From 1e8341ae0c0e117f0626cd6cf6732a0a9c8723f2 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Sun, 12 May 2013 07:26:22 +0800 Subject: powerpc: Move the patch_exception to a common place So that it can be used by other codes. No function change. Signed-off-by: Kevin Hao Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/code-patching.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index a6f8c7a5cbb7..97e02f985df8 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -34,6 +34,13 @@ int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr); unsigned long branch_target(const unsigned int *instr); unsigned int translate_branch(const unsigned int *dest, const unsigned int *src); +#ifdef CONFIG_PPC_BOOK3E_64 +void __patch_exception(int exc, unsigned long addr); +#define patch_exception(exc, name) do { \ + extern unsigned int name; \ + __patch_exception((exc), (unsigned long)&name); \ +} while (0) +#endif static inline unsigned long ppc_function_entry(void *func) { -- cgit v1.2.3 From 0ce636700c5bad54eda0e62903a1803f6d67b31d Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 22 Aug 2013 09:30:35 +0800 Subject: powerpc: purge all the prefetched instructions for the coherent icache flush As Benjamin Herrenschmidt has indicated, we still need a dummy icbi to purge all the prefetched instructions from the ifetch buffers for the snooping icache. We also need a sync before the icbi to order the actual stores to memory that might have modified instructions with the icbi. Signed-off-by: Kevin Hao Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/cache.h | 14 +++++++++++++- arch/powerpc/kernel/misc_32.S | 4 +++- arch/powerpc/kernel/misc_64.S | 6 ++++++ 3 files changed, 22 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index 9e495c9a6a88..ed0afc1e44a4 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -41,8 +41,20 @@ struct ppc64_caches { extern struct ppc64_caches ppc64_caches; #endif /* __powerpc64__ && ! __ASSEMBLY__ */ -#if !defined(__ASSEMBLY__) +#if defined(__ASSEMBLY__) +/* + * For a snooping icache, we still need a dummy icbi to purge all the + * prefetched instructions from the ifetch buffers. We also need a sync + * before the icbi to order the the actual stores to memory that might + * have modified instructions with the icbi. + */ +#define PURGE_PREFETCHED_INS \ + sync; \ + icbi 0,r3; \ + sync; \ + isync +#else #define __read_mostly __attribute__((__section__(".data..read_mostly"))) #ifdef CONFIG_6xx diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index e47d268727a4..879f09620f83 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -344,7 +344,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE) */ _KPROBE(flush_icache_range) BEGIN_FTR_SECTION - isync + PURGE_PREFETCHED_INS blr /* for 601, do nothing */ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) li r5,L1_CACHE_BYTES-1 @@ -448,6 +448,7 @@ _GLOBAL(invalidate_dcache_range) */ _GLOBAL(__flush_dcache_icache) BEGIN_FTR_SECTION + PURGE_PREFETCHED_INS blr END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */ @@ -489,6 +490,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x) */ _GLOBAL(__flush_dcache_icache_phys) BEGIN_FTR_SECTION + PURGE_PREFETCHED_INS blr /* for 601, do nothing */ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) mfmsr r10 diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index e59caf874d05..a9f7a79a3a40 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -67,6 +67,7 @@ PPC64_CACHES: _KPROBE(flush_icache_range) BEGIN_FTR_SECTION + PURGE_PREFETCHED_INS blr END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) /* @@ -211,6 +212,11 @@ _GLOBAL(__flush_dcache_icache) * Different systems have different cache line sizes */ +BEGIN_FTR_SECTION + PURGE_PREFETCHED_INS + blr +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) + /* Flush the dcache */ ld r7,PPC64_CACHES@toc(r2) clrrdi r3,r3,PAGE_SHIFT /* Page align */ -- cgit v1.2.3 From c041cfa2af1ccb8d0346dc576144a1085e9b4d4b Mon Sep 17 00:00:00 2001 From: "fan.du" Date: Wed, 23 Jan 2013 16:06:11 +0800 Subject: powerpc: Make irq_stat.timers_irqs counting more specific Current irq_stat.timers_irqs counting doesn't discriminate timer event handler and other timer interrupt(like arch_irq_work_raise). Sometimes we need to know exactly how much interrupts timer event handler fired, so let's be more specific on this. Signed-off-by: Fan Du Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/hardirq.h | 3 ++- arch/powerpc/kernel/irq.c | 12 +++++++++--- arch/powerpc/kernel/time.c | 3 ++- 3 files changed, 13 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index 3bdcfce2c42a..418fb654370d 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -6,7 +6,8 @@ typedef struct { unsigned int __softirq_pending; - unsigned int timer_irqs; + unsigned int timer_irqs_event; + unsigned int timer_irqs_others; unsigned int pmu_irqs; unsigned int mce_exceptions; unsigned int spurious_irqs; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index ba0165615215..9729b23bfb0a 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -354,8 +354,13 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%*s: ", prec, "LOC"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs); - seq_printf(p, " Local timer interrupts\n"); + seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_event); + seq_printf(p, " Local timer interrupts for timer event device\n"); + + seq_printf(p, "%*s: ", prec, "LOC"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_others); + seq_printf(p, " Local timer interrupts for others\n"); seq_printf(p, "%*s: ", prec, "SPU"); for_each_online_cpu(j) @@ -389,11 +394,12 @@ int arch_show_interrupts(struct seq_file *p, int prec) */ u64 arch_irq_stat_cpu(unsigned int cpu) { - u64 sum = per_cpu(irq_stat, cpu).timer_irqs; + u64 sum = per_cpu(irq_stat, cpu).timer_irqs_event; sum += per_cpu(irq_stat, cpu).pmu_irqs; sum += per_cpu(irq_stat, cpu).mce_exceptions; sum += per_cpu(irq_stat, cpu).spurious_irqs; + sum += per_cpu(irq_stat, cpu).timer_irqs_others; #ifdef CONFIG_PPC_DOORBELL sum += per_cpu(irq_stat, cpu).doorbell_irqs; #endif diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index b3b144121cc9..afb1b56ef4fa 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -510,7 +510,6 @@ void timer_interrupt(struct pt_regs * regs) */ may_hard_irq_enable(); - __get_cpu_var(irq_stat).timer_irqs++; #if defined(CONFIG_PPC32) && defined(CONFIG_PMAC) if (atomic_read(&ppc_n_lost_interrupts) != 0) @@ -532,10 +531,12 @@ void timer_interrupt(struct pt_regs * regs) *next_tb = ~(u64)0; if (evt->event_handler) evt->event_handler(evt); + __get_cpu_var(irq_stat).timer_irqs_event++; } else { now = *next_tb - now; if (now <= DECREMENTER_MAX) set_dec((int)now); + __get_cpu_var(irq_stat).timer_irqs_others++; } #ifdef CONFIG_PPC64 -- cgit v1.2.3 From b14a7253cf999412e5a0dd39d58b0a42d19fd73a Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 30 Oct 2013 20:03:51 +0530 Subject: powerpc/book3s: Split the common exception prolog logic into two section. This patch splits the common exception prolog logic into three parts to facilitate reuse of existing code in the next patch. This patch also re-arranges few instructions in such a way that the second part now deals with saving register values from paca save area to stack frame, and the third part deals with saving current register values to stack frame. The second and third part will be reused in the machine check exception routine in the subsequent patch. Please note that this patch does not introduce or change existing code logic. Instead it is just a code movement and instruction re-ordering. Patch Acked-by Paul. But made some minor modification (explained above) to address Paul's comment in the later patch(3). Signed-off-by: Mahesh Salgaonkar Acked-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/exception-64s.h | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 894662a5d4d5..ff4e2e80856d 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -301,9 +301,12 @@ do_kvm_##n: \ beq 4f; /* if from kernel mode */ \ ACCOUNT_CPU_USER_ENTRY(r9, r10); \ SAVE_PPR(area, r9, r10); \ -4: std r2,GPR2(r1); /* save r2 in stackframe */ \ - SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ - SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ +4: EXCEPTION_PROLOG_COMMON_2(area) \ + EXCEPTION_PROLOG_COMMON_3(n) \ + ACCOUNT_STOLEN_TIME + +/* Save original regs values from save area to stack frame. */ +#define EXCEPTION_PROLOG_COMMON_2(area) \ ld r9,area+EX_R9(r13); /* move r9, r10 to stackframe */ \ ld r10,area+EX_R10(r13); \ std r9,GPR9(r1); \ @@ -318,11 +321,16 @@ do_kvm_##n: \ ld r10,area+EX_CFAR(r13); \ std r10,ORIG_GPR3(r1); \ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \ + GET_CTR(r10, area); \ + std r10,_CTR(r1); + +#define EXCEPTION_PROLOG_COMMON_3(n) \ + std r2,GPR2(r1); /* save r2 in stackframe */ \ + SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ + SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ mflr r9; /* Get LR, later save to stack */ \ ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \ std r9,_LINK(r1); \ - GET_CTR(r10, area); \ - std r10,_CTR(r1); \ lbz r10,PACASOFTIRQEN(r13); \ mfspr r11,SPRN_XER; /* save XER in stackframe */ \ std r10,SOFTE(r1); \ @@ -332,8 +340,7 @@ do_kvm_##n: \ li r10,0; \ ld r11,exception_marker@toc(r2); \ std r10,RESULT(r1); /* clear regs->result */ \ - std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \ - ACCOUNT_STOLEN_TIME + std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ /* * Exception vectors. -- cgit v1.2.3 From 729b0f715371ce1e7636b4958fc45d6882442456 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 30 Oct 2013 20:04:00 +0530 Subject: powerpc/book3s: Introduce exclusive emergency stack for machine check exception. This patch introduces exclusive emergency stack for machine check exception. We use emergency stack to handle machine check exception so that we can save MCE information (srr1, srr0, dar and dsisr) before turning on ME bit and be ready for re-entrancy. This helps us to prevent clobbering of MCE information in case of nested machine checks. The reason for using emergency stack over normal kernel stack is that the machine check might occur in the middle of setting up a stack frame which may result into improper use of kernel stack. Signed-off-by: Mahesh Salgaonkar Acked-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/paca.h | 9 +++++++++ arch/powerpc/kernel/setup_64.c | 10 +++++++++- arch/powerpc/xmon/xmon.c | 4 ++++ 3 files changed, 22 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index b6ea9e068c13..c3523d1dda58 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -152,6 +152,15 @@ struct paca_struct { */ struct opal_machine_check_event *opal_mc_evt; #endif +#ifdef CONFIG_PPC_BOOK3S_64 + /* Exclusive emergency stack pointer for machine check exception. */ + void *mc_emergency_sp; + /* + * Flag to check whether we are in machine check early handler + * and already using emergency stack. + */ + u16 in_mce; +#endif /* Stuff for accurate time accounting */ u64 user_time; /* accumulated usermode TB ticks */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 5760b9bea936..2232aff66059 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -540,7 +540,8 @@ static void __init exc_lvl_early_init(void) /* * Stack space used when we detect a bad kernel stack pointer, and - * early in SMP boots before relocation is enabled. + * early in SMP boots before relocation is enabled. Exclusive emergency + * stack for machine checks. */ static void __init emergency_stack_init(void) { @@ -563,6 +564,13 @@ static void __init emergency_stack_init(void) sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); sp += THREAD_SIZE; paca[i].emergency_sp = __va(sp); + +#ifdef CONFIG_PPC_BOOK3S_64 + /* emergency stack for machine check exception handling. */ + sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); + sp += THREAD_SIZE; + paca[i].mc_emergency_sp = __va(sp); +#endif } } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index af9d3469fb99..a90731b3d44a 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2051,6 +2051,10 @@ static void dump_one_paca(int cpu) DUMP(p, stab_addr, "lx"); #endif DUMP(p, emergency_sp, "p"); +#ifdef CONFIG_PPC_BOOK3S_64 + DUMP(p, mc_emergency_sp, "p"); + DUMP(p, in_mce, "x"); +#endif DUMP(p, data_offset, "lx"); DUMP(p, hw_cpu_id, "x"); DUMP(p, cpu_start, "x"); -- cgit v1.2.3 From 4c703416efc0a23f83a282b9240bb92fbd9e0be9 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 30 Oct 2013 20:04:40 +0530 Subject: powerpc/book3s: Introduce a early machine check hook in cpu_spec. This patch adds the early machine check function pointer in cputable for CPU specific early machine check handling. The early machine handle routine will be called in real mode to handle SLB and TLB errors. We can not reuse the existing machine_check hook because it is always invoked in kernel virtual mode and we would already be in trouble if we get SLB or TLB errors. This patch just sets up a mechanism to invoke CPU specific handler. The subsequent patches will populate the function pointer. Signed-off-by: Mahesh Salgaonkar Acked-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/cputable.h | 7 +++++++ arch/powerpc/kernel/traps.c | 7 +++++-- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 0d4939ba48e7..c5b107afca4d 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -90,6 +90,13 @@ struct cpu_spec { * if the error is fatal, 1 if it was fully recovered and 0 to * pass up (not CPU originated) */ int (*machine_check)(struct pt_regs *regs); + + /* + * Processor specific early machine check handler which is + * called in real mode to handle SLB and TLB errors. + */ + long (*machine_check_early)(struct pt_regs *regs); + }; extern struct cpu_spec *cur_cpu_spec; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 97a1ce72e130..330841766b09 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -293,8 +293,11 @@ void system_reset_exception(struct pt_regs *regs) */ long machine_check_early(struct pt_regs *regs) { - /* TODO: handle/decode machine check reason */ - return 0; + long handled = 0; + + if (cur_cpu_spec && cur_cpu_spec->machine_check_early) + handled = cur_cpu_spec->machine_check_early(regs); + return handled; } #endif -- cgit v1.2.3 From 0440705049b041d84268ea57f6e90e2f16618897 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 30 Oct 2013 20:04:56 +0530 Subject: powerpc/book3s: Add flush_tlb operation in cpu_spec. This patch introduces flush_tlb operation in cpu_spec structure. This will help us to invoke appropriate CPU-side flush tlb routine. This patch adds the foundation to invoke CPU specific flush routine for respective architectures. Currently this patch introduce flush_tlb for p7 and p8. Signed-off-by: Mahesh Salgaonkar Acked-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/cputable.h | 5 +++++ arch/powerpc/kernel/cpu_setup_power.S | 38 +++++++++++++++++++++++++---------- arch/powerpc/kernel/cputable.c | 8 ++++++++ arch/powerpc/kvm/book3s_hv_ras.c | 18 ++++------------- 4 files changed, 44 insertions(+), 25 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index c5b107afca4d..617cc767c076 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -97,6 +97,11 @@ struct cpu_spec { */ long (*machine_check_early)(struct pt_regs *regs); + /* + * Processor specific routine to flush tlbs. + */ + void (*flush_tlb)(unsigned long inval_selector); + }; extern struct cpu_spec *cur_cpu_spec; diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 18b5b9cf8e37..37d1bb002aa9 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -29,7 +29,7 @@ _GLOBAL(__setup_cpu_power7) mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR bl __init_LPCR - bl __init_TLB + bl __init_tlb_power7 mtlr r11 blr @@ -42,7 +42,7 @@ _GLOBAL(__restore_cpu_power7) mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR bl __init_LPCR - bl __init_TLB + bl __init_tlb_power7 mtlr r11 blr @@ -59,7 +59,7 @@ _GLOBAL(__setup_cpu_power8) oris r3, r3, LPCR_AIL_3@h bl __init_LPCR bl __init_HFSCR - bl __init_TLB + bl __init_tlb_power8 bl __init_PMU_HV mtlr r11 blr @@ -78,7 +78,7 @@ _GLOBAL(__restore_cpu_power8) oris r3, r3, LPCR_AIL_3@h bl __init_LPCR bl __init_HFSCR - bl __init_TLB + bl __init_tlb_power8 bl __init_PMU_HV mtlr r11 blr @@ -134,15 +134,31 @@ __init_HFSCR: mtspr SPRN_HFSCR,r3 blr -__init_TLB: - /* - * Clear the TLB using the "IS 3" form of tlbiel instruction - * (invalidate by congruence class). P7 has 128 CCs, P8 has 512 - * so we just always do 512 - */ +/* + * Clear the TLB using the specified IS form of tlbiel instruction + * (invalidate by congruence class). P7 has 128 CCs., P8 has 512. + * + * r3 = IS field + */ +__init_tlb_power7: + li r3,0xc00 /* IS field = 0b11 */ +_GLOBAL(__flush_tlb_power7) + li r6,128 + mtctr r6 + mr r7,r3 /* IS field */ + ptesync +2: tlbiel r7 + addi r7,r7,0x1000 + bdnz 2b + ptesync +1: blr + +__init_tlb_power8: + li r3,0xc00 /* IS field = 0b11 */ +_GLOBAL(__flush_tlb_power8) li r6,512 mtctr r6 - li r7,0xc00 /* IS field = 0b11 */ + mr r7,r3 /* IS field */ ptesync 2: tlbiel r7 addi r7,r7,0x1000 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 597d954e5860..55d0f9c282b8 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -71,6 +71,8 @@ extern void __restore_cpu_power7(void); extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power8(void); extern void __restore_cpu_a2(void); +extern void __flush_tlb_power7(unsigned long inval_selector); +extern void __flush_tlb_power8(unsigned long inval_selector); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); @@ -440,6 +442,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, + .flush_tlb = __flush_tlb_power7, .platform = "power7", }, { /* 2.07-compliant processor, i.e. Power8 "architected" mode */ @@ -456,6 +459,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_cpu_type = "ppc64/ibm-compat-v1", .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, + .flush_tlb = __flush_tlb_power8, .platform = "power8", }, { /* Power7 */ @@ -474,6 +478,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_POWER4, .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, + .flush_tlb = __flush_tlb_power7, .platform = "power7", }, { /* Power7+ */ @@ -492,6 +497,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_POWER4, .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, + .flush_tlb = __flush_tlb_power7, .platform = "power7+", }, { /* Power8E */ @@ -510,6 +516,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, + .flush_tlb = __flush_tlb_power8, .platform = "power8", }, { /* Power8 */ @@ -528,6 +535,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_INVALID, .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, + .flush_tlb = __flush_tlb_power8, .platform = "power8", }, { /* Cell Broadband Engine */ diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index a353c485808c..5c427b41a2f5 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -58,18 +58,6 @@ static void reload_slb(struct kvm_vcpu *vcpu) } } -/* POWER7 TLB flush */ -static void flush_tlb_power7(struct kvm_vcpu *vcpu) -{ - unsigned long i, rb; - - rb = TLBIEL_INVAL_SET_LPID; - for (i = 0; i < POWER7_TLB_SETS; ++i) { - asm volatile("tlbiel %0" : : "r" (rb)); - rb += 1 << TLBIEL_INVAL_SET_SHIFT; - } -} - /* * On POWER7, see if we can handle a machine check that occurred inside * the guest in real mode, without switching to the host partition. @@ -96,7 +84,8 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI); } if (dsisr & DSISR_MC_TLB_MULTI) { - flush_tlb_power7(vcpu); + if (cur_cpu_spec && cur_cpu_spec->flush_tlb) + cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET_LPID); dsisr &= ~DSISR_MC_TLB_MULTI; } /* Any other errors we don't understand? */ @@ -113,7 +102,8 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) reload_slb(vcpu); break; case SRR1_MC_IFETCH_TLBMULTI: - flush_tlb_power7(vcpu); + if (cur_cpu_spec && cur_cpu_spec->flush_tlb) + cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET_LPID); break; default: handled = 0; -- cgit v1.2.3 From e22a22740c1ac23aaa10835f026b3549ee3e4e75 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 30 Oct 2013 20:05:11 +0530 Subject: powerpc/book3s: Flush SLB/TLBs if we get SLB/TLB machine check errors on power7. If we get a machine check exception due to SLB or TLB errors, then flush SLBs/TLBs and reload SLBs to recover. We do this in real mode before turning on MMU. Otherwise we would run into nested machine checks. If we get a machine check when we are in guest, then just flush the SLBs and continue. This patch handles errors for power7. The next patch will handle errors for power8 Signed-off-by: Mahesh Salgaonkar Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/bitops.h | 5 ++ arch/powerpc/include/asm/mce.h | 67 +++++++++++++++++ arch/powerpc/kernel/Makefile | 1 + arch/powerpc/kernel/cputable.c | 4 + arch/powerpc/kernel/mce_power.c | 150 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 227 insertions(+) create mode 100644 arch/powerpc/include/asm/mce.h create mode 100644 arch/powerpc/kernel/mce_power.c (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index 910194e9a1e2..a5e9a7d494d8 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -46,6 +46,11 @@ #include #include +/* PPC bit number conversion */ +#define PPC_BITLSHIFT(be) (BITS_PER_LONG - 1 - (be)) +#define PPC_BIT(bit) (1UL << PPC_BITLSHIFT(bit)) +#define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs)) + /* * clear_bit doesn't imply a memory barrier */ diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h new file mode 100644 index 000000000000..8157d4eaead6 --- /dev/null +++ b/arch/powerpc/include/asm/mce.h @@ -0,0 +1,67 @@ +/* + * Machine check exception header file. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright 2013 IBM Corporation + * Author: Mahesh Salgaonkar + */ + +#ifndef __ASM_PPC64_MCE_H__ +#define __ASM_PPC64_MCE_H__ + +#include + +/* + * Machine Check bits on power7 and power8 + */ +#define P7_SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42)) /* P8 too */ + +/* SRR1 bits for machine check (On Power7 and Power8) */ +#define P7_SRR1_MC_IFETCH(srr1) ((srr1) & PPC_BITMASK(43, 45)) /* P8 too */ + +#define P7_SRR1_MC_IFETCH_UE (0x1 << PPC_BITLSHIFT(45)) /* P8 too */ +#define P7_SRR1_MC_IFETCH_SLB_PARITY (0x2 << PPC_BITLSHIFT(45)) /* P8 too */ +#define P7_SRR1_MC_IFETCH_SLB_MULTIHIT (0x3 << PPC_BITLSHIFT(45)) /* P8 too */ +#define P7_SRR1_MC_IFETCH_SLB_BOTH (0x4 << PPC_BITLSHIFT(45)) +#define P7_SRR1_MC_IFETCH_TLB_MULTIHIT (0x5 << PPC_BITLSHIFT(45)) /* P8 too */ +#define P7_SRR1_MC_IFETCH_UE_TLB_RELOAD (0x6 << PPC_BITLSHIFT(45)) /* P8 too */ +#define P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL (0x7 << PPC_BITLSHIFT(45)) + +/* SRR1 bits for machine check (On Power8) */ +#define P8_SRR1_MC_IFETCH_ERAT_MULTIHIT (0x4 << PPC_BITLSHIFT(45)) + +/* DSISR bits for machine check (On Power7 and Power8) */ +#define P7_DSISR_MC_UE (PPC_BIT(48)) /* P8 too */ +#define P7_DSISR_MC_UE_TABLEWALK (PPC_BIT(49)) /* P8 too */ +#define P7_DSISR_MC_ERAT_MULTIHIT (PPC_BIT(52)) /* P8 too */ +#define P7_DSISR_MC_TLB_MULTIHIT_MFTLB (PPC_BIT(53)) /* P8 too */ +#define P7_DSISR_MC_SLB_PARITY_MFSLB (PPC_BIT(55)) /* P8 too */ +#define P7_DSISR_MC_SLB_MULTIHIT (PPC_BIT(56)) /* P8 too */ +#define P7_DSISR_MC_SLB_MULTIHIT_PARITY (PPC_BIT(57)) /* P8 too */ + +/* + * DSISR bits for machine check (Power8) in addition to above. + * Secondary DERAT Multihit + */ +#define P8_DSISR_MC_ERAT_MULTIHIT_SEC (PPC_BIT(54)) + +/* SLB error bits */ +#define P7_DSISR_MC_SLB_ERRORS (P7_DSISR_MC_ERAT_MULTIHIT | \ + P7_DSISR_MC_SLB_PARITY_MFSLB | \ + P7_DSISR_MC_SLB_MULTIHIT | \ + P7_DSISR_MC_SLB_MULTIHIT_PARITY) + +#endif /* __ASM_PPC64_MCE_H__ */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 445cb6e39d5b..07c63d0aa759 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -39,6 +39,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o +obj-$(CONFIG_PPC_BOOK3S_64) += mce_power.o obj64-$(CONFIG_RELOCATABLE) += reloc_64.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 55d0f9c282b8..c54188bcdd9e 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -73,6 +73,7 @@ extern void __restore_cpu_power8(void); extern void __restore_cpu_a2(void); extern void __flush_tlb_power7(unsigned long inval_selector); extern void __flush_tlb_power8(unsigned long inval_selector); +extern long __machine_check_early_realmode_p7(struct pt_regs *regs); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); @@ -443,6 +444,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, .flush_tlb = __flush_tlb_power7, + .machine_check_early = __machine_check_early_realmode_p7, .platform = "power7", }, { /* 2.07-compliant processor, i.e. Power8 "architected" mode */ @@ -479,6 +481,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, .flush_tlb = __flush_tlb_power7, + .machine_check_early = __machine_check_early_realmode_p7, .platform = "power7", }, { /* Power7+ */ @@ -498,6 +501,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, .flush_tlb = __flush_tlb_power7, + .machine_check_early = __machine_check_early_realmode_p7, .platform = "power7+", }, { /* Power8E */ diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c new file mode 100644 index 000000000000..690547319b03 --- /dev/null +++ b/arch/powerpc/kernel/mce_power.c @@ -0,0 +1,150 @@ +/* + * Machine check exception handling CPU-side for power7 and power8 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright 2013 IBM Corporation + * Author: Mahesh Salgaonkar + */ + +#undef DEBUG +#define pr_fmt(fmt) "mce_power: " fmt + +#include +#include +#include +#include + +/* flush SLBs and reload */ +static void flush_and_reload_slb(void) +{ + struct slb_shadow *slb; + unsigned long i, n; + + /* Invalidate all SLBs */ + asm volatile("slbmte %0,%0; slbia" : : "r" (0)); + +#ifdef CONFIG_KVM_BOOK3S_HANDLER + /* + * If machine check is hit when in guest or in transition, we will + * only flush the SLBs and continue. + */ + if (get_paca()->kvm_hstate.in_guest) + return; +#endif + + /* For host kernel, reload the SLBs from shadow SLB buffer. */ + slb = get_slb_shadow(); + if (!slb) + return; + + n = min_t(u32, slb->persistent, SLB_MIN_SIZE); + + /* Load up the SLB entries from shadow SLB */ + for (i = 0; i < n; i++) { + unsigned long rb = slb->save_area[i].esid; + unsigned long rs = slb->save_area[i].vsid; + + rb = (rb & ~0xFFFul) | i; + asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb)); + } +} + +static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits) +{ + long handled = 1; + + /* + * flush and reload SLBs for SLB errors and flush TLBs for TLB errors. + * reset the error bits whenever we handle them so that at the end + * we can check whether we handled all of them or not. + * */ + if (dsisr & slb_error_bits) { + flush_and_reload_slb(); + /* reset error bits */ + dsisr &= ~(slb_error_bits); + } + if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) { + if (cur_cpu_spec && cur_cpu_spec->flush_tlb) + cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE); + /* reset error bits */ + dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB; + } + /* Any other errors we don't understand? */ + if (dsisr & 0xffffffffUL) + handled = 0; + + return handled; +} + +static long mce_handle_derror_p7(uint64_t dsisr) +{ + return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS); +} + +static long mce_handle_common_ierror(uint64_t srr1) +{ + long handled = 0; + + switch (P7_SRR1_MC_IFETCH(srr1)) { + case 0: + break; + case P7_SRR1_MC_IFETCH_SLB_PARITY: + case P7_SRR1_MC_IFETCH_SLB_MULTIHIT: + /* flush and reload SLBs for SLB errors. */ + flush_and_reload_slb(); + handled = 1; + break; + case P7_SRR1_MC_IFETCH_TLB_MULTIHIT: + if (cur_cpu_spec && cur_cpu_spec->flush_tlb) { + cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE); + handled = 1; + } + break; + default: + break; + } + + return handled; +} + +static long mce_handle_ierror_p7(uint64_t srr1) +{ + long handled = 0; + + handled = mce_handle_common_ierror(srr1); + + if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) { + flush_and_reload_slb(); + handled = 1; + } + return handled; +} + +long __machine_check_early_realmode_p7(struct pt_regs *regs) +{ + uint64_t srr1; + long handled = 1; + + srr1 = regs->msr; + + if (P7_SRR1_MC_LOADSTORE(srr1)) + handled = mce_handle_derror_p7(regs->dsisr); + else + handled = mce_handle_ierror_p7(srr1); + + /* TODO: Decode machine check reason. */ + return handled; +} -- cgit v1.2.3 From ae744f3432d3872c51298d922728e13c24ccc068 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 30 Oct 2013 20:05:26 +0530 Subject: powerpc/book3s: Flush SLB/TLBs if we get SLB/TLB machine check errors on power8. This patch handles the memory errors on power8. If we get a machine check exception due to SLB or TLB errors, then flush SLBs/TLBs and reload SLBs to recover. Signed-off-by: Mahesh Salgaonkar Acked-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/mce.h | 3 +++ arch/powerpc/kernel/cputable.c | 4 ++++ arch/powerpc/kernel/mce_power.c | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 8157d4eaead6..e3ffa825b970 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -64,4 +64,7 @@ P7_DSISR_MC_SLB_MULTIHIT | \ P7_DSISR_MC_SLB_MULTIHIT_PARITY) +#define P8_DSISR_MC_SLB_ERRORS (P7_DSISR_MC_SLB_ERRORS | \ + P8_DSISR_MC_ERAT_MULTIHIT_SEC) + #endif /* __ASM_PPC64_MCE_H__ */ diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index c54188bcdd9e..6c8dd5da4de5 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -74,6 +74,7 @@ extern void __restore_cpu_a2(void); extern void __flush_tlb_power7(unsigned long inval_selector); extern void __flush_tlb_power8(unsigned long inval_selector); extern long __machine_check_early_realmode_p7(struct pt_regs *regs); +extern long __machine_check_early_realmode_p8(struct pt_regs *regs); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); @@ -462,6 +463,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .flush_tlb = __flush_tlb_power8, + .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, { /* Power7 */ @@ -521,6 +523,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .flush_tlb = __flush_tlb_power8, + .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, { /* Power8 */ @@ -540,6 +543,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .flush_tlb = __flush_tlb_power8, + .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, { /* Cell Broadband Engine */ diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 690547319b03..60a217f11275 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -148,3 +148,37 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs) /* TODO: Decode machine check reason. */ return handled; } + +static long mce_handle_ierror_p8(uint64_t srr1) +{ + long handled = 0; + + handled = mce_handle_common_ierror(srr1); + + if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) { + flush_and_reload_slb(); + handled = 1; + } + return handled; +} + +static long mce_handle_derror_p8(uint64_t dsisr) +{ + return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS); +} + +long __machine_check_early_realmode_p8(struct pt_regs *regs) +{ + uint64_t srr1; + long handled = 1; + + srr1 = regs->msr; + + if (P7_SRR1_MC_LOADSTORE(srr1)) + handled = mce_handle_derror_p8(regs->dsisr); + else + handled = mce_handle_ierror_p8(srr1); + + /* TODO: Decode machine check reason. */ + return handled; +} -- cgit v1.2.3 From 36df96f8acaf51992177645eb2d781f766ce97dc Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 30 Oct 2013 20:05:40 +0530 Subject: powerpc/book3s: Decode and save machine check event. Now that we handle machine check in linux, the MCE decoding should also take place in linux host. This info is crucial to log before we go down in case we can not handle the machine check errors. This patch decodes and populates a machine check event which contain high level meaning full MCE information. We do this in real mode C code with ME bit on. The MCE information is still available on emergency stack (in pt_regs structure format). Even if we take another exception at this point the MCE early handler will allocate a new stack frame on top of current one. So when we return back here we still have our MCE information safe on current stack. We use per cpu buffer to save high level MCE information. Each per cpu buffer is an array of machine check event structure indexed by per cpu counter mce_nest_count. The mce_nest_count is incremented every time we enter machine check early handler in real mode to get the current free slot (index = mce_nest_count - 1). The mce_nest_count is decremented once the MCE info is consumed by virtual mode machine exception handler. This patch provides save_mce_event(), get_mce_event() and release_mce_event() generic routines that can be used by machine check handlers to populate and retrieve the event. The routine release_mce_event() will free the event slot so that it can be reused. Caller can invoke get_mce_event() with a release flag either to release the event slot immediately OR keep it so that it can be fetched again. The event slot can be also released anytime by invoking release_mce_event(). This patch also updates kvm code to invoke get_mce_event to retrieve generic mce event rather than paca->opal_mce_evt. The KVM code always calls get_mce_event() with release flags set to false so that event is available for linus host machine If machine check occurs while we are in guest, KVM tries to handle the error. If KVM is able to handle MC error successfully, it enters the guest and delivers the machine check to guest. If KVM is not able to handle MC error, it exists the guest and passes the control to linux host machine check handler which then logs MC event and decides how to handle it in linux host. In failure case, KVM needs to make sure that the MC event is available for linux host to consume. Hence KVM always calls get_mce_event() with release flags set to false and later it invokes release_mce_event() only if it succeeds to handle error. Signed-off-by: Mahesh Salgaonkar Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/mce.h | 124 +++++++++++++++++++++++++ arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/mce.c | 164 ++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/mce_power.c | 116 ++++++++++++++++++++++-- arch/powerpc/kvm/book3s_hv_ras.c | 32 ++++--- arch/powerpc/platforms/powernv/opal.c | 35 ++++---- 6 files changed, 434 insertions(+), 39 deletions(-) create mode 100644 arch/powerpc/kernel/mce.c (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index e3ffa825b970..87cad2a808c2 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -66,5 +66,129 @@ #define P8_DSISR_MC_SLB_ERRORS (P7_DSISR_MC_SLB_ERRORS | \ P8_DSISR_MC_ERAT_MULTIHIT_SEC) +enum MCE_Version { + MCE_V1 = 1, +}; + +enum MCE_Severity { + MCE_SEV_NO_ERROR = 0, + MCE_SEV_WARNING = 1, + MCE_SEV_ERROR_SYNC = 2, + MCE_SEV_FATAL = 3, +}; + +enum MCE_Disposition { + MCE_DISPOSITION_RECOVERED = 0, + MCE_DISPOSITION_NOT_RECOVERED = 1, +}; + +enum MCE_Initiator { + MCE_INITIATOR_UNKNOWN = 0, + MCE_INITIATOR_CPU = 1, +}; + +enum MCE_ErrorType { + MCE_ERROR_TYPE_UNKNOWN = 0, + MCE_ERROR_TYPE_UE = 1, + MCE_ERROR_TYPE_SLB = 2, + MCE_ERROR_TYPE_ERAT = 3, + MCE_ERROR_TYPE_TLB = 4, +}; + +enum MCE_UeErrorType { + MCE_UE_ERROR_INDETERMINATE = 0, + MCE_UE_ERROR_IFETCH = 1, + MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH = 2, + MCE_UE_ERROR_LOAD_STORE = 3, + MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 4, +}; + +enum MCE_SlbErrorType { + MCE_SLB_ERROR_INDETERMINATE = 0, + MCE_SLB_ERROR_PARITY = 1, + MCE_SLB_ERROR_MULTIHIT = 2, +}; + +enum MCE_EratErrorType { + MCE_ERAT_ERROR_INDETERMINATE = 0, + MCE_ERAT_ERROR_PARITY = 1, + MCE_ERAT_ERROR_MULTIHIT = 2, +}; + +enum MCE_TlbErrorType { + MCE_TLB_ERROR_INDETERMINATE = 0, + MCE_TLB_ERROR_PARITY = 1, + MCE_TLB_ERROR_MULTIHIT = 2, +}; + +struct machine_check_event { + enum MCE_Version version:8; /* 0x00 */ + uint8_t in_use; /* 0x01 */ + enum MCE_Severity severity:8; /* 0x02 */ + enum MCE_Initiator initiator:8; /* 0x03 */ + enum MCE_ErrorType error_type:8; /* 0x04 */ + enum MCE_Disposition disposition:8; /* 0x05 */ + uint8_t reserved_1[2]; /* 0x06 */ + uint64_t gpr3; /* 0x08 */ + uint64_t srr0; /* 0x10 */ + uint64_t srr1; /* 0x18 */ + union { /* 0x20 */ + struct { + enum MCE_UeErrorType ue_error_type:8; + uint8_t effective_address_provided; + uint8_t physical_address_provided; + uint8_t reserved_1[5]; + uint64_t effective_address; + uint64_t physical_address; + uint8_t reserved_2[8]; + } ue_error; + + struct { + enum MCE_SlbErrorType slb_error_type:8; + uint8_t effective_address_provided; + uint8_t reserved_1[6]; + uint64_t effective_address; + uint8_t reserved_2[16]; + } slb_error; + + struct { + enum MCE_EratErrorType erat_error_type:8; + uint8_t effective_address_provided; + uint8_t reserved_1[6]; + uint64_t effective_address; + uint8_t reserved_2[16]; + } erat_error; + + struct { + enum MCE_TlbErrorType tlb_error_type:8; + uint8_t effective_address_provided; + uint8_t reserved_1[6]; + uint64_t effective_address; + uint8_t reserved_2[16]; + } tlb_error; + } u; +}; + +struct mce_error_info { + enum MCE_ErrorType error_type:8; + union { + enum MCE_UeErrorType ue_error_type:8; + enum MCE_SlbErrorType slb_error_type:8; + enum MCE_EratErrorType erat_error_type:8; + enum MCE_TlbErrorType tlb_error_type:8; + } u; + uint8_t reserved[2]; +}; + +#define MAX_MC_EVT 100 + +/* Release flags for get_mce_event() */ +#define MCE_EVENT_RELEASE true +#define MCE_EVENT_DONTRELEASE false + +extern void save_mce_event(struct pt_regs *regs, long handled, + struct mce_error_info *mce_err, uint64_t addr); +extern int get_mce_event(struct machine_check_event *mce, bool release); +extern void release_mce_event(void); #endif /* __ASM_PPC64_MCE_H__ */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 07c63d0aa759..904d713366ff 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -39,7 +39,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o -obj-$(CONFIG_PPC_BOOK3S_64) += mce_power.o +obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o obj64-$(CONFIG_RELOCATABLE) += reloc_64.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c new file mode 100644 index 000000000000..aeecdf1ba897 --- /dev/null +++ b/arch/powerpc/kernel/mce.c @@ -0,0 +1,164 @@ +/* + * Machine check exception handling. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright 2013 IBM Corporation + * Author: Mahesh Salgaonkar + */ + +#undef DEBUG +#define pr_fmt(fmt) "mce: " fmt + +#include +#include +#include +#include +#include + +static DEFINE_PER_CPU(int, mce_nest_count); +static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); + +static void mce_set_error_info(struct machine_check_event *mce, + struct mce_error_info *mce_err) +{ + mce->error_type = mce_err->error_type; + switch (mce_err->error_type) { + case MCE_ERROR_TYPE_UE: + mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type; + break; + case MCE_ERROR_TYPE_SLB: + mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type; + break; + case MCE_ERROR_TYPE_ERAT: + mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type; + break; + case MCE_ERROR_TYPE_TLB: + mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type; + break; + case MCE_ERROR_TYPE_UNKNOWN: + default: + break; + } +} + +/* + * Decode and save high level MCE information into per cpu buffer which + * is an array of machine_check_event structure. + */ +void save_mce_event(struct pt_regs *regs, long handled, + struct mce_error_info *mce_err, + uint64_t addr) +{ + uint64_t srr1; + int index = __get_cpu_var(mce_nest_count)++; + struct machine_check_event *mce = &__get_cpu_var(mce_event[index]); + + /* + * Return if we don't have enough space to log mce event. + * mce_nest_count may go beyond MAX_MC_EVT but that's ok, + * the check below will stop buffer overrun. + */ + if (index >= MAX_MC_EVT) + return; + + /* Populate generic machine check info */ + mce->version = MCE_V1; + mce->srr0 = regs->nip; + mce->srr1 = regs->msr; + mce->gpr3 = regs->gpr[3]; + mce->in_use = 1; + + mce->initiator = MCE_INITIATOR_CPU; + if (handled) + mce->disposition = MCE_DISPOSITION_RECOVERED; + else + mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; + mce->severity = MCE_SEV_ERROR_SYNC; + + srr1 = regs->msr; + + /* + * Populate the mce error_type and type-specific error_type. + */ + mce_set_error_info(mce, mce_err); + + if (!addr) + return; + + if (mce->error_type == MCE_ERROR_TYPE_TLB) { + mce->u.tlb_error.effective_address_provided = true; + mce->u.tlb_error.effective_address = addr; + } else if (mce->error_type == MCE_ERROR_TYPE_SLB) { + mce->u.slb_error.effective_address_provided = true; + mce->u.slb_error.effective_address = addr; + } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) { + mce->u.erat_error.effective_address_provided = true; + mce->u.erat_error.effective_address = addr; + } else if (mce->error_type == MCE_ERROR_TYPE_UE) { + mce->u.ue_error.effective_address_provided = true; + mce->u.ue_error.effective_address = addr; + } + return; +} + +/* + * get_mce_event: + * mce Pointer to machine_check_event structure to be filled. + * release Flag to indicate whether to free the event slot or not. + * 0 <= do not release the mce event. Caller will invoke + * release_mce_event() once event has been consumed. + * 1 <= release the slot. + * + * return 1 = success + * 0 = failure + * + * get_mce_event() will be called by platform specific machine check + * handle routine and in KVM. + * When we call get_mce_event(), we are still in interrupt context and + * preemption will not be scheduled until ret_from_expect() routine + * is called. + */ +int get_mce_event(struct machine_check_event *mce, bool release) +{ + int index = __get_cpu_var(mce_nest_count) - 1; + struct machine_check_event *mc_evt; + int ret = 0; + + /* Sanity check */ + if (index < 0) + return ret; + + /* Check if we have MCE info to process. */ + if (index < MAX_MC_EVT) { + mc_evt = &__get_cpu_var(mce_event[index]); + /* Copy the event structure and release the original */ + if (mce) + *mce = *mc_evt; + if (release) + mc_evt->in_use = 0; + ret = 1; + } + /* Decrement the count to free the slot. */ + if (release) + __get_cpu_var(mce_nest_count)--; + + return ret; +} + +void release_mce_event(void) +{ + get_mce_event(NULL, true); +} diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 60a217f11275..b36e777a734f 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -133,22 +133,116 @@ static long mce_handle_ierror_p7(uint64_t srr1) return handled; } +static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1) +{ + switch (P7_SRR1_MC_IFETCH(srr1)) { + case P7_SRR1_MC_IFETCH_SLB_PARITY: + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; + break; + case P7_SRR1_MC_IFETCH_SLB_MULTIHIT: + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; + break; + case P7_SRR1_MC_IFETCH_TLB_MULTIHIT: + mce_err->error_type = MCE_ERROR_TYPE_TLB; + mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; + break; + case P7_SRR1_MC_IFETCH_UE: + case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL: + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH; + break; + case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD: + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = + MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH; + break; + } +} + +static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1) +{ + mce_get_common_ierror(mce_err, srr1); + if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) { + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; + } +} + +static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr) +{ + if (dsisr & P7_DSISR_MC_UE) { + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE; + } else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) { + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = + MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE; + } else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) { + mce_err->error_type = MCE_ERROR_TYPE_ERAT; + mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; + } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) { + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; + } else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) { + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; + } else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) { + mce_err->error_type = MCE_ERROR_TYPE_TLB; + mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; + } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) { + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; + } +} + long __machine_check_early_realmode_p7(struct pt_regs *regs) { - uint64_t srr1; + uint64_t srr1, addr; long handled = 1; + struct mce_error_info mce_error_info = { 0 }; srr1 = regs->msr; - if (P7_SRR1_MC_LOADSTORE(srr1)) + /* + * Handle memory errors depending whether this was a load/store or + * ifetch exception. Also, populate the mce error_type and + * type-specific error_type from either SRR1 or DSISR, depending + * whether this was a load/store or ifetch exception + */ + if (P7_SRR1_MC_LOADSTORE(srr1)) { handled = mce_handle_derror_p7(regs->dsisr); - else + mce_get_derror_p7(&mce_error_info, regs->dsisr); + addr = regs->dar; + } else { handled = mce_handle_ierror_p7(srr1); + mce_get_ierror_p7(&mce_error_info, srr1); + addr = regs->nip; + } - /* TODO: Decode machine check reason. */ + save_mce_event(regs, handled, &mce_error_info, addr); return handled; } +static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1) +{ + mce_get_common_ierror(mce_err, srr1); + if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) { + mce_err->error_type = MCE_ERROR_TYPE_ERAT; + mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; + } +} + +static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr) +{ + mce_get_derror_p7(mce_err, dsisr); + if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) { + mce_err->error_type = MCE_ERROR_TYPE_ERAT; + mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; + } +} + static long mce_handle_ierror_p8(uint64_t srr1) { long handled = 0; @@ -169,16 +263,22 @@ static long mce_handle_derror_p8(uint64_t dsisr) long __machine_check_early_realmode_p8(struct pt_regs *regs) { - uint64_t srr1; + uint64_t srr1, addr; long handled = 1; + struct mce_error_info mce_error_info = { 0 }; srr1 = regs->msr; - if (P7_SRR1_MC_LOADSTORE(srr1)) + if (P7_SRR1_MC_LOADSTORE(srr1)) { handled = mce_handle_derror_p8(regs->dsisr); - else + mce_get_derror_p8(&mce_error_info, regs->dsisr); + addr = regs->dar; + } else { handled = mce_handle_ierror_p8(srr1); + mce_get_ierror_p8(&mce_error_info, srr1); + addr = regs->nip; + } - /* TODO: Decode machine check reason. */ + save_mce_event(regs, handled, &mce_error_info, addr); return handled; } diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 5c427b41a2f5..768a9f977c00 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -12,6 +12,7 @@ #include #include #include +#include /* SRR1 bits for machine check on POWER7 */ #define SRR1_MC_LDSTERR (1ul << (63-42)) @@ -67,9 +68,7 @@ static void reload_slb(struct kvm_vcpu *vcpu) static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) { unsigned long srr1 = vcpu->arch.shregs.msr; -#ifdef CONFIG_PPC_POWERNV - struct opal_machine_check_event *opal_evt; -#endif + struct machine_check_event mce_evt; long handled = 1; if (srr1 & SRR1_MC_LDSTERR) { @@ -109,22 +108,31 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) handled = 0; } -#ifdef CONFIG_PPC_POWERNV /* - * See if OPAL has already handled the condition. - * We assume that if the condition is recovered then OPAL + * See if we have already handled the condition in the linux host. + * We assume that if the condition is recovered then linux host * will have generated an error log event that we will pick * up and log later. + * Don't release mce event now. In case if condition is not + * recovered we do guest exit and go back to linux host machine + * check handler. Hence we need make sure that current mce event + * is available for linux host to consume. */ - opal_evt = local_paca->opal_mc_evt; - if (opal_evt->version == OpalMCE_V1 && - (opal_evt->severity == OpalMCE_SEV_NO_ERROR || - opal_evt->disposition == OpalMCE_DISPOSITION_RECOVERED)) + if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE)) + goto out; + + if (mce_evt.version == MCE_V1 && + (mce_evt.severity == MCE_SEV_NO_ERROR || + mce_evt.disposition == MCE_DISPOSITION_RECOVERED)) handled = 1; +out: + /* + * If we have handled the error, then release the mce event because + * we will be delivering machine check to guest. + */ if (handled) - opal_evt->in_use = 0; -#endif + release_mce_event(); return handled; } diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 1c798cd55372..c5e71d773f47 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "powernv.h" @@ -256,8 +257,7 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len) int opal_machine_check(struct pt_regs *regs) { - struct opal_machine_check_event *opal_evt = get_paca()->opal_mc_evt; - struct opal_machine_check_event evt; + struct machine_check_event evt; const char *level, *sevstr, *subtype; static const char *opal_mc_ue_types[] = { "Indeterminate", @@ -282,30 +282,29 @@ int opal_machine_check(struct pt_regs *regs) "Multihit", }; - /* Copy the event structure and release the original */ - evt = *opal_evt; - opal_evt->in_use = 0; + if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) + return 0; /* Print things out */ - if (evt.version != OpalMCE_V1) { + if (evt.version != MCE_V1) { pr_err("Machine Check Exception, Unknown event version %d !\n", evt.version); return 0; } switch(evt.severity) { - case OpalMCE_SEV_NO_ERROR: + case MCE_SEV_NO_ERROR: level = KERN_INFO; sevstr = "Harmless"; break; - case OpalMCE_SEV_WARNING: + case MCE_SEV_WARNING: level = KERN_WARNING; sevstr = ""; break; - case OpalMCE_SEV_ERROR_SYNC: + case MCE_SEV_ERROR_SYNC: level = KERN_ERR; sevstr = "Severe"; break; - case OpalMCE_SEV_FATAL: + case MCE_SEV_FATAL: default: level = KERN_ERR; sevstr = "Fatal"; @@ -313,12 +312,12 @@ int opal_machine_check(struct pt_regs *regs) } printk("%s%s Machine check interrupt [%s]\n", level, sevstr, - evt.disposition == OpalMCE_DISPOSITION_RECOVERED ? + evt.disposition == MCE_DISPOSITION_RECOVERED ? "Recovered" : "[Not recovered"); printk("%s Initiator: %s\n", level, - evt.initiator == OpalMCE_INITIATOR_CPU ? "CPU" : "Unknown"); + evt.initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); switch(evt.error_type) { - case OpalMCE_ERROR_TYPE_UE: + case MCE_ERROR_TYPE_UE: subtype = evt.u.ue_error.ue_error_type < ARRAY_SIZE(opal_mc_ue_types) ? opal_mc_ue_types[evt.u.ue_error.ue_error_type] @@ -331,7 +330,7 @@ int opal_machine_check(struct pt_regs *regs) printk("%s Physial address: %016llx\n", level, evt.u.ue_error.physical_address); break; - case OpalMCE_ERROR_TYPE_SLB: + case MCE_ERROR_TYPE_SLB: subtype = evt.u.slb_error.slb_error_type < ARRAY_SIZE(opal_mc_slb_types) ? opal_mc_slb_types[evt.u.slb_error.slb_error_type] @@ -341,7 +340,7 @@ int opal_machine_check(struct pt_regs *regs) printk("%s Effective address: %016llx\n", level, evt.u.slb_error.effective_address); break; - case OpalMCE_ERROR_TYPE_ERAT: + case MCE_ERROR_TYPE_ERAT: subtype = evt.u.erat_error.erat_error_type < ARRAY_SIZE(opal_mc_erat_types) ? opal_mc_erat_types[evt.u.erat_error.erat_error_type] @@ -351,7 +350,7 @@ int opal_machine_check(struct pt_regs *regs) printk("%s Effective address: %016llx\n", level, evt.u.erat_error.effective_address); break; - case OpalMCE_ERROR_TYPE_TLB: + case MCE_ERROR_TYPE_TLB: subtype = evt.u.tlb_error.tlb_error_type < ARRAY_SIZE(opal_mc_tlb_types) ? opal_mc_tlb_types[evt.u.tlb_error.tlb_error_type] @@ -362,11 +361,11 @@ int opal_machine_check(struct pt_regs *regs) level, evt.u.tlb_error.effective_address); break; default: - case OpalMCE_ERROR_TYPE_UNKNOWN: + case MCE_ERROR_TYPE_UNKNOWN: printk("%s Error type: Unknown\n", level); break; } - return evt.severity == OpalMCE_SEV_FATAL ? 0 : 1; + return evt.severity == MCE_SEV_FATAL ? 0 : 1; } static irqreturn_t opal_interrupt(int irq, void *data) -- cgit v1.2.3 From b5ff4211a8294be2ddbaf963fa3666fa042292a8 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 30 Oct 2013 20:05:49 +0530 Subject: powerpc/book3s: Queue up and process delayed MCE events. When machine check real mode handler can not continue into host kernel in V mode, it returns from the interrupt and we loose MCE event which never gets logged. In such a situation queue up the MCE event so that we can log it later when we get back into host kernel with r1 pointing to kernel stack e.g. during syscall exit. Signed-off-by: Mahesh Salgaonkar Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/mce.h | 3 + arch/powerpc/kernel/entry_64.S | 5 ++ arch/powerpc/kernel/exceptions-64s.S | 7 +- arch/powerpc/kernel/mce.c | 154 ++++++++++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/opal.c | 97 +-------------------- 5 files changed, 168 insertions(+), 98 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 87cad2a808c2..3276b409299c 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -190,5 +190,8 @@ extern void save_mce_event(struct pt_regs *regs, long handled, struct mce_error_info *mce_err, uint64_t addr); extern int get_mce_event(struct machine_check_event *mce, bool release); extern void release_mce_event(void); +extern void machine_check_queue_event(void); +extern void machine_check_process_queued_event(void); +extern void machine_check_print_event_info(struct machine_check_event *evt); #endif /* __ASM_PPC64_MCE_H__ */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index bbfb0294b354..770d6d65c47b 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -183,6 +183,11 @@ syscall_exit: #ifdef SHOW_SYSCALLS bl .do_show_syscall_exit ld r3,RESULT(r1) +#endif +#ifdef CONFIG_PPC_BOOK3S_64 +BEGIN_FTR_SECTION + bl .machine_check_process_queued_event +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) #endif CURRENT_THREAD_INFO(r12, r1) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 1aec3025eeee..862b9dd4a9db 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -855,7 +855,8 @@ BEGIN_FTR_SECTION /* Supervisor state loss */ li r0,1 stb r0,PACA_NAPSTATELOST(r13) -3: MACHINE_CHECK_HANDLER_WINDUP +3: bl .machine_check_queue_event + MACHINE_CHECK_HANDLER_WINDUP GET_PACA(r13) ld r1,PACAR1(r13) b .power7_enter_nap_mode @@ -895,8 +896,10 @@ BEGIN_FTR_SECTION 2: /* * Return from MC interrupt. - * TODO: Queue up the MCE event so that we can log it later. + * Queue up the MCE event so that we can log it later, while + * returning from kernel or opal call. */ + bl .machine_check_queue_event MACHINE_CHECK_HANDLER_WINDUP rfid 9: diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index aeecdf1ba897..1c6d15701c56 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -31,6 +31,10 @@ static DEFINE_PER_CPU(int, mce_nest_count); static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); +/* Queue for delayed MCE events. */ +static DEFINE_PER_CPU(int, mce_queue_count); +static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue); + static void mce_set_error_info(struct machine_check_event *mce, struct mce_error_info *mce_err) { @@ -162,3 +166,153 @@ void release_mce_event(void) { get_mce_event(NULL, true); } + +/* + * Queue up the MCE event which then can be handled later. + */ +void machine_check_queue_event(void) +{ + int index; + struct machine_check_event evt; + + if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) + return; + + index = __get_cpu_var(mce_queue_count)++; + /* If queue is full, just return for now. */ + if (index >= MAX_MC_EVT) { + __get_cpu_var(mce_queue_count)--; + return; + } + __get_cpu_var(mce_event_queue[index]) = evt; +} + +/* + * process pending MCE event from the mce event queue. This function will be + * called during syscall exit. + */ +void machine_check_process_queued_event(void) +{ + int index; + + preempt_disable(); + /* + * For now just print it to console. + * TODO: log this error event to FSP or nvram. + */ + while (__get_cpu_var(mce_queue_count) > 0) { + index = __get_cpu_var(mce_queue_count) - 1; + machine_check_print_event_info( + &__get_cpu_var(mce_event_queue[index])); + __get_cpu_var(mce_queue_count)--; + } + preempt_enable(); +} + +void machine_check_print_event_info(struct machine_check_event *evt) +{ + const char *level, *sevstr, *subtype; + static const char *mc_ue_types[] = { + "Indeterminate", + "Instruction fetch", + "Page table walk ifetch", + "Load/Store", + "Page table walk Load/Store", + }; + static const char *mc_slb_types[] = { + "Indeterminate", + "Parity", + "Multihit", + }; + static const char *mc_erat_types[] = { + "Indeterminate", + "Parity", + "Multihit", + }; + static const char *mc_tlb_types[] = { + "Indeterminate", + "Parity", + "Multihit", + }; + + /* Print things out */ + if (evt->version != MCE_V1) { + pr_err("Machine Check Exception, Unknown event version %d !\n", + evt->version); + return; + } + switch (evt->severity) { + case MCE_SEV_NO_ERROR: + level = KERN_INFO; + sevstr = "Harmless"; + break; + case MCE_SEV_WARNING: + level = KERN_WARNING; + sevstr = ""; + break; + case MCE_SEV_ERROR_SYNC: + level = KERN_ERR; + sevstr = "Severe"; + break; + case MCE_SEV_FATAL: + default: + level = KERN_ERR; + sevstr = "Fatal"; + break; + } + + printk("%s%s Machine check interrupt [%s]\n", level, sevstr, + evt->disposition == MCE_DISPOSITION_RECOVERED ? + "Recovered" : "[Not recovered"); + printk("%s Initiator: %s\n", level, + evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); + switch (evt->error_type) { + case MCE_ERROR_TYPE_UE: + subtype = evt->u.ue_error.ue_error_type < + ARRAY_SIZE(mc_ue_types) ? + mc_ue_types[evt->u.ue_error.ue_error_type] + : "Unknown"; + printk("%s Error type: UE [%s]\n", level, subtype); + if (evt->u.ue_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.ue_error.effective_address); + if (evt->u.ue_error.physical_address_provided) + printk("%s Physial address: %016llx\n", + level, evt->u.ue_error.physical_address); + break; + case MCE_ERROR_TYPE_SLB: + subtype = evt->u.slb_error.slb_error_type < + ARRAY_SIZE(mc_slb_types) ? + mc_slb_types[evt->u.slb_error.slb_error_type] + : "Unknown"; + printk("%s Error type: SLB [%s]\n", level, subtype); + if (evt->u.slb_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.slb_error.effective_address); + break; + case MCE_ERROR_TYPE_ERAT: + subtype = evt->u.erat_error.erat_error_type < + ARRAY_SIZE(mc_erat_types) ? + mc_erat_types[evt->u.erat_error.erat_error_type] + : "Unknown"; + printk("%s Error type: ERAT [%s]\n", level, subtype); + if (evt->u.erat_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.erat_error.effective_address); + break; + case MCE_ERROR_TYPE_TLB: + subtype = evt->u.tlb_error.tlb_error_type < + ARRAY_SIZE(mc_tlb_types) ? + mc_tlb_types[evt->u.tlb_error.tlb_error_type] + : "Unknown"; + printk("%s Error type: TLB [%s]\n", level, subtype); + if (evt->u.tlb_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.tlb_error.effective_address); + break; + default: + case MCE_ERROR_TYPE_UNKNOWN: + printk("%s Error type: Unknown\n", level); + break; + } +} diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index c5e71d773f47..245096f90437 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -258,29 +258,6 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len) int opal_machine_check(struct pt_regs *regs) { struct machine_check_event evt; - const char *level, *sevstr, *subtype; - static const char *opal_mc_ue_types[] = { - "Indeterminate", - "Instruction fetch", - "Page table walk ifetch", - "Load/Store", - "Page table walk Load/Store", - }; - static const char *opal_mc_slb_types[] = { - "Indeterminate", - "Parity", - "Multihit", - }; - static const char *opal_mc_erat_types[] = { - "Indeterminate", - "Parity", - "Multihit", - }; - static const char *opal_mc_tlb_types[] = { - "Indeterminate", - "Parity", - "Multihit", - }; if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return 0; @@ -291,80 +268,8 @@ int opal_machine_check(struct pt_regs *regs) evt.version); return 0; } - switch(evt.severity) { - case MCE_SEV_NO_ERROR: - level = KERN_INFO; - sevstr = "Harmless"; - break; - case MCE_SEV_WARNING: - level = KERN_WARNING; - sevstr = ""; - break; - case MCE_SEV_ERROR_SYNC: - level = KERN_ERR; - sevstr = "Severe"; - break; - case MCE_SEV_FATAL: - default: - level = KERN_ERR; - sevstr = "Fatal"; - break; - } + machine_check_print_event_info(&evt); - printk("%s%s Machine check interrupt [%s]\n", level, sevstr, - evt.disposition == MCE_DISPOSITION_RECOVERED ? - "Recovered" : "[Not recovered"); - printk("%s Initiator: %s\n", level, - evt.initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); - switch(evt.error_type) { - case MCE_ERROR_TYPE_UE: - subtype = evt.u.ue_error.ue_error_type < - ARRAY_SIZE(opal_mc_ue_types) ? - opal_mc_ue_types[evt.u.ue_error.ue_error_type] - : "Unknown"; - printk("%s Error type: UE [%s]\n", level, subtype); - if (evt.u.ue_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt.u.ue_error.effective_address); - if (evt.u.ue_error.physical_address_provided) - printk("%s Physial address: %016llx\n", - level, evt.u.ue_error.physical_address); - break; - case MCE_ERROR_TYPE_SLB: - subtype = evt.u.slb_error.slb_error_type < - ARRAY_SIZE(opal_mc_slb_types) ? - opal_mc_slb_types[evt.u.slb_error.slb_error_type] - : "Unknown"; - printk("%s Error type: SLB [%s]\n", level, subtype); - if (evt.u.slb_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt.u.slb_error.effective_address); - break; - case MCE_ERROR_TYPE_ERAT: - subtype = evt.u.erat_error.erat_error_type < - ARRAY_SIZE(opal_mc_erat_types) ? - opal_mc_erat_types[evt.u.erat_error.erat_error_type] - : "Unknown"; - printk("%s Error type: ERAT [%s]\n", level, subtype); - if (evt.u.erat_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt.u.erat_error.effective_address); - break; - case MCE_ERROR_TYPE_TLB: - subtype = evt.u.tlb_error.tlb_error_type < - ARRAY_SIZE(opal_mc_tlb_types) ? - opal_mc_tlb_types[evt.u.tlb_error.tlb_error_type] - : "Unknown"; - printk("%s Error type: TLB [%s]\n", level, subtype); - if (evt.u.tlb_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt.u.tlb_error.effective_address); - break; - default: - case MCE_ERROR_TYPE_UNKNOWN: - printk("%s Error type: Unknown\n", level); - break; - } return evt.severity == MCE_SEV_FATAL ? 0 : 1; } -- cgit v1.2.3 From b63a0ffe35de7e5f9b907bbc2c783e702f7e15af Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Wed, 30 Oct 2013 20:06:13 +0530 Subject: powerpc/powernv: Machine check exception handling. Add basic error handling in machine check exception handler. - If MSR_RI isn't set, we can not recover. - Check if disposition set to OpalMCE_DISPOSITION_RECOVERED. - Check if address at fault is inside kernel address space, if not then send SIGBUS to process if we hit exception when in userspace. - If address at fault is not provided then and if we get a synchronous machine check while in userspace then kill the task. Signed-off-by: Mahesh Salgaonkar Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/mce.h | 1 + arch/powerpc/kernel/mce.c | 27 ++++++++++++++++++++++ arch/powerpc/platforms/powernv/opal.c | 43 ++++++++++++++++++++++++++++++++++- 3 files changed, 70 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 3276b409299c..a2b8c7b35fba 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -193,5 +193,6 @@ extern void release_mce_event(void); extern void machine_check_queue_event(void); extern void machine_check_process_queued_event(void); extern void machine_check_print_event_info(struct machine_check_event *evt); +extern uint64_t get_mce_fault_addr(struct machine_check_event *evt); #endif /* __ASM_PPC64_MCE_H__ */ diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 1c6d15701c56..c0c52ec1fca7 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -316,3 +316,30 @@ void machine_check_print_event_info(struct machine_check_event *evt) break; } } + +uint64_t get_mce_fault_addr(struct machine_check_event *evt) +{ + switch (evt->error_type) { + case MCE_ERROR_TYPE_UE: + if (evt->u.ue_error.effective_address_provided) + return evt->u.ue_error.effective_address; + break; + case MCE_ERROR_TYPE_SLB: + if (evt->u.slb_error.effective_address_provided) + return evt->u.slb_error.effective_address; + break; + case MCE_ERROR_TYPE_ERAT: + if (evt->u.erat_error.effective_address_provided) + return evt->u.erat_error.effective_address; + break; + case MCE_ERROR_TYPE_TLB: + if (evt->u.tlb_error.effective_address_provided) + return evt->u.tlb_error.effective_address; + break; + default: + case MCE_ERROR_TYPE_UNKNOWN: + break; + } + return 0; +} +EXPORT_SYMBOL(get_mce_fault_addr); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index f348bd49487c..01e74cbc67e9 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -251,6 +252,44 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len) return written; } +static int opal_recover_mce(struct pt_regs *regs, + struct machine_check_event *evt) +{ + int recovered = 0; + uint64_t ea = get_mce_fault_addr(evt); + + if (!(regs->msr & MSR_RI)) { + /* If MSR_RI isn't set, we cannot recover */ + recovered = 0; + } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) { + /* Platform corrected itself */ + recovered = 1; + } else if (ea && !is_kernel_addr(ea)) { + /* + * Faulting address is not in kernel text. We should be fine. + * We need to find which process uses this address. + * For now, kill the task if we have received exception when + * in userspace. + * + * TODO: Queue up this address for hwpoisioning later. + */ + if (user_mode(regs) && !is_global_init(current)) { + _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); + recovered = 1; + } else + recovered = 0; + } else if (user_mode(regs) && !is_global_init(current) && + evt->severity == MCE_SEV_ERROR_SYNC) { + /* + * If we have received a synchronous error when in userspace + * kill the task. + */ + _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); + recovered = 1; + } + return recovered; +} + int opal_machine_check(struct pt_regs *regs) { struct machine_check_event evt; @@ -266,7 +305,9 @@ int opal_machine_check(struct pt_regs *regs) } machine_check_print_event_info(&evt); - return evt.severity == MCE_SEV_FATAL ? 0 : 1; + if (opal_recover_mce(regs, &evt)) + return 1; + return 0; } static irqreturn_t opal_interrupt(int irq, void *data) -- cgit v1.2.3 From 24366360035a9e0a9870ed7208aa2ba1948f844d Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Mon, 18 Nov 2013 15:35:58 +0530 Subject: powerpc/powernv: Infrastructure to read opal messages in generic format. Opal now has a new messaging infrastructure to push the messages to linux in a generic format for different type of messages using only one event bit. The format of the opal message is as below: struct opal_msg { uint32_t msg_type; uint32_t reserved; uint64_t params[8]; }; This patch allows clients to subscribe for notification for specific message type. It is upto the subscriber to decipher the messages who showed interested in receiving specific message type. The interface to subscribe for notification is: int opal_message_notifier_register(enum OpalMessageType msg_type, struct notifier_block *nb) The notifier will fetch the opal message when available and notify the subscriber with message type and the opal message. It is subscribers responsibility to copy the message data before returning from notifier callback. Signed-off-by: Mahesh Salgaonkar Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 24 ++++++- arch/powerpc/platforms/powernv/opal-wrappers.S | 2 + arch/powerpc/platforms/powernv/opal.c | 90 ++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 033c06be1d84..ffb2036fcfb2 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -132,6 +132,8 @@ extern int opal_enter_rtas(struct rtas_args *args, #define OPAL_FLASH_VALIDATE 76 #define OPAL_FLASH_MANAGE 77 #define OPAL_FLASH_UPDATE 78 +#define OPAL_GET_MSG 85 +#define OPAL_CHECK_ASYNC_COMPLETION 86 #ifndef __ASSEMBLY__ @@ -211,7 +213,16 @@ enum OpalPendingState { OPAL_EVENT_ERROR_LOG = 0x40, OPAL_EVENT_EPOW = 0x80, OPAL_EVENT_LED_STATUS = 0x100, - OPAL_EVENT_PCI_ERROR = 0x200 + OPAL_EVENT_PCI_ERROR = 0x200, + OPAL_EVENT_MSG_PENDING = 0x800, +}; + +enum OpalMessageType { + OPAL_MSG_ASYNC_COMP = 0, + OPAL_MSG_MEM_ERR, + OPAL_MSG_EPOW, + OPAL_MSG_SHUTDOWN, + OPAL_MSG_TYPE_MAX, }; /* Machine check related definitions */ @@ -356,6 +367,12 @@ enum OpalLPCAddressType { OPAL_LPC_FW = 2, }; +struct opal_msg { + uint32_t msg_type; + uint32_t reserved; + uint64_t params[8]; +}; + struct opal_machine_check_event { enum OpalMCE_Version version:8; /* 0x00 */ uint8_t in_use; /* 0x01 */ @@ -731,6 +748,9 @@ int64_t opal_validate_flash(uint64_t buffer, uint32_t *size, uint32_t *result); int64_t opal_manage_flash(uint8_t op); int64_t opal_update_flash(uint64_t blk_list); +int64_t opal_get_msg(uint64_t buffer, size_t size); +int64_t opal_check_completion(uint64_t buffer, size_t size, uint64_t token); + /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); @@ -744,6 +764,8 @@ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); extern int opal_notifier_register(struct notifier_block *nb); +extern int opal_message_notifier_register(enum OpalMessageType msg_type, + struct notifier_block *nb); extern void opal_notifier_enable(void); extern void opal_notifier_disable(void); extern void opal_notifier_update_evt(uint64_t evt_mask, uint64_t evt_val); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index e7806504e976..719aa5c325c6 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -126,3 +126,5 @@ OPAL_CALL(opal_return_cpu, OPAL_RETURN_CPU); OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE); OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE); OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE); +OPAL_CALL(opal_get_msg, OPAL_GET_MSG); +OPAL_CALL(opal_check_completion, OPAL_CHECK_ASYNC_COMPLETION); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 01e74cbc67e9..7a184a0ff183 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -40,6 +40,7 @@ extern u64 opal_mc_secondary_handler[]; static unsigned int *opal_irqs; static unsigned int opal_irq_count; static ATOMIC_NOTIFIER_HEAD(opal_notifier_head); +static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX]; static DEFINE_SPINLOCK(opal_notifier_lock); static uint64_t last_notified_mask = 0x0ul; static atomic_t opal_notifier_hold = ATOMIC_INIT(0); @@ -167,6 +168,95 @@ void opal_notifier_disable(void) atomic_set(&opal_notifier_hold, 1); } +/* + * Opal message notifier based on message type. Allow subscribers to get + * notified for specific messgae type. + */ +int opal_message_notifier_register(enum OpalMessageType msg_type, + struct notifier_block *nb) +{ + if (!nb) { + pr_warning("%s: Invalid argument (%p)\n", + __func__, nb); + return -EINVAL; + } + if (msg_type > OPAL_MSG_TYPE_MAX) { + pr_warning("%s: Invalid message type argument (%d)\n", + __func__, msg_type); + return -EINVAL; + } + return atomic_notifier_chain_register( + &opal_msg_notifier_head[msg_type], nb); +} + +static void opal_message_do_notify(uint32_t msg_type, void *msg) +{ + /* notify subscribers */ + atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type], + msg_type, msg); +} + +static void opal_handle_message(void) +{ + s64 ret; + /* + * TODO: pre-allocate a message buffer depending on opal-msg-size + * value in /proc/device-tree. + */ + static struct opal_msg msg; + + ret = opal_get_msg(__pa(&msg), sizeof(msg)); + /* No opal message pending. */ + if (ret == OPAL_RESOURCE) + return; + + /* check for errors. */ + if (ret) { + pr_warning("%s: Failed to retrive opal message, err=%lld\n", + __func__, ret); + return; + } + + /* Sanity check */ + if (msg.msg_type > OPAL_MSG_TYPE_MAX) { + pr_warning("%s: Unknown message type: %u\n", + __func__, msg.msg_type); + return; + } + opal_message_do_notify(msg.msg_type, (void *)&msg); +} + +static int opal_message_notify(struct notifier_block *nb, + unsigned long events, void *change) +{ + if (events & OPAL_EVENT_MSG_PENDING) + opal_handle_message(); + return 0; +} + +static struct notifier_block opal_message_nb = { + .notifier_call = opal_message_notify, + .next = NULL, + .priority = 0, +}; + +static int __init opal_message_init(void) +{ + int ret, i; + + for (i = 0; i < OPAL_MSG_TYPE_MAX; i++) + ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]); + + ret = opal_notifier_register(&opal_message_nb); + if (ret) { + pr_err("%s: Can't register OPAL event notifier (%d)\n", + __func__, ret); + return ret; + } + return 0; +} +early_initcall(opal_message_init); + int opal_get_chars(uint32_t vtermno, char *buf, int count) { s64 rc; -- cgit v1.2.3 From 7e1ce5a492e18449fd47ef6305b26e0c572d26e9 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Mon, 18 Nov 2013 16:39:22 +0530 Subject: powerpc/powernv: Move SG list structure to header file Move SG list and entry structure to header file so that it can be used in other places as well. Signed-off-by: Vasant Hegde Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 22 ++++++++++++++++++++ arch/powerpc/platforms/powernv/opal-flash.c | 31 ++++++----------------------- 2 files changed, 28 insertions(+), 25 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index ffb2036fcfb2..0a2ac85998d7 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -33,6 +33,28 @@ struct opal_takeover_args { u64 rd_loc; /* r11 */ }; +/* + * SG entry + * + * WARNING: The current implementation requires each entry + * to represent a block that is 4k aligned *and* each block + * size except the last one in the list to be as well. + */ +struct opal_sg_entry { + void *data; + long length; +}; + +/* sg list */ +struct opal_sg_list { + unsigned long num_entries; + struct opal_sg_list *next; + struct opal_sg_entry entry[]; +}; + +/* We calculate number of sg entries based on PAGE_SIZE */ +#define SG_ENTRIES_PER_NODE ((PAGE_SIZE - 16) / sizeof(struct opal_sg_entry)) + extern long opal_query_takeover(u64 *hal_size, u64 *hal_align); extern long opal_do_takeover(struct opal_takeover_args *args); diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c index 6ffa6b1ec5b7..4aeae4f36e1d 100644 --- a/arch/powerpc/platforms/powernv/opal-flash.c +++ b/arch/powerpc/platforms/powernv/opal-flash.c @@ -103,27 +103,6 @@ struct image_header_t { uint32_t size; }; -/* Scatter/gather entry */ -struct opal_sg_entry { - void *data; - long length; -}; - -/* We calculate number of entries based on PAGE_SIZE */ -#define SG_ENTRIES_PER_NODE ((PAGE_SIZE - 16) / sizeof(struct opal_sg_entry)) - -/* - * This struct is very similar but not identical to that - * needed by the opal flash update. All we need to do for - * opal is rewrite num_entries into a version/length and - * translate the pointers to absolute. - */ -struct opal_sg_list { - unsigned long num_entries; - struct opal_sg_list *next; - struct opal_sg_entry entry[SG_ENTRIES_PER_NODE]; -}; - struct validate_flash_t { int status; /* Return status */ void *buf; /* Candiate image buffer */ @@ -333,7 +312,7 @@ static struct opal_sg_list *image_data_to_sglist(void) addr = image_data.data; size = image_data.size; - sg1 = kzalloc((sizeof(struct opal_sg_list)), GFP_KERNEL); + sg1 = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!sg1) return NULL; @@ -351,8 +330,7 @@ static struct opal_sg_list *image_data_to_sglist(void) sg1->num_entries++; if (sg1->num_entries >= SG_ENTRIES_PER_NODE) { - sg1->next = kzalloc((sizeof(struct opal_sg_list)), - GFP_KERNEL); + sg1->next = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!sg1->next) { pr_err("%s : Failed to allocate memory\n", __func__); @@ -402,7 +380,10 @@ static int opal_flash_update(int op) else sg->next = NULL; - /* Make num_entries into the version/length field */ + /* + * Convert num_entries to version/length format + * to satisfy OPAL. + */ sg->num_entries = (SG_LIST_VERSION << 56) | (sg->num_entries * sizeof(struct opal_sg_entry) + 16); } -- cgit v1.2.3 From d905c5df9aef38d63df268f6f5e7b13894f626d3 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 21 Nov 2013 17:43:14 +1100 Subject: PPC: POWERNV: move iommu_add_device earlier The current implementation of IOMMU on sPAPR does not use iommu_ops and therefore does not call IOMMU API's bus_set_iommu() which 1) sets iommu_ops for a bus 2) registers a bus notifier Instead, PCI devices are added to IOMMU groups from subsys_initcall_sync(tce_iommu_init) which does basically the same thing without using iommu_ops callbacks. However Freescale PAMU driver (https://lkml.org/lkml/2013/7/1/158) implements iommu_ops and when tce_iommu_init is called, every PCI device is already added to some group so there is a conflict. This patch does 2 things: 1. removes the loop in which PCI devices were added to groups and adds explicit iommu_add_device() calls to add devices as soon as they get the iommu_table pointer assigned to them. 2. moves a bus notifier to powernv code in order to avoid conflict with the notifier from Freescale driver. iommu_add_device() and iommu_del_device() are public now. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/iommu.h | 26 ++++++++++++++++ arch/powerpc/kernel/iommu.c | 48 +++-------------------------- arch/powerpc/platforms/powernv/pci-ioda.c | 8 ++--- arch/powerpc/platforms/powernv/pci-p5ioc2.c | 2 +- arch/powerpc/platforms/powernv/pci.c | 33 +++++++++++++++++++- arch/powerpc/platforms/pseries/iommu.c | 8 +++-- 6 files changed, 72 insertions(+), 53 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index c34656a8925e..774fa2776907 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -101,8 +101,34 @@ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name); */ extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, int nid); +#ifdef CONFIG_IOMMU_API extern void iommu_register_group(struct iommu_table *tbl, int pci_domain_number, unsigned long pe_num); +extern int iommu_add_device(struct device *dev); +extern void iommu_del_device(struct device *dev); +#else +static inline void iommu_register_group(struct iommu_table *tbl, + int pci_domain_number, + unsigned long pe_num) +{ +} + +static inline int iommu_add_device(struct device *dev) +{ + return 0; +} + +static inline void iommu_del_device(struct device *dev) +{ +} +#endif /* !CONFIG_IOMMU_API */ + +static inline void set_iommu_table_base_and_group(struct device *dev, + void *base) +{ + set_iommu_table_base(dev, base); + iommu_add_device(dev); +} extern int iommu_map_sg(struct device *dev, struct iommu_table *tbl, struct scatterlist *sglist, int nelems, diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 572bb5b95f35..d22abe0b08e3 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1105,7 +1105,7 @@ void iommu_release_ownership(struct iommu_table *tbl) } EXPORT_SYMBOL_GPL(iommu_release_ownership); -static int iommu_add_device(struct device *dev) +int iommu_add_device(struct device *dev) { struct iommu_table *tbl; int ret = 0; @@ -1134,52 +1134,12 @@ static int iommu_add_device(struct device *dev) return ret; } +EXPORT_SYMBOL_GPL(iommu_add_device); -static void iommu_del_device(struct device *dev) +void iommu_del_device(struct device *dev) { iommu_group_remove_device(dev); } - -static int iommu_bus_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct device *dev = data; - - switch (action) { - case BUS_NOTIFY_ADD_DEVICE: - return iommu_add_device(dev); - case BUS_NOTIFY_DEL_DEVICE: - iommu_del_device(dev); - return 0; - default: - return 0; - } -} - -static struct notifier_block tce_iommu_bus_nb = { - .notifier_call = iommu_bus_notifier, -}; - -static int __init tce_iommu_init(void) -{ - struct pci_dev *pdev = NULL; - - BUILD_BUG_ON(PAGE_SIZE < IOMMU_PAGE_SIZE); - - for_each_pci_dev(pdev) - iommu_add_device(&pdev->dev); - - bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb); - return 0; -} - -subsys_initcall_sync(tce_iommu_init); - -#else - -void iommu_register_group(struct iommu_table *tbl, - int pci_domain_number, unsigned long pe_num) -{ -} +EXPORT_SYMBOL_GPL(iommu_del_device); #endif /* CONFIG_IOMMU_API */ diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 084cdfa40682..614356cac466 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -460,7 +460,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev return; pe = &phb->ioda.pe_array[pdn->pe_number]; - set_iommu_table_base(&pdev->dev, &pe->tce32_table); + set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table); } static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) @@ -468,7 +468,7 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) struct pci_dev *dev; list_for_each_entry(dev, &bus->devices, bus_list) { - set_iommu_table_base(&dev->dev, &pe->tce32_table); + set_iommu_table_base_and_group(&dev->dev, &pe->tce32_table); if (dev->subordinate) pnv_ioda_setup_bus_dma(pe, dev->subordinate); } @@ -644,7 +644,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number); if (pe->pdev) - set_iommu_table_base(&pe->pdev->dev, tbl); + set_iommu_table_base_and_group(&pe->pdev->dev, tbl); else pnv_ioda_setup_bus_dma(pe, pe->pbus); @@ -722,7 +722,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, iommu_init_table(tbl, phb->hose->node); if (pe->pdev) - set_iommu_table_base(&pe->pdev->dev, tbl); + set_iommu_table_base_and_group(&pe->pdev->dev, tbl); else pnv_ioda_setup_bus_dma(pe, pe->pbus); diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index f8b4bd8afb2e..e3807d69393e 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -92,7 +92,7 @@ static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, pci_domain_nr(phb->hose->bus), phb->opal_id); } - set_iommu_table_base(&pdev->dev, &phb->p5ioc2.iommu_table); + set_iommu_table_base_and_group(&pdev->dev, &phb->p5ioc2.iommu_table); } static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 4eb33a9ed532..6f3d49c18d64 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -536,7 +536,7 @@ static void pnv_pci_dma_fallback_setup(struct pci_controller *hose, pdn->iommu_table = pnv_pci_setup_bml_iommu(hose); if (!pdn->iommu_table) return; - set_iommu_table_base(&pdev->dev, pdn->iommu_table); + set_iommu_table_base_and_group(&pdev->dev, pdn->iommu_table); } static void pnv_pci_dma_dev_setup(struct pci_dev *pdev) @@ -657,3 +657,34 @@ void __init pnv_pci_init(void) ppc_md.teardown_msi_irqs = pnv_teardown_msi_irqs; #endif } + +static int tce_iommu_bus_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + + switch (action) { + case BUS_NOTIFY_ADD_DEVICE: + return iommu_add_device(dev); + case BUS_NOTIFY_DEL_DEVICE: + if (dev->iommu_group) + iommu_del_device(dev); + return 0; + default: + return 0; + } +} + +static struct notifier_block tce_iommu_bus_nb = { + .notifier_call = tce_iommu_bus_notifier, +}; + +static int __init tce_iommu_bus_notifier_init(void) +{ + BUILD_BUG_ON(PAGE_SIZE < IOMMU_PAGE_SIZE); + + bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb); + return 0; +} + +subsys_initcall_sync(tce_iommu_bus_notifier_init); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index f253361552ae..a80af6c20cba 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -687,7 +687,8 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) iommu_table_setparms(phb, dn, tbl); PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node); iommu_register_group(tbl, pci_domain_nr(phb->bus), 0); - set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table); + set_iommu_table_base_and_group(&dev->dev, + PCI_DN(dn)->iommu_table); return; } @@ -699,7 +700,8 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) dn = dn->parent; if (dn && PCI_DN(dn)) - set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table); + set_iommu_table_base_and_group(&dev->dev, + PCI_DN(dn)->iommu_table); else printk(KERN_WARNING "iommu: Device %s has no iommu table\n", pci_name(dev)); @@ -1193,7 +1195,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) pr_debug(" found DMA window, table: %p\n", pci->iommu_table); } - set_iommu_table_base(&dev->dev, pci->iommu_table); + set_iommu_table_base_and_group(&dev->dev, pci->iommu_table); } static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask) -- cgit v1.2.3 From 0150a3dd92fc45b599bf2442b47d40921b4aa2d2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 29 Nov 2013 13:27:18 +1100 Subject: powerpc: Add real mode cache inhibited IO accessors These accessors allow us to do cache inhibited accesses when in real mode. They should only be used in real mode. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/io.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 575fbf81fad0..97d3869991ca 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -191,8 +191,24 @@ DEF_MMIO_OUT_D(out_le32, 32, stw); #endif /* __BIG_ENDIAN */ +/* + * Cache inhibitied accessors for use in real mode, you don't want to use these + * unless you know what you're doing. + * + * NB. These use the cpu byte ordering. + */ +DEF_MMIO_OUT_X(out_rm8, 8, stbcix); +DEF_MMIO_OUT_X(out_rm16, 16, sthcix); +DEF_MMIO_OUT_X(out_rm32, 32, stwcix); +DEF_MMIO_IN_X(in_rm8, 8, lbzcix); +DEF_MMIO_IN_X(in_rm16, 16, lhzcix); +DEF_MMIO_IN_X(in_rm32, 32, lwzcix); + #ifdef __powerpc64__ +DEF_MMIO_OUT_X(out_rm64, 64, stdcix); +DEF_MMIO_IN_X(in_rm64, 64, ldcix); + #ifdef __BIG_ENDIAN__ DEF_MMIO_OUT_D(out_be64, 64, std); DEF_MMIO_IN_D(in_be64, 64, ld); -- cgit v1.2.3 From 1a8f6f97ea4dbaaa21b05cae2dacea47e4aea37b Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 5 Dec 2013 11:31:08 +0800 Subject: powerpc: Make slb_shadow a local The only external user of slb_shadow is the pseries lpar code, and it can access through the paca array instead. Signed-off-by: Jeremy Kerr Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/lppaca.h | 2 -- arch/powerpc/kernel/paca.c | 2 +- arch/powerpc/platforms/pseries/lpar.c | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index 844c28de7ec0..d0a2a2f99564 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -132,8 +132,6 @@ struct slb_shadow { } save_area[SLB_NUM_BOLTED]; } ____cacheline_aligned; -extern struct slb_shadow slb_shadow[]; - /* * Layout of entries in the hypervisor's dispatch trace log buffer. */ diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 0620eaaaad45..9095a6f7ac2c 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -99,7 +99,7 @@ static inline void free_lppacas(void) { } * 3 persistent SLBs are registered here. The buffer will be zero * initially, hence will all be invaild until we actually write them. */ -struct slb_shadow slb_shadow[] __cacheline_aligned = { +static struct slb_shadow slb_shadow[] __cacheline_aligned = { [0 ... (NR_CPUS-1)] = { .persistent = cpu_to_be32(SLB_NUM_BOLTED), .buffer_length = cpu_to_be32(sizeof(struct slb_shadow)), diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 4fca3def9db9..28cf0f33c5be 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -92,7 +92,7 @@ void vpa_init(int cpu) * PAPR says this feature is SLB-Buffer but firmware never * reports that. All SPLPAR support SLB shadow buffer. */ - addr = __pa(&slb_shadow[cpu]); + addr = __pa(paca[cpu].slb_shadow_ptr); if (firmware_has_feature(FW_FEATURE_SPLPAR)) { ret = register_slb_shadow(hwcpu, addr); if (ret) -- cgit v1.2.3 From c8c06f5a0dde0fed260c54d550962187f266ed0d Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 18 Nov 2013 14:58:10 +0530 Subject: powerpc/mm: Free up _PAGE_COHERENCE for numa fault use later Set memory coherence always on hash64 config. If a platform cannot have memory coherence always set they can infer that from _PAGE_NO_CACHE and _PAGE_WRITETHRU like in lpar. So we dont' really need a separate bit for tracking _PAGE_COHERENCE. Signed-off-by: Aneesh Kumar K.V Acked-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/pte-hash64.h | 2 +- arch/powerpc/mm/hash_low_64.S | 15 ++++++++++++--- arch/powerpc/mm/hash_utils_64.c | 7 ++++--- arch/powerpc/mm/hugepage-hash64.c | 6 +++++- arch/powerpc/mm/hugetlbpage-hash64.c | 4 ++++ 5 files changed, 26 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/pte-hash64.h b/arch/powerpc/include/asm/pte-hash64.h index 0419eeb53274..55aea0caf95e 100644 --- a/arch/powerpc/include/asm/pte-hash64.h +++ b/arch/powerpc/include/asm/pte-hash64.h @@ -19,7 +19,7 @@ #define _PAGE_FILE 0x0002 /* (!present only) software: pte holds file offset */ #define _PAGE_EXEC 0x0004 /* No execute on POWER4 and newer (we invert) */ #define _PAGE_GUARDED 0x0008 -#define _PAGE_COHERENT 0x0010 /* M: enforce memory coherence (SMP systems) */ +/* We can derive Memory coherence from _PAGE_NO_CACHE */ #define _PAGE_NO_CACHE 0x0020 /* I: cache inhibit */ #define _PAGE_WRITETHRU 0x0040 /* W: cache write-through */ #define _PAGE_DIRTY 0x0080 /* C: page changed */ diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index d3cbda62857b..1136d26a95ae 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -148,7 +148,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/ andc r0,r30,r0 /* r0 = pte & ~r0 */ rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */ - ori r3,r3,HPTE_R_C /* Always add "C" bit for perf. */ + /* + * Always add "C" bit for perf. Memory coherence is always enabled + */ + ori r3,r3,HPTE_R_C | HPTE_R_M /* We eventually do the icache sync here (maybe inline that * code rather than call a C function...) @@ -457,7 +460,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/ andc r0,r3,r0 /* r0 = pte & ~r0 */ rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */ - ori r3,r3,HPTE_R_C /* Always add "C" bit for perf. */ + /* + * Always add "C" bit for perf. Memory coherence is always enabled + */ + ori r3,r3,HPTE_R_C | HPTE_R_M /* We eventually do the icache sync here (maybe inline that * code rather than call a C function...) @@ -795,7 +801,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/ andc r0,r30,r0 /* r0 = pte & ~r0 */ rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */ - ori r3,r3,HPTE_R_C /* Always add "C" bit for perf. */ + /* + * Always add "C" bit for perf. Memory coherence is always enabled + */ + ori r3,r3,HPTE_R_C | HPTE_R_M /* We eventually do the icache sync here (maybe inline that * code rather than call a C function...) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 6176b3cdf579..de6881259aef 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -169,9 +169,10 @@ static unsigned long htab_convert_pte_flags(unsigned long pteflags) if ((pteflags & _PAGE_USER) && !((pteflags & _PAGE_RW) && (pteflags & _PAGE_DIRTY))) rflags |= 1; - - /* Always add C */ - return rflags | HPTE_R_C; + /* + * Always add "C" bit for perf. Memory coherence is always enabled + */ + return rflags | HPTE_R_C | HPTE_R_M; } int htab_bolt_mapping(unsigned long vstart, unsigned long vend, diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 34de9e0cdc34..826893fcb3a7 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -127,7 +127,11 @@ repeat: /* Add in WIMG bits */ rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | - _PAGE_COHERENT | _PAGE_GUARDED)); + _PAGE_GUARDED)); + /* + * enable the memory coherence always + */ + rflags |= HPTE_R_M; /* Insert into the hash table, primary slot */ slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 0b7fb6761015..a5bcf9301196 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -99,6 +99,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, /* Add in WIMG bits */ rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT | _PAGE_GUARDED)); + /* + * enable the memory coherence always + */ + rflags |= HPTE_R_M; slot = hpte_insert_repeating(hash, vpn, pa, rflags, 0, mmu_psize, ssize); -- cgit v1.2.3 From c34a51ce49b40b9667cd7f5cc2e40475af8b4c3d Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 18 Nov 2013 14:58:13 +0530 Subject: powerpc/mm: Enable _PAGE_NUMA for book3s We steal the _PAGE_COHERENCE bit and use that for indicating NUMA ptes. Signed-off-by: Aneesh Kumar K.V Acked-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/pgtable.h | 66 +++++++++++++++++++++++++++++++++- arch/powerpc/include/asm/pte-hash64.h | 6 ++++ arch/powerpc/platforms/Kconfig.cputype | 1 + 3 files changed, 72 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 7d6eacf249cf..b999ca318985 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -3,6 +3,7 @@ #ifdef __KERNEL__ #ifndef __ASSEMBLY__ +#include #include /* For TASK_SIZE */ #include #include @@ -33,10 +34,73 @@ static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } -static inline int pte_present(pte_t pte) { return pte_val(pte) & _PAGE_PRESENT; } static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; } static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); } +#ifdef CONFIG_NUMA_BALANCING + +static inline int pte_present(pte_t pte) +{ + return pte_val(pte) & (_PAGE_PRESENT | _PAGE_NUMA); +} + +#define pte_numa pte_numa +static inline int pte_numa(pte_t pte) +{ + return (pte_val(pte) & + (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA; +} + +#define pte_mknonnuma pte_mknonnuma +static inline pte_t pte_mknonnuma(pte_t pte) +{ + pte_val(pte) &= ~_PAGE_NUMA; + pte_val(pte) |= _PAGE_PRESENT | _PAGE_ACCESSED; + return pte; +} + +#define pte_mknuma pte_mknuma +static inline pte_t pte_mknuma(pte_t pte) +{ + /* + * We should not set _PAGE_NUMA on non present ptes. Also clear the + * present bit so that hash_page will return 1 and we collect this + * as numa fault. + */ + if (pte_present(pte)) { + pte_val(pte) |= _PAGE_NUMA; + pte_val(pte) &= ~_PAGE_PRESENT; + } else + VM_BUG_ON(1); + return pte; +} + +#define pmd_numa pmd_numa +static inline int pmd_numa(pmd_t pmd) +{ + return pte_numa(pmd_pte(pmd)); +} + +#define pmd_mknonnuma pmd_mknonnuma +static inline pmd_t pmd_mknonnuma(pmd_t pmd) +{ + return pte_pmd(pte_mknonnuma(pmd_pte(pmd))); +} + +#define pmd_mknuma pmd_mknuma +static inline pmd_t pmd_mknuma(pmd_t pmd) +{ + return pte_pmd(pte_mknuma(pmd_pte(pmd))); +} + +# else + +static inline int pte_present(pte_t pte) +{ + return pte_val(pte) & _PAGE_PRESENT; +} +#endif /* CONFIG_NUMA_BALANCING */ + /* Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. * diff --git a/arch/powerpc/include/asm/pte-hash64.h b/arch/powerpc/include/asm/pte-hash64.h index 55aea0caf95e..2505d8eab15c 100644 --- a/arch/powerpc/include/asm/pte-hash64.h +++ b/arch/powerpc/include/asm/pte-hash64.h @@ -27,6 +27,12 @@ #define _PAGE_RW 0x0200 /* software: user write access allowed */ #define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */ +/* + * Used for tracking numa faults + */ +#define _PAGE_NUMA 0x00000010 /* Gather numa placement stats */ + + /* No separate kernel read-only */ #define _PAGE_KERNEL_RW (_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */ #define _PAGE_KERNEL_RO _PAGE_KERNEL_RW diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index bca2465a9c34..434fda39bf8b 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -72,6 +72,7 @@ config PPC_BOOK3S_64 select PPC_HAVE_PMU_SUPPORT select SYS_SUPPORTS_HUGETLBFS select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES + select ARCH_SUPPORTS_NUMA_BALANCING config PPC_BOOK3E_64 bool "Embedded processors" -- cgit v1.2.3 From 75eb3d9b60c280a275099fbed06a890cee2d784b Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Fri, 15 Nov 2013 09:50:57 +0530 Subject: powerpc/powernv: Get FSP memory errors and plumb into memory poison infrastructure. Get the memory errors reported by opal and plumb it into memory poison infrastructure. This patch uses new messaging channel infrastructure to pull the fsp memory errors to linux. Signed-off-by: Mahesh Salgaonkar Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 52 ++++++++ arch/powerpc/platforms/powernv/Makefile | 1 + .../powerpc/platforms/powernv/opal-memory-errors.c | 146 +++++++++++++++++++++ 3 files changed, 199 insertions(+) create mode 100644 arch/powerpc/platforms/powernv/opal-memory-errors.c (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 0a2ac85998d7..aded1b81bfd6 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -443,6 +443,58 @@ struct opal_machine_check_event { } u; }; +/* FSP memory errors handling */ +enum OpalMemErr_Version { + OpalMemErr_V1 = 1, +}; + +enum OpalMemErrType { + OPAL_MEM_ERR_TYPE_RESILIENCE = 0, + OPAL_MEM_ERR_TYPE_DYN_DALLOC, + OPAL_MEM_ERR_TYPE_SCRUB, +}; + +/* Memory Reilience error type */ +enum OpalMemErr_ResilErrType { + OPAL_MEM_RESILIENCE_CE = 0, + OPAL_MEM_RESILIENCE_UE, + OPAL_MEM_RESILIENCE_UE_SCRUB, +}; + +/* Dynamic Memory Deallocation type */ +enum OpalMemErr_DynErrType { + OPAL_MEM_DYNAMIC_DEALLOC = 0, +}; + +/* OpalMemoryErrorData->flags */ +#define OPAL_MEM_CORRECTED_ERROR 0x0001 +#define OPAL_MEM_THRESHOLD_EXCEEDED 0x0002 +#define OPAL_MEM_ACK_REQUIRED 0x8000 + +struct OpalMemoryErrorData { + enum OpalMemErr_Version version:8; /* 0x00 */ + enum OpalMemErrType type:8; /* 0x01 */ + uint16_t flags; /* 0x02 */ + uint8_t reserved_1[4]; /* 0x04 */ + + union { + /* Memory Resilience corrected/uncorrected error info */ + struct { + enum OpalMemErr_ResilErrType resil_err_type:8; + uint8_t reserved_1[7]; + uint64_t physical_address_start; + uint64_t physical_address_end; + } resilience; + /* Dynamic memory deallocation error info */ + struct { + enum OpalMemErr_DynErrType dyn_err_type:8; + uint8_t reserved_1[7]; + uint64_t physical_address_start; + uint64_t physical_address_end; + } dyn_dealloc; + } u; +}; + enum { OPAL_P7IOC_DIAG_TYPE_NONE = 0, OPAL_P7IOC_DIAG_TYPE_RGC = 1, diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 873fa1370dc4..8d767fde5a6a 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o obj-$(CONFIG_PPC_SCOM) += opal-xscom.o +obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c new file mode 100644 index 000000000000..ec4132239cdf --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c @@ -0,0 +1,146 @@ +/* + * OPAL asynchronus Memory error handling support in PowreNV. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright 2013 IBM Corporation + * Author: Mahesh Salgaonkar + */ + +#undef DEBUG + +#include +#include +#include +#include +#include + +#include +#include + +static int opal_mem_err_nb_init; +static LIST_HEAD(opal_memory_err_list); +static DEFINE_SPINLOCK(opal_mem_err_lock); + +struct OpalMsgNode { + struct list_head list; + struct opal_msg msg; +}; + +static void handle_memory_error_event(struct OpalMemoryErrorData *merr_evt) +{ + uint64_t paddr_start, paddr_end; + + pr_debug("%s: Retrived memory error event, type: 0x%x\n", + __func__, merr_evt->type); + switch (merr_evt->type) { + case OPAL_MEM_ERR_TYPE_RESILIENCE: + paddr_start = merr_evt->u.resilience.physical_address_start; + paddr_end = merr_evt->u.resilience.physical_address_end; + break; + case OPAL_MEM_ERR_TYPE_DYN_DALLOC: + paddr_start = merr_evt->u.dyn_dealloc.physical_address_start; + paddr_end = merr_evt->u.dyn_dealloc.physical_address_end; + break; + default: + return; + } + + for (; paddr_start < paddr_end; paddr_start += PAGE_SIZE) { + memory_failure(paddr_start >> PAGE_SHIFT, 0, 0); + } +} + +static void handle_memory_error(void) +{ + unsigned long flags; + struct OpalMemoryErrorData *merr_evt; + struct OpalMsgNode *msg_node; + + spin_lock_irqsave(&opal_mem_err_lock, flags); + while (!list_empty(&opal_memory_err_list)) { + msg_node = list_entry(opal_memory_err_list.next, + struct OpalMsgNode, list); + list_del(&msg_node->list); + spin_unlock_irqrestore(&opal_mem_err_lock, flags); + + merr_evt = (struct OpalMemoryErrorData *) + &msg_node->msg.params[0]; + handle_memory_error_event(merr_evt); + kfree(msg_node); + spin_lock_irqsave(&opal_mem_err_lock, flags); + } + spin_unlock_irqrestore(&opal_mem_err_lock, flags); +} + +static void mem_error_handler(struct work_struct *work) +{ + handle_memory_error(); +} + +static DECLARE_WORK(mem_error_work, mem_error_handler); + +/* + * opal_memory_err_event - notifier handler that queues up the opal message + * to be preocessed later. + */ +static int opal_memory_err_event(struct notifier_block *nb, + unsigned long msg_type, void *msg) +{ + unsigned long flags; + struct OpalMsgNode *msg_node; + + if (msg_type != OPAL_MSG_MEM_ERR) + return 0; + + msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC); + if (!msg_node) { + pr_err("MEMORY_ERROR: out of memory, Opal message event not" + "handled\n"); + return -ENOMEM; + } + memcpy(&msg_node->msg, msg, sizeof(struct opal_msg)); + + spin_lock_irqsave(&opal_mem_err_lock, flags); + list_add(&msg_node->list, &opal_memory_err_list); + spin_unlock_irqrestore(&opal_mem_err_lock, flags); + + schedule_work(&mem_error_work); + return 0; +} + +static struct notifier_block opal_mem_err_nb = { + .notifier_call = opal_memory_err_event, + .next = NULL, + .priority = 0, +}; + +static int __init opal_mem_err_init(void) +{ + int ret; + + if (!opal_mem_err_nb_init) { + ret = opal_message_notifier_register( + OPAL_MSG_MEM_ERR, &opal_mem_err_nb); + if (ret) { + pr_err("%s: Can't register OPAL event notifier (%d)\n", + __func__, ret); + return ret; + } + opal_mem_err_nb_init = 1; + } + return 0; +} +subsys_initcall(opal_mem_err_init); -- cgit v1.2.3 From e3fec2f74f7f90d2149a24243a4d040caabe6f30 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Dec 2013 21:26:19 -0500 Subject: lib: Add missing arch generic-y entries for asm-generic/hash.h Reported-by: Stephen Rothwell Signed-off-by: David S. Miller --- arch/alpha/include/asm/Kbuild | 1 + arch/arc/include/asm/Kbuild | 1 + arch/arm/include/asm/Kbuild | 1 + arch/arm64/include/asm/Kbuild | 1 + arch/avr32/include/asm/Kbuild | 1 + arch/blackfin/include/asm/Kbuild | 1 + arch/c6x/include/asm/Kbuild | 1 + arch/cris/include/asm/Kbuild | 1 + arch/frv/include/asm/Kbuild | 1 + arch/hexagon/include/asm/Kbuild | 1 + arch/ia64/include/asm/Kbuild | 3 ++- arch/m32r/include/asm/Kbuild | 1 + arch/m68k/include/asm/Kbuild | 1 + arch/metag/include/asm/Kbuild | 1 + arch/microblaze/include/asm/Kbuild | 1 + arch/mips/include/asm/Kbuild | 1 + arch/mn10300/include/asm/Kbuild | 1 + arch/openrisc/include/asm/Kbuild | 1 + arch/parisc/include/asm/Kbuild | 1 + arch/powerpc/include/asm/Kbuild | 3 ++- arch/s390/include/asm/Kbuild | 1 + arch/score/include/asm/Kbuild | 2 ++ arch/sh/include/asm/Kbuild | 1 + arch/sparc/include/asm/Kbuild | 1 + arch/tile/include/asm/Kbuild | 1 + arch/um/include/asm/Kbuild | 1 + arch/unicore32/include/asm/Kbuild | 1 + arch/xtensa/include/asm/Kbuild | 1 + 28 files changed, 31 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index f01fb505ad52..a73a8e208a4a 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -4,3 +4,4 @@ generic-y += clkdev.h generic-y += exec.h generic-y += trace_clock.h generic-y += preempt.h +generic-y += hash.h diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index 5943f7f9d325..93e6ca919620 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -47,3 +47,4 @@ generic-y += user.h generic-y += vga.h generic-y += xor.h generic-y += preempt.h +generic-y += hash.h diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index c38b58c80202..3278afe2c3ab 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -34,3 +34,4 @@ generic-y += timex.h generic-y += trace_clock.h generic-y += unaligned.h generic-y += preempt.h +generic-y += hash.h diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 519f89f5b6a3..626d4a92521f 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -51,3 +51,4 @@ generic-y += user.h generic-y += vga.h generic-y += xor.h generic-y += preempt.h +generic-y += hash.h diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild index 658001b52400..cfb9fe1b8df9 100644 --- a/arch/avr32/include/asm/Kbuild +++ b/arch/avr32/include/asm/Kbuild @@ -18,3 +18,4 @@ generic-y += sections.h generic-y += topology.h generic-y += trace_clock.h generic-y += xor.h +generic-y += hash.h diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index f2b43474b0e2..359d36fdc247 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -45,3 +45,4 @@ generic-y += unaligned.h generic-y += user.h generic-y += xor.h generic-y += preempt.h +generic-y += hash.h diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index fc0b3c356027..d73bb85ccdd3 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -57,3 +57,4 @@ generic-y += user.h generic-y += vga.h generic-y += xor.h generic-y += preempt.h +generic-y += hash.h diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index b06caf649a95..c5963b3e4624 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -12,3 +12,4 @@ generic-y += trace_clock.h generic-y += vga.h generic-y += xor.h generic-y += preempt.h +generic-y += hash.h diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index 74742dc6a3da..bc42f14c9c2e 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -3,3 +3,4 @@ generic-y += clkdev.h generic-y += exec.h generic-y += trace_clock.h generic-y += preempt.h +generic-y += hash.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 67c3450309b7..469d223950ff 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -54,3 +54,4 @@ generic-y += ucontext.h generic-y += unaligned.h generic-y += xor.h generic-y += preempt.h +generic-y += hash.h diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index f93ee087e8fe..283a83154b5e 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -4,4 +4,5 @@ generic-y += exec.h generic-y += kvm_para.h generic-y += trace_clock.h generic-y += preempt.h -generic-y += vtime.h \ No newline at end of file +generic-y += vtime.h +generic-y += hash.h diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index 2b58c5f0bc38..932435ac4e5c 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -4,3 +4,4 @@ generic-y += exec.h generic-y += module.h generic-y += trace_clock.h generic-y += preempt.h +generic-y += hash.h diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index a5d27f272a59..7cc8c364924d 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -32,3 +32,4 @@ generic-y += types.h generic-y += word-at-a-time.h generic-y += xor.h generic-y += preempt.h +generic-y += hash.h diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild index 84d0c1d6b9b3..b716d807c2ec 100644 --- a/arch/metag/include/asm/Kbuild +++ b/arch/metag/include/asm/Kbuild @@ -53,3 +53,4 @@ generic-y += user.h generic-y += vga.h generic-y += xor.h generic-y += preempt.h +generic-y += hash.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index ce0bbf8f5640..43eec338ff50 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -4,3 +4,4 @@ generic-y += exec.h generic-y += trace_clock.h generic-y += syscalls.h generic-y += preempt.h +generic-y += hash.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 1acbb8b77a71..2d7f65052c1f 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -14,3 +14,4 @@ generic-y += trace_clock.h generic-y += preempt.h generic-y += ucontext.h generic-y += xor.h +generic-y += hash.h diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild index 74742dc6a3da..bc42f14c9c2e 100644 --- a/arch/mn10300/include/asm/Kbuild +++ b/arch/mn10300/include/asm/Kbuild @@ -3,3 +3,4 @@ generic-y += clkdev.h generic-y += exec.h generic-y += trace_clock.h generic-y += preempt.h +generic-y += hash.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index da1951a22907..2e40f1ca8667 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -69,3 +69,4 @@ generic-y += vga.h generic-y += word-at-a-time.h generic-y += xor.h generic-y += preempt.h +generic-y += hash.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index a603b9ebe54c..75edd5fcc6ff 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -5,3 +5,4 @@ generic-y += word-at-a-time.h auxvec.h user.h cputime.h emergency-restart.h \ poll.h xor.h clkdev.h exec.h generic-y += trace_clock.h generic-y += preempt.h +generic-y += hash.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index d8f9d2f18a23..6c0a955a1b06 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -3,4 +3,5 @@ generic-y += clkdev.h generic-y += rwsem.h generic-y += trace_clock.h generic-y += preempt.h -generic-y += vtime.h \ No newline at end of file +generic-y += vtime.h +generic-y += hash.h diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 7a5288f3479a..8386a4a1f19a 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -3,3 +3,4 @@ generic-y += clkdev.h generic-y += trace_clock.h generic-y += preempt.h +generic-y += hash.h diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index f3414ade77a3..099e7ba40599 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -5,3 +5,5 @@ generic-y += clkdev.h generic-y += trace_clock.h generic-y += xor.h generic-y += preempt.h +generic-y += hash.h + diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 231efbb68108..0cd7198a4524 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -35,3 +35,4 @@ generic-y += trace_clock.h generic-y += ucontext.h generic-y += xor.h generic-y += preempt.h +generic-y += hash.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index bf390667657a..4b60a0c325ec 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -17,3 +17,4 @@ generic-y += trace_clock.h generic-y += types.h generic-y += word-at-a-time.h generic-y += preempt.h +generic-y += hash.h diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index 22f3bd147fa7..3793c75e45d9 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -39,3 +39,4 @@ generic-y += trace_clock.h generic-y += types.h generic-y += xor.h generic-y += preempt.h +generic-y += hash.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index fdde187e6087..75de4abe4f94 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -4,3 +4,4 @@ generic-y += ftrace.h pci.h io.h param.h delay.h mutex.h current.h exec.h generic-y += switch_to.h clkdev.h generic-y += trace_clock.h generic-y += preempt.h +generic-y += hash.h diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index 00045cbe5c63..3ef4f9d9bf5d 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -61,3 +61,4 @@ generic-y += user.h generic-y += vga.h generic-y += xor.h generic-y += preempt.h +generic-y += hash.h diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 228d6aee3a16..d7efa1502101 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -29,3 +29,4 @@ generic-y += topology.h generic-y += trace_clock.h generic-y += xor.h generic-y += preempt.h +generic-y += hash.h -- cgit v1.2.3 From e589a4404fa06730355de204d3d136ed9bbc7dea Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 9 Dec 2013 18:17:01 +1100 Subject: powerpc/iommu: Update constant names to reflect their hardcoded page size The powerpc iommu uses a hardcoded page size of 4K. This patch changes the name of the IOMMU_PAGE_* macros to reflect the hardcoded values. A future patch will use the existing names to support dynamic page sizes. Signed-off-by: Alistair Popple Signed-off-by: Alexey Kardashevskiy Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/iommu.h | 10 ++--- arch/powerpc/kernel/dma-iommu.c | 4 +- arch/powerpc/kernel/iommu.c | 78 +++++++++++++++++----------------- arch/powerpc/kernel/vio.c | 19 +++++---- arch/powerpc/platforms/cell/iommu.c | 12 +++--- arch/powerpc/platforms/powernv/pci.c | 4 +- arch/powerpc/platforms/pseries/iommu.c | 8 ++-- arch/powerpc/platforms/pseries/setup.c | 4 +- arch/powerpc/platforms/wsp/wsp_pci.c | 10 ++--- drivers/net/ethernet/ibm/ibmveth.c | 9 ++-- drivers/vfio/vfio_iommu_spapr_tce.c | 28 ++++++------ 11 files changed, 94 insertions(+), 92 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 774fa2776907..0869c7e74421 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -30,10 +30,10 @@ #include #include -#define IOMMU_PAGE_SHIFT 12 -#define IOMMU_PAGE_SIZE (ASM_CONST(1) << IOMMU_PAGE_SHIFT) -#define IOMMU_PAGE_MASK (~((1 << IOMMU_PAGE_SHIFT) - 1)) -#define IOMMU_PAGE_ALIGN(addr) _ALIGN_UP(addr, IOMMU_PAGE_SIZE) +#define IOMMU_PAGE_SHIFT_4K 12 +#define IOMMU_PAGE_SIZE_4K (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K) +#define IOMMU_PAGE_MASK_4K (~((1 << IOMMU_PAGE_SHIFT_4K) - 1)) +#define IOMMU_PAGE_ALIGN_4K(addr) _ALIGN_UP(addr, IOMMU_PAGE_SIZE_4K) /* Boot time flags */ extern int iommu_is_off; @@ -42,7 +42,7 @@ extern int iommu_force_on; /* Pure 2^n version of get_order */ static __inline__ __attribute_const__ int get_iommu_order(unsigned long size) { - return __ilog2((size - 1) >> IOMMU_PAGE_SHIFT) + 1; + return __ilog2((size - 1) >> IOMMU_PAGE_SHIFT_4K) + 1; } diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index e4897523de41..5cfe3dbfc4a0 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -83,10 +83,10 @@ static int dma_iommu_dma_supported(struct device *dev, u64 mask) return 0; } - if (tbl->it_offset > (mask >> IOMMU_PAGE_SHIFT)) { + if (tbl->it_offset > (mask >> IOMMU_PAGE_SHIFT_4K)) { dev_info(dev, "Warning: IOMMU offset too big for device mask\n"); dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n", - mask, tbl->it_offset << IOMMU_PAGE_SHIFT); + mask, tbl->it_offset << IOMMU_PAGE_SHIFT_4K); return 0; } else return 1; diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index d22abe0b08e3..df4a7f1b7444 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -251,14 +251,14 @@ again: if (dev) boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - 1 << IOMMU_PAGE_SHIFT); + 1 << IOMMU_PAGE_SHIFT_4K); else - boundary_size = ALIGN(1UL << 32, 1 << IOMMU_PAGE_SHIFT); + boundary_size = ALIGN(1UL << 32, 1 << IOMMU_PAGE_SHIFT_4K); /* 4GB boundary for iseries_hv_alloc and iseries_hv_map */ n = iommu_area_alloc(tbl->it_map, limit, start, npages, - tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT, - align_mask); + tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT_4K, + align_mask); if (n == -1) { if (likely(pass == 0)) { /* First try the pool from the start */ @@ -320,12 +320,12 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, return DMA_ERROR_CODE; entry += tbl->it_offset; /* Offset into real TCE table */ - ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */ + ret = entry << IOMMU_PAGE_SHIFT_4K; /* Set the return dma address */ /* Put the TCEs in the HW table */ build_fail = ppc_md.tce_build(tbl, entry, npages, - (unsigned long)page & IOMMU_PAGE_MASK, - direction, attrs); + (unsigned long)page & IOMMU_PAGE_MASK_4K, + direction, attrs); /* ppc_md.tce_build() only returns non-zero for transient errors. * Clean up the table bitmap in this case and return @@ -352,7 +352,7 @@ static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr, { unsigned long entry, free_entry; - entry = dma_addr >> IOMMU_PAGE_SHIFT; + entry = dma_addr >> IOMMU_PAGE_SHIFT_4K; free_entry = entry - tbl->it_offset; if (((free_entry + npages) > tbl->it_size) || @@ -401,7 +401,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, unsigned long flags; struct iommu_pool *pool; - entry = dma_addr >> IOMMU_PAGE_SHIFT; + entry = dma_addr >> IOMMU_PAGE_SHIFT_4K; free_entry = entry - tbl->it_offset; pool = get_pool(tbl, free_entry); @@ -468,13 +468,13 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, } /* Allocate iommu entries for that segment */ vaddr = (unsigned long) sg_virt(s); - npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE); + npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE_4K); align = 0; - if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && slen >= PAGE_SIZE && + if (IOMMU_PAGE_SHIFT_4K < PAGE_SHIFT && slen >= PAGE_SIZE && (vaddr & ~PAGE_MASK) == 0) - align = PAGE_SHIFT - IOMMU_PAGE_SHIFT; + align = PAGE_SHIFT - IOMMU_PAGE_SHIFT_4K; entry = iommu_range_alloc(dev, tbl, npages, &handle, - mask >> IOMMU_PAGE_SHIFT, align); + mask >> IOMMU_PAGE_SHIFT_4K, align); DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen); @@ -489,16 +489,16 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, /* Convert entry to a dma_addr_t */ entry += tbl->it_offset; - dma_addr = entry << IOMMU_PAGE_SHIFT; - dma_addr |= (s->offset & ~IOMMU_PAGE_MASK); + dma_addr = entry << IOMMU_PAGE_SHIFT_4K; + dma_addr |= (s->offset & ~IOMMU_PAGE_MASK_4K); DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n", npages, entry, dma_addr); /* Insert into HW table */ build_fail = ppc_md.tce_build(tbl, entry, npages, - vaddr & IOMMU_PAGE_MASK, - direction, attrs); + vaddr & IOMMU_PAGE_MASK_4K, + direction, attrs); if(unlikely(build_fail)) goto failure; @@ -559,9 +559,9 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, if (s->dma_length != 0) { unsigned long vaddr, npages; - vaddr = s->dma_address & IOMMU_PAGE_MASK; + vaddr = s->dma_address & IOMMU_PAGE_MASK_4K; npages = iommu_num_pages(s->dma_address, s->dma_length, - IOMMU_PAGE_SIZE); + IOMMU_PAGE_SIZE_4K); __iommu_free(tbl, vaddr, npages); s->dma_address = DMA_ERROR_CODE; s->dma_length = 0; @@ -592,7 +592,7 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, if (sg->dma_length == 0) break; npages = iommu_num_pages(dma_handle, sg->dma_length, - IOMMU_PAGE_SIZE); + IOMMU_PAGE_SIZE_4K); __iommu_free(tbl, dma_handle, npages); sg = sg_next(sg); } @@ -676,7 +676,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) set_bit(0, tbl->it_map); /* We only split the IOMMU table if we have 1GB or more of space */ - if ((tbl->it_size << IOMMU_PAGE_SHIFT) >= (1UL * 1024 * 1024 * 1024)) + if ((tbl->it_size << IOMMU_PAGE_SHIFT_4K) >= (1UL * 1024 * 1024 * 1024)) tbl->nr_pools = IOMMU_NR_POOLS; else tbl->nr_pools = 1; @@ -768,16 +768,16 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, vaddr = page_address(page) + offset; uaddr = (unsigned long)vaddr; - npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE); + npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE_4K); if (tbl) { align = 0; - if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && size >= PAGE_SIZE && + if (IOMMU_PAGE_SHIFT_4K < PAGE_SHIFT && size >= PAGE_SIZE && ((unsigned long)vaddr & ~PAGE_MASK) == 0) - align = PAGE_SHIFT - IOMMU_PAGE_SHIFT; + align = PAGE_SHIFT - IOMMU_PAGE_SHIFT_4K; dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction, - mask >> IOMMU_PAGE_SHIFT, align, + mask >> IOMMU_PAGE_SHIFT_4K, align, attrs); if (dma_handle == DMA_ERROR_CODE) { if (printk_ratelimit()) { @@ -786,7 +786,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, npages); } } else - dma_handle |= (uaddr & ~IOMMU_PAGE_MASK); + dma_handle |= (uaddr & ~IOMMU_PAGE_MASK_4K); } return dma_handle; @@ -801,7 +801,7 @@ void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle, BUG_ON(direction == DMA_NONE); if (tbl) { - npages = iommu_num_pages(dma_handle, size, IOMMU_PAGE_SIZE); + npages = iommu_num_pages(dma_handle, size, IOMMU_PAGE_SIZE_4K); iommu_free(tbl, dma_handle, npages); } } @@ -845,10 +845,10 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, memset(ret, 0, size); /* Set up tces to cover the allocated range */ - nio_pages = size >> IOMMU_PAGE_SHIFT; + nio_pages = size >> IOMMU_PAGE_SHIFT_4K; io_order = get_iommu_order(size); mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, - mask >> IOMMU_PAGE_SHIFT, io_order, NULL); + mask >> IOMMU_PAGE_SHIFT_4K, io_order, NULL); if (mapping == DMA_ERROR_CODE) { free_pages((unsigned long)ret, order); return NULL; @@ -864,7 +864,7 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size, unsigned int nio_pages; size = PAGE_ALIGN(size); - nio_pages = size >> IOMMU_PAGE_SHIFT; + nio_pages = size >> IOMMU_PAGE_SHIFT_4K; iommu_free(tbl, dma_handle, nio_pages); size = PAGE_ALIGN(size); free_pages((unsigned long)vaddr, get_order(size)); @@ -935,10 +935,10 @@ int iommu_tce_clear_param_check(struct iommu_table *tbl, if (tce_value) return -EINVAL; - if (ioba & ~IOMMU_PAGE_MASK) + if (ioba & ~IOMMU_PAGE_MASK_4K) return -EINVAL; - ioba >>= IOMMU_PAGE_SHIFT; + ioba >>= IOMMU_PAGE_SHIFT_4K; if (ioba < tbl->it_offset) return -EINVAL; @@ -955,13 +955,13 @@ int iommu_tce_put_param_check(struct iommu_table *tbl, if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ))) return -EINVAL; - if (tce & ~(IOMMU_PAGE_MASK | TCE_PCI_WRITE | TCE_PCI_READ)) + if (tce & ~(IOMMU_PAGE_MASK_4K | TCE_PCI_WRITE | TCE_PCI_READ)) return -EINVAL; - if (ioba & ~IOMMU_PAGE_MASK) + if (ioba & ~IOMMU_PAGE_MASK_4K) return -EINVAL; - ioba >>= IOMMU_PAGE_SHIFT; + ioba >>= IOMMU_PAGE_SHIFT_4K; if (ioba < tbl->it_offset) return -EINVAL; @@ -1037,7 +1037,7 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, /* if (unlikely(ret)) pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n", - __func__, hwaddr, entry << IOMMU_PAGE_SHIFT, + __func__, hwaddr, entry << IOMMU_PAGE_SHIFT_4K, hwaddr, ret); */ return ret; @@ -1049,14 +1049,14 @@ int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry, { int ret; struct page *page = NULL; - unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK & ~PAGE_MASK; + unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK_4K & ~PAGE_MASK; enum dma_data_direction direction = iommu_tce_direction(tce); ret = get_user_pages_fast(tce & PAGE_MASK, 1, direction != DMA_TO_DEVICE, &page); if (unlikely(ret != 1)) { /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n", - tce, entry << IOMMU_PAGE_SHIFT, ret); */ + tce, entry << IOMMU_PAGE_SHIFT_4K, ret); */ return -EFAULT; } hwaddr = (unsigned long) page_address(page) + offset; @@ -1067,7 +1067,7 @@ int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry, if (ret < 0) pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n", - __func__, entry << IOMMU_PAGE_SHIFT, tce, ret); + __func__, entry << IOMMU_PAGE_SHIFT_4K, tce, ret); return ret; } diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 76a64821f4a2..2e89fa350763 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -520,14 +520,14 @@ static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page, struct vio_dev *viodev = to_vio_dev(dev); dma_addr_t ret = DMA_ERROR_CODE; - if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) { + if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE_4K))) { atomic_inc(&viodev->cmo.allocs_failed); return ret; } ret = dma_iommu_ops.map_page(dev, page, offset, size, direction, attrs); if (unlikely(dma_mapping_error(dev, ret))) { - vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE_4K)); atomic_inc(&viodev->cmo.allocs_failed); } @@ -543,7 +543,7 @@ static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, dma_iommu_ops.unmap_page(dev, dma_handle, size, direction, attrs); - vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE_4K)); } static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, @@ -556,7 +556,7 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, size_t alloc_size = 0; for (sgl = sglist; count < nelems; count++, sgl++) - alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE); + alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE_4K); if (vio_cmo_alloc(viodev, alloc_size)) { atomic_inc(&viodev->cmo.allocs_failed); @@ -572,7 +572,7 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, } for (sgl = sglist, count = 0; count < ret; count++, sgl++) - alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE); + alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE_4K); if (alloc_size) vio_cmo_dealloc(viodev, alloc_size); @@ -590,7 +590,7 @@ static void vio_dma_iommu_unmap_sg(struct device *dev, int count = 0; for (sgl = sglist; count < nelems; count++, sgl++) - alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE); + alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE_4K); dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs); @@ -736,7 +736,8 @@ static int vio_cmo_bus_probe(struct vio_dev *viodev) return -EINVAL; } - viodev->cmo.desired = IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev)); + viodev->cmo.desired = + IOMMU_PAGE_ALIGN_4K(viodrv->get_desired_dma(viodev)); if (viodev->cmo.desired < VIO_CMO_MIN_ENT) viodev->cmo.desired = VIO_CMO_MIN_ENT; size = VIO_CMO_MIN_ENT; @@ -1176,9 +1177,9 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) &tbl->it_index, &offset, &size); /* TCE table size - measured in tce entries */ - tbl->it_size = size >> IOMMU_PAGE_SHIFT; + tbl->it_size = size >> IOMMU_PAGE_SHIFT_4K; /* offset for VIO should always be 0 */ - tbl->it_offset = offset >> IOMMU_PAGE_SHIFT; + tbl->it_offset = offset >> IOMMU_PAGE_SHIFT_4K; tbl->it_busno = 0; tbl->it_type = TCE_VB; tbl->it_blocksize = 16; diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index b53560660b72..fc61b908eaf0 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -197,7 +197,7 @@ static int tce_build_cell(struct iommu_table *tbl, long index, long npages, io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset); - for (i = 0; i < npages; i++, uaddr += IOMMU_PAGE_SIZE) + for (i = 0; i < npages; i++, uaddr += IOMMU_PAGE_SIZE_4K) io_pte[i] = base_pte | (__pa(uaddr) & CBE_IOPTE_RPN_Mask); mb(); @@ -430,7 +430,7 @@ static void cell_iommu_setup_hardware(struct cbe_iommu *iommu, { cell_iommu_setup_stab(iommu, base, size, 0, 0); iommu->ptab = cell_iommu_alloc_ptab(iommu, base, size, 0, 0, - IOMMU_PAGE_SHIFT); + IOMMU_PAGE_SHIFT_4K); cell_iommu_enable_hardware(iommu); } @@ -487,8 +487,8 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np, window->table.it_blocksize = 16; window->table.it_base = (unsigned long)iommu->ptab; window->table.it_index = iommu->nid; - window->table.it_offset = (offset >> IOMMU_PAGE_SHIFT) + pte_offset; - window->table.it_size = size >> IOMMU_PAGE_SHIFT; + window->table.it_offset = (offset >> IOMMU_PAGE_SHIFT_4K) + pte_offset; + window->table.it_size = size >> IOMMU_PAGE_SHIFT_4K; iommu_init_table(&window->table, iommu->nid); @@ -773,7 +773,7 @@ static void __init cell_iommu_init_one(struct device_node *np, /* Setup the iommu_table */ cell_iommu_setup_window(iommu, np, base, size, - offset >> IOMMU_PAGE_SHIFT); + offset >> IOMMU_PAGE_SHIFT_4K); } static void __init cell_disable_iommus(void) @@ -1122,7 +1122,7 @@ static int __init cell_iommu_fixed_mapping_init(void) cell_iommu_setup_stab(iommu, dbase, dsize, fbase, fsize); iommu->ptab = cell_iommu_alloc_ptab(iommu, dbase, dsize, 0, 0, - IOMMU_PAGE_SHIFT); + IOMMU_PAGE_SHIFT_4K); cell_iommu_setup_fixed_ptab(iommu, np, dbase, dsize, fbase, fsize); cell_iommu_enable_hardware(iommu); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index bac289aac7cc..7f4d857668a9 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -564,7 +564,7 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl, { tbl->it_blocksize = 16; tbl->it_base = (unsigned long)tce_mem; - tbl->it_offset = dma_offset >> IOMMU_PAGE_SHIFT; + tbl->it_offset = dma_offset >> IOMMU_PAGE_SHIFT_4K; tbl->it_index = 0; tbl->it_size = tce_size >> 3; tbl->it_busno = 0; @@ -761,7 +761,7 @@ static struct notifier_block tce_iommu_bus_nb = { static int __init tce_iommu_bus_notifier_init(void) { - BUILD_BUG_ON(PAGE_SIZE < IOMMU_PAGE_SIZE); + BUILD_BUG_ON(PAGE_SIZE < IOMMU_PAGE_SIZE_4K); bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb); return 0; diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index a80af6c20cba..1b7531ce0c0c 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -488,7 +488,7 @@ static void iommu_table_setparms(struct pci_controller *phb, tbl->it_busno = phb->bus->number; /* Units of tce entries */ - tbl->it_offset = phb->dma_window_base_cur >> IOMMU_PAGE_SHIFT; + tbl->it_offset = phb->dma_window_base_cur >> IOMMU_PAGE_SHIFT_4K; /* Test if we are going over 2GB of DMA space */ if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) { @@ -499,7 +499,7 @@ static void iommu_table_setparms(struct pci_controller *phb, phb->dma_window_base_cur += phb->dma_window_size; /* Set the tce table size - measured in entries */ - tbl->it_size = phb->dma_window_size >> IOMMU_PAGE_SHIFT; + tbl->it_size = phb->dma_window_size >> IOMMU_PAGE_SHIFT_4K; tbl->it_index = 0; tbl->it_blocksize = 16; @@ -540,8 +540,8 @@ static void iommu_table_setparms_lpar(struct pci_controller *phb, tbl->it_base = 0; tbl->it_blocksize = 16; tbl->it_type = TCE_PCI; - tbl->it_offset = offset >> IOMMU_PAGE_SHIFT; - tbl->it_size = size >> IOMMU_PAGE_SHIFT; + tbl->it_offset = offset >> IOMMU_PAGE_SHIFT_4K; + tbl->it_size = size >> IOMMU_PAGE_SHIFT_4K; } static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index c1f190858701..49cd16e6b450 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -72,7 +72,7 @@ int CMO_PrPSP = -1; int CMO_SecPSP = -1; -unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT); +unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K); EXPORT_SYMBOL(CMO_PageSize); int fwnmi_active; /* TRUE if an FWNMI handler is present */ @@ -569,7 +569,7 @@ void pSeries_cmo_feature_init(void) { char *ptr, *key, *value, *end; int call_status; - int page_order = IOMMU_PAGE_SHIFT; + int page_order = IOMMU_PAGE_SHIFT_4K; pr_debug(" -> fw_cmo_feature_init()\n"); spin_lock(&rtas_data_buf_lock); diff --git a/arch/powerpc/platforms/wsp/wsp_pci.c b/arch/powerpc/platforms/wsp/wsp_pci.c index 62cb527493e7..8a589618551e 100644 --- a/arch/powerpc/platforms/wsp/wsp_pci.c +++ b/arch/powerpc/platforms/wsp/wsp_pci.c @@ -260,7 +260,7 @@ static int tce_build_wsp(struct iommu_table *tbl, long index, long npages, *tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; dma_debug("[DMA] TCE %p set to 0x%016llx (dma addr: 0x%lx)\n", - tcep, *tcep, (tbl->it_offset + index) << IOMMU_PAGE_SHIFT); + tcep, *tcep, (tbl->it_offset + index) << IOMMU_PAGE_SHIFT_4K); uaddr += TCE_PAGE_SIZE; index++; @@ -381,8 +381,8 @@ static struct wsp_dma_table *wsp_pci_create_dma32_table(struct wsp_phb *phb, /* Init bits and pieces */ tbl->table.it_blocksize = 16; - tbl->table.it_offset = addr >> IOMMU_PAGE_SHIFT; - tbl->table.it_size = size >> IOMMU_PAGE_SHIFT; + tbl->table.it_offset = addr >> IOMMU_PAGE_SHIFT_4K; + tbl->table.it_size = size >> IOMMU_PAGE_SHIFT_4K; /* * It's already blank but we clear it anyway. @@ -449,8 +449,8 @@ static void wsp_pci_dma_dev_setup(struct pci_dev *pdev) if (table) { pr_info("%s: Setup iommu: 32-bit DMA region 0x%08lx..0x%08lx\n", pci_name(pdev), - table->table.it_offset << IOMMU_PAGE_SHIFT, - (table->table.it_offset << IOMMU_PAGE_SHIFT) + table->table.it_offset << IOMMU_PAGE_SHIFT_4K, + (table->table.it_offset << IOMMU_PAGE_SHIFT_4K) + phb->dma32_region_size - 1); archdata->dma_data.iommu_table_base = &table->table; return; diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 952d795230a4..f7d7538b6bd9 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1282,24 +1282,25 @@ static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev) /* netdev inits at probe time along with the structures we need below*/ if (netdev == NULL) - return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT); + return IOMMU_PAGE_ALIGN_4K(IBMVETH_IO_ENTITLEMENT_DEFAULT); adapter = netdev_priv(netdev); ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE; - ret += IOMMU_PAGE_ALIGN(netdev->mtu); + ret += IOMMU_PAGE_ALIGN_4K(netdev->mtu); for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { /* add the size of the active receive buffers */ if (adapter->rx_buff_pool[i].active) ret += adapter->rx_buff_pool[i].size * - IOMMU_PAGE_ALIGN(adapter->rx_buff_pool[i]. + IOMMU_PAGE_ALIGN_4K(adapter->rx_buff_pool[i]. buff_size); rxqentries += adapter->rx_buff_pool[i].size; } /* add the size of the receive queue entries */ - ret += IOMMU_PAGE_ALIGN(rxqentries * sizeof(struct ibmveth_rx_q_entry)); + ret += IOMMU_PAGE_ALIGN_4K( + rxqentries * sizeof(struct ibmveth_rx_q_entry)); return ret; } diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index bdae7a04af75..a84788ba662c 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -81,7 +81,7 @@ static int tce_iommu_enable(struct tce_container *container) * enforcing the limit based on the max that the guest can map. */ down_write(¤t->mm->mmap_sem); - npages = (tbl->it_size << IOMMU_PAGE_SHIFT) >> PAGE_SHIFT; + npages = (tbl->it_size << IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT; locked = current->mm->locked_vm + npages; lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; if (locked > lock_limit && !capable(CAP_IPC_LOCK)) { @@ -110,7 +110,7 @@ static void tce_iommu_disable(struct tce_container *container) down_write(¤t->mm->mmap_sem); current->mm->locked_vm -= (container->tbl->it_size << - IOMMU_PAGE_SHIFT) >> PAGE_SHIFT; + IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT; up_write(¤t->mm->mmap_sem); } @@ -174,8 +174,8 @@ static long tce_iommu_ioctl(void *iommu_data, if (info.argsz < minsz) return -EINVAL; - info.dma32_window_start = tbl->it_offset << IOMMU_PAGE_SHIFT; - info.dma32_window_size = tbl->it_size << IOMMU_PAGE_SHIFT; + info.dma32_window_start = tbl->it_offset << IOMMU_PAGE_SHIFT_4K; + info.dma32_window_size = tbl->it_size << IOMMU_PAGE_SHIFT_4K; info.flags = 0; if (copy_to_user((void __user *)arg, &info, minsz)) @@ -205,8 +205,8 @@ static long tce_iommu_ioctl(void *iommu_data, VFIO_DMA_MAP_FLAG_WRITE)) return -EINVAL; - if ((param.size & ~IOMMU_PAGE_MASK) || - (param.vaddr & ~IOMMU_PAGE_MASK)) + if ((param.size & ~IOMMU_PAGE_MASK_4K) || + (param.vaddr & ~IOMMU_PAGE_MASK_4K)) return -EINVAL; /* iova is checked by the IOMMU API */ @@ -220,17 +220,17 @@ static long tce_iommu_ioctl(void *iommu_data, if (ret) return ret; - for (i = 0; i < (param.size >> IOMMU_PAGE_SHIFT); ++i) { + for (i = 0; i < (param.size >> IOMMU_PAGE_SHIFT_4K); ++i) { ret = iommu_put_tce_user_mode(tbl, - (param.iova >> IOMMU_PAGE_SHIFT) + i, + (param.iova >> IOMMU_PAGE_SHIFT_4K) + i, tce); if (ret) break; - tce += IOMMU_PAGE_SIZE; + tce += IOMMU_PAGE_SIZE_4K; } if (ret) iommu_clear_tces_and_put_pages(tbl, - param.iova >> IOMMU_PAGE_SHIFT, i); + param.iova >> IOMMU_PAGE_SHIFT_4K, i); iommu_flush_tce(tbl); @@ -256,17 +256,17 @@ static long tce_iommu_ioctl(void *iommu_data, if (param.flags) return -EINVAL; - if (param.size & ~IOMMU_PAGE_MASK) + if (param.size & ~IOMMU_PAGE_MASK_4K) return -EINVAL; ret = iommu_tce_clear_param_check(tbl, param.iova, 0, - param.size >> IOMMU_PAGE_SHIFT); + param.size >> IOMMU_PAGE_SHIFT_4K); if (ret) return ret; ret = iommu_clear_tces_and_put_pages(tbl, - param.iova >> IOMMU_PAGE_SHIFT, - param.size >> IOMMU_PAGE_SHIFT); + param.iova >> IOMMU_PAGE_SHIFT_4K, + param.size >> IOMMU_PAGE_SHIFT_4K); iommu_flush_tce(tbl); return ret; -- cgit v1.2.3 From 3a553170d35d69bea3877bffa508489dfa6f133d Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 9 Dec 2013 18:17:02 +1100 Subject: powerpc/iommu: Add it_page_shift field to determine iommu page size This patch adds a it_page_shift field to struct iommu_table and initiliases it to 4K for all platforms. Signed-off-by: Alistair Popple Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/iommu.h | 1 + arch/powerpc/kernel/vio.c | 5 +++-- arch/powerpc/platforms/cell/iommu.c | 8 +++++--- arch/powerpc/platforms/pasemi/iommu.c | 5 ++++- arch/powerpc/platforms/powernv/pci.c | 3 ++- arch/powerpc/platforms/pseries/iommu.c | 10 ++++++---- arch/powerpc/platforms/wsp/wsp_pci.c | 5 +++-- 7 files changed, 24 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 0869c7e74421..7c928342f3f5 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -76,6 +76,7 @@ struct iommu_table { struct iommu_pool large_pool; struct iommu_pool pools[IOMMU_NR_POOLS]; unsigned long *it_map; /* A simple allocation bitmap for now */ + unsigned long it_page_shift;/* table iommu page size */ #ifdef CONFIG_IOMMU_API struct iommu_group *it_group; #endif diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 2e89fa350763..170ac24a0106 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1177,9 +1177,10 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) &tbl->it_index, &offset, &size); /* TCE table size - measured in tce entries */ - tbl->it_size = size >> IOMMU_PAGE_SHIFT_4K; + tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K; + tbl->it_size = size >> tbl->it_page_shift; /* offset for VIO should always be 0 */ - tbl->it_offset = offset >> IOMMU_PAGE_SHIFT_4K; + tbl->it_offset = offset >> tbl->it_page_shift; tbl->it_busno = 0; tbl->it_type = TCE_VB; tbl->it_blocksize = 16; diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index fc61b908eaf0..2b90ff8a93be 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -197,7 +197,7 @@ static int tce_build_cell(struct iommu_table *tbl, long index, long npages, io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset); - for (i = 0; i < npages; i++, uaddr += IOMMU_PAGE_SIZE_4K) + for (i = 0; i < npages; i++, uaddr += tbl->it_page_shift) io_pte[i] = base_pte | (__pa(uaddr) & CBE_IOPTE_RPN_Mask); mb(); @@ -487,8 +487,10 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np, window->table.it_blocksize = 16; window->table.it_base = (unsigned long)iommu->ptab; window->table.it_index = iommu->nid; - window->table.it_offset = (offset >> IOMMU_PAGE_SHIFT_4K) + pte_offset; - window->table.it_size = size >> IOMMU_PAGE_SHIFT_4K; + window->table.it_page_shift = IOMMU_PAGE_SHIFT_4K; + window->table.it_offset = + (offset >> window->table.it_page_shift) + pte_offset; + window->table.it_size = size >> window->table.it_page_shift; iommu_init_table(&window->table, iommu->nid); diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index 7d2d036754b5..2e576f2ae442 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -138,8 +138,11 @@ static void iommu_table_iobmap_setup(void) pr_debug(" -> %s\n", __func__); iommu_table_iobmap.it_busno = 0; iommu_table_iobmap.it_offset = 0; + iommu_table_iobmap.it_page_shift = IOBMAP_PAGE_SHIFT; + /* it_size is in number of entries */ - iommu_table_iobmap.it_size = 0x80000000 >> IOBMAP_PAGE_SHIFT; + iommu_table_iobmap.it_size = + 0x80000000 >> iommu_table_iobmap.it_page_shift; /* Initialize the common IOMMU code */ iommu_table_iobmap.it_base = (unsigned long)iob_l2_base; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 7f4d857668a9..569b46423a93 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -564,7 +564,8 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl, { tbl->it_blocksize = 16; tbl->it_base = (unsigned long)tce_mem; - tbl->it_offset = dma_offset >> IOMMU_PAGE_SHIFT_4K; + tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K; + tbl->it_offset = dma_offset >> tbl->it_page_shift; tbl->it_index = 0; tbl->it_size = tce_size >> 3; tbl->it_busno = 0; diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 1b7531ce0c0c..e0299183ae54 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -486,9 +486,10 @@ static void iommu_table_setparms(struct pci_controller *phb, memset((void *)tbl->it_base, 0, *sizep); tbl->it_busno = phb->bus->number; + tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K; /* Units of tce entries */ - tbl->it_offset = phb->dma_window_base_cur >> IOMMU_PAGE_SHIFT_4K; + tbl->it_offset = phb->dma_window_base_cur >> tbl->it_page_shift; /* Test if we are going over 2GB of DMA space */ if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) { @@ -499,7 +500,7 @@ static void iommu_table_setparms(struct pci_controller *phb, phb->dma_window_base_cur += phb->dma_window_size; /* Set the tce table size - measured in entries */ - tbl->it_size = phb->dma_window_size >> IOMMU_PAGE_SHIFT_4K; + tbl->it_size = phb->dma_window_size >> tbl->it_page_shift; tbl->it_index = 0; tbl->it_blocksize = 16; @@ -537,11 +538,12 @@ static void iommu_table_setparms_lpar(struct pci_controller *phb, of_parse_dma_window(dn, dma_window, &tbl->it_index, &offset, &size); tbl->it_busno = phb->bus->number; + tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K; tbl->it_base = 0; tbl->it_blocksize = 16; tbl->it_type = TCE_PCI; - tbl->it_offset = offset >> IOMMU_PAGE_SHIFT_4K; - tbl->it_size = size >> IOMMU_PAGE_SHIFT_4K; + tbl->it_offset = offset >> tbl->it_page_shift; + tbl->it_size = size >> tbl->it_page_shift; } static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) diff --git a/arch/powerpc/platforms/wsp/wsp_pci.c b/arch/powerpc/platforms/wsp/wsp_pci.c index 8a589618551e..9a15e5b39bb8 100644 --- a/arch/powerpc/platforms/wsp/wsp_pci.c +++ b/arch/powerpc/platforms/wsp/wsp_pci.c @@ -381,8 +381,9 @@ static struct wsp_dma_table *wsp_pci_create_dma32_table(struct wsp_phb *phb, /* Init bits and pieces */ tbl->table.it_blocksize = 16; - tbl->table.it_offset = addr >> IOMMU_PAGE_SHIFT_4K; - tbl->table.it_size = size >> IOMMU_PAGE_SHIFT_4K; + tbl->table.it_page_shift = IOMMU_PAGE_SHIFT_4K; + tbl->table.it_offset = addr >> tbl->table.it_page_shift; + tbl->table.it_size = size >> tbl->table.it_page_shift; /* * It's already blank but we clear it anyway. -- cgit v1.2.3 From d084775738b746648d4102337163a04534a02982 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 9 Dec 2013 18:17:03 +1100 Subject: powerpc/iommu: Update the generic code to use dynamic iommu page sizes This patch updates the generic iommu backend code to use the it_page_shift field to determine the iommu page size instead of using hardcoded values. Signed-off-by: Alistair Popple Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/iommu.h | 19 +++++--- arch/powerpc/kernel/dma-iommu.c | 4 +- arch/powerpc/kernel/iommu.c | 88 +++++++++++++++++++----------------- arch/powerpc/kernel/vio.c | 25 +++++++--- arch/powerpc/platforms/powernv/pci.c | 2 - drivers/net/ethernet/ibm/ibmveth.c | 15 +++--- 6 files changed, 88 insertions(+), 65 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 7c928342f3f5..f7a8036579b5 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -35,17 +35,14 @@ #define IOMMU_PAGE_MASK_4K (~((1 << IOMMU_PAGE_SHIFT_4K) - 1)) #define IOMMU_PAGE_ALIGN_4K(addr) _ALIGN_UP(addr, IOMMU_PAGE_SIZE_4K) +#define IOMMU_PAGE_SIZE(tblptr) (ASM_CONST(1) << (tblptr)->it_page_shift) +#define IOMMU_PAGE_MASK(tblptr) (~((1 << (tblptr)->it_page_shift) - 1)) +#define IOMMU_PAGE_ALIGN(addr, tblptr) _ALIGN_UP(addr, IOMMU_PAGE_SIZE(tblptr)) + /* Boot time flags */ extern int iommu_is_off; extern int iommu_force_on; -/* Pure 2^n version of get_order */ -static __inline__ __attribute_const__ int get_iommu_order(unsigned long size) -{ - return __ilog2((size - 1) >> IOMMU_PAGE_SHIFT_4K) + 1; -} - - /* * IOMAP_MAX_ORDER defines the largest contiguous block * of dma space we can get. IOMAP_MAX_ORDER = 13 @@ -82,6 +79,14 @@ struct iommu_table { #endif }; +/* Pure 2^n version of get_order */ +static inline __attribute_const__ +int get_iommu_order(unsigned long size, struct iommu_table *tbl) +{ + return __ilog2((size - 1) >> tbl->it_page_shift) + 1; +} + + struct scatterlist; static inline void set_iommu_table_base(struct device *dev, void *base) diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 5cfe3dbfc4a0..54d0116256f7 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -83,10 +83,10 @@ static int dma_iommu_dma_supported(struct device *dev, u64 mask) return 0; } - if (tbl->it_offset > (mask >> IOMMU_PAGE_SHIFT_4K)) { + if (tbl->it_offset > (mask >> tbl->it_page_shift)) { dev_info(dev, "Warning: IOMMU offset too big for device mask\n"); dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n", - mask, tbl->it_offset << IOMMU_PAGE_SHIFT_4K); + mask, tbl->it_offset << tbl->it_page_shift); return 0; } else return 1; diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index df4a7f1b7444..f58d8135aab2 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -251,14 +251,13 @@ again: if (dev) boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - 1 << IOMMU_PAGE_SHIFT_4K); + 1 << tbl->it_page_shift); else - boundary_size = ALIGN(1UL << 32, 1 << IOMMU_PAGE_SHIFT_4K); + boundary_size = ALIGN(1UL << 32, 1 << tbl->it_page_shift); /* 4GB boundary for iseries_hv_alloc and iseries_hv_map */ - n = iommu_area_alloc(tbl->it_map, limit, start, npages, - tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT_4K, - align_mask); + n = iommu_area_alloc(tbl->it_map, limit, start, npages, tbl->it_offset, + boundary_size >> tbl->it_page_shift, align_mask); if (n == -1) { if (likely(pass == 0)) { /* First try the pool from the start */ @@ -320,12 +319,12 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, return DMA_ERROR_CODE; entry += tbl->it_offset; /* Offset into real TCE table */ - ret = entry << IOMMU_PAGE_SHIFT_4K; /* Set the return dma address */ + ret = entry << tbl->it_page_shift; /* Set the return dma address */ /* Put the TCEs in the HW table */ build_fail = ppc_md.tce_build(tbl, entry, npages, - (unsigned long)page & IOMMU_PAGE_MASK_4K, - direction, attrs); + (unsigned long)page & + IOMMU_PAGE_MASK(tbl), direction, attrs); /* ppc_md.tce_build() only returns non-zero for transient errors. * Clean up the table bitmap in this case and return @@ -352,7 +351,7 @@ static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr, { unsigned long entry, free_entry; - entry = dma_addr >> IOMMU_PAGE_SHIFT_4K; + entry = dma_addr >> tbl->it_page_shift; free_entry = entry - tbl->it_offset; if (((free_entry + npages) > tbl->it_size) || @@ -401,7 +400,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, unsigned long flags; struct iommu_pool *pool; - entry = dma_addr >> IOMMU_PAGE_SHIFT_4K; + entry = dma_addr >> tbl->it_page_shift; free_entry = entry - tbl->it_offset; pool = get_pool(tbl, free_entry); @@ -468,13 +467,13 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, } /* Allocate iommu entries for that segment */ vaddr = (unsigned long) sg_virt(s); - npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE_4K); + npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE(tbl)); align = 0; - if (IOMMU_PAGE_SHIFT_4K < PAGE_SHIFT && slen >= PAGE_SIZE && + if (tbl->it_page_shift < PAGE_SHIFT && slen >= PAGE_SIZE && (vaddr & ~PAGE_MASK) == 0) - align = PAGE_SHIFT - IOMMU_PAGE_SHIFT_4K; + align = PAGE_SHIFT - tbl->it_page_shift; entry = iommu_range_alloc(dev, tbl, npages, &handle, - mask >> IOMMU_PAGE_SHIFT_4K, align); + mask >> tbl->it_page_shift, align); DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen); @@ -489,16 +488,16 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, /* Convert entry to a dma_addr_t */ entry += tbl->it_offset; - dma_addr = entry << IOMMU_PAGE_SHIFT_4K; - dma_addr |= (s->offset & ~IOMMU_PAGE_MASK_4K); + dma_addr = entry << tbl->it_page_shift; + dma_addr |= (s->offset & ~IOMMU_PAGE_MASK(tbl)); DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n", npages, entry, dma_addr); /* Insert into HW table */ build_fail = ppc_md.tce_build(tbl, entry, npages, - vaddr & IOMMU_PAGE_MASK_4K, - direction, attrs); + vaddr & IOMMU_PAGE_MASK(tbl), + direction, attrs); if(unlikely(build_fail)) goto failure; @@ -559,9 +558,9 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, if (s->dma_length != 0) { unsigned long vaddr, npages; - vaddr = s->dma_address & IOMMU_PAGE_MASK_4K; + vaddr = s->dma_address & IOMMU_PAGE_MASK(tbl); npages = iommu_num_pages(s->dma_address, s->dma_length, - IOMMU_PAGE_SIZE_4K); + IOMMU_PAGE_SIZE(tbl)); __iommu_free(tbl, vaddr, npages); s->dma_address = DMA_ERROR_CODE; s->dma_length = 0; @@ -592,7 +591,7 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, if (sg->dma_length == 0) break; npages = iommu_num_pages(dma_handle, sg->dma_length, - IOMMU_PAGE_SIZE_4K); + IOMMU_PAGE_SIZE(tbl)); __iommu_free(tbl, dma_handle, npages); sg = sg_next(sg); } @@ -676,7 +675,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) set_bit(0, tbl->it_map); /* We only split the IOMMU table if we have 1GB or more of space */ - if ((tbl->it_size << IOMMU_PAGE_SHIFT_4K) >= (1UL * 1024 * 1024 * 1024)) + if ((tbl->it_size << tbl->it_page_shift) >= (1UL * 1024 * 1024 * 1024)) tbl->nr_pools = IOMMU_NR_POOLS; else tbl->nr_pools = 1; @@ -768,16 +767,16 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, vaddr = page_address(page) + offset; uaddr = (unsigned long)vaddr; - npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE_4K); + npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl)); if (tbl) { align = 0; - if (IOMMU_PAGE_SHIFT_4K < PAGE_SHIFT && size >= PAGE_SIZE && + if (tbl->it_page_shift < PAGE_SHIFT && size >= PAGE_SIZE && ((unsigned long)vaddr & ~PAGE_MASK) == 0) - align = PAGE_SHIFT - IOMMU_PAGE_SHIFT_4K; + align = PAGE_SHIFT - tbl->it_page_shift; dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction, - mask >> IOMMU_PAGE_SHIFT_4K, align, + mask >> tbl->it_page_shift, align, attrs); if (dma_handle == DMA_ERROR_CODE) { if (printk_ratelimit()) { @@ -786,7 +785,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, npages); } } else - dma_handle |= (uaddr & ~IOMMU_PAGE_MASK_4K); + dma_handle |= (uaddr & ~IOMMU_PAGE_MASK(tbl)); } return dma_handle; @@ -801,7 +800,8 @@ void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle, BUG_ON(direction == DMA_NONE); if (tbl) { - npages = iommu_num_pages(dma_handle, size, IOMMU_PAGE_SIZE_4K); + npages = iommu_num_pages(dma_handle, size, + IOMMU_PAGE_SIZE(tbl)); iommu_free(tbl, dma_handle, npages); } } @@ -845,10 +845,10 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, memset(ret, 0, size); /* Set up tces to cover the allocated range */ - nio_pages = size >> IOMMU_PAGE_SHIFT_4K; - io_order = get_iommu_order(size); + nio_pages = size >> tbl->it_page_shift; + io_order = get_iommu_order(size, tbl); mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, - mask >> IOMMU_PAGE_SHIFT_4K, io_order, NULL); + mask >> tbl->it_page_shift, io_order, NULL); if (mapping == DMA_ERROR_CODE) { free_pages((unsigned long)ret, order); return NULL; @@ -864,7 +864,7 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size, unsigned int nio_pages; size = PAGE_ALIGN(size); - nio_pages = size >> IOMMU_PAGE_SHIFT_4K; + nio_pages = size >> tbl->it_page_shift; iommu_free(tbl, dma_handle, nio_pages); size = PAGE_ALIGN(size); free_pages((unsigned long)vaddr, get_order(size)); @@ -935,10 +935,10 @@ int iommu_tce_clear_param_check(struct iommu_table *tbl, if (tce_value) return -EINVAL; - if (ioba & ~IOMMU_PAGE_MASK_4K) + if (ioba & ~IOMMU_PAGE_MASK(tbl)) return -EINVAL; - ioba >>= IOMMU_PAGE_SHIFT_4K; + ioba >>= tbl->it_page_shift; if (ioba < tbl->it_offset) return -EINVAL; @@ -955,13 +955,13 @@ int iommu_tce_put_param_check(struct iommu_table *tbl, if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ))) return -EINVAL; - if (tce & ~(IOMMU_PAGE_MASK_4K | TCE_PCI_WRITE | TCE_PCI_READ)) + if (tce & ~(IOMMU_PAGE_MASK(tbl) | TCE_PCI_WRITE | TCE_PCI_READ)) return -EINVAL; - if (ioba & ~IOMMU_PAGE_MASK_4K) + if (ioba & ~IOMMU_PAGE_MASK(tbl)) return -EINVAL; - ioba >>= IOMMU_PAGE_SHIFT_4K; + ioba >>= tbl->it_page_shift; if (ioba < tbl->it_offset) return -EINVAL; @@ -1037,7 +1037,7 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, /* if (unlikely(ret)) pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n", - __func__, hwaddr, entry << IOMMU_PAGE_SHIFT_4K, + __func__, hwaddr, entry << IOMMU_PAGE_SHIFT(tbl), hwaddr, ret); */ return ret; @@ -1049,14 +1049,14 @@ int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry, { int ret; struct page *page = NULL; - unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK_4K & ~PAGE_MASK; + unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK; enum dma_data_direction direction = iommu_tce_direction(tce); ret = get_user_pages_fast(tce & PAGE_MASK, 1, direction != DMA_TO_DEVICE, &page); if (unlikely(ret != 1)) { /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n", - tce, entry << IOMMU_PAGE_SHIFT_4K, ret); */ + tce, entry << IOMMU_PAGE_SHIFT(tbl), ret); */ return -EFAULT; } hwaddr = (unsigned long) page_address(page) + offset; @@ -1067,7 +1067,7 @@ int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry, if (ret < 0) pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n", - __func__, entry << IOMMU_PAGE_SHIFT_4K, tce, ret); + __func__, entry << tbl->it_page_shift, tce, ret); return ret; } @@ -1127,6 +1127,12 @@ int iommu_add_device(struct device *dev) pr_debug("iommu_tce: adding %s to iommu group %d\n", dev_name(dev), iommu_group_id(tbl->it_group)); + if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) { + pr_err("iommu_tce: unsupported iommu page size."); + pr_err("%s has not been added\n", dev_name(dev)); + return -EINVAL; + } + ret = iommu_group_add_device(tbl->it_group, dev); if (ret < 0) pr_err("iommu_tce: %s has not been added, ret=%d\n", diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 170ac24a0106..826d8bd9e522 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -518,16 +518,18 @@ static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page, struct dma_attrs *attrs) { struct vio_dev *viodev = to_vio_dev(dev); + struct iommu_table *tbl; dma_addr_t ret = DMA_ERROR_CODE; - if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE_4K))) { + tbl = get_iommu_table_base(dev); + if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)))) { atomic_inc(&viodev->cmo.allocs_failed); return ret; } ret = dma_iommu_ops.map_page(dev, page, offset, size, direction, attrs); if (unlikely(dma_mapping_error(dev, ret))) { - vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE_4K)); + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl))); atomic_inc(&viodev->cmo.allocs_failed); } @@ -540,10 +542,12 @@ static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, struct dma_attrs *attrs) { struct vio_dev *viodev = to_vio_dev(dev); + struct iommu_table *tbl; + tbl = get_iommu_table_base(dev); dma_iommu_ops.unmap_page(dev, dma_handle, size, direction, attrs); - vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE_4K)); + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl))); } static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, @@ -551,12 +555,14 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, struct dma_attrs *attrs) { struct vio_dev *viodev = to_vio_dev(dev); + struct iommu_table *tbl; struct scatterlist *sgl; int ret, count = 0; size_t alloc_size = 0; + tbl = get_iommu_table_base(dev); for (sgl = sglist; count < nelems; count++, sgl++) - alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE_4K); + alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE(tbl)); if (vio_cmo_alloc(viodev, alloc_size)) { atomic_inc(&viodev->cmo.allocs_failed); @@ -572,7 +578,7 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, } for (sgl = sglist, count = 0; count < ret; count++, sgl++) - alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE_4K); + alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl)); if (alloc_size) vio_cmo_dealloc(viodev, alloc_size); @@ -585,12 +591,14 @@ static void vio_dma_iommu_unmap_sg(struct device *dev, struct dma_attrs *attrs) { struct vio_dev *viodev = to_vio_dev(dev); + struct iommu_table *tbl; struct scatterlist *sgl; size_t alloc_size = 0; int count = 0; + tbl = get_iommu_table_base(dev); for (sgl = sglist; count < nelems; count++, sgl++) - alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE_4K); + alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl)); dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs); @@ -706,11 +714,14 @@ static int vio_cmo_bus_probe(struct vio_dev *viodev) { struct vio_cmo_dev_entry *dev_ent; struct device *dev = &viodev->dev; + struct iommu_table *tbl; struct vio_driver *viodrv = to_vio_driver(dev->driver); unsigned long flags; size_t size; bool dma_capable = false; + tbl = get_iommu_table_base(dev); + /* A device requires entitlement if it has a DMA window property */ switch (viodev->family) { case VDEVICE: @@ -737,7 +748,7 @@ static int vio_cmo_bus_probe(struct vio_dev *viodev) } viodev->cmo.desired = - IOMMU_PAGE_ALIGN_4K(viodrv->get_desired_dma(viodev)); + IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev), tbl); if (viodev->cmo.desired < VIO_CMO_MIN_ENT) viodev->cmo.desired = VIO_CMO_MIN_ENT; size = VIO_CMO_MIN_ENT; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 569b46423a93..b555ebc57ef5 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -762,8 +762,6 @@ static struct notifier_block tce_iommu_bus_nb = { static int __init tce_iommu_bus_notifier_init(void) { - BUILD_BUG_ON(PAGE_SIZE < IOMMU_PAGE_SIZE_4K); - bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb); return 0; } diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index f7d7538b6bd9..d04dbaba83dd 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1276,31 +1276,34 @@ static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev) { struct net_device *netdev = dev_get_drvdata(&vdev->dev); struct ibmveth_adapter *adapter; + struct iommu_table *tbl; unsigned long ret; int i; int rxqentries = 1; + tbl = get_iommu_table_base(&vdev->dev); + /* netdev inits at probe time along with the structures we need below*/ if (netdev == NULL) - return IOMMU_PAGE_ALIGN_4K(IBMVETH_IO_ENTITLEMENT_DEFAULT); + return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT, tbl); adapter = netdev_priv(netdev); ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE; - ret += IOMMU_PAGE_ALIGN_4K(netdev->mtu); + ret += IOMMU_PAGE_ALIGN(netdev->mtu, tbl); for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { /* add the size of the active receive buffers */ if (adapter->rx_buff_pool[i].active) ret += adapter->rx_buff_pool[i].size * - IOMMU_PAGE_ALIGN_4K(adapter->rx_buff_pool[i]. - buff_size); + IOMMU_PAGE_ALIGN(adapter->rx_buff_pool[i]. + buff_size, tbl); rxqentries += adapter->rx_buff_pool[i].size; } /* add the size of the receive queue entries */ - ret += IOMMU_PAGE_ALIGN_4K( - rxqentries * sizeof(struct ibmveth_rx_q_entry)); + ret += IOMMU_PAGE_ALIGN( + rxqentries * sizeof(struct ibmveth_rx_q_entry), tbl); return ret; } -- cgit v1.2.3 From 228b1a473037c89d524e03a569c688a22241b4ea Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Thu, 8 Aug 2013 15:56:09 +0300 Subject: powerpc/booke64: Add LRAT error exception handler LRAT (Logical to Real Address Translation) present in MMU v2 provides hardware translation from a logical page number (LPN) to a real page number (RPN) when tlbwe is executed by a guest or when a page table translation occurs from a guest virtual address. Add LRAT error exception handler to Booke3E 64-bit kernel and the basic KVM handler to avoid build breakage. This is a prerequisite for KVM LRAT support that will follow. Signed-off-by: Mihai Caraman Signed-off-by: Scott Wood --- arch/powerpc/include/asm/kvm_asm.h | 1 + arch/powerpc/include/asm/reg_booke.h | 1 + arch/powerpc/kernel/cpu_setup_fsl_booke.S | 12 ++++++++++++ arch/powerpc/kernel/exceptions-64e.S | 17 +++++++++++++++++ arch/powerpc/kvm/bookehv_interrupts.S | 2 ++ 5 files changed, 33 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 1bd92fd43cfb..1503d8c7c41b 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -74,6 +74,7 @@ #define BOOKE_INTERRUPT_GUEST_DBELL_CRIT 39 #define BOOKE_INTERRUPT_HV_SYSCALL 40 #define BOOKE_INTERRUPT_HV_PRIV 41 +#define BOOKE_INTERRUPT_LRAT_ERROR 42 /* book3s */ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 2e31aacd8acc..1f7134dd0946 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -101,6 +101,7 @@ #define SPRN_IVOR39 0x1B1 /* Interrupt Vector Offset Register 39 */ #define SPRN_IVOR40 0x1B2 /* Interrupt Vector Offset Register 40 */ #define SPRN_IVOR41 0x1B3 /* Interrupt Vector Offset Register 41 */ +#define SPRN_IVOR42 0x1B4 /* Interrupt Vector Offset Register 42 */ #define SPRN_GIVOR2 0x1B8 /* Guest IVOR2 */ #define SPRN_GIVOR3 0x1B9 /* Guest IVOR3 */ #define SPRN_GIVOR4 0x1BA /* Guest IVOR4 */ diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index bfb18c7290b7..fa6862db8a02 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -57,6 +57,12 @@ _GLOBAL(__setup_cpu_e6500) mflr r6 #ifdef CONFIG_PPC64 bl .setup_altivec_ivors + /* Touch IVOR42 only if the CPU supports E.HV category */ + mfspr r10,SPRN_MMUCFG + rlwinm. r10,r10,0,MMUCFG_LPIDSIZE + beq 1f + bl .setup_lrat_ivor +1: #endif bl __setup_cpu_e5500 mtlr r6 @@ -119,6 +125,12 @@ _GLOBAL(__setup_cpu_e5500) _GLOBAL(__restore_cpu_e6500) mflr r5 bl .setup_altivec_ivors + /* Touch IVOR42 only if the CPU supports E.HV category */ + mfspr r10,SPRN_MMUCFG + rlwinm. r10,r10,0,MMUCFG_LPIDSIZE + beq 1f + bl .setup_lrat_ivor +1: bl __restore_cpu_e5500 mtlr r5 blr diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index e7751561fd1d..4d5a0b1034e8 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -308,6 +308,7 @@ interrupt_base_book3e: /* fake trap */ EXCEPTION_STUB(0x2e0, guest_doorbell_crit) EXCEPTION_STUB(0x300, hypercall) EXCEPTION_STUB(0x320, ehpriv) + EXCEPTION_STUB(0x340, lrat_error) .globl interrupt_end_book3e interrupt_end_book3e: @@ -677,6 +678,17 @@ kernel_dbg_exc: bl .unknown_exception b .ret_from_except +/* LRAT Error interrupt */ + START_EXCEPTION(lrat_error); + NORMAL_EXCEPTION_PROLOG(0x340, BOOKE_INTERRUPT_LRAT_ERROR, + PROLOG_ADDITION_NONE) + EXCEPTION_COMMON(0x340, PACA_EXGEN, INTS_KEEP) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .save_nvgprs + INTS_RESTORE_HARD + bl .unknown_exception + b .ret_from_except + /* * An interrupt came in while soft-disabled; We mark paca->irq_happened * accordingly and if the interrupt is level sensitive, we hard disable @@ -859,6 +871,7 @@ BAD_STACK_TRAMPOLINE(0x2e0) BAD_STACK_TRAMPOLINE(0x300) BAD_STACK_TRAMPOLINE(0x310) BAD_STACK_TRAMPOLINE(0x320) +BAD_STACK_TRAMPOLINE(0x340) BAD_STACK_TRAMPOLINE(0x400) BAD_STACK_TRAMPOLINE(0x500) BAD_STACK_TRAMPOLINE(0x600) @@ -1414,3 +1427,7 @@ _GLOBAL(setup_ehv_ivors) SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */ SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */ blr + +_GLOBAL(setup_lrat_ivor) + SET_IVOR(42, 0x340) /* LRAT Error */ + blr diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index e8ed7d659c55..a0d6929d8678 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -319,6 +319,8 @@ kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \ SPRN_DSRR0, SPRN_DSRR1, 0 kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \ SPRN_CSRR0, SPRN_CSRR1, 0 +kvm_handler BOOKE_INTERRUPT_LRAT_ERROR, EX_PARAMS(GEN), \ + SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR) #else /* * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h -- cgit v1.2.3 From 640e922501103aaf2e0abb4cf4de5d49fa8342f7 Mon Sep 17 00:00:00 2001 From: Joseph Myers Date: Tue, 10 Dec 2013 23:07:45 +0000 Subject: powerpc: fix exception clearing in e500 SPE float emulation The e500 SPE floating-point emulation code clears existing exceptions (__FPU_FPSCR &= ~FP_EX_MASK;) before ORing in the exceptions from the emulated operation. However, these exception bits are the "sticky", cumulative exception bits, and should only be cleared by the user program setting SPEFSCR, not implicitly by any floating-point instruction (whether executed purely by the hardware or emulated). The spurious clearing of these bits shows up as missing exceptions in glibc testing. Fixing this, however, is not as simple as just not clearing the bits, because while the bits may be from previous floating-point operations (in which case they should not be cleared), the processor can also set the sticky bits itself before the interrupt for an exception occurs, and this can happen in cases when IEEE 754 semantics are that the sticky bit should not be set. Specifically, the "invalid" sticky bit is set in various cases with non-finite operands, where IEEE 754 semantics do not involve raising such an exception, and the "underflow" sticky bit is set in cases of exact underflow, whereas IEEE 754 semantics are that this flag is set only for inexact underflow. Thus, for correct emulation the kernel needs to know the setting of these two sticky bits before the instruction being emulated. When a floating-point operation raises an exception, the kernel can note the state of the sticky bits immediately afterwards. Some functions that affect the state of these bits, such as fesetenv and feholdexcept, need to use prctl with PR_GET_FPEXC and PR_SET_FPEXC anyway, and so it is natural to record the state of those bits during that call into the kernel and so avoid any need for a separate call into the kernel to inform it of a change to those bits. Thus, the interface I chose to use (in this patch and the glibc port) is that one of those prctl calls must be made after any userspace change to those sticky bits, other than through a floating-point operation that traps into the kernel anyway. feclearexcept and fesetexceptflag duly make those calls, which would not be required were it not for this issue. The previous EGLIBC port, and the uClibc code copied from it, is fundamentally broken as regards any use of prctl for floating-point exceptions because it didn't use the PR_FP_EXC_SW_ENABLE bit in its prctl calls (and did various worse things, such as passing a pointer when prctl expected an integer). If you avoid anything where prctl is used, the clearing of sticky bits still means it will never give anything approximating correct exception semantics with existing kernels. I don't believe the patch makes things any worse for existing code that doesn't try to inform the kernel of changes to sticky bits - such code may get incorrect exceptions in some cases, but it would have done so anyway in other cases. Signed-off-by: Joseph Myers Signed-off-by: Scott Wood --- arch/powerpc/include/asm/processor.h | 6 +++++- arch/powerpc/kernel/process.c | 30 ++++++++++++++++++++++++++++-- arch/powerpc/math-emu/math_efp.c | 20 +++++++++++++++++++- 3 files changed, 52 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index fc14a38c7ccf..91441d9cbaae 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -256,6 +256,8 @@ struct thread_struct { unsigned long evr[32]; /* upper 32-bits of SPE regs */ u64 acc; /* Accumulator */ unsigned long spefscr; /* SPE & eFP status */ + unsigned long spefscr_last; /* SPEFSCR value on last prctl + call or trap return */ int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -317,7 +319,9 @@ struct thread_struct { (_ALIGN_UP(sizeof(init_thread_info), 16) + (unsigned long) &init_stack) #ifdef CONFIG_SPE -#define SPEFSCR_INIT .spefscr = SPEFSCR_FINVE | SPEFSCR_FDBZE | SPEFSCR_FUNFE | SPEFSCR_FOVFE, +#define SPEFSCR_INIT \ + .spefscr = SPEFSCR_FINVE | SPEFSCR_FDBZE | SPEFSCR_FUNFE | SPEFSCR_FOVFE, \ + .spefscr_last = SPEFSCR_FINVE | SPEFSCR_FDBZE | SPEFSCR_FUNFE | SPEFSCR_FOVFE, #else #define SPEFSCR_INIT #endif diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 3386d8ab7eb0..b08c0d03530f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1175,6 +1175,19 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val) if (val & PR_FP_EXC_SW_ENABLE) { #ifdef CONFIG_SPE if (cpu_has_feature(CPU_FTR_SPE)) { + /* + * When the sticky exception bits are set + * directly by userspace, it must call prctl + * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE + * in the existing prctl settings) or + * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in + * the bits being set). functions + * saving and restoring the whole + * floating-point environment need to do so + * anyway to restore the prctl settings from + * the saved environment. + */ + tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR); tsk->thread.fpexc_mode = val & (PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT); return 0; @@ -1206,9 +1219,22 @@ int get_fpexc_mode(struct task_struct *tsk, unsigned long adr) if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) #ifdef CONFIG_SPE - if (cpu_has_feature(CPU_FTR_SPE)) + if (cpu_has_feature(CPU_FTR_SPE)) { + /* + * When the sticky exception bits are set + * directly by userspace, it must call prctl + * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE + * in the existing prctl settings) or + * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in + * the bits being set). functions + * saving and restoring the whole + * floating-point environment need to do so + * anyway to restore the prctl settings from + * the saved environment. + */ + tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR); val = tsk->thread.fpexc_mode; - else + } else return -EINVAL; #else return -EINVAL; diff --git a/arch/powerpc/math-emu/math_efp.c b/arch/powerpc/math-emu/math_efp.c index a73f0884d358..59835c625dc6 100644 --- a/arch/powerpc/math-emu/math_efp.c +++ b/arch/powerpc/math-emu/math_efp.c @@ -630,9 +630,27 @@ update_ccr: regs->ccr |= (IR << ((7 - ((speinsn >> 23) & 0x7)) << 2)); update_regs: - __FPU_FPSCR &= ~FP_EX_MASK; + /* + * If the "invalid" exception sticky bit was set by the + * processor for non-finite input, but was not set before the + * instruction being emulated, clear it. Likewise for the + * "underflow" bit, which may have been set by the processor + * for exact underflow, not just inexact underflow when the + * flag should be set for IEEE 754 semantics. Other sticky + * exceptions will only be set by the processor when they are + * correct according to IEEE 754 semantics, and we must not + * clear sticky bits that were already set before the emulated + * instruction as they represent the user-visible sticky + * exception status. "inexact" traps to kernel are not + * required for IEEE semantics and are not enabled by default, + * so the "inexact" sticky bit may have been set by a previous + * instruction without the kernel being aware of it. + */ + __FPU_FPSCR + &= ~(FP_EX_INVALID | FP_EX_UNDERFLOW) | current->thread.spefscr_last; __FPU_FPSCR |= (FP_CUR_EXCEPTIONS & FP_EX_MASK); mtspr(SPRN_SPEFSCR, __FPU_FPSCR); + current->thread.spefscr_last = __FPU_FPSCR; current->thread.evr[fc] = vc.wp[0]; regs->gpr[fc] = vc.wp[1]; -- cgit v1.2.3 From 71a6fa17e1526d3f26f4711cc55d339f96c49a95 Mon Sep 17 00:00:00 2001 From: Wang Dongsheng Date: Tue, 17 Dec 2013 16:16:59 +0800 Subject: powerpc/fsl: add E6500 PVR and SPRN_PWRMGTCR0 define E6500 PVR and SPRN_PWRMGTCR0 will be used in subsequent pw20/altivec idle patches. Signed-off-by: Wang Dongsheng Signed-off-by: Scott Wood --- arch/powerpc/include/asm/reg.h | 2 ++ arch/powerpc/include/asm/reg_booke.h | 9 +++++++++ 2 files changed, 11 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index fa8388ed94c5..62b114e079cf 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1075,6 +1075,8 @@ #define PVR_8560 0x80200000 #define PVR_VER_E500V1 0x8020 #define PVR_VER_E500V2 0x8021 +#define PVR_VER_E6500 0x8040 + /* * For the 8xx processors, all of them report the same PVR family for * the PowerPC core. The various versions of these processors must be diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 1f7134dd0946..163c3b05a76e 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -171,6 +171,7 @@ #define SPRN_L2CSR1 0x3FA /* L2 Data Cache Control and Status Register 1 */ #define SPRN_DCCR 0x3FA /* Data Cache Cacheability Register */ #define SPRN_ICCR 0x3FB /* Instruction Cache Cacheability Register */ +#define SPRN_PWRMGTCR0 0x3FB /* Power management control register 0 */ #define SPRN_SVR 0x3FF /* System Version Register */ /* @@ -217,6 +218,14 @@ #define CCR1_DPC 0x00000100 /* Disable L1 I-Cache/D-Cache parity checking */ #define CCR1_TCS 0x00000080 /* Timer Clock Select */ +/* Bit definitions for PWRMGTCR0. */ +#define PWRMGTCR0_PW20_WAIT (1 << 14) /* PW20 state enable bit */ +#define PWRMGTCR0_PW20_ENT_SHIFT 8 +#define PWRMGTCR0_PW20_ENT 0x3F00 +#define PWRMGTCR0_AV_IDLE_PD_EN (1 << 22) /* Altivec idle enable */ +#define PWRMGTCR0_AV_IDLE_CNT_SHIFT 16 +#define PWRMGTCR0_AV_IDLE_CNT 0x3F0000 + /* Bit definitions for the MCSR. */ #define MCSR_MCS 0x80000000 /* Machine Check Summary */ #define MCSR_IB 0x40000000 /* Instruction PLB Error */ -- cgit v1.2.3 From 1820a8d2163554dcd0272cc095338499db4a4dc3 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Tue, 8 Oct 2013 09:32:19 +0530 Subject: kvm/powerpc: rename kvm_hypercall() to epapr_hypercall() kvm_hypercall() have nothing KVM specific, so renamed to epapr_hypercall(). Also this in moved to arch/powerpc/include/asm/epapr_hcalls.h Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/epapr_hcalls.h | 46 +++++++++++++++++++++++++++++++++ arch/powerpc/include/asm/kvm_para.h | 23 +++++------------ arch/powerpc/kernel/kvm.c | 41 ++--------------------------- 3 files changed, 54 insertions(+), 56 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index 86b0ac79990c..eec87f9494bc 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -460,5 +460,51 @@ static inline unsigned int ev_idle(void) return r3; } + +#ifdef CONFIG_EPAPR_PARAVIRT +static inline unsigned long epapr_hypercall(unsigned long *in, + unsigned long *out, + unsigned long nr) +{ + unsigned long register r0 asm("r0"); + unsigned long register r3 asm("r3") = in[0]; + unsigned long register r4 asm("r4") = in[1]; + unsigned long register r5 asm("r5") = in[2]; + unsigned long register r6 asm("r6") = in[3]; + unsigned long register r7 asm("r7") = in[4]; + unsigned long register r8 asm("r8") = in[5]; + unsigned long register r9 asm("r9") = in[6]; + unsigned long register r10 asm("r10") = in[7]; + unsigned long register r11 asm("r11") = nr; + unsigned long register r12 asm("r12"); + + asm volatile("bl epapr_hypercall_start" + : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6), + "=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11), + "=r"(r12) + : "r"(r3), "r"(r4), "r"(r5), "r"(r6), "r"(r7), "r"(r8), + "r"(r9), "r"(r10), "r"(r11) + : "memory", "cc", "xer", "ctr", "lr"); + + out[0] = r4; + out[1] = r5; + out[2] = r6; + out[3] = r7; + out[4] = r8; + out[5] = r9; + out[6] = r10; + out[7] = r11; + + return r3; +} +#else +static unsigned long epapr_hypercall(unsigned long *in, + unsigned long *out, + unsigned long nr) +{ + return EV_UNIMPLEMENTED; +} +#endif + #endif /* !__ASSEMBLY__ */ #endif /* _EPAPR_HCALLS_H */ diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 2b119654b4c1..c18660ef26d8 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -39,10 +39,6 @@ static inline int kvm_para_available(void) return 1; } -extern unsigned long kvm_hypercall(unsigned long *in, - unsigned long *out, - unsigned long nr); - #else static inline int kvm_para_available(void) @@ -50,13 +46,6 @@ static inline int kvm_para_available(void) return 0; } -static unsigned long kvm_hypercall(unsigned long *in, - unsigned long *out, - unsigned long nr) -{ - return EV_UNIMPLEMENTED; -} - #endif static inline long kvm_hypercall0_1(unsigned int nr, unsigned long *r2) @@ -65,7 +54,7 @@ static inline long kvm_hypercall0_1(unsigned int nr, unsigned long *r2) unsigned long out[8]; unsigned long r; - r = kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); + r = epapr_hypercall(in, out, KVM_HCALL_TOKEN(nr)); *r2 = out[0]; return r; @@ -76,7 +65,7 @@ static inline long kvm_hypercall0(unsigned int nr) unsigned long in[8]; unsigned long out[8]; - return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); + return epapr_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } static inline long kvm_hypercall1(unsigned int nr, unsigned long p1) @@ -85,7 +74,7 @@ static inline long kvm_hypercall1(unsigned int nr, unsigned long p1) unsigned long out[8]; in[0] = p1; - return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); + return epapr_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } static inline long kvm_hypercall2(unsigned int nr, unsigned long p1, @@ -96,7 +85,7 @@ static inline long kvm_hypercall2(unsigned int nr, unsigned long p1, in[0] = p1; in[1] = p2; - return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); + return epapr_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } static inline long kvm_hypercall3(unsigned int nr, unsigned long p1, @@ -108,7 +97,7 @@ static inline long kvm_hypercall3(unsigned int nr, unsigned long p1, in[0] = p1; in[1] = p2; in[2] = p3; - return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); + return epapr_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, @@ -122,7 +111,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, in[1] = p2; in[2] = p3; in[3] = p4; - return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); + return epapr_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index db28032e320e..6a0175297b0d 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -413,13 +413,13 @@ static void kvm_map_magic_page(void *data) { u32 *features = data; - ulong in[8]; + ulong in[8] = {0}; ulong out[8]; in[0] = KVM_MAGIC_PAGE; in[1] = KVM_MAGIC_PAGE; - kvm_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE)); + epapr_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE)); *features = out[0]; } @@ -711,43 +711,6 @@ static void kvm_use_magic_page(void) kvm_patching_worked ? "worked" : "failed"); } -unsigned long kvm_hypercall(unsigned long *in, - unsigned long *out, - unsigned long nr) -{ - unsigned long register r0 asm("r0"); - unsigned long register r3 asm("r3") = in[0]; - unsigned long register r4 asm("r4") = in[1]; - unsigned long register r5 asm("r5") = in[2]; - unsigned long register r6 asm("r6") = in[3]; - unsigned long register r7 asm("r7") = in[4]; - unsigned long register r8 asm("r8") = in[5]; - unsigned long register r9 asm("r9") = in[6]; - unsigned long register r10 asm("r10") = in[7]; - unsigned long register r11 asm("r11") = nr; - unsigned long register r12 asm("r12"); - - asm volatile("bl epapr_hypercall_start" - : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6), - "=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11), - "=r"(r12) - : "r"(r3), "r"(r4), "r"(r5), "r"(r6), "r"(r7), "r"(r8), - "r"(r9), "r"(r10), "r"(r11) - : "memory", "cc", "xer", "ctr", "lr"); - - out[0] = r4; - out[1] = r5; - out[2] = r6; - out[3] = r7; - out[4] = r8; - out[5] = r9; - out[6] = r10; - out[7] = r11; - - return r3; -} -EXPORT_SYMBOL_GPL(kvm_hypercall); - static __init void kvm_free_tmp(void) { free_reserved_area(&kvm_tmp[kvm_tmp_index], -- cgit v1.2.3 From b1f0d94c26b64e814243b736f47e7ef40d96432c Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Tue, 8 Oct 2013 09:32:20 +0530 Subject: kvm/powerpc: move kvm_hypercall0() and friends to epapr_hypercall0() kvm_hypercall0() and friends have nothing KVM specific so moved to epapr_hypercall0() and friends. Also they are moved from arch/powerpc/include/asm/kvm_para.h to arch/powerpc/include/asm/epapr_hcalls.h Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/epapr_hcalls.h | 65 +++++++++++++++++++++++++++++++ arch/powerpc/include/asm/kvm_para.h | 69 +-------------------------------- 2 files changed, 66 insertions(+), 68 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index eec87f9494bc..334459ad145b 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -506,5 +506,70 @@ static unsigned long epapr_hypercall(unsigned long *in, } #endif +static inline long epapr_hypercall0_1(unsigned int nr, unsigned long *r2) +{ + unsigned long in[8]; + unsigned long out[8]; + unsigned long r; + + r = epapr_hypercall(in, out, nr); + *r2 = out[0]; + + return r; +} + +static inline long epapr_hypercall0(unsigned int nr) +{ + unsigned long in[8]; + unsigned long out[8]; + + return epapr_hypercall(in, out, nr); +} + +static inline long epapr_hypercall1(unsigned int nr, unsigned long p1) +{ + unsigned long in[8]; + unsigned long out[8]; + + in[0] = p1; + return epapr_hypercall(in, out, nr); +} + +static inline long epapr_hypercall2(unsigned int nr, unsigned long p1, + unsigned long p2) +{ + unsigned long in[8]; + unsigned long out[8]; + + in[0] = p1; + in[1] = p2; + return epapr_hypercall(in, out, nr); +} + +static inline long epapr_hypercall3(unsigned int nr, unsigned long p1, + unsigned long p2, unsigned long p3) +{ + unsigned long in[8]; + unsigned long out[8]; + + in[0] = p1; + in[1] = p2; + in[2] = p3; + return epapr_hypercall(in, out, nr); +} + +static inline long epapr_hypercall4(unsigned int nr, unsigned long p1, + unsigned long p2, unsigned long p3, + unsigned long p4) +{ + unsigned long in[8]; + unsigned long out[8]; + + in[0] = p1; + in[1] = p2; + in[2] = p3; + in[3] = p4; + return epapr_hypercall(in, out, nr); +} #endif /* !__ASSEMBLY__ */ #endif /* _EPAPR_HCALLS_H */ diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index c18660ef26d8..336a91acb8b1 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -48,73 +48,6 @@ static inline int kvm_para_available(void) #endif -static inline long kvm_hypercall0_1(unsigned int nr, unsigned long *r2) -{ - unsigned long in[8]; - unsigned long out[8]; - unsigned long r; - - r = epapr_hypercall(in, out, KVM_HCALL_TOKEN(nr)); - *r2 = out[0]; - - return r; -} - -static inline long kvm_hypercall0(unsigned int nr) -{ - unsigned long in[8]; - unsigned long out[8]; - - return epapr_hypercall(in, out, KVM_HCALL_TOKEN(nr)); -} - -static inline long kvm_hypercall1(unsigned int nr, unsigned long p1) -{ - unsigned long in[8]; - unsigned long out[8]; - - in[0] = p1; - return epapr_hypercall(in, out, KVM_HCALL_TOKEN(nr)); -} - -static inline long kvm_hypercall2(unsigned int nr, unsigned long p1, - unsigned long p2) -{ - unsigned long in[8]; - unsigned long out[8]; - - in[0] = p1; - in[1] = p2; - return epapr_hypercall(in, out, KVM_HCALL_TOKEN(nr)); -} - -static inline long kvm_hypercall3(unsigned int nr, unsigned long p1, - unsigned long p2, unsigned long p3) -{ - unsigned long in[8]; - unsigned long out[8]; - - in[0] = p1; - in[1] = p2; - in[2] = p3; - return epapr_hypercall(in, out, KVM_HCALL_TOKEN(nr)); -} - -static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, - unsigned long p2, unsigned long p3, - unsigned long p4) -{ - unsigned long in[8]; - unsigned long out[8]; - - in[0] = p1; - in[1] = p2; - in[2] = p3; - in[3] = p4; - return epapr_hypercall(in, out, KVM_HCALL_TOKEN(nr)); -} - - static inline unsigned int kvm_arch_para_features(void) { unsigned long r; @@ -122,7 +55,7 @@ static inline unsigned int kvm_arch_para_features(void) if (!kvm_para_available()) return 0; - if(kvm_hypercall0_1(KVM_HC_FEATURES, &r)) + if(epapr_hypercall0_1(KVM_HCALL_TOKEN(KVM_HC_FEATURES), &r)) return 0; return r; -- cgit v1.2.3 From 09548fdaf32ce77a68e7f9a8a3098c1306b04858 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 15 Oct 2013 20:43:01 +1100 Subject: KVM: PPC: Use load_fp/vr_state rather than load_up_fpu/altivec The load_up_fpu and load_up_altivec functions were never intended to be called from C, and do things like modifying the MSR value in their callers' stack frames, which are assumed to be interrupt frames. In addition, on 32-bit Book S they require the MMU to be off. This makes KVM use the new load_fp_state() and load_vr_state() functions instead of load_up_fpu/altivec. This means we can remove the assembler glue in book3s_rmhandlers.S, and potentially fixes a bug on Book E, where load_up_fpu was called directly from C. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s.h | 3 --- arch/powerpc/include/asm/switch_to.h | 2 -- arch/powerpc/kvm/book3s_exports.c | 4 --- arch/powerpc/kvm/book3s_pr.c | 18 +++++++++----- arch/powerpc/kvm/book3s_rmhandlers.S | 47 ----------------------------------- arch/powerpc/kvm/booke.h | 3 ++- 6 files changed, 14 insertions(+), 63 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 4a594b76674d..8bb870694616 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -186,9 +186,6 @@ extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, extern void kvmppc_entry_trampoline(void); extern void kvmppc_hv_entry_trampoline(void); -extern void kvmppc_load_up_fpu(void); -extern void kvmppc_load_up_altivec(void); -extern void kvmppc_load_up_vsx(void); extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst); extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst); extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd); diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 9ee12610af02..971ca336f451 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -25,10 +25,8 @@ static inline void save_tar(struct thread_struct *prev) static inline void save_tar(struct thread_struct *prev) {} #endif -extern void load_up_fpu(void); extern void enable_kernel_fp(void); extern void enable_kernel_altivec(void); -extern void load_up_altivec(struct task_struct *); extern int emulate_altivec(struct pt_regs *); extern void __giveup_vsx(struct task_struct *); extern void giveup_vsx(struct task_struct *); diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c index 852989a9bad3..20d4ea8e656d 100644 --- a/arch/powerpc/kvm/book3s_exports.c +++ b/arch/powerpc/kvm/book3s_exports.c @@ -25,9 +25,5 @@ EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline); #endif #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline); -EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu); -#ifdef CONFIG_ALTIVEC -EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec); -#endif #endif diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 21bf7c5c9545..d63a91f825d3 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -691,7 +691,8 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, #endif t->fp_state.fpscr = vcpu->arch.fpscr; t->fpexc_mode = 0; - kvmppc_load_up_fpu(); + enable_kernel_fp(); + load_fp_state(&t->fp_state); } if (msr & MSR_VEC) { @@ -699,7 +700,8 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, memcpy(t->vr_state.vr, vcpu->arch.vr, sizeof(vcpu->arch.vr)); t->vr_state.vscr = vcpu->arch.vscr; t->vrsave = -1; - kvmppc_load_up_altivec(); + enable_kernel_altivec(); + load_vr_state(&t->vr_state); #endif } @@ -722,11 +724,15 @@ static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu) if (!lost_ext) return; - if (lost_ext & MSR_FP) - kvmppc_load_up_fpu(); + if (lost_ext & MSR_FP) { + enable_kernel_fp(); + load_fp_state(¤t->thread.fp_state); + } #ifdef CONFIG_ALTIVEC - if (lost_ext & MSR_VEC) - kvmppc_load_up_altivec(); + if (lost_ext & MSR_VEC) { + enable_kernel_altivec(); + load_vr_state(¤t->thread.vr_state); + } #endif current->thread.regs->msr |= lost_ext; } diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index a38c4c9edab8..c78ffbc371a5 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -166,51 +166,4 @@ _GLOBAL(kvmppc_entry_trampoline) mtsrr1 r6 RFI -#if defined(CONFIG_PPC_BOOK3S_32) -#define STACK_LR INT_FRAME_SIZE+4 - -/* load_up_xxx have to run with MSR_DR=0 on Book3S_32 */ -#define MSR_EXT_START \ - PPC_STL r20, _NIP(r1); \ - mfmsr r20; \ - LOAD_REG_IMMEDIATE(r3, MSR_DR|MSR_EE); \ - andc r3,r20,r3; /* Disable DR,EE */ \ - mtmsr r3; \ - sync - -#define MSR_EXT_END \ - mtmsr r20; /* Enable DR,EE */ \ - sync; \ - PPC_LL r20, _NIP(r1) - -#elif defined(CONFIG_PPC_BOOK3S_64) -#define STACK_LR _LINK -#define MSR_EXT_START -#define MSR_EXT_END -#endif - -/* - * Activate current's external feature (FPU/Altivec/VSX) - */ -#define define_load_up(what) \ - \ -_GLOBAL(kvmppc_load_up_ ## what); \ - PPC_STLU r1, -INT_FRAME_SIZE(r1); \ - mflr r3; \ - PPC_STL r3, STACK_LR(r1); \ - MSR_EXT_START; \ - \ - bl FUNC(load_up_ ## what); \ - \ - MSR_EXT_END; \ - PPC_LL r3, STACK_LR(r1); \ - mtlr r3; \ - addi r1, r1, INT_FRAME_SIZE; \ - blr - -define_load_up(fpu) -#ifdef CONFIG_ALTIVEC -define_load_up(altivec) -#endif - #include "book3s_segment.S" diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index 09bfd9bc7cf8..fe59f225327f 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -136,7 +136,8 @@ static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu) { #ifdef CONFIG_PPC_FPU if (vcpu->fpu_active && !(current->thread.regs->msr & MSR_FP)) { - load_up_fpu(); + enable_kernel_fp(); + load_fp_state(¤t->thread.fp_state); current->thread.regs->msr |= MSR_FP; } #endif -- cgit v1.2.3 From efff19122315f1431f6b02cd2983b15f5d3957bd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 15 Oct 2013 20:43:02 +1100 Subject: KVM: PPC: Store FP/VSX/VMX state in thread_fp/vr_state structures This uses struct thread_fp_state and struct thread_vr_state to store the floating-point, VMX/Altivec and VSX state, rather than flat arrays. This makes transferring the state to/from the thread_struct simpler and allows us to unify the get/set_one_reg implementations for the VSX registers. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 12 +-- arch/powerpc/kernel/asm-offsets.c | 11 +- arch/powerpc/kvm/book3s.c | 38 +++++-- arch/powerpc/kvm/book3s_hv.c | 42 -------- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 +- arch/powerpc/kvm/book3s_paired_singles.c | 169 +++++++++++++++---------------- arch/powerpc/kvm/book3s_pr.c | 63 +----------- arch/powerpc/kvm/booke.c | 8 +- arch/powerpc/kvm/powerpc.c | 4 +- 9 files changed, 131 insertions(+), 220 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 237d1d25b448..3ca0b430eaee 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -410,8 +410,7 @@ struct kvm_vcpu_arch { ulong gpr[32]; - u64 fpr[32]; - u64 fpscr; + struct thread_fp_state fp; #ifdef CONFIG_SPE ulong evr[32]; @@ -420,12 +419,7 @@ struct kvm_vcpu_arch { u64 acc; #endif #ifdef CONFIG_ALTIVEC - vector128 vr[32]; - vector128 vscr; -#endif - -#ifdef CONFIG_VSX - u64 vsr[64]; + struct thread_vr_state vr; #endif #ifdef CONFIG_KVM_BOOKE_HV @@ -619,6 +613,8 @@ struct kvm_vcpu_arch { #endif }; +#define VCPU_FPR(vcpu, i) (vcpu)->arch.fp.fpr[i][TS_FPROFFSET] + /* Values for vcpu->arch.state */ #define KVMPPC_VCPU_NOTREADY 0 #define KVMPPC_VCPU_RUNNABLE 1 diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 2ea5cc033ec8..8403f9031d93 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -425,14 +425,11 @@ int main(void) DEFINE(VCPU_GUEST_PID, offsetof(struct kvm_vcpu, arch.pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); - DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fpr)); - DEFINE(VCPU_FPSCR, offsetof(struct kvm_vcpu, arch.fpscr)); + DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fp.fpr)); + DEFINE(VCPU_FPSCR, offsetof(struct kvm_vcpu, arch.fp.fpscr)); #ifdef CONFIG_ALTIVEC - DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr)); - DEFINE(VCPU_VSCR, offsetof(struct kvm_vcpu, arch.vscr)); -#endif -#ifdef CONFIG_VSX - DEFINE(VCPU_VSRS, offsetof(struct kvm_vcpu, arch.vsr)); + DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr.vr)); + DEFINE(VCPU_VSCR, offsetof(struct kvm_vcpu, arch.vr.vscr)); #endif DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 48cf91bc862f..94e597e6f15c 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -577,10 +577,10 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) break; case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: i = reg->id - KVM_REG_PPC_FPR0; - val = get_reg_val(reg->id, vcpu->arch.fpr[i]); + val = get_reg_val(reg->id, VCPU_FPR(vcpu, i)); break; case KVM_REG_PPC_FPSCR: - val = get_reg_val(reg->id, vcpu->arch.fpscr); + val = get_reg_val(reg->id, vcpu->arch.fp.fpscr); break; #ifdef CONFIG_ALTIVEC case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: @@ -588,19 +588,30 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) r = -ENXIO; break; } - val.vval = vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0]; + val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0]; break; case KVM_REG_PPC_VSCR: if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { r = -ENXIO; break; } - val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]); + val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]); break; case KVM_REG_PPC_VRSAVE: val = get_reg_val(reg->id, vcpu->arch.vrsave); break; #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_VSX + case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: + if (cpu_has_feature(CPU_FTR_VSX)) { + long int i = reg->id - KVM_REG_PPC_VSR0; + val.vsxval[0] = vcpu->arch.fp.fpr[i][0]; + val.vsxval[1] = vcpu->arch.fp.fpr[i][1]; + } else { + r = -ENXIO; + } + break; +#endif /* CONFIG_VSX */ case KVM_REG_PPC_DEBUG_INST: { u32 opcode = INS_TW; r = copy_to_user((u32 __user *)(long)reg->addr, @@ -656,10 +667,10 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) break; case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: i = reg->id - KVM_REG_PPC_FPR0; - vcpu->arch.fpr[i] = set_reg_val(reg->id, val); + VCPU_FPR(vcpu, i) = set_reg_val(reg->id, val); break; case KVM_REG_PPC_FPSCR: - vcpu->arch.fpscr = set_reg_val(reg->id, val); + vcpu->arch.fp.fpscr = set_reg_val(reg->id, val); break; #ifdef CONFIG_ALTIVEC case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: @@ -667,14 +678,14 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) r = -ENXIO; break; } - vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0] = val.vval; + vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval; break; case KVM_REG_PPC_VSCR: if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { r = -ENXIO; break; } - vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val); + vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val); break; case KVM_REG_PPC_VRSAVE: if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { @@ -684,6 +695,17 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) vcpu->arch.vrsave = set_reg_val(reg->id, val); break; #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_VSX + case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: + if (cpu_has_feature(CPU_FTR_VSX)) { + long int i = reg->id - KVM_REG_PPC_VSR0; + vcpu->arch.fp.fpr[i][0] = val.vsxval[0]; + vcpu->arch.fp.fpr[i][1] = val.vsxval[1]; + } else { + r = -ENXIO; + } + break; +#endif /* CONFIG_VSX */ #ifdef CONFIG_KVM_XICS case KVM_REG_PPC_ICP_STATE: if (!vcpu->arch.icp) { diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 088a6e54c998..461f55566167 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -811,27 +811,6 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_SDAR: *val = get_reg_val(id, vcpu->arch.sdar); break; -#ifdef CONFIG_VSX - case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: - if (cpu_has_feature(CPU_FTR_VSX)) { - /* VSX => FP reg i is stored in arch.vsr[2*i] */ - long int i = id - KVM_REG_PPC_FPR0; - *val = get_reg_val(id, vcpu->arch.vsr[2 * i]); - } else { - /* let generic code handle it */ - r = -EINVAL; - } - break; - case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: - if (cpu_has_feature(CPU_FTR_VSX)) { - long int i = id - KVM_REG_PPC_VSR0; - val->vsxval[0] = vcpu->arch.vsr[2 * i]; - val->vsxval[1] = vcpu->arch.vsr[2 * i + 1]; - } else { - r = -ENXIO; - } - break; -#endif /* CONFIG_VSX */ case KVM_REG_PPC_VPA_ADDR: spin_lock(&vcpu->arch.vpa_update_lock); *val = get_reg_val(id, vcpu->arch.vpa.next_gpa); @@ -914,27 +893,6 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_SDAR: vcpu->arch.sdar = set_reg_val(id, *val); break; -#ifdef CONFIG_VSX - case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: - if (cpu_has_feature(CPU_FTR_VSX)) { - /* VSX => FP reg i is stored in arch.vsr[2*i] */ - long int i = id - KVM_REG_PPC_FPR0; - vcpu->arch.vsr[2 * i] = set_reg_val(id, *val); - } else { - /* let generic code handle it */ - r = -EINVAL; - } - break; - case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: - if (cpu_has_feature(CPU_FTR_VSX)) { - long int i = id - KVM_REG_PPC_VSR0; - vcpu->arch.vsr[2 * i] = val->vsxval[0]; - vcpu->arch.vsr[2 * i + 1] = val->vsxval[1]; - } else { - r = -ENXIO; - } - break; -#endif /* CONFIG_VSX */ case KVM_REG_PPC_VPA_ADDR: addr = set_reg_val(id, *val); r = -EINVAL; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index d5ddc2d10748..47fd536fb13b 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1889,7 +1889,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) BEGIN_FTR_SECTION reg = 0 .rept 32 - li r6,reg*16+VCPU_VSRS + li r6,reg*16+VCPU_FPRS STXVD2X(reg,R6,R3) reg = reg + 1 .endr @@ -1951,7 +1951,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) BEGIN_FTR_SECTION reg = 0 .rept 32 - li r7,reg*16+VCPU_VSRS + li r7,reg*16+VCPU_FPRS LXVD2X(reg,R7,R4) reg = reg + 1 .endr diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c index a59a25a13218..c1abd95063f4 100644 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ b/arch/powerpc/kvm/book3s_paired_singles.c @@ -160,7 +160,7 @@ static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt) { - kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt]); + kvm_cvt_df(&VCPU_FPR(vcpu, rt), &vcpu->arch.qpr[rt]); } static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store) @@ -207,11 +207,11 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu, /* put in registers */ switch (ls_type) { case FPU_LS_SINGLE: - kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs]); + kvm_cvt_fd((u32*)tmp, &VCPU_FPR(vcpu, rs)); vcpu->arch.qpr[rs] = *((u32*)tmp); break; case FPU_LS_DOUBLE: - vcpu->arch.fpr[rs] = *((u64*)tmp); + VCPU_FPR(vcpu, rs) = *((u64*)tmp); break; } @@ -233,18 +233,18 @@ static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu, switch (ls_type) { case FPU_LS_SINGLE: - kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp); + kvm_cvt_df(&VCPU_FPR(vcpu, rs), (u32*)tmp); val = *((u32*)tmp); len = sizeof(u32); break; case FPU_LS_SINGLE_LOW: - *((u32*)tmp) = vcpu->arch.fpr[rs]; - val = vcpu->arch.fpr[rs] & 0xffffffff; + *((u32*)tmp) = VCPU_FPR(vcpu, rs); + val = VCPU_FPR(vcpu, rs) & 0xffffffff; len = sizeof(u32); break; case FPU_LS_DOUBLE: - *((u64*)tmp) = vcpu->arch.fpr[rs]; - val = vcpu->arch.fpr[rs]; + *((u64*)tmp) = VCPU_FPR(vcpu, rs); + val = VCPU_FPR(vcpu, rs); len = sizeof(u64); break; default: @@ -301,7 +301,7 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu, emulated = EMULATE_DONE; /* put in registers */ - kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs]); + kvm_cvt_fd(&tmp[0], &VCPU_FPR(vcpu, rs)); vcpu->arch.qpr[rs] = tmp[1]; dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0], @@ -319,7 +319,7 @@ static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu, u32 tmp[2]; int len = w ? sizeof(u32) : sizeof(u64); - kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0]); + kvm_cvt_df(&VCPU_FPR(vcpu, rs), &tmp[0]); tmp[1] = vcpu->arch.qpr[rs]; r = kvmppc_st(vcpu, &addr, len, tmp, true); @@ -512,7 +512,6 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, u32 *src2, u32 *src3)) { u32 *qpr = vcpu->arch.qpr; - u64 *fpr = vcpu->arch.fpr; u32 ps0_out; u32 ps0_in1, ps0_in2, ps0_in3; u32 ps1_in1, ps1_in2, ps1_in3; @@ -521,20 +520,20 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, WARN_ON(rc); /* PS0 */ - kvm_cvt_df(&fpr[reg_in1], &ps0_in1); - kvm_cvt_df(&fpr[reg_in2], &ps0_in2); - kvm_cvt_df(&fpr[reg_in3], &ps0_in3); + kvm_cvt_df(&VCPU_FPR(vcpu, reg_in1), &ps0_in1); + kvm_cvt_df(&VCPU_FPR(vcpu, reg_in2), &ps0_in2); + kvm_cvt_df(&VCPU_FPR(vcpu, reg_in3), &ps0_in3); if (scalar & SCALAR_LOW) ps0_in2 = qpr[reg_in2]; - func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3); + func(&vcpu->arch.fp.fpscr, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3); dprintk(KERN_INFO "PS3 ps0 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", ps0_in1, ps0_in2, ps0_in3, ps0_out); if (!(scalar & SCALAR_NO_PS0)) - kvm_cvt_fd(&ps0_out, &fpr[reg_out]); + kvm_cvt_fd(&ps0_out, &VCPU_FPR(vcpu, reg_out)); /* PS1 */ ps1_in1 = qpr[reg_in1]; @@ -545,7 +544,7 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, ps1_in2 = ps0_in2; if (!(scalar & SCALAR_NO_PS1)) - func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3); + func(&vcpu->arch.fp.fpscr, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3); dprintk(KERN_INFO "PS3 ps1 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", ps1_in1, ps1_in2, ps1_in3, qpr[reg_out]); @@ -561,7 +560,6 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, u32 *src2)) { u32 *qpr = vcpu->arch.qpr; - u64 *fpr = vcpu->arch.fpr; u32 ps0_out; u32 ps0_in1, ps0_in2; u32 ps1_out; @@ -571,20 +569,20 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, WARN_ON(rc); /* PS0 */ - kvm_cvt_df(&fpr[reg_in1], &ps0_in1); + kvm_cvt_df(&VCPU_FPR(vcpu, reg_in1), &ps0_in1); if (scalar & SCALAR_LOW) ps0_in2 = qpr[reg_in2]; else - kvm_cvt_df(&fpr[reg_in2], &ps0_in2); + kvm_cvt_df(&VCPU_FPR(vcpu, reg_in2), &ps0_in2); - func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2); + func(&vcpu->arch.fp.fpscr, &ps0_out, &ps0_in1, &ps0_in2); if (!(scalar & SCALAR_NO_PS0)) { dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n", ps0_in1, ps0_in2, ps0_out); - kvm_cvt_fd(&ps0_out, &fpr[reg_out]); + kvm_cvt_fd(&ps0_out, &VCPU_FPR(vcpu, reg_out)); } /* PS1 */ @@ -594,7 +592,7 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, if (scalar & SCALAR_HIGH) ps1_in2 = ps0_in2; - func(&vcpu->arch.fpscr, &ps1_out, &ps1_in1, &ps1_in2); + func(&vcpu->arch.fp.fpscr, &ps1_out, &ps1_in1, &ps1_in2); if (!(scalar & SCALAR_NO_PS1)) { qpr[reg_out] = ps1_out; @@ -612,7 +610,6 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, u32 *dst, u32 *src1)) { u32 *qpr = vcpu->arch.qpr; - u64 *fpr = vcpu->arch.fpr; u32 ps0_out, ps0_in; u32 ps1_in; @@ -620,17 +617,17 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, WARN_ON(rc); /* PS0 */ - kvm_cvt_df(&fpr[reg_in], &ps0_in); - func(&vcpu->arch.fpscr, &ps0_out, &ps0_in); + kvm_cvt_df(&VCPU_FPR(vcpu, reg_in), &ps0_in); + func(&vcpu->arch.fp.fpscr, &ps0_out, &ps0_in); dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n", ps0_in, ps0_out); - kvm_cvt_fd(&ps0_out, &fpr[reg_out]); + kvm_cvt_fd(&ps0_out, &VCPU_FPR(vcpu, reg_out)); /* PS1 */ ps1_in = qpr[reg_in]; - func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in); + func(&vcpu->arch.fp.fpscr, &qpr[reg_out], &ps1_in); dprintk(KERN_INFO "PS1 ps1 -> f(0x%x) = 0x%x\n", ps1_in, qpr[reg_out]); @@ -649,10 +646,10 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) int ax_rc = inst_get_field(inst, 21, 25); short full_d = inst_get_field(inst, 16, 31); - u64 *fpr_d = &vcpu->arch.fpr[ax_rd]; - u64 *fpr_a = &vcpu->arch.fpr[ax_ra]; - u64 *fpr_b = &vcpu->arch.fpr[ax_rb]; - u64 *fpr_c = &vcpu->arch.fpr[ax_rc]; + u64 *fpr_d = &VCPU_FPR(vcpu, ax_rd); + u64 *fpr_a = &VCPU_FPR(vcpu, ax_ra); + u64 *fpr_b = &VCPU_FPR(vcpu, ax_rb); + u64 *fpr_c = &VCPU_FPR(vcpu, ax_rc); bool rcomp = (inst & 1) ? true : false; u32 cr = kvmppc_get_cr(vcpu); @@ -674,11 +671,11 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) /* Do we need to clear FE0 / FE1 here? Don't think so. */ #ifdef DEBUG - for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { + for (i = 0; i < ARRAY_SIZE(vcpu->arch.fp.fpr); i++) { u32 f; - kvm_cvt_df(&vcpu->arch.fpr[i], &f); + kvm_cvt_df(&VCPU_FPR(vcpu, i), &f); dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx QPR[%d] = 0x%x\n", - i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]); + i, f, VCPU_FPR(vcpu, i), i, vcpu->arch.qpr[i]); } #endif @@ -764,8 +761,8 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) break; } case OP_4X_PS_NEG: - vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; - vcpu->arch.fpr[ax_rd] ^= 0x8000000000000000ULL; + VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb); + VCPU_FPR(vcpu, ax_rd) ^= 0x8000000000000000ULL; vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; vcpu->arch.qpr[ax_rd] ^= 0x80000000; break; @@ -775,7 +772,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case OP_4X_PS_MR: WARN_ON(rcomp); - vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; + VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb); vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; break; case OP_4X_PS_CMPO1: @@ -784,44 +781,44 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case OP_4X_PS_NABS: WARN_ON(rcomp); - vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; - vcpu->arch.fpr[ax_rd] |= 0x8000000000000000ULL; + VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb); + VCPU_FPR(vcpu, ax_rd) |= 0x8000000000000000ULL; vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; vcpu->arch.qpr[ax_rd] |= 0x80000000; break; case OP_4X_PS_ABS: WARN_ON(rcomp); - vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; - vcpu->arch.fpr[ax_rd] &= ~0x8000000000000000ULL; + VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rb); + VCPU_FPR(vcpu, ax_rd) &= ~0x8000000000000000ULL; vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; vcpu->arch.qpr[ax_rd] &= ~0x80000000; break; case OP_4X_PS_MERGE00: WARN_ON(rcomp); - vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; - /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ - kvm_cvt_df(&vcpu->arch.fpr[ax_rb], + VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_ra); + /* vcpu->arch.qpr[ax_rd] = VCPU_FPR(vcpu, ax_rb); */ + kvm_cvt_df(&VCPU_FPR(vcpu, ax_rb), &vcpu->arch.qpr[ax_rd]); break; case OP_4X_PS_MERGE01: WARN_ON(rcomp); - vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; + VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_ra); vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; break; case OP_4X_PS_MERGE10: WARN_ON(rcomp); - /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ + /* VCPU_FPR(vcpu, ax_rd) = vcpu->arch.qpr[ax_ra]; */ kvm_cvt_fd(&vcpu->arch.qpr[ax_ra], - &vcpu->arch.fpr[ax_rd]); - /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ - kvm_cvt_df(&vcpu->arch.fpr[ax_rb], + &VCPU_FPR(vcpu, ax_rd)); + /* vcpu->arch.qpr[ax_rd] = VCPU_FPR(vcpu, ax_rb); */ + kvm_cvt_df(&VCPU_FPR(vcpu, ax_rb), &vcpu->arch.qpr[ax_rd]); break; case OP_4X_PS_MERGE11: WARN_ON(rcomp); - /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ + /* VCPU_FPR(vcpu, ax_rd) = vcpu->arch.qpr[ax_ra]; */ kvm_cvt_fd(&vcpu->arch.qpr[ax_ra], - &vcpu->arch.fpr[ax_rd]); + &VCPU_FPR(vcpu, ax_rd)); vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; break; } @@ -856,7 +853,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) case OP_4A_PS_SUM1: emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, ax_rb, ax_ra, SCALAR_NO_PS0 | SCALAR_HIGH, fps_fadds); - vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rc]; + VCPU_FPR(vcpu, ax_rd) = VCPU_FPR(vcpu, ax_rc); break; case OP_4A_PS_SUM0: emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, @@ -1106,45 +1103,45 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) case 59: switch (inst_get_field(inst, 21, 30)) { case OP_59_FADDS: - fpd_fadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + fpd_fadds(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b); kvmppc_sync_qpr(vcpu, ax_rd); break; case OP_59_FSUBS: - fpd_fsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + fpd_fsubs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b); kvmppc_sync_qpr(vcpu, ax_rd); break; case OP_59_FDIVS: - fpd_fdivs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + fpd_fdivs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b); kvmppc_sync_qpr(vcpu, ax_rd); break; case OP_59_FRES: - fpd_fres(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + fpd_fres(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b); kvmppc_sync_qpr(vcpu, ax_rd); break; case OP_59_FRSQRTES: - fpd_frsqrtes(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + fpd_frsqrtes(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b); kvmppc_sync_qpr(vcpu, ax_rd); break; } switch (inst_get_field(inst, 26, 30)) { case OP_59_FMULS: - fpd_fmuls(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c); + fpd_fmuls(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c); kvmppc_sync_qpr(vcpu, ax_rd); break; case OP_59_FMSUBS: - fpd_fmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + fpd_fmsubs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); kvmppc_sync_qpr(vcpu, ax_rd); break; case OP_59_FMADDS: - fpd_fmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + fpd_fmadds(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); kvmppc_sync_qpr(vcpu, ax_rd); break; case OP_59_FNMSUBS: - fpd_fnmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + fpd_fnmsubs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); kvmppc_sync_qpr(vcpu, ax_rd); break; case OP_59_FNMADDS: - fpd_fnmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + fpd_fnmadds(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); kvmppc_sync_qpr(vcpu, ax_rd); break; } @@ -1159,12 +1156,12 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case OP_63_MFFS: /* XXX missing CR */ - *fpr_d = vcpu->arch.fpscr; + *fpr_d = vcpu->arch.fp.fpscr; break; case OP_63_MTFSF: /* XXX missing fm bits */ /* XXX missing CR */ - vcpu->arch.fpscr = *fpr_b; + vcpu->arch.fp.fpscr = *fpr_b; break; case OP_63_FCMPU: { @@ -1172,7 +1169,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) u32 cr0_mask = 0xf0000000; u32 cr_shift = inst_get_field(inst, 6, 8) * 4; - fpd_fcmpu(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b); + fpd_fcmpu(&vcpu->arch.fp.fpscr, &tmp_cr, fpr_a, fpr_b); cr &= ~(cr0_mask >> cr_shift); cr |= (cr & cr0_mask) >> cr_shift; break; @@ -1183,40 +1180,40 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) u32 cr0_mask = 0xf0000000; u32 cr_shift = inst_get_field(inst, 6, 8) * 4; - fpd_fcmpo(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b); + fpd_fcmpo(&vcpu->arch.fp.fpscr, &tmp_cr, fpr_a, fpr_b); cr &= ~(cr0_mask >> cr_shift); cr |= (cr & cr0_mask) >> cr_shift; break; } case OP_63_FNEG: - fpd_fneg(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + fpd_fneg(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b); break; case OP_63_FMR: *fpr_d = *fpr_b; break; case OP_63_FABS: - fpd_fabs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + fpd_fabs(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b); break; case OP_63_FCPSGN: - fpd_fcpsgn(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + fpd_fcpsgn(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b); break; case OP_63_FDIV: - fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + fpd_fdiv(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b); break; case OP_63_FADD: - fpd_fadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + fpd_fadd(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b); break; case OP_63_FSUB: - fpd_fsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + fpd_fsub(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_b); break; case OP_63_FCTIW: - fpd_fctiw(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + fpd_fctiw(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b); break; case OP_63_FCTIWZ: - fpd_fctiwz(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + fpd_fctiwz(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b); break; case OP_63_FRSP: - fpd_frsp(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + fpd_frsp(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b); kvmppc_sync_qpr(vcpu, ax_rd); break; case OP_63_FRSQRTE: @@ -1224,39 +1221,39 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) double one = 1.0f; /* fD = sqrt(fB) */ - fpd_fsqrt(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + fpd_fsqrt(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_b); /* fD = 1.0f / fD */ - fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, (u64*)&one, fpr_d); + fpd_fdiv(&vcpu->arch.fp.fpscr, &cr, fpr_d, (u64*)&one, fpr_d); break; } } switch (inst_get_field(inst, 26, 30)) { case OP_63_FMUL: - fpd_fmul(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c); + fpd_fmul(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c); break; case OP_63_FSEL: - fpd_fsel(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + fpd_fsel(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); break; case OP_63_FMSUB: - fpd_fmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + fpd_fmsub(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); break; case OP_63_FMADD: - fpd_fmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + fpd_fmadd(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); break; case OP_63_FNMSUB: - fpd_fnmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + fpd_fnmsub(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); break; case OP_63_FNMADD: - fpd_fnmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + fpd_fnmadd(&vcpu->arch.fp.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); break; } break; } #ifdef DEBUG - for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { + for (i = 0; i < ARRAY_SIZE(vcpu->arch.fp.fpr); i++) { u32 f; - kvm_cvt_df(&vcpu->arch.fpr[i], &f); + kvm_cvt_df(&VCPU_FPR(vcpu, i), &f); dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f); } #endif diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index d63a91f825d3..2bb425b22461 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -545,12 +545,6 @@ static inline int get_fpr_index(int i) void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) { struct thread_struct *t = ¤t->thread; - u64 *vcpu_fpr = vcpu->arch.fpr; -#ifdef CONFIG_VSX - u64 *vcpu_vsx = vcpu->arch.vsr; -#endif - u64 *thread_fpr = &t->fp_state.fpr[0][0]; - int i; /* * VSX instructions can access FP and vector registers, so if @@ -575,24 +569,14 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) */ if (current->thread.regs->msr & MSR_FP) giveup_fpu(current); - for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) - vcpu_fpr[i] = thread_fpr[get_fpr_index(i)]; - - vcpu->arch.fpscr = t->fp_state.fpscr; - -#ifdef CONFIG_VSX - if (cpu_has_feature(CPU_FTR_VSX)) - for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++) - vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1]; -#endif + vcpu->arch.fp = t->fp_state; } #ifdef CONFIG_ALTIVEC if (msr & MSR_VEC) { if (current->thread.regs->msr & MSR_VEC) giveup_altivec(current); - memcpy(vcpu->arch.vr, t->vr_state.vr, sizeof(vcpu->arch.vr)); - vcpu->arch.vscr = t->vr_state.vscr; + vcpu->arch.vr = t->vr_state; } #endif @@ -640,12 +624,6 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, ulong msr) { struct thread_struct *t = ¤t->thread; - u64 *vcpu_fpr = vcpu->arch.fpr; -#ifdef CONFIG_VSX - u64 *vcpu_vsx = vcpu->arch.vsr; -#endif - u64 *thread_fpr = &t->fp_state.fpr[0][0]; - int i; /* When we have paired singles, we emulate in software */ if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) @@ -683,13 +661,7 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, #endif if (msr & MSR_FP) { - for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) - thread_fpr[get_fpr_index(i)] = vcpu_fpr[i]; -#ifdef CONFIG_VSX - for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++) - thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i]; -#endif - t->fp_state.fpscr = vcpu->arch.fpscr; + t->fp_state = vcpu->arch.fp; t->fpexc_mode = 0; enable_kernel_fp(); load_fp_state(&t->fp_state); @@ -697,8 +669,7 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, if (msr & MSR_VEC) { #ifdef CONFIG_ALTIVEC - memcpy(t->vr_state.vr, vcpu->arch.vr, sizeof(vcpu->arch.vr)); - t->vr_state.vscr = vcpu->arch.vscr; + t->vr_state = vcpu->arch.vr; t->vrsave = -1; enable_kernel_altivec(); load_vr_state(&t->vr_state); @@ -1118,19 +1089,6 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_HIOR: *val = get_reg_val(id, to_book3s(vcpu)->hior); break; -#ifdef CONFIG_VSX - case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: { - long int i = id - KVM_REG_PPC_VSR0; - - if (!cpu_has_feature(CPU_FTR_VSX)) { - r = -ENXIO; - break; - } - val->vsxval[0] = vcpu->arch.fpr[i]; - val->vsxval[1] = vcpu->arch.vsr[i]; - break; - } -#endif /* CONFIG_VSX */ default: r = -EINVAL; break; @@ -1149,19 +1107,6 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, to_book3s(vcpu)->hior = set_reg_val(id, *val); to_book3s(vcpu)->hior_explicit = true; break; -#ifdef CONFIG_VSX - case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: { - long int i = id - KVM_REG_PPC_VSR0; - - if (!cpu_has_feature(CPU_FTR_VSX)) { - r = -ENXIO; - break; - } - vcpu->arch.fpr[i] = val->vsxval[0]; - vcpu->arch.vsr[i] = val->vsxval[1]; - break; - } -#endif /* CONFIG_VSX */ default: r = -EINVAL; break; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 53e65a210b9a..0033465ecc3f 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -707,9 +707,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) fpexc_mode = current->thread.fpexc_mode; /* Restore guest FPU state to thread */ - memcpy(current->thread.fp_state.fpr, vcpu->arch.fpr, - sizeof(vcpu->arch.fpr)); - current->thread.fp_state.fpscr = vcpu->arch.fpscr; + current->thread.fp_state = vcpu->arch.fp; /* * Since we can't trap on MSR_FP in GS-mode, we consider the guest @@ -745,9 +743,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) vcpu->fpu_active = 0; /* Save guest FPU state from thread */ - memcpy(vcpu->arch.fpr, current->thread.fp_state.fpr, - sizeof(vcpu->arch.fpr)); - vcpu->arch.fpscr = current->thread.fp_state.fpscr; + vcpu->arch.fp = current->thread.fp_state; /* Restore userspace FPU state from stack */ current->thread.fp_state = fp; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 9ae97686e9f4..7ca9e0a80499 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -656,14 +656,14 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); break; case KVM_MMIO_REG_FPR: - vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; + VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr; break; #ifdef CONFIG_PPC_BOOK3S case KVM_MMIO_REG_QPR: vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; break; case KVM_MMIO_REG_FQPR: - vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; + VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr; vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; break; #endif -- cgit v1.2.3 From f5e3fe091f5238459752a81b478398b7cb22e575 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Fri, 15 Nov 2013 11:01:15 +0530 Subject: kvm: powerpc: define a linux pte lookup function We need to search linux "pte" to get "pte" attributes for setting TLB in KVM. This patch defines a lookup_linux_ptep() function which returns pte pointer. Signed-off-by: Bharat Bhushan Reviewed-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/pgtable.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 7d6eacf249cf..b60ceb8d86c8 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -223,6 +223,27 @@ extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, #endif pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift); + +static inline pte_t *lookup_linux_ptep(pgd_t *pgdir, unsigned long hva, + unsigned long *pte_sizep) +{ + pte_t *ptep; + unsigned long ps = *pte_sizep; + unsigned int shift; + + ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift); + if (!ptep) + return NULL; + if (shift) + *pte_sizep = 1ul << shift; + else + *pte_sizep = PAGE_SIZE; + + if (ps > *pte_sizep) + return NULL; + + return ptep; +} #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ -- cgit v1.2.3 From 08c9a188d0d0fc0f0c5e17d89a06bb59c493110f Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Mon, 18 Nov 2013 11:18:54 +0530 Subject: kvm: powerpc: use caching attributes as per linux pte KVM uses same WIM tlb attributes as the corresponding qemu pte. For this we now search the linux pte for the requested page and get these cache caching/coherency attributes from pte. Signed-off-by: Bharat Bhushan Reviewed-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 2 +- arch/powerpc/kvm/booke.c | 1 + arch/powerpc/kvm/e500.h | 8 ++++--- arch/powerpc/kvm/e500_mmu_host.c | 43 ++++++++++++++++++++++--------------- 4 files changed, 33 insertions(+), 21 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 3ca0b430eaee..2c2ca5faf7f2 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -540,6 +540,7 @@ struct kvm_vcpu_arch { #endif gpa_t paddr_accessed; gva_t vaddr_accessed; + pgd_t *pgdir; u8 io_gpr; /* GPR used as IO source/target */ u8 mmio_is_bigendian; @@ -597,7 +598,6 @@ struct kvm_vcpu_arch { struct list_head run_list; struct task_struct *run_task; struct kvm_run *kvm_run; - pgd_t *pgdir; spinlock_t vpa_update_lock; struct kvmppc_vpa vpa; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index a983ccaf3cce..54ee1c01798f 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -717,6 +717,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) thread.debug = current->thread.debug; current->thread.debug = vcpu->arch.shadow_dbg_reg; + vcpu->arch.pgdir = current->mm->pgd; kvmppc_fix_ee_before_entry(); ret = __kvmppc_vcpu_run(kvm_run, vcpu); diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index 4fd9650eb018..a326178bdea5 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -31,11 +31,13 @@ enum vcpu_ftr { #define E500_TLB_NUM 2 /* entry is mapped somewhere in host TLB */ -#define E500_TLB_VALID (1 << 0) +#define E500_TLB_VALID (1 << 31) /* TLB1 entry is mapped by host TLB1, tracked by bitmaps */ -#define E500_TLB_BITMAP (1 << 1) +#define E500_TLB_BITMAP (1 << 30) /* TLB1 entry is mapped by host TLB0 */ -#define E500_TLB_TLB0 (1 << 2) +#define E500_TLB_TLB0 (1 << 29) +/* bits [6-5] MAS2_X1 and MAS2_X0 and [4-0] bits for WIMGE */ +#define E500_TLB_MAS2_ATTR (0x7f) struct tlbe_ref { pfn_t pfn; /* valid only for TLB0, except briefly */ diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index a45e25cd78fc..dd2cc03f406f 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -65,15 +65,6 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) return mas3; } -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode) -{ -#ifdef CONFIG_SMP - return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M; -#else - return mas2 & MAS2_ATTRIB_MASK; -#endif -} - /* * writing shadow tlb entry to host TLB */ @@ -249,11 +240,14 @@ static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe) static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, struct kvm_book3e_206_tlb_entry *gtlbe, - pfn_t pfn) + pfn_t pfn, unsigned int wimg) { ref->pfn = pfn; ref->flags = E500_TLB_VALID; + /* Use guest supplied MAS2_G and MAS2_E */ + ref->flags |= (gtlbe->mas2 & MAS2_ATTRIB_MASK) | wimg; + /* Mark the page accessed */ kvm_set_pfn_accessed(pfn); @@ -316,8 +310,7 @@ static void kvmppc_e500_setup_stlbe( /* Force IPROT=0 for all guest mappings. */ stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID; - stlbe->mas2 = (gvaddr & MAS2_EPN) | - e500_shadow_mas2_attrib(gtlbe->mas2, pr); + stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR); stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) | e500_shadow_mas3_attrib(gtlbe->mas7_3, pr); @@ -339,6 +332,10 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, int ret = 0; unsigned long mmu_seq; struct kvm *kvm = vcpu_e500->vcpu.kvm; + unsigned long tsize_pages = 0; + pte_t *ptep; + unsigned int wimg = 0; + pgd_t *pgdir; /* used to check for invalidations in progress */ mmu_seq = kvm->mmu_notifier_seq; @@ -405,7 +402,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, */ for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) { - unsigned long gfn_start, gfn_end, tsize_pages; + unsigned long gfn_start, gfn_end; tsize_pages = 1 << (tsize - 2); gfn_start = gfn & ~(tsize_pages - 1); @@ -447,11 +444,12 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, } if (likely(!pfnmap)) { - unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); + tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); pfn = gfn_to_pfn_memslot(slot, gfn); if (is_error_noslot_pfn(pfn)) { - printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", - (long)gfn); + if (printk_ratelimit()) + pr_err("%s: real page not found for gfn %lx\n", + __func__, (long)gfn); return -EINVAL; } @@ -466,7 +464,18 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, goto out; } - kvmppc_e500_ref_setup(ref, gtlbe, pfn); + + pgdir = vcpu_e500->vcpu.arch.pgdir; + ptep = lookup_linux_ptep(pgdir, hva, &tsize_pages); + if (pte_present(*ptep)) + wimg = (*ptep >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK; + else { + if (printk_ratelimit()) + pr_err("%s: pte not present: gfn %lx, pfn %lx\n", + __func__, (long)gfn, pfn); + return -EINVAL; + } + kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg); kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize, ref, gvaddr, stlbe); -- cgit v1.2.3 From 7a8ff56be68239bd36a2b639cb40bfbcfc58dad3 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 9 Jan 2014 11:10:44 +0100 Subject: KVM: PPC: Unify kvmppc_get_last_inst and sc We had code duplication between the inline functions to get our last instruction on normal interrupts and system call interrupts. Unify both helper functions towards a single implementation. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s.h | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 8bb870694616..f8b23201c105 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -264,10 +264,8 @@ static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) return vcpu->arch.pc; } -static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) +static inline u32 kvmppc_get_last_inst_internal(struct kvm_vcpu *vcpu, ulong pc) { - ulong pc = kvmppc_get_pc(vcpu); - /* Load the instruction manually if it failed to do so in the * exit path */ if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) @@ -276,6 +274,11 @@ static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) return vcpu->arch.last_inst; } +static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) +{ + return kvmppc_get_last_inst_internal(vcpu, kvmppc_get_pc(vcpu)); +} + /* * Like kvmppc_get_last_inst(), but for fetching a sc instruction. * Because the sc instruction sets SRR0 to point to the following @@ -283,14 +286,7 @@ static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) */ static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu) { - ulong pc = kvmppc_get_pc(vcpu) - 4; - - /* Load the instruction manually if it failed to do so in the - * exit path */ - if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) - kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false); - - return vcpu->arch.last_inst; + return kvmppc_get_last_inst_internal(vcpu, kvmppc_get_pc(vcpu) - 4); } static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 1c49abec677c7ff495837dbaafad8e12f09c29fc Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 24 Dec 2013 15:12:05 +0800 Subject: powerpc: introduce macro LOAD_REG_ADDR_PIC This is used to get the address of a variable when the kernel is not running at the linked or relocated address. Signed-off-by: Kevin Hao Signed-off-by: Scott Wood --- arch/powerpc/include/asm/ppc_asm.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index f595b98079ee..1279c59624ed 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -295,6 +295,11 @@ n: * you want to access various offsets within it). On ppc32 this is * identical to LOAD_REG_IMMEDIATE. * + * LOAD_REG_ADDR_PIC(rn, name) + * Loads the address of label 'name' into register 'run'. Use this when + * the kernel doesn't run at the linked or relocated address. Please + * note that this macro will clobber the lr register. + * * LOAD_REG_ADDRBASE(rn, name) * ADDROFF(name) * LOAD_REG_ADDRBASE loads part of the address of label 'name' into @@ -305,6 +310,14 @@ n: * LOAD_REG_ADDRBASE(rX, name) * ld rY,ADDROFF(name)(rX) */ + +/* Be careful, this will clobber the lr register. */ +#define LOAD_REG_ADDR_PIC(reg, name) \ + bl 0f; \ +0: mflr reg; \ + addis reg,reg,(name - 0b)@ha; \ + addi reg,reg,(name - 0b)@l; + #ifdef __powerpc64__ #define LOAD_REG_IMMEDIATE(reg,expr) \ lis reg,(expr)@highest; \ -- cgit v1.2.3 From 28efc35fe68dacbddc4b12c2fa8f2df1593a4ad3 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 11 Oct 2013 19:22:38 -0500 Subject: powerpc/e6500: TLB miss handler with hardware tablewalk support There are a few things that make the existing hw tablewalk handlers unsuitable for e6500: - Indirect entries go in TLB1 (though the resulting direct entries go in TLB0). - It has threads, but no "tlbsrx." -- so we need a spinlock and a normal "tlbsx". Because we need this lock, hardware tablewalk is mandatory on e6500 unless we want to add spinlock+tlbsx to the normal bolted TLB miss handler. - TLB1 has no HES (nor next-victim hint) so we need software round robin (TODO: integrate this round robin data with hugetlb/KVM) - The existing tablewalk handlers map half of a page table at a time, because IBM hardware has a fixed 1MiB indirect page size. e6500 has variable size indirect entries, with a minimum of 2MiB. So we can't do the half-page indirect mapping, and even if we could it would be less efficient than mapping the full page. - Like on e5500, the linear mapping is bolted, so we don't need the overhead of supporting nested tlb misses. Note that hardware tablewalk does not work in rev1 of e6500. We do not expect to support e6500 rev1 in mainline Linux. Signed-off-by: Scott Wood Cc: Mihai Caraman --- arch/powerpc/include/asm/mmu-book3e.h | 13 +++ arch/powerpc/include/asm/mmu.h | 21 +++-- arch/powerpc/include/asm/paca.h | 6 ++ arch/powerpc/kernel/asm-offsets.c | 9 ++ arch/powerpc/kernel/paca.c | 5 + arch/powerpc/kernel/setup_64.c | 31 ++++++ arch/powerpc/mm/fsl_booke_mmu.c | 7 ++ arch/powerpc/mm/mem.c | 6 ++ arch/powerpc/mm/tlb_low_64e.S | 171 ++++++++++++++++++++++++++++++++++ arch/powerpc/mm/tlb_nohash.c | 93 ++++++++++++------ 10 files changed, 326 insertions(+), 36 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index 936db360790a..89b785d16846 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h @@ -286,8 +286,21 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) extern int mmu_linear_psize; extern int mmu_vmemmap_psize; +struct tlb_core_data { + /* For software way selection, as on Freescale TLB1 */ + u8 esel_next, esel_max, esel_first; + + /* Per-core spinlock for e6500 TLB handlers (no tlbsrx.) */ + u8 lock; +}; + #ifdef CONFIG_PPC64 extern unsigned long linear_map_top; +extern int book3e_htw_mode; + +#define PPC_HTW_NONE 0 +#define PPC_HTW_IBM 1 +#define PPC_HTW_E6500 2 /* * 64-bit booke platforms don't load the tlb in the tlb miss handler code. diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 691fd8aca939..f8d1d6dcf7db 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -180,16 +180,17 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr) #define MMU_PAGE_64K_AP 3 /* "Admixed pages" (hash64 only) */ #define MMU_PAGE_256K 4 #define MMU_PAGE_1M 5 -#define MMU_PAGE_4M 6 -#define MMU_PAGE_8M 7 -#define MMU_PAGE_16M 8 -#define MMU_PAGE_64M 9 -#define MMU_PAGE_256M 10 -#define MMU_PAGE_1G 11 -#define MMU_PAGE_16G 12 -#define MMU_PAGE_64G 13 - -#define MMU_PAGE_COUNT 14 +#define MMU_PAGE_2M 6 +#define MMU_PAGE_4M 7 +#define MMU_PAGE_8M 8 +#define MMU_PAGE_16M 9 +#define MMU_PAGE_64M 10 +#define MMU_PAGE_256M 11 +#define MMU_PAGE_1G 12 +#define MMU_PAGE_16G 13 +#define MMU_PAGE_64G 14 + +#define MMU_PAGE_COUNT 15 #if defined(CONFIG_PPC_STD_MMU_64) /* 64-bit classic hash table MMU */ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index c3523d1dda58..e81731c62a7f 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -113,6 +113,10 @@ struct paca_struct { /* Keep pgd in the same cacheline as the start of extlb */ pgd_t *pgd __attribute__((aligned(0x80))); /* Current PGD */ pgd_t *kernel_pgd; /* Kernel PGD */ + + /* Shared by all threads of a core -- points to tcd of first thread */ + struct tlb_core_data *tcd_ptr; + /* We can have up to 3 levels of reentrancy in the TLB miss handler */ u64 extlb[3][EX_TLB_SIZE / sizeof(u64)]; u64 exmc[8]; /* used for machine checks */ @@ -123,6 +127,8 @@ struct paca_struct { void *mc_kstack; void *crit_kstack; void *dbg_kstack; + + struct tlb_core_data tcd; #endif /* CONFIG_PPC_BOOK3E */ mm_context_t context; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 41a283956a29..ed8d68ce71f3 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -203,6 +203,15 @@ int main(void) DEFINE(PACA_MC_STACK, offsetof(struct paca_struct, mc_kstack)); DEFINE(PACA_CRIT_STACK, offsetof(struct paca_struct, crit_kstack)); DEFINE(PACA_DBG_STACK, offsetof(struct paca_struct, dbg_kstack)); + DEFINE(PACA_TCD_PTR, offsetof(struct paca_struct, tcd_ptr)); + + DEFINE(TCD_ESEL_NEXT, + offsetof(struct tlb_core_data, esel_next)); + DEFINE(TCD_ESEL_MAX, + offsetof(struct tlb_core_data, esel_max)); + DEFINE(TCD_ESEL_FIRST, + offsetof(struct tlb_core_data, esel_first)); + DEFINE(TCD_LOCK, offsetof(struct tlb_core_data, lock)); #endif /* CONFIG_PPC_BOOK3E */ #ifdef CONFIG_PPC_STD_MMU_64 diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 623c356fe34f..bf0aada02fe4 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -160,6 +160,11 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) #ifdef CONFIG_PPC_STD_MMU_64 new_paca->slb_shadow_ptr = init_slb_shadow(cpu); #endif /* CONFIG_PPC_STD_MMU_64 */ + +#ifdef CONFIG_PPC_BOOK3E + /* For now -- if we have threads this will be adjusted later */ + new_paca->tcd_ptr = &new_paca->tcd; +#endif } /* Put the paca pointer into r13 and SPRG_PACA */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 2232aff66059..1ce9b87d7df8 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -97,6 +97,36 @@ int dcache_bsize; int icache_bsize; int ucache_bsize; +#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) +static void setup_tlb_core_data(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + int first = cpu_first_thread_sibling(cpu); + + paca[cpu].tcd_ptr = &paca[first].tcd; + + /* + * If we have threads, we need either tlbsrx. + * or e6500 tablewalk mode, or else TLB handlers + * will be racy and could produce duplicate entries. + */ + if (smt_enabled_at_boot >= 2 && + !mmu_has_feature(MMU_FTR_USE_TLBRSRV) && + book3e_htw_mode != PPC_HTW_E6500) { + /* Should we panic instead? */ + WARN_ONCE("%s: unsupported MMU configuration -- expect problems\n", + __func__); + } + } +} +#else +static void setup_tlb_core_data(void) +{ +} +#endif + #ifdef CONFIG_SMP static char *smt_enabled_cmdline; @@ -445,6 +475,7 @@ void __init setup_system(void) smp_setup_cpu_maps(); check_smt_enabled(); + setup_tlb_core_data(); #ifdef CONFIG_SMP /* Release secondary cpus out of their spinloops at 0x60 now that diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index a68671c18ad4..94cd728166d3 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -52,6 +52,7 @@ #include #include #include +#include #include "mmu_decl.h" @@ -191,6 +192,12 @@ static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt, } tlbcam_index = i; +#ifdef CONFIG_PPC64 + get_paca()->tcd.esel_next = i; + get_paca()->tcd.esel_max = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; + get_paca()->tcd.esel_first = i; +#endif + return amount_mapped; } diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 3fa93dc7fe75..94448cd4b444 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -307,6 +307,12 @@ static void __init register_page_bootmem_info(void) void __init mem_init(void) { + /* + * book3s is limited to 16 page sizes due to encoding this in + * a 4-bit field for slices. + */ + BUILD_BUG_ON(MMU_PAGE_COUNT > 16); + #ifdef CONFIG_SWIOTLB swiotlb_init(0); #endif diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index b4113bf86353..75f5d2777f61 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -239,6 +239,177 @@ itlb_miss_fault_bolted: beq tlb_miss_common_bolted b itlb_miss_kernel_bolted +/* + * TLB miss handling for e6500 and derivatives, using hardware tablewalk. + * + * Linear mapping is bolted: no virtual page table or nested TLB misses + * Indirect entries in TLB1, hardware loads resulting direct entries + * into TLB0 + * No HES or NV hint on TLB1, so we need to do software round-robin + * No tlbsrx. so we need a spinlock, and we have to deal + * with MAS-damage caused by tlbsx + * 4K pages only + */ + + START_EXCEPTION(instruction_tlb_miss_e6500) + tlb_prolog_bolted BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR0 + + ld r11,PACA_TCD_PTR(r13) + srdi. r15,r16,60 /* get region */ + ori r16,r16,1 + + TLB_MISS_STATS_SAVE_INFO_BOLTED + bne tlb_miss_kernel_e6500 /* user/kernel test */ + + b tlb_miss_common_e6500 + + START_EXCEPTION(data_tlb_miss_e6500) + tlb_prolog_bolted BOOKE_INTERRUPT_DTLB_MISS SPRN_DEAR + + ld r11,PACA_TCD_PTR(r13) + srdi. r15,r16,60 /* get region */ + rldicr r16,r16,0,62 + + TLB_MISS_STATS_SAVE_INFO_BOLTED + bne tlb_miss_kernel_e6500 /* user vs kernel check */ + +/* + * This is the guts of the TLB miss handler for e6500 and derivatives. + * We are entered with: + * + * r16 = page of faulting address (low bit 0 if data, 1 if instruction) + * r15 = crap (free to use) + * r14 = page table base + * r13 = PACA + * r11 = tlb_per_core ptr + * r10 = crap (free to use) + */ +tlb_miss_common_e6500: + /* + * Search if we already have an indirect entry for that virtual + * address, and if we do, bail out. + * + * MAS6:IND should be already set based on MAS4 + */ + addi r10,r11,TCD_LOCK +1: lbarx r15,0,r10 + cmpdi r15,0 + bne 2f + li r15,1 + stbcx. r15,0,r10 + bne 1b + .subsection 1 +2: lbz r15,0(r10) + cmpdi r15,0 + bne 2b + b 1b + .previous + + mfspr r15,SPRN_MAS2 + + tlbsx 0,r16 + mfspr r10,SPRN_MAS1 + andis. r10,r10,MAS1_VALID@h + bne tlb_miss_done_e6500 + + /* Undo MAS-damage from the tlbsx */ + mfspr r10,SPRN_MAS1 + oris r10,r10,MAS1_VALID@h + mtspr SPRN_MAS1,r10 + mtspr SPRN_MAS2,r15 + + /* Now, we need to walk the page tables. First check if we are in + * range. + */ + rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4 + bne- tlb_miss_fault_e6500 + + rldicl r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3 + cmpldi cr0,r14,0 + clrrdi r15,r15,3 + beq- tlb_miss_fault_e6500 /* No PGDIR, bail */ + ldx r14,r14,r15 /* grab pgd entry */ + + rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3 + clrrdi r15,r15,3 + cmpdi cr0,r14,0 + bge tlb_miss_fault_e6500 /* Bad pgd entry or hugepage; bail */ + ldx r14,r14,r15 /* grab pud entry */ + + rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3 + clrrdi r15,r15,3 + cmpdi cr0,r14,0 + bge tlb_miss_fault_e6500 + ldx r14,r14,r15 /* Grab pmd entry */ + + mfspr r10,SPRN_MAS0 + cmpdi cr0,r14,0 + bge tlb_miss_fault_e6500 + + /* Now we build the MAS for a 2M indirect page: + * + * MAS 0 : ESEL needs to be filled by software round-robin + * MAS 1 : Fully set up + * - PID already updated by caller if necessary + * - TSIZE for now is base ind page size always + * - TID already cleared if necessary + * MAS 2 : Default not 2M-aligned, need to be redone + * MAS 3+7 : Needs to be done + */ + + ori r14,r14,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT) + mtspr SPRN_MAS7_MAS3,r14 + + clrrdi r15,r16,21 /* make EA 2M-aligned */ + mtspr SPRN_MAS2,r15 + + lbz r15,TCD_ESEL_NEXT(r11) + lbz r16,TCD_ESEL_MAX(r11) + lbz r14,TCD_ESEL_FIRST(r11) + rlwimi r10,r15,16,0x00ff0000 /* insert esel_next into MAS0 */ + addi r15,r15,1 /* increment esel_next */ + mtspr SPRN_MAS0,r10 + cmpw r15,r16 + iseleq r15,r14,r15 /* if next == last use first */ + stb r15,TCD_ESEL_NEXT(r11) + + tlbwe + +tlb_miss_done_e6500: + .macro tlb_unlock_e6500 + li r15,0 + isync + stb r15,TCD_LOCK(r11) + .endm + + tlb_unlock_e6500 + TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK) + tlb_epilog_bolted + rfi + +tlb_miss_kernel_e6500: + mfspr r10,SPRN_MAS1 + ld r14,PACA_KERNELPGD(r13) + cmpldi cr0,r15,8 /* Check for vmalloc region */ + rlwinm r10,r10,0,16,1 /* Clear TID */ + mtspr SPRN_MAS1,r10 + beq+ tlb_miss_common_e6500 + +tlb_miss_fault_e6500: + tlb_unlock_e6500 + /* We need to check if it was an instruction miss */ + andi. r16,r16,1 + bne itlb_miss_fault_e6500 +dtlb_miss_fault_e6500: + TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) + tlb_epilog_bolted + b exc_data_storage_book3e +itlb_miss_fault_e6500: + TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) + tlb_epilog_bolted + b exc_instruction_storage_book3e + + /********************************************************************** * * * TLB miss handling for Book3E with TLB reservation and HES support * diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 8805b7b87dc6..735839b74dc5 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "mmu_decl.h" @@ -58,6 +59,10 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { .shift = 12, .enc = BOOK3E_PAGESZ_4K, }, + [MMU_PAGE_2M] = { + .shift = 21, + .enc = BOOK3E_PAGESZ_2M, + }, [MMU_PAGE_4M] = { .shift = 22, .enc = BOOK3E_PAGESZ_4M, @@ -136,7 +141,7 @@ static inline int mmu_get_tsize(int psize) int mmu_linear_psize; /* Page size used for the linear mapping */ int mmu_pte_psize; /* Page size used for PTE pages */ int mmu_vmemmap_psize; /* Page size used for the virtual mem map */ -int book3e_htw_enabled; /* Is HW tablewalk enabled ? */ +int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */ unsigned long linear_map_top; /* Top of linear mapping */ #endif /* CONFIG_PPC64 */ @@ -377,7 +382,7 @@ void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address) { int tsize = mmu_psize_defs[mmu_pte_psize].enc; - if (book3e_htw_enabled) { + if (book3e_htw_mode != PPC_HTW_NONE) { unsigned long start = address & PMD_MASK; unsigned long end = address + PMD_SIZE; unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift; @@ -430,7 +435,7 @@ static void setup_page_sizes(void) def = &mmu_psize_defs[psize]; shift = def->shift; - if (shift == 0) + if (shift == 0 || shift & 1) continue; /* adjust to be in terms of 4^shift Kb */ @@ -440,21 +445,40 @@ static void setup_page_sizes(void) def->flags |= MMU_PAGE_SIZE_DIRECT; } - goto no_indirect; + goto out; } if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2) { - u32 tlb1ps = mfspr(SPRN_TLB1PS); + u32 tlb1cfg, tlb1ps; + + tlb0cfg = mfspr(SPRN_TLB0CFG); + tlb1cfg = mfspr(SPRN_TLB1CFG); + tlb1ps = mfspr(SPRN_TLB1PS); + eptcfg = mfspr(SPRN_EPTCFG); + + if ((tlb1cfg & TLBnCFG_IND) && (tlb0cfg & TLBnCFG_PT)) + book3e_htw_mode = PPC_HTW_E6500; + + /* + * We expect 4K subpage size and unrestricted indirect size. + * The lack of a restriction on indirect size is a Freescale + * extension, indicated by PSn = 0 but SPSn != 0. + */ + if (eptcfg != 2) + book3e_htw_mode = PPC_HTW_NONE; for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { struct mmu_psize_def *def = &mmu_psize_defs[psize]; if (tlb1ps & (1U << (def->shift - 10))) { def->flags |= MMU_PAGE_SIZE_DIRECT; + + if (book3e_htw_mode && psize == MMU_PAGE_2M) + def->flags |= MMU_PAGE_SIZE_INDIRECT; } } - goto no_indirect; + goto out; } #endif @@ -471,8 +495,11 @@ static void setup_page_sizes(void) } /* Indirect page sizes supported ? */ - if ((tlb0cfg & TLBnCFG_IND) == 0) - goto no_indirect; + if ((tlb0cfg & TLBnCFG_IND) == 0 || + (tlb0cfg & TLBnCFG_PT) == 0) + goto out; + + book3e_htw_mode = PPC_HTW_IBM; /* Now, we only deal with one IND page size for each * direct size. Hopefully all implementations today are @@ -497,8 +524,8 @@ static void setup_page_sizes(void) def->ind = ps + 10; } } - no_indirect: +out: /* Cleanup array and print summary */ pr_info("MMU: Supported page sizes\n"); for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { @@ -520,23 +547,23 @@ static void setup_page_sizes(void) static void setup_mmu_htw(void) { - /* Check if HW tablewalk is present, and if yes, enable it by: - * - * - patching the TLB miss handlers to branch to the - * one dedicates to it - * - * - setting the global book3e_htw_enabled - */ - unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG); + /* + * If we want to use HW tablewalk, enable it by patching the TLB miss + * handlers to branch to the one dedicated to it. + */ - if ((tlb0cfg & TLBnCFG_IND) && - (tlb0cfg & TLBnCFG_PT)) { + switch (book3e_htw_mode) { + case PPC_HTW_IBM: patch_exception(0x1c0, exc_data_tlb_miss_htw_book3e); patch_exception(0x1e0, exc_instruction_tlb_miss_htw_book3e); - book3e_htw_enabled = 1; + break; + case PPC_HTW_E6500: + patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e); + patch_exception(0x1e0, exc_instruction_tlb_miss_e6500_book3e); + break; } pr_info("MMU: Book3E HW tablewalk %s\n", - book3e_htw_enabled ? "enabled" : "not supported"); + book3e_htw_mode != PPC_HTW_NONE ? "enabled" : "not supported"); } /* @@ -576,8 +603,16 @@ static void __early_init_mmu(int boot_cpu) /* Set MAS4 based on page table setting */ mas4 = 0x4 << MAS4_WIMGED_SHIFT; - if (book3e_htw_enabled) { - mas4 |= mas4 | MAS4_INDD; + switch (book3e_htw_mode) { + case PPC_HTW_E6500: + mas4 |= MAS4_INDD; + mas4 |= BOOK3E_PAGESZ_2M << MAS4_TSIZED_SHIFT; + mas4 |= MAS4_TLBSELD(1); + mmu_pte_psize = MMU_PAGE_2M; + break; + + case PPC_HTW_IBM: + mas4 |= MAS4_INDD; #ifdef CONFIG_PPC_64K_PAGES mas4 |= BOOK3E_PAGESZ_256M << MAS4_TSIZED_SHIFT; mmu_pte_psize = MMU_PAGE_256M; @@ -585,13 +620,16 @@ static void __early_init_mmu(int boot_cpu) mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT; mmu_pte_psize = MMU_PAGE_1M; #endif - } else { + break; + + case PPC_HTW_NONE: #ifdef CONFIG_PPC_64K_PAGES mas4 |= BOOK3E_PAGESZ_64K << MAS4_TSIZED_SHIFT; #else mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT; #endif mmu_pte_psize = mmu_virtual_psize; + break; } mtspr(SPRN_MAS4, mas4); @@ -611,8 +649,11 @@ static void __early_init_mmu(int boot_cpu) /* limit memory so we dont have linear faults */ memblock_enforce_memory_limit(linear_map_top); - patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); - patch_exception(0x1e0, exc_instruction_tlb_miss_bolted_book3e); + if (book3e_htw_mode == PPC_HTW_NONE) { + patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); + patch_exception(0x1e0, + exc_instruction_tlb_miss_bolted_book3e); + } } #endif -- cgit v1.2.3 From a655f724df2c0e1634477c7e89da81477a691c0f Mon Sep 17 00:00:00 2001 From: Shaohui Xie Date: Tue, 7 Jan 2014 14:27:41 +0800 Subject: powerpc/85xx: handle the eLBC error interrupt if it exists in dts On P1020, P1021, P1022, and P1023, eLBC event interrupts are routed to internal interrupt 3 while ELBC error interrupts are routed to internal interrupt 0. We need to call request_irq for each. Signed-off-by: Shaohui Xie Signed-off-by: Wang Dongsheng Signed-off-by: Kumar Gala [scottwood@freescale.com: reworded commit message and fixed author] Signed-off-by: Scott Wood --- arch/powerpc/include/asm/fsl_lbc.h | 2 +- arch/powerpc/sysdev/fsl_lbc.c | 31 +++++++++++++++++++++++++------ 2 files changed, 26 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/fsl_lbc.h b/arch/powerpc/include/asm/fsl_lbc.h index 420b45368fcf..067fb0dca549 100644 --- a/arch/powerpc/include/asm/fsl_lbc.h +++ b/arch/powerpc/include/asm/fsl_lbc.h @@ -285,7 +285,7 @@ struct fsl_lbc_ctrl { /* device info */ struct device *dev; struct fsl_lbc_regs __iomem *regs; - int irq; + int irq[2]; wait_queue_head_t irq_wait; spinlock_t lock; void *nand; diff --git a/arch/powerpc/sysdev/fsl_lbc.c b/arch/powerpc/sysdev/fsl_lbc.c index 6bc5a546d49f..d631022ffb4b 100644 --- a/arch/powerpc/sysdev/fsl_lbc.c +++ b/arch/powerpc/sysdev/fsl_lbc.c @@ -214,10 +214,14 @@ static irqreturn_t fsl_lbc_ctrl_irq(int irqno, void *data) struct fsl_lbc_ctrl *ctrl = data; struct fsl_lbc_regs __iomem *lbc = ctrl->regs; u32 status; + unsigned long flags; + spin_lock_irqsave(&fsl_lbc_lock, flags); status = in_be32(&lbc->ltesr); - if (!status) + if (!status) { + spin_unlock_irqrestore(&fsl_lbc_lock, flags); return IRQ_NONE; + } out_be32(&lbc->ltesr, LTESR_CLEAR); out_be32(&lbc->lteatr, 0); @@ -260,6 +264,7 @@ static irqreturn_t fsl_lbc_ctrl_irq(int irqno, void *data) if (status & ~LTESR_MASK) dev_err(ctrl->dev, "Unknown error: " "LTESR 0x%08X\n", status); + spin_unlock_irqrestore(&fsl_lbc_lock, flags); return IRQ_HANDLED; } @@ -298,8 +303,8 @@ static int fsl_lbc_ctrl_probe(struct platform_device *dev) goto err; } - fsl_lbc_ctrl_dev->irq = irq_of_parse_and_map(dev->dev.of_node, 0); - if (fsl_lbc_ctrl_dev->irq == NO_IRQ) { + fsl_lbc_ctrl_dev->irq[0] = irq_of_parse_and_map(dev->dev.of_node, 0); + if (!fsl_lbc_ctrl_dev->irq[0]) { dev_err(&dev->dev, "failed to get irq resource\n"); ret = -ENODEV; goto err; @@ -311,20 +316,34 @@ static int fsl_lbc_ctrl_probe(struct platform_device *dev) if (ret < 0) goto err; - ret = request_irq(fsl_lbc_ctrl_dev->irq, fsl_lbc_ctrl_irq, 0, + ret = request_irq(fsl_lbc_ctrl_dev->irq[0], fsl_lbc_ctrl_irq, 0, "fsl-lbc", fsl_lbc_ctrl_dev); if (ret != 0) { dev_err(&dev->dev, "failed to install irq (%d)\n", - fsl_lbc_ctrl_dev->irq); - ret = fsl_lbc_ctrl_dev->irq; + fsl_lbc_ctrl_dev->irq[0]); + ret = fsl_lbc_ctrl_dev->irq[0]; goto err; } + fsl_lbc_ctrl_dev->irq[1] = irq_of_parse_and_map(dev->dev.of_node, 1); + if (fsl_lbc_ctrl_dev->irq[1]) { + ret = request_irq(fsl_lbc_ctrl_dev->irq[1], fsl_lbc_ctrl_irq, + IRQF_SHARED, "fsl-lbc-err", fsl_lbc_ctrl_dev); + if (ret) { + dev_err(&dev->dev, "failed to install irq (%d)\n", + fsl_lbc_ctrl_dev->irq[1]); + ret = fsl_lbc_ctrl_dev->irq[1]; + goto err1; + } + } + /* Enable interrupts for any detected events */ out_be32(&fsl_lbc_ctrl_dev->regs->lteir, LTEIR_ENABLE); return 0; +err1: + free_irq(fsl_lbc_ctrl_dev->irq[0], fsl_lbc_ctrl_dev); err: iounmap(fsl_lbc_ctrl_dev->regs); kfree(fsl_lbc_ctrl_dev); -- cgit v1.2.3 From 7d71d5b2e80623242b0c0823ce6cb635d089c8b2 Mon Sep 17 00:00:00 2001 From: Gerhard Sittig Date: Sat, 30 Nov 2013 23:51:27 +0100 Subject: clk: mpc5xxx: switch to COMMON_CLK, retire PPC_CLOCK the setup before the change was - arch/powerpc/Kconfig had the PPC_CLOCK option, off by default - depending on the PPC_CLOCK option the arch/powerpc/kernel/clock.c file was built, which implements the clk.h API but always returns -ENOSYS unless a platform registers specific callbacks - the MPC52xx platform selected PPC_CLOCK but did not register any callbacks, thus all clk.h API calls keep resulting in -ENOSYS errors (which is OK, all peripheral drivers deal with the situation) - the MPC512x platform selected PPC_CLOCK and registered specific callbacks implemented in arch/powerpc/platforms/512x/clock.c, thus provided real support for the clock API - no other powerpc platform did select PPC_CLOCK the situation after the change is - the MPC512x platform implements the COMMON_CLK interface, and thus the PPC_CLOCK approach in arch/powerpc/platforms/512x/clock.c has become obsolete - the MPC52xx platform still lacks genuine support for the clk.h API while this is not a change against the previous situation (the error code returned from COMMON_CLK stubs differs but every call still results in an error) - with all references gone, the arch/powerpc/kernel/clock.c wrapper and the PPC_CLOCK option have become obsolete, as did the clk_interface.h header file the switch from PPC_CLOCK to COMMON_CLK is done for all platforms within the same commit such that multiplatform kernels (the combination of 512x and 52xx within one executable) keep working Cc: Mike Turquette Cc: Anatolij Gustschin Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Gerhard Sittig Signed-off-by: Anatolij Gustschin --- arch/powerpc/Kconfig | 5 - arch/powerpc/include/asm/clk_interface.h | 20 - arch/powerpc/kernel/Makefile | 1 - arch/powerpc/kernel/clock.c | 82 ---- arch/powerpc/platforms/512x/Kconfig | 2 +- arch/powerpc/platforms/512x/Makefile | 1 - arch/powerpc/platforms/512x/clock.c | 754 ------------------------------- arch/powerpc/platforms/52xx/Kconfig | 2 +- 8 files changed, 2 insertions(+), 865 deletions(-) delete mode 100644 arch/powerpc/include/asm/clk_interface.h delete mode 100644 arch/powerpc/kernel/clock.c delete mode 100644 arch/powerpc/platforms/512x/clock.c (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index f0a893142cee..b131b26ca45a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1040,11 +1040,6 @@ config KEYS_COMPAT source "crypto/Kconfig" -config PPC_CLOCK - bool - default n - select HAVE_CLK - config PPC_LIB_RHEAP bool diff --git a/arch/powerpc/include/asm/clk_interface.h b/arch/powerpc/include/asm/clk_interface.h deleted file mode 100644 index ab1882c1e176..000000000000 --- a/arch/powerpc/include/asm/clk_interface.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef __ASM_POWERPC_CLK_INTERFACE_H -#define __ASM_POWERPC_CLK_INTERFACE_H - -#include - -struct clk_interface { - struct clk* (*clk_get) (struct device *dev, const char *id); - int (*clk_enable) (struct clk *clk); - void (*clk_disable) (struct clk *clk); - unsigned long (*clk_get_rate) (struct clk *clk); - void (*clk_put) (struct clk *clk); - long (*clk_round_rate) (struct clk *clk, unsigned long rate); - int (*clk_set_rate) (struct clk *clk, unsigned long rate); - int (*clk_set_parent) (struct clk *clk, struct clk *parent); - struct clk* (*clk_get_parent) (struct clk *clk); -}; - -extern struct clk_interface clk_functions; - -#endif /* __ASM_POWERPC_CLK_INTERFACE_H */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 904d713366ff..fcc9a89a4695 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -48,7 +48,6 @@ obj-$(CONFIG_ALTIVEC) += vecemu.o obj-$(CONFIG_PPC_970_NAP) += idle_power4.o obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o obj-$(CONFIG_PPC_OF) += of_platform.o prom_parse.o -obj-$(CONFIG_PPC_CLOCK) += clock.o procfs-y := proc_powerpc.o obj-$(CONFIG_PROC_FS) += $(procfs-y) rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o diff --git a/arch/powerpc/kernel/clock.c b/arch/powerpc/kernel/clock.c deleted file mode 100644 index a764b47791e8..000000000000 --- a/arch/powerpc/kernel/clock.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Dummy clk implementations for powerpc. - * These need to be overridden in platform code. - */ - -#include -#include -#include -#include -#include - -struct clk_interface clk_functions; - -struct clk *clk_get(struct device *dev, const char *id) -{ - if (clk_functions.clk_get) - return clk_functions.clk_get(dev, id); - return ERR_PTR(-ENOSYS); -} -EXPORT_SYMBOL(clk_get); - -void clk_put(struct clk *clk) -{ - if (clk_functions.clk_put) - clk_functions.clk_put(clk); -} -EXPORT_SYMBOL(clk_put); - -int clk_enable(struct clk *clk) -{ - if (clk_functions.clk_enable) - return clk_functions.clk_enable(clk); - return -ENOSYS; -} -EXPORT_SYMBOL(clk_enable); - -void clk_disable(struct clk *clk) -{ - if (clk_functions.clk_disable) - clk_functions.clk_disable(clk); -} -EXPORT_SYMBOL(clk_disable); - -unsigned long clk_get_rate(struct clk *clk) -{ - if (clk_functions.clk_get_rate) - return clk_functions.clk_get_rate(clk); - return 0; -} -EXPORT_SYMBOL(clk_get_rate); - -long clk_round_rate(struct clk *clk, unsigned long rate) -{ - if (clk_functions.clk_round_rate) - return clk_functions.clk_round_rate(clk, rate); - return -ENOSYS; -} -EXPORT_SYMBOL(clk_round_rate); - -int clk_set_rate(struct clk *clk, unsigned long rate) -{ - if (clk_functions.clk_set_rate) - return clk_functions.clk_set_rate(clk, rate); - return -ENOSYS; -} -EXPORT_SYMBOL(clk_set_rate); - -struct clk *clk_get_parent(struct clk *clk) -{ - if (clk_functions.clk_get_parent) - return clk_functions.clk_get_parent(clk); - return ERR_PTR(-ENOSYS); -} -EXPORT_SYMBOL(clk_get_parent); - -int clk_set_parent(struct clk *clk, struct clk *parent) -{ - if (clk_functions.clk_set_parent) - return clk_functions.clk_set_parent(clk, parent); - return -ENOSYS; -} -EXPORT_SYMBOL(clk_set_parent); diff --git a/arch/powerpc/platforms/512x/Kconfig b/arch/powerpc/platforms/512x/Kconfig index fc9c1cbfcb1d..5aa3f4b5332c 100644 --- a/arch/powerpc/platforms/512x/Kconfig +++ b/arch/powerpc/platforms/512x/Kconfig @@ -1,9 +1,9 @@ config PPC_MPC512x bool "512x-based boards" depends on 6xx + select COMMON_CLK select FSL_SOC select IPIC - select PPC_CLOCK select PPC_PCI_CHOICE select FSL_PCI if PCI select ARCH_WANT_OPTIONAL_GPIOLIB diff --git a/arch/powerpc/platforms/512x/Makefile b/arch/powerpc/platforms/512x/Makefile index 1e05f9def8a4..01693121a2b1 100644 --- a/arch/powerpc/platforms/512x/Makefile +++ b/arch/powerpc/platforms/512x/Makefile @@ -1,7 +1,6 @@ # # Makefile for the Freescale PowerPC 512x linux kernel. # -obj-$(CONFIG_PPC_CLOCK) += clock.o obj-$(CONFIG_COMMON_CLK) += clock-commonclk.o obj-y += mpc512x_shared.o obj-$(CONFIG_MPC5121_ADS) += mpc5121_ads.o mpc5121_ads_cpld.o diff --git a/arch/powerpc/platforms/512x/clock.c b/arch/powerpc/platforms/512x/clock.c deleted file mode 100644 index fd8a37653417..000000000000 --- a/arch/powerpc/platforms/512x/clock.c +++ /dev/null @@ -1,754 +0,0 @@ -/* - * Copyright (C) 2007,2008 Freescale Semiconductor, Inc. All rights reserved. - * - * Author: John Rigby - * - * Implements the clk api defined in include/linux/clk.h - * - * Original based on linux/arch/arm/mach-integrator/clock.c - * - * Copyright (C) 2004 ARM Limited. - * Written by Deep Blue Solutions Limited. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "mpc512x.h" - -#undef CLK_DEBUG - -static int clocks_initialized; - -#define CLK_HAS_RATE 0x1 /* has rate in MHz */ -#define CLK_HAS_CTRL 0x2 /* has control reg and bit */ - -struct clk { - struct list_head node; - char name[32]; - int flags; - struct device *dev; - unsigned long rate; - struct module *owner; - void (*calc) (struct clk *); - struct clk *parent; - int reg, bit; /* CLK_HAS_CTRL */ - int div_shift; /* only used by generic_div_clk_calc */ -}; - -static LIST_HEAD(clocks); -static DEFINE_MUTEX(clocks_mutex); - -static struct clk *mpc5121_clk_get(struct device *dev, const char *id) -{ - struct clk *p, *clk = ERR_PTR(-ENOENT); - int dev_match; - int id_match; - - if (dev == NULL || id == NULL) - return clk; - - mutex_lock(&clocks_mutex); - list_for_each_entry(p, &clocks, node) { - dev_match = id_match = 0; - - if (dev == p->dev) - dev_match++; - if (strcmp(id, p->name) == 0) - id_match++; - if ((dev_match || id_match) && try_module_get(p->owner)) { - clk = p; - break; - } - } - mutex_unlock(&clocks_mutex); - - return clk; -} - -#ifdef CLK_DEBUG -static void dump_clocks(void) -{ - struct clk *p; - - mutex_lock(&clocks_mutex); - printk(KERN_INFO "CLOCKS:\n"); - list_for_each_entry(p, &clocks, node) { - pr_info(" %s=%ld", p->name, p->rate); - if (p->parent) - pr_cont(" %s=%ld", p->parent->name, - p->parent->rate); - if (p->flags & CLK_HAS_CTRL) - pr_cont(" reg/bit=%d/%d", p->reg, p->bit); - pr_cont("\n"); - } - mutex_unlock(&clocks_mutex); -} -#define DEBUG_CLK_DUMP() dump_clocks() -#else -#define DEBUG_CLK_DUMP() -#endif - - -static void mpc5121_clk_put(struct clk *clk) -{ - module_put(clk->owner); -} - -#define NRPSC 12 - -struct mpc512x_clockctl { - u32 spmr; /* System PLL Mode Reg */ - u32 sccr[2]; /* System Clk Ctrl Reg 1 & 2 */ - u32 scfr1; /* System Clk Freq Reg 1 */ - u32 scfr2; /* System Clk Freq Reg 2 */ - u32 reserved; - u32 bcr; /* Bread Crumb Reg */ - u32 pccr[NRPSC]; /* PSC Clk Ctrl Reg 0-11 */ - u32 spccr; /* SPDIF Clk Ctrl Reg */ - u32 cccr; /* CFM Clk Ctrl Reg */ - u32 dccr; /* DIU Clk Cnfg Reg */ -}; - -static struct mpc512x_clockctl __iomem *clockctl; - -static int mpc5121_clk_enable(struct clk *clk) -{ - unsigned int mask; - - if (clk->flags & CLK_HAS_CTRL) { - mask = in_be32(&clockctl->sccr[clk->reg]); - mask |= 1 << clk->bit; - out_be32(&clockctl->sccr[clk->reg], mask); - } - return 0; -} - -static void mpc5121_clk_disable(struct clk *clk) -{ - unsigned int mask; - - if (clk->flags & CLK_HAS_CTRL) { - mask = in_be32(&clockctl->sccr[clk->reg]); - mask &= ~(1 << clk->bit); - out_be32(&clockctl->sccr[clk->reg], mask); - } -} - -static unsigned long mpc5121_clk_get_rate(struct clk *clk) -{ - if (clk->flags & CLK_HAS_RATE) - return clk->rate; - else - return 0; -} - -static long mpc5121_clk_round_rate(struct clk *clk, unsigned long rate) -{ - return rate; -} - -static int mpc5121_clk_set_rate(struct clk *clk, unsigned long rate) -{ - return 0; -} - -static int clk_register(struct clk *clk) -{ - mutex_lock(&clocks_mutex); - list_add(&clk->node, &clocks); - mutex_unlock(&clocks_mutex); - return 0; -} - -static unsigned long spmf_mult(void) -{ - /* - * Convert spmf to multiplier - */ - static int spmf_to_mult[] = { - 68, 1, 12, 16, - 20, 24, 28, 32, - 36, 40, 44, 48, - 52, 56, 60, 64 - }; - int spmf = (in_be32(&clockctl->spmr) >> 24) & 0xf; - return spmf_to_mult[spmf]; -} - -static unsigned long sysdiv_div_x_2(void) -{ - /* - * Convert sysdiv to divisor x 2 - * Some divisors have fractional parts so - * multiply by 2 then divide by this value - */ - static int sysdiv_to_div_x_2[] = { - 4, 5, 6, 7, - 8, 9, 10, 14, - 12, 16, 18, 22, - 20, 24, 26, 30, - 28, 32, 34, 38, - 36, 40, 42, 46, - 44, 48, 50, 54, - 52, 56, 58, 62, - 60, 64, 66, - }; - int sysdiv = (in_be32(&clockctl->scfr2) >> 26) & 0x3f; - return sysdiv_to_div_x_2[sysdiv]; -} - -static unsigned long ref_to_sys(unsigned long rate) -{ - rate *= spmf_mult(); - rate *= 2; - rate /= sysdiv_div_x_2(); - - return rate; -} - -static unsigned long sys_to_ref(unsigned long rate) -{ - rate *= sysdiv_div_x_2(); - rate /= 2; - rate /= spmf_mult(); - - return rate; -} - -static long ips_to_ref(unsigned long rate) -{ - int ips_div = (in_be32(&clockctl->scfr1) >> 23) & 0x7; - - rate *= ips_div; /* csb_clk = ips_clk * ips_div */ - rate *= 2; /* sys_clk = csb_clk * 2 */ - return sys_to_ref(rate); -} - -static unsigned long devtree_getfreq(char *clockname) -{ - struct device_node *np; - const unsigned int *prop; - unsigned int val = 0; - - np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-immr"); - if (np) { - prop = of_get_property(np, clockname, NULL); - if (prop) - val = *prop; - of_node_put(np); - } - return val; -} - -static void ref_clk_calc(struct clk *clk) -{ - unsigned long rate; - - rate = devtree_getfreq("bus-frequency"); - if (rate == 0) { - printk(KERN_ERR "No bus-frequency in dev tree\n"); - clk->rate = 0; - return; - } - clk->rate = ips_to_ref(rate); -} - -static struct clk ref_clk = { - .name = "ref_clk", - .calc = ref_clk_calc, -}; - - -static void sys_clk_calc(struct clk *clk) -{ - clk->rate = ref_to_sys(ref_clk.rate); -} - -static struct clk sys_clk = { - .name = "sys_clk", - .calc = sys_clk_calc, -}; - -static void diu_clk_calc(struct clk *clk) -{ - int diudiv_x_2 = in_be32(&clockctl->scfr1) & 0xff; - unsigned long rate; - - rate = sys_clk.rate; - - rate *= 2; - rate /= diudiv_x_2; - - clk->rate = rate; -} - -static void viu_clk_calc(struct clk *clk) -{ - unsigned long rate; - - rate = sys_clk.rate; - rate /= 2; - clk->rate = rate; -} - -static void half_clk_calc(struct clk *clk) -{ - clk->rate = clk->parent->rate / 2; -} - -static void generic_div_clk_calc(struct clk *clk) -{ - int div = (in_be32(&clockctl->scfr1) >> clk->div_shift) & 0x7; - - clk->rate = clk->parent->rate / div; -} - -static void unity_clk_calc(struct clk *clk) -{ - clk->rate = clk->parent->rate; -} - -static struct clk csb_clk = { - .name = "csb_clk", - .calc = half_clk_calc, - .parent = &sys_clk, -}; - -static void e300_clk_calc(struct clk *clk) -{ - int spmf = (in_be32(&clockctl->spmr) >> 16) & 0xf; - int ratex2 = clk->parent->rate * spmf; - - clk->rate = ratex2 / 2; -} - -static struct clk e300_clk = { - .name = "e300_clk", - .calc = e300_clk_calc, - .parent = &csb_clk, -}; - -static struct clk ips_clk = { - .name = "ips_clk", - .calc = generic_div_clk_calc, - .parent = &csb_clk, - .div_shift = 23, -}; - -/* - * Clocks controlled by SCCR1 (.reg = 0) - */ -static struct clk lpc_clk = { - .name = "lpc_clk", - .flags = CLK_HAS_CTRL, - .reg = 0, - .bit = 30, - .calc = generic_div_clk_calc, - .parent = &ips_clk, - .div_shift = 11, -}; - -static struct clk nfc_clk = { - .name = "nfc_clk", - .flags = CLK_HAS_CTRL, - .reg = 0, - .bit = 29, - .calc = generic_div_clk_calc, - .parent = &ips_clk, - .div_shift = 8, -}; - -static struct clk pata_clk = { - .name = "pata_clk", - .flags = CLK_HAS_CTRL, - .reg = 0, - .bit = 28, - .calc = unity_clk_calc, - .parent = &ips_clk, -}; - -/* - * PSC clocks (bits 27 - 16) - * are setup elsewhere - */ - -static struct clk sata_clk = { - .name = "sata_clk", - .flags = CLK_HAS_CTRL, - .reg = 0, - .bit = 14, - .calc = unity_clk_calc, - .parent = &ips_clk, -}; - -static struct clk fec_clk = { - .name = "fec_clk", - .flags = CLK_HAS_CTRL, - .reg = 0, - .bit = 13, - .calc = unity_clk_calc, - .parent = &ips_clk, -}; - -static struct clk pci_clk = { - .name = "pci_clk", - .flags = CLK_HAS_CTRL, - .reg = 0, - .bit = 11, - .calc = generic_div_clk_calc, - .parent = &csb_clk, - .div_shift = 20, -}; - -/* - * Clocks controlled by SCCR2 (.reg = 1) - */ -static struct clk diu_clk = { - .name = "diu_clk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 31, - .calc = diu_clk_calc, -}; - -static struct clk viu_clk = { - .name = "viu_clk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 18, - .calc = viu_clk_calc, -}; - -static struct clk axe_clk = { - .name = "axe_clk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 30, - .calc = unity_clk_calc, - .parent = &csb_clk, -}; - -static struct clk usb1_clk = { - .name = "usb1_clk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 28, - .calc = unity_clk_calc, - .parent = &csb_clk, -}; - -static struct clk usb2_clk = { - .name = "usb2_clk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 27, - .calc = unity_clk_calc, - .parent = &csb_clk, -}; - -static struct clk i2c_clk = { - .name = "i2c_clk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 26, - .calc = unity_clk_calc, - .parent = &ips_clk, -}; - -static struct clk mscan_clk = { - .name = "mscan_clk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 25, - .calc = unity_clk_calc, - .parent = &ips_clk, -}; - -static struct clk sdhc_clk = { - .name = "sdhc_clk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 24, - .calc = unity_clk_calc, - .parent = &ips_clk, -}; - -static struct clk mbx_bus_clk = { - .name = "mbx_bus_clk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 22, - .calc = half_clk_calc, - .parent = &csb_clk, -}; - -static struct clk mbx_clk = { - .name = "mbx_clk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 21, - .calc = unity_clk_calc, - .parent = &csb_clk, -}; - -static struct clk mbx_3d_clk = { - .name = "mbx_3d_clk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 20, - .calc = generic_div_clk_calc, - .parent = &mbx_bus_clk, - .div_shift = 14, -}; - -static void psc_mclk_in_calc(struct clk *clk) -{ - clk->rate = devtree_getfreq("psc_mclk_in"); - if (!clk->rate) - clk->rate = 25000000; -} - -static struct clk psc_mclk_in = { - .name = "psc_mclk_in", - .calc = psc_mclk_in_calc, -}; - -static struct clk spdif_txclk = { - .name = "spdif_txclk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 23, -}; - -static struct clk spdif_rxclk = { - .name = "spdif_rxclk", - .flags = CLK_HAS_CTRL, - .reg = 1, - .bit = 23, -}; - -static void ac97_clk_calc(struct clk *clk) -{ - /* ac97 bit clock is always 24.567 MHz */ - clk->rate = 24567000; -} - -static struct clk ac97_clk = { - .name = "ac97_clk_in", - .calc = ac97_clk_calc, -}; - -static struct clk *rate_clks[] = { - &ref_clk, - &sys_clk, - &diu_clk, - &viu_clk, - &csb_clk, - &e300_clk, - &ips_clk, - &fec_clk, - &sata_clk, - &pata_clk, - &nfc_clk, - &lpc_clk, - &mbx_bus_clk, - &mbx_clk, - &mbx_3d_clk, - &axe_clk, - &usb1_clk, - &usb2_clk, - &i2c_clk, - &mscan_clk, - &sdhc_clk, - &pci_clk, - &psc_mclk_in, - &spdif_txclk, - &spdif_rxclk, - &ac97_clk, - NULL -}; - -static void rate_clk_init(struct clk *clk) -{ - if (clk->calc) { - clk->calc(clk); - clk->flags |= CLK_HAS_RATE; - clk_register(clk); - } else { - printk(KERN_WARNING - "Could not initialize clk %s without a calc routine\n", - clk->name); - } -} - -static void rate_clks_init(void) -{ - struct clk **cpp, *clk; - - cpp = rate_clks; - while ((clk = *cpp++)) - rate_clk_init(clk); -} - -/* - * There are two clk enable registers with 32 enable bits each - * psc clocks and device clocks are all stored in dev_clks - */ -static struct clk dev_clks[2][32]; - -/* - * Given a psc number return the dev_clk - * associated with it - */ -static struct clk *psc_dev_clk(int pscnum) -{ - int reg, bit; - struct clk *clk; - - reg = 0; - bit = 27 - pscnum; - - clk = &dev_clks[reg][bit]; - clk->reg = 0; - clk->bit = bit; - return clk; -} - -/* - * PSC clock rate calculation - */ -static void psc_calc_rate(struct clk *clk, int pscnum, struct device_node *np) -{ - unsigned long mclk_src = sys_clk.rate; - unsigned long mclk_div; - - /* - * Can only change value of mclk divider - * when the divider is disabled. - * - * Zero is not a valid divider so minimum - * divider is 1 - * - * disable/set divider/enable - */ - out_be32(&clockctl->pccr[pscnum], 0); - out_be32(&clockctl->pccr[pscnum], 0x00020000); - out_be32(&clockctl->pccr[pscnum], 0x00030000); - - if (in_be32(&clockctl->pccr[pscnum]) & 0x80) { - clk->rate = spdif_rxclk.rate; - return; - } - - switch ((in_be32(&clockctl->pccr[pscnum]) >> 14) & 0x3) { - case 0: - mclk_src = sys_clk.rate; - break; - case 1: - mclk_src = ref_clk.rate; - break; - case 2: - mclk_src = psc_mclk_in.rate; - break; - case 3: - mclk_src = spdif_txclk.rate; - break; - } - - mclk_div = ((in_be32(&clockctl->pccr[pscnum]) >> 17) & 0x7fff) + 1; - clk->rate = mclk_src / mclk_div; -} - -/* - * Find all psc nodes in device tree and assign a clock - * with name "psc%d_mclk" and dev pointing at the device - * returned from of_find_device_by_node - */ -static void psc_clks_init(void) -{ - struct device_node *np; - struct platform_device *ofdev; - u32 reg; - const char *psc_compat; - - psc_compat = mpc512x_select_psc_compat(); - if (!psc_compat) - return; - - for_each_compatible_node(np, NULL, psc_compat) { - if (!of_property_read_u32(np, "reg", ®)) { - int pscnum = (reg & 0xf00) >> 8; - struct clk *clk = psc_dev_clk(pscnum); - - clk->flags = CLK_HAS_RATE | CLK_HAS_CTRL; - ofdev = of_find_device_by_node(np); - clk->dev = &ofdev->dev; - /* - * AC97 is special rate clock does - * not go through normal path - */ - if (of_device_is_compatible(np, "fsl,mpc5121-psc-ac97")) - clk->rate = ac97_clk.rate; - else - psc_calc_rate(clk, pscnum, np); - sprintf(clk->name, "psc%d_mclk", pscnum); - clk_register(clk); - clk_enable(clk); - } - } -} - -static struct clk_interface mpc5121_clk_functions = { - .clk_get = mpc5121_clk_get, - .clk_enable = mpc5121_clk_enable, - .clk_disable = mpc5121_clk_disable, - .clk_get_rate = mpc5121_clk_get_rate, - .clk_put = mpc5121_clk_put, - .clk_round_rate = mpc5121_clk_round_rate, - .clk_set_rate = mpc5121_clk_set_rate, - .clk_set_parent = NULL, - .clk_get_parent = NULL, -}; - -int __init mpc5121_clk_init(void) -{ - struct device_node *np; - - np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-clock"); - if (np) { - clockctl = of_iomap(np, 0); - of_node_put(np); - } - - if (!clockctl) { - printk(KERN_ERR "Could not map clock control registers\n"); - return 0; - } - - rate_clks_init(); - psc_clks_init(); - - /* leave clockctl mapped forever */ - /*iounmap(clockctl); */ - DEBUG_CLK_DUMP(); - clocks_initialized++; - clk_functions = mpc5121_clk_functions; - return 0; -} diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig index af54174801f7..b625a2c6f4f2 100644 --- a/arch/powerpc/platforms/52xx/Kconfig +++ b/arch/powerpc/platforms/52xx/Kconfig @@ -1,7 +1,7 @@ config PPC_MPC52xx bool "52xx-based boards" depends on 6xx - select PPC_CLOCK + select COMMON_CLK select PPC_PCI_CHOICE config PPC_MPC5200_SIMPLE -- cgit v1.2.3 From 319bbe0ef513e33ecf1a3f40099b5b369122afbd Mon Sep 17 00:00:00 2001 From: Gerhard Sittig Date: Tue, 10 Dec 2013 14:11:36 +0100 Subject: powerpc/512x: clk: support MPC5121/5123/5125 SoC variants improve the common clock support code for MPC512x - expand the CCM register set declaration with MPC5125 related registers (which reside in the previously "reserved" area) - tell the MPC5121, MPC5123, and MPC5125 SoC variants apart, and derive the availability of components and their clocks from the detected SoC (MBX, AXE, VIU, SPDIF, PATA, SATA, PCI, second FEC, second SDHC, number of PSC components, type of NAND flash controller, interpretation of the CPMF bitfield, PSC/CAN mux0 stage input clocks, output clocks on SoC pins) - add backwards compatibility (allow operation against a device tree which lacks clock related specs) for MPC5125 FECs, too telling SoC variants apart and adjusting the clock tree's generation occurs at runtime, a common generic binary supports all of the chips the MPC5125 approach to the NFC clock (one register with two counters for the high and low periods of the clock) is not implemented, as there are no users and there is no common implementation which supports this kind of clock -- the new implementation would be unused and could not get verified, so it shall wait until there is demand Signed-off-by: Gerhard Sittig Acked-by: Mike Turquette Signed-off-by: Anatolij Gustschin --- arch/powerpc/include/asm/mpc5121.h | 7 +- arch/powerpc/platforms/512x/clock-commonclk.c | 339 ++++++++++++++++++++++---- include/dt-bindings/clock/mpc512x-clock.h | 9 +- 3 files changed, 309 insertions(+), 46 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/mpc5121.h b/arch/powerpc/include/asm/mpc5121.h index 887d3d6133e3..4a69cd1d5041 100644 --- a/arch/powerpc/include/asm/mpc5121.h +++ b/arch/powerpc/include/asm/mpc5121.h @@ -37,7 +37,12 @@ struct mpc512x_ccm { u32 cccr; /* CFM Clock Control Register */ u32 dccr; /* DIU Clock Control Register */ u32 mscan_ccr[4]; /* MSCAN Clock Control Registers */ - u8 res[0x98]; /* Reserved */ + u32 out_ccr[4]; /* OUT CLK Configure Registers */ + u32 rsv0[2]; /* Reserved */ + u32 scfr3; /* System Clock Frequency Register 3 */ + u32 rsv1[3]; /* Reserved */ + u32 spll_lock_cnt; /* System PLL Lock Counter */ + u8 res[0x6c]; /* Reserved */ }; /* diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c index 16569376b259..6eb614a271fb 100644 --- a/arch/powerpc/platforms/512x/clock-commonclk.c +++ b/arch/powerpc/platforms/512x/clock-commonclk.c @@ -36,7 +36,8 @@ enum { #define NR_PSCS 12 #define NR_MSCANS 4 #define NR_SPDIFS 1 -#define NR_MCLKS (NR_PSCS + NR_MSCANS + NR_SPDIFS) +#define NR_OUTCLK 4 +#define NR_MCLKS (NR_PSCS + NR_MSCANS + NR_SPDIFS + NR_OUTCLK) /* extend the public set of clocks by adding internal slots for management */ enum { @@ -46,11 +47,11 @@ enum { MPC512x_CLK_DDR, MPC512x_CLK_MEM, MPC512x_CLK_IIM, - MPC512x_CLK_SDHC_2, /* intermediates in div+gate combos or fractional dividers */ MPC512x_CLK_DDR_UG, MPC512x_CLK_SDHC_x4, MPC512x_CLK_SDHC_UG, + MPC512x_CLK_SDHC2_UG, MPC512x_CLK_DIU_x4, MPC512x_CLK_DIU_UG, MPC512x_CLK_MBX_BUS_UG, @@ -76,6 +77,144 @@ static struct clk_onecell_data clk_data; static struct mpc512x_ccm __iomem *clkregs; static DEFINE_SPINLOCK(clklock); +/* SoC variants {{{ */ + +/* + * tell SoC variants apart as they are rather similar yet not identical, + * cache the result in an enum to not repeatedly run the expensive OF test + * + * MPC5123 is an MPC5121 without the MBX graphics accelerator + * + * MPC5125 has many more differences: no MBX, no AXE, no VIU, no SPDIF, + * no PATA, no SATA, no PCI, two FECs (of different compatibility name), + * only 10 PSCs (of different compatibility name), two SDHCs, different + * NFC IP block, output clocks, system PLL status query, different CPMF + * interpretation, no CFM, different fourth PSC/CAN mux0 input -- yet + * those differences can get folded into this clock provider support + * code and don't warrant a separate highly redundant implementation + */ + +static enum soc_type { + MPC512x_SOC_MPC5121, + MPC512x_SOC_MPC5123, + MPC512x_SOC_MPC5125, +} soc; + +static void mpc512x_clk_determine_soc(void) +{ + if (of_machine_is_compatible("fsl,mpc5121")) { + soc = MPC512x_SOC_MPC5121; + return; + } + if (of_machine_is_compatible("fsl,mpc5123")) { + soc = MPC512x_SOC_MPC5123; + return; + } + if (of_machine_is_compatible("fsl,mpc5125")) { + soc = MPC512x_SOC_MPC5125; + return; + } +} + +static bool soc_has_mbx(void) +{ + if (soc == MPC512x_SOC_MPC5121) + return true; + return false; +} + +static bool soc_has_axe(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return false; + return true; +} + +static bool soc_has_viu(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return false; + return true; +} + +static bool soc_has_spdif(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return false; + return true; +} + +static bool soc_has_pata(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return false; + return true; +} + +static bool soc_has_sata(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return false; + return true; +} + +static bool soc_has_pci(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return false; + return true; +} + +static bool soc_has_fec2(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return true; + return false; +} + +static int soc_max_pscnum(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return 10; + return 12; +} + +static bool soc_has_sdhc2(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return true; + return false; +} + +static bool soc_has_nfc_5125(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return true; + return false; +} + +static bool soc_has_outclk(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return true; + return false; +} + +static bool soc_has_cpmf_0_bypass(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return true; + return false; +} + +static bool soc_has_mclk_mux0_canin(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return true; + return false; +} + +/* }}} SoC variants */ /* common clk API wrappers {{{ */ /* convenience wrappers around the common clk API */ @@ -196,12 +335,23 @@ static int get_sys_div_x2(void) */ static int get_cpmf_mult_x2(void) { - static int cpmf_to_mult[] = { + static int cpmf_to_mult_x36[] = { + /* 0b000 is "times 36" */ 72, 2, 2, 3, 4, 5, 6, 7, }; + static int cpmf_to_mult_0by[] = { + /* 0b000 is "bypass" */ + 2, 2, 2, 3, 4, 5, 6, 7, + }; + + int *cpmf_to_mult; int cpmf; cpmf = get_bit_field(&clkregs->spmr, 16, 4); + if (soc_has_cpmf_0_bypass()) + cpmf_to_mult = cpmf_to_mult_0by; + else + cpmf_to_mult = cpmf_to_mult_x36; return cpmf_to_mult[cpmf]; } @@ -347,14 +497,19 @@ static void mpc512x_clk_setup_ref_clock(struct device_node *np, int bus_freq, * it's the very data type dictated by , * "fixing" this warning will break compilation */ -static const char *parent_names_mux0[] = { +static const char *parent_names_mux0_spdif[] = { "sys", "ref", "psc-mclk-in", "spdif-tx", }; +static const char *parent_names_mux0_canin[] = { + "sys", "ref", "psc-mclk-in", "can-clk-in", +}; + enum mclk_type { MCLK_TYPE_PSC, MCLK_TYPE_MSCAN, MCLK_TYPE_SPDIF, + MCLK_TYPE_OUTCLK, }; struct mclk_setup_data { @@ -394,6 +549,15 @@ struct mclk_setup_data { "spdif_mclk", \ } +#define MCLK_SETUP_DATA_OUTCLK(id) { \ + MCLK_TYPE_OUTCLK, 0, \ + "out" #id "-mux0", \ + "out" #id "-en0", \ + "out" #id "_mclk_div", \ + { "out" #id "_mclk_div", "dummy", }, \ + "out" #id "_clk", \ +} + static struct mclk_setup_data mclk_psc_data[] = { MCLK_SETUP_DATA_PSC(0), MCLK_SETUP_DATA_PSC(1), @@ -420,6 +584,13 @@ static struct mclk_setup_data mclk_spdif_data[] = { MCLK_SETUP_DATA_SPDIF, }; +static struct mclk_setup_data mclk_outclk_data[] = { + MCLK_SETUP_DATA_OUTCLK(0), + MCLK_SETUP_DATA_OUTCLK(1), + MCLK_SETUP_DATA_OUTCLK(2), + MCLK_SETUP_DATA_OUTCLK(3), +}; + /* setup the MCLK clock subtree of an individual PSC/MSCAN/SPDIF */ static void mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx) { @@ -447,6 +618,13 @@ static void mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx) + (NR_PSCS + NR_MSCANS) * MCLK_MAX_IDX; mccr_reg = &clkregs->spccr; break; + case MCLK_TYPE_OUTCLK: + clks_idx_pub = MPC512x_CLK_OUT0_CLK + idx; + clks_idx_int = MPC512x_CLK_MCLKS_FIRST + + (NR_PSCS + NR_MSCANS + NR_SPDIFS + idx) + * MCLK_MAX_IDX; + mccr_reg = &clkregs->out_ccr[idx]; + break; default: return; } @@ -495,7 +673,10 @@ static void mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx) */ clks[clks_idx_int + MCLK_IDX_MUX0] = mpc512x_clk_muxed( entry->name_mux0, - &parent_names_mux0[0], ARRAY_SIZE(parent_names_mux0), + soc_has_mclk_mux0_canin() + ? &parent_names_mux0_canin[0] + : &parent_names_mux0_spdif[0], + ARRAY_SIZE(parent_names_mux0_spdif), mccr_reg, 14, 2); clks[clks_idx_int + MCLK_IDX_EN0] = mpc512x_clk_gated( entry->name_en0, entry->name_mux0, @@ -576,6 +757,12 @@ static void mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq) clks[MPC512x_CLK_SDHC_UG] = mpc512x_clk_divider("sdhc-ug", "sdhc-x4", 0, &clkregs->scfr2, 1, 7, CLK_DIVIDER_ONE_BASED); + if (soc_has_sdhc2()) { + clks[MPC512x_CLK_SDHC2_UG] = mpc512x_clk_divider( + "sdhc2-ug", "sdhc-x4", 0, &clkregs->scfr2, + 9, 7, CLK_DIVIDER_ONE_BASED); + } + clks[MPC512x_CLK_DIU_x4] = mpc512x_clk_factor("diu-x4", "csb", 4, 1); clks[MPC512x_CLK_DIU_UG] = mpc512x_clk_divider("diu-ug", "diu-x4", 0, &clkregs->scfr1, 0, 8, @@ -592,19 +779,32 @@ static void mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq) div = 2; /* compensate for the fractional factor */ clks[MPC512x_CLK_E300] = mpc512x_clk_factor("e300", "csb", mul, div); - clks[MPC512x_CLK_MBX_BUS_UG] = mpc512x_clk_factor("mbx-bus-ug", "csb", - 1, 2); - clks[MPC512x_CLK_MBX_UG] = mpc512x_clk_divtable("mbx-ug", "mbx-bus-ug", - &clkregs->scfr1, 14, 3, - divtab_1234); - clks[MPC512x_CLK_MBX_3D_UG] = mpc512x_clk_factor("mbx-3d-ug", "mbx-ug", - 1, 1); - clks[MPC512x_CLK_PCI_UG] = mpc512x_clk_divtable("pci-ug", "csb", - &clkregs->scfr1, 20, 3, - divtab_2346); - clks[MPC512x_CLK_NFC_UG] = mpc512x_clk_divtable("nfc-ug", "ips", - &clkregs->scfr1, 8, 3, - divtab_1234); + if (soc_has_mbx()) { + clks[MPC512x_CLK_MBX_BUS_UG] = mpc512x_clk_factor( + "mbx-bus-ug", "csb", 1, 2); + clks[MPC512x_CLK_MBX_UG] = mpc512x_clk_divtable( + "mbx-ug", "mbx-bus-ug", &clkregs->scfr1, + 14, 3, divtab_1234); + clks[MPC512x_CLK_MBX_3D_UG] = mpc512x_clk_factor( + "mbx-3d-ug", "mbx-ug", 1, 1); + } + if (soc_has_pci()) { + clks[MPC512x_CLK_PCI_UG] = mpc512x_clk_divtable( + "pci-ug", "csb", &clkregs->scfr1, + 20, 3, divtab_2346); + } + if (soc_has_nfc_5125()) { + /* + * XXX TODO implement 5125 NFC clock setup logic, + * with high/low period counters in clkregs->scfr3, + * currently there are no users so it's ENOIMPL + */ + clks[MPC512x_CLK_NFC_UG] = ERR_PTR(-ENOTSUPP); + } else { + clks[MPC512x_CLK_NFC_UG] = mpc512x_clk_divtable( + "nfc-ug", "ips", &clkregs->scfr1, + 8, 3, divtab_1234); + } clks[MPC512x_CLK_LPC_UG] = mpc512x_clk_divtable("lpc-ug", "ips", &clkregs->scfr1, 11, 3, divtab_1234); @@ -613,10 +813,12 @@ static void mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq) &clkregs->sccr1, 30); clks[MPC512x_CLK_NFC] = mpc512x_clk_gated("nfc", "nfc-ug", &clkregs->sccr1, 29); - clks[MPC512x_CLK_PATA] = mpc512x_clk_gated("pata", "ips", - &clkregs->sccr1, 28); + if (soc_has_pata()) { + clks[MPC512x_CLK_PATA] = mpc512x_clk_gated( + "pata", "ips", &clkregs->sccr1, 28); + } /* for PSCs there is a "registers" gate and a bitrate MCLK subtree */ - for (mclk_idx = 0; mclk_idx < ARRAY_SIZE(mclk_psc_data); mclk_idx++) { + for (mclk_idx = 0; mclk_idx < soc_max_pscnum(); mclk_idx++) { char name[12]; snprintf(name, sizeof(name), "psc%d", mclk_idx); clks[MPC512x_CLK_PSC0 + mclk_idx] = mpc512x_clk_gated( @@ -625,19 +827,29 @@ static void mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq) } clks[MPC512x_CLK_PSC_FIFO] = mpc512x_clk_gated("psc-fifo", "ips", &clkregs->sccr1, 15); - clks[MPC512x_CLK_SATA] = mpc512x_clk_gated("sata", "ips", - &clkregs->sccr1, 14); + if (soc_has_sata()) { + clks[MPC512x_CLK_SATA] = mpc512x_clk_gated( + "sata", "ips", &clkregs->sccr1, 14); + } clks[MPC512x_CLK_FEC] = mpc512x_clk_gated("fec", "ips", &clkregs->sccr1, 13); - clks[MPC512x_CLK_PCI] = mpc512x_clk_gated("pci", "pci-ug", - &clkregs->sccr1, 11); + if (soc_has_pci()) { + clks[MPC512x_CLK_PCI] = mpc512x_clk_gated( + "pci", "pci-ug", &clkregs->sccr1, 11); + } clks[MPC512x_CLK_DDR] = mpc512x_clk_gated("ddr", "ddr-ug", &clkregs->sccr1, 10); + if (soc_has_fec2()) { + clks[MPC512x_CLK_FEC2] = mpc512x_clk_gated( + "fec2", "ips", &clkregs->sccr1, 9); + } clks[MPC512x_CLK_DIU] = mpc512x_clk_gated("diu", "diu-ug", &clkregs->sccr2, 31); - clks[MPC512x_CLK_AXE] = mpc512x_clk_gated("axe", "csb", - &clkregs->sccr2, 30); + if (soc_has_axe()) { + clks[MPC512x_CLK_AXE] = mpc512x_clk_gated( + "axe", "csb", &clkregs->sccr2, 30); + } clks[MPC512x_CLK_MEM] = mpc512x_clk_gated("mem", "ips", &clkregs->sccr2, 29); clks[MPC512x_CLK_USB1] = mpc512x_clk_gated("usb1", "csb", @@ -654,21 +866,35 @@ static void mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq) clks[MPC512x_CLK_SDHC] = mpc512x_clk_gated("sdhc", "sdhc-ug", &clkregs->sccr2, 24); /* there is only one SPDIF component, which shares MCLK support code */ - clks[MPC512x_CLK_SPDIF] = mpc512x_clk_gated("spdif", "ips", - &clkregs->sccr2, 23); - mpc512x_clk_setup_mclk(&mclk_spdif_data[0], 0); - clks[MPC512x_CLK_MBX_BUS] = mpc512x_clk_gated("mbx-bus", "mbx-bus-ug", - &clkregs->sccr2, 22); - clks[MPC512x_CLK_MBX] = mpc512x_clk_gated("mbx", "mbx-ug", - &clkregs->sccr2, 21); - clks[MPC512x_CLK_MBX_3D] = mpc512x_clk_gated("mbx-3d", "mbx-3d-ug", - &clkregs->sccr2, 20); + if (soc_has_spdif()) { + clks[MPC512x_CLK_SPDIF] = mpc512x_clk_gated( + "spdif", "ips", &clkregs->sccr2, 23); + mpc512x_clk_setup_mclk(&mclk_spdif_data[0], 0); + } + if (soc_has_mbx()) { + clks[MPC512x_CLK_MBX_BUS] = mpc512x_clk_gated( + "mbx-bus", "mbx-bus-ug", &clkregs->sccr2, 22); + clks[MPC512x_CLK_MBX] = mpc512x_clk_gated( + "mbx", "mbx-ug", &clkregs->sccr2, 21); + clks[MPC512x_CLK_MBX_3D] = mpc512x_clk_gated( + "mbx-3d", "mbx-3d-ug", &clkregs->sccr2, 20); + } clks[MPC512x_CLK_IIM] = mpc512x_clk_gated("iim", "csb", &clkregs->sccr2, 19); - clks[MPC512x_CLK_VIU] = mpc512x_clk_gated("viu", "csb", - &clkregs->sccr2, 18); - clks[MPC512x_CLK_SDHC_2] = mpc512x_clk_gated("sdhc-2", "sdhc-ug", - &clkregs->sccr2, 17); + if (soc_has_viu()) { + clks[MPC512x_CLK_VIU] = mpc512x_clk_gated( + "viu", "csb", &clkregs->sccr2, 18); + } + if (soc_has_sdhc2()) { + clks[MPC512x_CLK_SDHC2] = mpc512x_clk_gated( + "sdhc-2", "sdhc2-ug", &clkregs->sccr2, 17); + } + + if (soc_has_outclk()) { + size_t idx; /* used as mclk_idx, just to trim line length */ + for (idx = 0; idx < ARRAY_SIZE(mclk_outclk_data); idx++) + mpc512x_clk_setup_mclk(&mclk_outclk_data[idx], idx); + } /* * externally provided clocks (when implemented in hardware, @@ -678,10 +904,18 @@ static void mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq) if (!freq) freq = 25000000; clks[MPC512x_CLK_PSC_MCLK_IN] = mpc512x_clk_fixed("psc_mclk_in", freq); - freq = get_freq_from_dt("spdif_tx_in"); - clks[MPC512x_CLK_SPDIF_TX_IN] = mpc512x_clk_fixed("spdif_tx_in", freq); - freq = get_freq_from_dt("spdif_rx_in"); - clks[MPC512x_CLK_SPDIF_TX_IN] = mpc512x_clk_fixed("spdif_rx_in", freq); + if (soc_has_mclk_mux0_canin()) { + freq = get_freq_from_dt("can_clk_in"); + clks[MPC512x_CLK_CAN_CLK_IN] = mpc512x_clk_fixed( + "can_clk_in", freq); + } else { + freq = get_freq_from_dt("spdif_tx_in"); + clks[MPC512x_CLK_SPDIF_TX_IN] = mpc512x_clk_fixed( + "spdif_tx_in", freq); + freq = get_freq_from_dt("spdif_rx_in"); + clks[MPC512x_CLK_SPDIF_TX_IN] = mpc512x_clk_fixed( + "spdif_rx_in", freq); + } /* fixed frequency for AC97, always 24.567MHz */ clks[MPC512x_CLK_AC97] = mpc512x_clk_fixed("ac97", 24567000); @@ -883,6 +1117,20 @@ static void mpc5121_clk_provide_backwards_compat(void) NODE_PREP; NODE_CHK("per", clks[MPC512x_CLK_FEC], 0, FEC); } + /* + * MPC5125 has two FECs: FEC1 at 0x2800, FEC2 at 0x4800; + * the clock items don't "form an array" since FEC2 was + * added only later and was not allowed to shift all other + * clock item indices, so the numbers aren't adjacent + */ + FOR_NODES("fsl,mpc5125-fec") { + NODE_PREP; + if (res.start & 0x4000) + idx = MPC512x_CLK_FEC2; + else + idx = MPC512x_CLK_FEC; + NODE_CHK("per", clks[idx], 0, FEC); + } FOR_NODES("fsl,mpc5121-usb2-dr") { NODE_PREP; @@ -932,6 +1180,9 @@ int __init mpc5121_clk_init(void) clkregs = of_iomap(clk_np, 0); WARN_ON(!clkregs); + /* determine the SoC variant we run on */ + mpc512x_clk_determine_soc(); + /* invalidate all not yet registered clock slots */ mpc512x_clk_preset_data(); diff --git a/include/dt-bindings/clock/mpc512x-clock.h b/include/dt-bindings/clock/mpc512x-clock.h index 9e81b3b99a32..4f94919327ce 100644 --- a/include/dt-bindings/clock/mpc512x-clock.h +++ b/include/dt-bindings/clock/mpc512x-clock.h @@ -63,7 +63,14 @@ #define MPC512x_CLK_PSC9 55 #define MPC512x_CLK_PSC10 56 #define MPC512x_CLK_PSC11 57 +#define MPC512x_CLK_SDHC2 58 +#define MPC512x_CLK_FEC2 59 +#define MPC512x_CLK_OUT0_CLK 60 +#define MPC512x_CLK_OUT1_CLK 61 +#define MPC512x_CLK_OUT2_CLK 62 +#define MPC512x_CLK_OUT3_CLK 63 +#define MPC512x_CLK_CAN_CLK_IN 64 -#define MPC512x_CLK_LAST_PUBLIC 57 +#define MPC512x_CLK_LAST_PUBLIC 64 #endif -- cgit v1.2.3 From c141611fb1ee2cfc374cf9be5327e97f361c4bed Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 9 Jan 2014 00:44:29 -0500 Subject: powerpc: Delete non-required instances of include None of these files are actually using any __init type directives and hence don't need to include . Most are just a left over from __devinit and __cpuinit removal, or simply due to code getting copied from one driver to the next. The one instance where we add an include for init.h covers off a case where that file was implicitly getting it from another header which itself didn't need it. Signed-off-by: Paul Gortmaker Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/paca.h | 1 - arch/powerpc/include/asm/ppc_asm.h | 1 - arch/powerpc/include/asm/ps3.h | 1 - arch/powerpc/include/asm/vio.h | 1 - arch/powerpc/kernel/cacheinfo.c | 1 - arch/powerpc/kernel/crash.c | 1 - arch/powerpc/kernel/eeh_pe.c | 1 - arch/powerpc/kernel/head_64.S | 1 + arch/powerpc/kernel/hw_breakpoint.c | 1 - arch/powerpc/kernel/iomap.c | 1 - arch/powerpc/kernel/kgdb.c | 1 - arch/powerpc/kernel/process.c | 1 - arch/powerpc/kernel/smp-tbsync.c | 1 - arch/powerpc/kernel/syscalls.c | 1 - arch/powerpc/kernel/vdso32/vdso32_wrapper.S | 1 - arch/powerpc/kernel/vdso64/vdso64_wrapper.S | 1 - arch/powerpc/mm/pgtable.c | 1 - arch/powerpc/mm/pgtable_64.c | 1 - arch/powerpc/mm/tlb_hash64.c | 1 - arch/powerpc/oprofile/op_model_7450.c | 1 - arch/powerpc/oprofile/op_model_cell.c | 1 - arch/powerpc/oprofile/op_model_fsl_emb.c | 1 - arch/powerpc/oprofile/op_model_pa6t.c | 1 - arch/powerpc/oprofile/op_model_power4.c | 1 - arch/powerpc/oprofile/op_model_rs64.c | 1 - arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c | 1 - arch/powerpc/platforms/83xx/suspend.c | 1 - arch/powerpc/platforms/85xx/sgy_cts1000.c | 1 - arch/powerpc/platforms/chrp/smp.c | 1 - arch/powerpc/platforms/embedded6xx/hlwd-pic.c | 1 - arch/powerpc/platforms/pasemi/dma_lib.c | 1 - arch/powerpc/platforms/powermac/pfunc_core.c | 1 - arch/powerpc/platforms/powernv/eeh-ioda.c | 1 - arch/powerpc/platforms/pseries/cmm.c | 1 - arch/powerpc/platforms/pseries/dtl.c | 1 - arch/powerpc/sysdev/cpm2_pic.c | 1 - arch/powerpc/sysdev/fsl_ifc.c | 1 - arch/powerpc/sysdev/ge/ge_pic.h | 1 - arch/powerpc/sysdev/i8259.c | 1 - arch/powerpc/sysdev/mpc8xx_pic.c | 1 - arch/powerpc/sysdev/qe_lib/qe_io.c | 1 - arch/powerpc/sysdev/qe_lib/ucc.c | 1 - arch/powerpc/sysdev/qe_lib/ucc_fast.c | 1 - arch/powerpc/sysdev/qe_lib/ucc_slow.c | 1 - arch/powerpc/sysdev/udbg_memcons.c | 1 - arch/powerpc/sysdev/xics/icp-hv.c | 1 - 46 files changed, 1 insertion(+), 45 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index c3523d1dda58..68bee4636045 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -16,7 +16,6 @@ #ifdef CONFIG_PPC64 -#include #include #include #include diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index f595b98079ee..7689e0e98d33 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -4,7 +4,6 @@ #ifndef _ASM_POWERPC_PPC_ASM_H #define _ASM_POWERPC_PPC_ASM_H -#include #include #include #include diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h index 678a7c1d9cb8..a1bc7e758422 100644 --- a/arch/powerpc/include/asm/ps3.h +++ b/arch/powerpc/include/asm/ps3.h @@ -21,7 +21,6 @@ #if !defined(_ASM_POWERPC_PS3_H) #define _ASM_POWERPC_PS3_H -#include #include #include #include diff --git a/arch/powerpc/include/asm/vio.h b/arch/powerpc/include/asm/vio.h index 68d0cc998b1b..4f9b7ca0710f 100644 --- a/arch/powerpc/include/asm/vio.h +++ b/arch/powerpc/include/asm/vio.h @@ -15,7 +15,6 @@ #define _ASM_POWERPC_VIO_H #ifdef __KERNEL__ -#include #include #include #include diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 654932727873..abfa011344d9 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -12,7 +12,6 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index fdcd8f551aff..18d7c80ddeb9 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index f9450537e335..1feccd64f955 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 4f0946de2d5c..b7363bd42452 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -23,6 +23,7 @@ */ #include +#include #include #include #include diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index f0b47d1a6b0e..b0a1792279bb 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index 97a3715ac8bd..b82227e7e21b 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -3,7 +3,6 @@ * * (C) Copyright 2004 Linus Torvalds */ -#include #include #include #include diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 83e89d310734..8504657379f1 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -15,7 +15,6 @@ */ #include -#include #include #include #include diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 3386d8ab7eb0..bf8e136316e2 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/smp-tbsync.c b/arch/powerpc/kernel/smp-tbsync.c index e68fd1ae727a..7a37ecd3afa3 100644 --- a/arch/powerpc/kernel/smp-tbsync.c +++ b/arch/powerpc/kernel/smp-tbsync.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 4e3cc47f26b9..cd9be9aa016d 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S index 6e8f507ed32b..79683d0393f5 100644 --- a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S +++ b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S @@ -1,4 +1,3 @@ -#include #include #include diff --git a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S index b8553d62b792..8df9e2463007 100644 --- a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S +++ b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S @@ -1,4 +1,3 @@ -#include #include #include diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index ad90429bbd8b..c695943a513c 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 02e8681fb865..7f0e872ab83d 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 36e44b4260eb..c99f6510a0b2 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -23,7 +23,6 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/oprofile/op_model_7450.c b/arch/powerpc/oprofile/op_model_7450.c index ff617246d128..d29b6e4e5e72 100644 --- a/arch/powerpc/oprofile/op_model_7450.c +++ b/arch/powerpc/oprofile/op_model_7450.c @@ -16,7 +16,6 @@ */ #include -#include #include #include #include diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c index b9589c19ccda..1f0ebdeea5f7 100644 --- a/arch/powerpc/oprofile/op_model_cell.c +++ b/arch/powerpc/oprofile/op_model_cell.c @@ -16,7 +16,6 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/oprofile/op_model_fsl_emb.c b/arch/powerpc/oprofile/op_model_fsl_emb.c index 2a82d3ed464d..14cf86fdddab 100644 --- a/arch/powerpc/oprofile/op_model_fsl_emb.c +++ b/arch/powerpc/oprofile/op_model_fsl_emb.c @@ -14,7 +14,6 @@ */ #include -#include #include #include #include diff --git a/arch/powerpc/oprofile/op_model_pa6t.c b/arch/powerpc/oprofile/op_model_pa6t.c index 42f778dff919..a114a7c22d40 100644 --- a/arch/powerpc/oprofile/op_model_pa6t.c +++ b/arch/powerpc/oprofile/op_model_pa6t.c @@ -22,7 +22,6 @@ */ #include -#include #include #include #include diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c index f444b94935f5..962fe7b3e3fb 100644 --- a/arch/powerpc/oprofile/op_model_power4.c +++ b/arch/powerpc/oprofile/op_model_power4.c @@ -10,7 +10,6 @@ */ #include -#include #include #include #include diff --git a/arch/powerpc/oprofile/op_model_rs64.c b/arch/powerpc/oprofile/op_model_rs64.c index 9b801b8c8c5a..7e5b8ed3a1b7 100644 --- a/arch/powerpc/oprofile/op_model_rs64.c +++ b/arch/powerpc/oprofile/op_model_rs64.c @@ -8,7 +8,6 @@ */ #include -#include #include #include #include diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c index fd71cfdf2380..e238b6a55b15 100644 --- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -11,7 +11,6 @@ * (at your option) any later version. */ -#include #include #include #include diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 3d9716ccd327..4b4c081df94d 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -10,7 +10,6 @@ * by the Free Software Foundation. */ -#include #include #include #include diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c index b9197cea1854..bb75add67084 100644 --- a/arch/powerpc/platforms/85xx/sgy_cts1000.c +++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/platforms/chrp/smp.c b/arch/powerpc/platforms/chrp/smp.c index dead91b177b9..b6c9a0dcc924 100644 --- a/arch/powerpc/platforms/chrp/smp.c +++ b/arch/powerpc/platforms/chrp/smp.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c index 6c03034dbbd3..c269caee58f9 100644 --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c @@ -15,7 +15,6 @@ #define pr_fmt(fmt) DRV_MODULE_NAME ": " fmt #include -#include #include #include #include diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c index f3defd8a2806..aafa01ba062f 100644 --- a/arch/powerpc/platforms/pasemi/dma_lib.c +++ b/arch/powerpc/platforms/pasemi/dma_lib.c @@ -18,7 +18,6 @@ */ #include -#include #include #include #include diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c index d588e48dff74..43075081721f 100644 --- a/arch/powerpc/platforms/powermac/pfunc_core.c +++ b/arch/powerpc/platforms/powermac/pfunc_core.c @@ -5,7 +5,6 @@ * FIXME: LOCKING !!! */ -#include #include #include #include diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 9b1db254898a..007ac4989841 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 1e561bef459b..2d8bf15879fd 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 5db66f1fbc26..7d61498e45c0 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -20,7 +20,6 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include #include #include #include diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c index 10386b676d87..a11bd1d433ad 100644 --- a/arch/powerpc/sysdev/cpm2_pic.c +++ b/arch/powerpc/sysdev/cpm2_pic.c @@ -27,7 +27,6 @@ */ #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/fsl_ifc.c b/arch/powerpc/sysdev/fsl_ifc.c index d7fc72239144..fbc885b31946 100644 --- a/arch/powerpc/sysdev/fsl_ifc.c +++ b/arch/powerpc/sysdev/fsl_ifc.c @@ -19,7 +19,6 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include #include #include #include diff --git a/arch/powerpc/sysdev/ge/ge_pic.h b/arch/powerpc/sysdev/ge/ge_pic.h index 6149916da3f4..908dbd9826b6 100644 --- a/arch/powerpc/sysdev/ge/ge_pic.h +++ b/arch/powerpc/sysdev/ge/ge_pic.h @@ -1,7 +1,6 @@ #ifndef __GEF_PIC_H__ #define __GEF_PIC_H__ -#include void gef_pic_cascade(unsigned int, struct irq_desc *); unsigned int gef_pic_get_irq(void); diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c index 997df6a7ab5d..45598da0b321 100644 --- a/arch/powerpc/sysdev/i8259.c +++ b/arch/powerpc/sysdev/i8259.c @@ -8,7 +8,6 @@ */ #undef DEBUG -#include #include #include #include diff --git a/arch/powerpc/sysdev/mpc8xx_pic.c b/arch/powerpc/sysdev/mpc8xx_pic.c index b724622c3a0b..c4828c0be5bd 100644 --- a/arch/powerpc/sysdev/mpc8xx_pic.c +++ b/arch/powerpc/sysdev/mpc8xx_pic.c @@ -1,6 +1,5 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/qe_lib/qe_io.c b/arch/powerpc/sysdev/qe_lib/qe_io.c index a88807b3dd57..d09994164daf 100644 --- a/arch/powerpc/sysdev/qe_lib/qe_io.c +++ b/arch/powerpc/sysdev/qe_lib/qe_io.c @@ -16,7 +16,6 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/qe_lib/ucc.c b/arch/powerpc/sysdev/qe_lib/ucc.c index 134b07d29435..621575b7e84a 100644 --- a/arch/powerpc/sysdev/qe_lib/ucc.c +++ b/arch/powerpc/sysdev/qe_lib/ucc.c @@ -14,7 +14,6 @@ * option) any later version. */ #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/qe_lib/ucc_fast.c b/arch/powerpc/sysdev/qe_lib/ucc_fast.c index cceb2e366738..65aaf15032ae 100644 --- a/arch/powerpc/sysdev/qe_lib/ucc_fast.c +++ b/arch/powerpc/sysdev/qe_lib/ucc_fast.c @@ -13,7 +13,6 @@ * option) any later version. */ #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/qe_lib/ucc_slow.c b/arch/powerpc/sysdev/qe_lib/ucc_slow.c index 1c062f48f1ac..befaf1123f7f 100644 --- a/arch/powerpc/sysdev/qe_lib/ucc_slow.c +++ b/arch/powerpc/sysdev/qe_lib/ucc_slow.c @@ -13,7 +13,6 @@ * option) any later version. */ #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/udbg_memcons.c b/arch/powerpc/sysdev/udbg_memcons.c index ce5a7b489e4b..9998c0de12d0 100644 --- a/arch/powerpc/sysdev/udbg_memcons.c +++ b/arch/powerpc/sysdev/udbg_memcons.c @@ -18,7 +18,6 @@ * 2 of the License, or (at your option) any later version. */ -#include #include #include #include diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c index df0fc5821469..c1917cf67c3d 100644 --- a/arch/powerpc/sysdev/xics/icp-hv.c +++ b/arch/powerpc/sysdev/xics/icp-hv.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From 1d350544d5bf17d835d2850004c64ca51235c771 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 3 Jan 2014 17:47:12 +0800 Subject: powerpc/eeh: Add restore_config operation After reset on the specific PE or PHB, we never configure AER correctly on PowerNV platform. We needn't care it on pSeries platform. The patch introduces additional EEH operation eeh_ops:: restore_config() so that we have chance to configure AER correctly for PowerNV platform. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 1 + arch/powerpc/kernel/eeh_pe.c | 3 +++ arch/powerpc/platforms/powernv/eeh-powernv.c | 3 ++- arch/powerpc/platforms/pseries/eeh_pseries.c | 4 +++- 4 files changed, 9 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index d3e5e9bc8f94..7f8adc848cd6 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -157,6 +157,7 @@ struct eeh_ops { int (*read_config)(struct device_node *dn, int where, int size, u32 *val); int (*write_config)(struct device_node *dn, int where, int size, u32 val); int (*next_error)(struct eeh_pe **pe); + int (*restore_config)(struct device_node *dn); }; extern struct eeh_ops *eeh_ops; diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 1feccd64f955..f0c353fa655a 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -736,6 +736,9 @@ static void *eeh_restore_one_device_bars(void *data, void *flag) else eeh_restore_device_bars(edev, dn); + if (eeh_ops->restore_config) + eeh_ops->restore_config(dn); + return NULL; } diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 73b981438cc5..ab91e6a34afa 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -359,7 +359,8 @@ static struct eeh_ops powernv_eeh_ops = { .configure_bridge = powernv_eeh_configure_bridge, .read_config = pnv_pci_cfg_read, .write_config = pnv_pci_cfg_write, - .next_error = powernv_eeh_next_error + .next_error = powernv_eeh_next_error, + .restore_config = NULL }; /** diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index ccb633e077b1..9ef3cc8ebc11 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -689,7 +689,9 @@ static struct eeh_ops pseries_eeh_ops = { .get_log = pseries_eeh_get_log, .configure_bridge = pseries_eeh_configure_bridge, .read_config = pseries_eeh_read_config, - .write_config = pseries_eeh_write_config + .write_config = pseries_eeh_write_config, + .next_error = NULL, + .restore_config = NULL }; /** -- cgit v1.2.3 From 9be3becc2f99f4f2b1b697a616b1aa9e7889d68f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 3 Jan 2014 17:47:13 +0800 Subject: powerpc/eeh: Call opal_pci_reinit() on powernv for restoring config space The patch implements the EEH operation backend restore_config() for PowerNV platform. That relies on OPAL API opal_pci_reinit() where we reinitialize the error reporting properly after PE or PHB reset. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 8 ++++++-- arch/powerpc/platforms/powernv/eeh-powernv.c | 23 ++++++++++++++++++++++- 2 files changed, 28 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index a4041e9ed550..9a87b4401a41 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -344,12 +344,16 @@ enum OpalMveEnableAction { OPAL_ENABLE_MVE = 1 }; -enum OpalPciResetAndReinitScope { +enum OpalPciResetScope { OPAL_PHB_COMPLETE = 1, OPAL_PCI_LINK = 2, OPAL_PHB_ERROR = 3, OPAL_PCI_HOT_RESET = 4, OPAL_PCI_FUNDAMENTAL_RESET = 5, OPAL_PCI_IODA_TABLE_RESET = 6, }; +enum OpalPciReinitScope { + OPAL_REINIT_PCI_DEV = 1000 +}; + enum OpalPciResetState { OPAL_DEASSERT_RESET = 0, OPAL_ASSERT_RESET = 1 @@ -801,7 +805,7 @@ int64_t opal_pci_get_phb_diag_data(uint64_t phb_id, void *diag_buffer, int64_t opal_pci_get_phb_diag_data2(uint64_t phb_id, void *diag_buffer, uint64_t diag_buffer_len); int64_t opal_pci_fence_phb(uint64_t phb_id); -int64_t opal_pci_reinit(uint64_t phb_id, uint8_t reinit_scope); +int64_t opal_pci_reinit(uint64_t phb_id, uint64_t reinit_scope, uint64_t data); int64_t opal_pci_mask_pe_error(uint64_t phb_id, uint16_t pe_number, uint8_t error_type, uint8_t mask_action); int64_t opal_set_slot_led_status(uint64_t phb_id, uint64_t slot_id, uint8_t led_type, uint8_t led_action); int64_t opal_get_epow_status(__be64 *status); diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index ab91e6a34afa..a79fddc5e74e 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -344,6 +344,27 @@ static int powernv_eeh_next_error(struct eeh_pe **pe) return -EEXIST; } +static int powernv_eeh_restore_config(struct device_node *dn) +{ + struct eeh_dev *edev = of_node_to_eeh_dev(dn); + struct pnv_phb *phb; + s64 ret; + + if (!edev) + return -EEXIST; + + phb = edev->phb->private_data; + ret = opal_pci_reinit(phb->opal_id, + OPAL_REINIT_PCI_DEV, edev->config_addr); + if (ret) { + pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n", + __func__, edev->config_addr, ret); + return -EIO; + } + + return 0; +} + static struct eeh_ops powernv_eeh_ops = { .name = "powernv", .init = powernv_eeh_init, @@ -360,7 +381,7 @@ static struct eeh_ops powernv_eeh_ops = { .read_config = pnv_pci_cfg_read, .write_config = pnv_pci_cfg_write, .next_error = powernv_eeh_next_error, - .restore_config = NULL + .restore_config = powernv_eeh_restore_config }; /** -- cgit v1.2.3 From f26c7a035b7f2f1a7505ce42e4ba946b12f7df91 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Sun, 12 Jan 2014 14:13:45 +0800 Subject: powerpc/eeh: Hotplug improvement When EEH error comes to one specific PCI device before its driver is loaded, we will apply hotplug to recover the error. During the plug time, the PCI device will be probed and its driver is loaded. Then we wrongly calls to the error handlers if the driver supports EEH explicitly. The patch intends to fix by introducing flag EEH_DEV_NO_HANDLER and set it before we remove the PCI device. In turn, we can avoid wrongly calls the error handlers of the PCI device after its driver loaded. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 3 ++- arch/powerpc/kernel/eeh.c | 15 +++++++++++++++ arch/powerpc/kernel/eeh_driver.c | 10 +++++++--- 3 files changed, 24 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 7f8adc848cd6..8b4b8e4a5c32 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -90,7 +90,8 @@ struct eeh_pe { #define EEH_DEV_IRQ_DISABLED (1 << 3) /* Interrupt disabled */ #define EEH_DEV_DISCONNECTED (1 << 4) /* Removing from PE */ -#define EEH_DEV_SYSFS (1 << 8) /* Sysfs created */ +#define EEH_DEV_NO_HANDLER (1 << 8) /* No error handler */ +#define EEH_DEV_SYSFS (1 << 9) /* Sysfs created */ struct eeh_dev { int mode; /* EEH mode */ diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index f4b7a227f183..148db72a8c43 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -921,6 +921,13 @@ void eeh_add_device_late(struct pci_dev *dev) eeh_sysfs_remove_device(edev->pdev); edev->mode &= ~EEH_DEV_SYSFS; + /* + * We definitely should have the PCI device removed + * though it wasn't correctly. So we needn't call + * into error handler afterwards. + */ + edev->mode |= EEH_DEV_NO_HANDLER; + edev->pdev = NULL; dev->dev.archdata.edev = NULL; } @@ -1023,6 +1030,14 @@ void eeh_remove_device(struct pci_dev *dev) else edev->mode |= EEH_DEV_DISCONNECTED; + /* + * We're removing from the PCI subsystem, that means + * the PCI device driver can't support EEH or not + * well. So we rely on hotplug completely to do recovery + * for the specific PCI device. + */ + edev->mode |= EEH_DEV_NO_HANDLER; + eeh_addr_cache_rmv_dev(dev); eeh_sysfs_remove_device(dev); edev->mode &= ~EEH_DEV_SYSFS; diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 4ef59c33777f..7db39203a073 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -217,7 +217,8 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata) if (!driver) return NULL; if (!driver->err_handler || - !driver->err_handler->mmio_enabled) { + !driver->err_handler->mmio_enabled || + (edev->mode & EEH_DEV_NO_HANDLER)) { eeh_pcid_put(dev); return NULL; } @@ -258,7 +259,8 @@ static void *eeh_report_reset(void *data, void *userdata) eeh_enable_irq(dev); if (!driver->err_handler || - !driver->err_handler->slot_reset) { + !driver->err_handler->slot_reset || + (edev->mode & EEH_DEV_NO_HANDLER)) { eeh_pcid_put(dev); return NULL; } @@ -297,7 +299,9 @@ static void *eeh_report_resume(void *data, void *userdata) eeh_enable_irq(dev); if (!driver->err_handler || - !driver->err_handler->resume) { + !driver->err_handler->resume || + (edev->mode & EEH_DEV_NO_HANDLER)) { + edev->mode &= ~EEH_DEV_NO_HANDLER; eeh_pcid_put(dev); return NULL; } -- cgit v1.2.3 From d4edc5b6c480a0917e61d93d55531d7efa6230be Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Mon, 30 Dec 2013 17:05:34 +0530 Subject: powerpc: Fix the setup of CPU-to-Node mappings during CPU online On POWER platforms, the hypervisor can notify the guest kernel about dynamic changes in the cpu-numa associativity (VPHN topology update). Hence the cpu-to-node mappings that we got from the firmware during boot, may no longer be valid after such updates. This is handled using the arch_update_cpu_topology() hook in the scheduler, and the sched-domains are rebuilt according to the new mappings. But unfortunately, at the moment, CPU hotplug ignores these updated mappings and instead queries the firmware for the cpu-to-numa relationships and uses them during CPU online. So the kernel can end up assigning wrong NUMA nodes to CPUs during subsequent CPU hotplug online operations (after booting). Further, a particularly problematic scenario can result from this bug: On POWER platforms, the SMT mode can be switched between 1, 2, 4 (and even 8) threads per core. The switch to Single-Threaded (ST) mode is performed by offlining all except the first CPU thread in each core. Switching back to SMT mode involves onlining those other threads back, in each core. Now consider this scenario: 1. During boot, the kernel gets the cpu-to-node mappings from the firmware and assigns the CPUs to NUMA nodes appropriately, during CPU online. 2. Later on, the hypervisor updates the cpu-to-node mappings dynamically and communicates this update to the kernel. The kernel in turn updates its cpu-to-node associations and rebuilds its sched domains. Everything is fine so far. 3. Now, the user switches the machine from SMT to ST mode (say, by running ppc64_cpu --smt=1). This involves offlining all except 1 thread in each core. 4. The user then tries to switch back from ST to SMT mode (say, by running ppc64_cpu --smt=4), and this involves onlining those threads back. Since CPU hotplug ignores the new mappings, it queries the firmware and tries to associate the newly onlined sibling threads to the old NUMA nodes. This results in sibling threads within the same core getting associated with different NUMA nodes, which is incorrect. The scheduler's build-sched-domains code gets thoroughly confused with this and enters an infinite loop and causes soft-lockups, as explained in detail in commit 3be7db6ab (powerpc: VPHN topology change updates all siblings). So to fix this, use the numa_cpu_lookup_table to remember the updated cpu-to-node mappings, and use them during CPU hotplug online operations. Further, we also need to ensure that all threads in a core are assigned to a common NUMA node, irrespective of whether all those threads were online during the topology update. To achieve this, we take care not to use cpu_sibling_mask() since it is not hotplug invariant. Instead, we use cpu_first_sibling_thread() and set up the mappings manually using the 'threads_per_core' value for that particular platform. This helps us ensure that we don't hit this bug with any combination of CPU hotplug and SMT mode switching. Cc: stable@vger.kernel.org Signed-off-by: Srivatsa S. Bhat Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/topology.h | 10 +++++- arch/powerpc/mm/numa.c | 70 +++++++++++++++++++++++++++++++++++-- 2 files changed, 76 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 89e3ef2496ac..d0b5fca6b077 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -22,7 +22,15 @@ struct device_node; static inline int cpu_to_node(int cpu) { - return numa_cpu_lookup_table[cpu]; + int nid; + + nid = numa_cpu_lookup_table[cpu]; + + /* + * During early boot, the numa-cpu lookup table might not have been + * setup for all CPUs yet. In such cases, default to node 0. + */ + return (nid < 0) ? 0 : nid; } #define parent_node(node) (node) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 078d3e00a616..6847d509162f 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -31,6 +31,8 @@ #include #include #include +#include +#include #include #include #include @@ -152,9 +154,22 @@ static void __init get_node_active_region(unsigned long pfn, } } -static void map_cpu_to_node(int cpu, int node) +static void reset_numa_cpu_lookup_table(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) + numa_cpu_lookup_table[cpu] = -1; +} + +static void update_numa_cpu_lookup_table(unsigned int cpu, int node) { numa_cpu_lookup_table[cpu] = node; +} + +static void map_cpu_to_node(int cpu, int node) +{ + update_numa_cpu_lookup_table(cpu, node); dbg("adding cpu %d to node %d\n", cpu, node); @@ -522,11 +537,24 @@ static int of_drconf_to_nid_single(struct of_drconf_cell *drmem, */ static int numa_setup_cpu(unsigned long lcpu) { - int nid = 0; - struct device_node *cpu = of_get_cpu_node(lcpu, NULL); + int nid; + struct device_node *cpu; + + /* + * If a valid cpu-to-node mapping is already available, use it + * directly instead of querying the firmware, since it represents + * the most recent mapping notified to us by the platform (eg: VPHN). + */ + if ((nid = numa_cpu_lookup_table[lcpu]) >= 0) { + map_cpu_to_node(lcpu, nid); + return nid; + } + + cpu = of_get_cpu_node(lcpu, NULL); if (!cpu) { WARN_ON(1); + nid = 0; goto out; } @@ -1067,6 +1095,7 @@ void __init do_init_bootmem(void) */ setup_node_to_cpumask_map(); + reset_numa_cpu_lookup_table(); register_cpu_notifier(&ppc64_numa_nb); cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, (void *)(unsigned long)boot_cpuid); @@ -1445,6 +1474,33 @@ static int update_cpu_topology(void *data) return 0; } +static int update_lookup_table(void *data) +{ + struct topology_update_data *update; + + if (!data) + return -EINVAL; + + /* + * Upon topology update, the numa-cpu lookup table needs to be updated + * for all threads in the core, including offline CPUs, to ensure that + * future hotplug operations respect the cpu-to-node associativity + * properly. + */ + for (update = data; update; update = update->next) { + int nid, base, j; + + nid = update->new_nid; + base = cpu_first_thread_sibling(update->cpu); + + for (j = 0; j < threads_per_core; j++) { + update_numa_cpu_lookup_table(base + j, nid); + } + } + + return 0; +} + /* * Update the node maps and sysfs entries for each cpu whose home node * has changed. Returns 1 when the topology has changed, and 0 otherwise. @@ -1513,6 +1569,14 @@ int arch_update_cpu_topology(void) stop_machine(update_cpu_topology, &updates[0], &updated_cpus); + /* + * Update the numa-cpu lookup table with the new mappings, even for + * offline CPUs. It is best to perform this update from the stop- + * machine context. + */ + stop_machine(update_lookup_table, &updates[0], + cpumask_of(raw_smp_processor_id())); + for (ud = &updates[0]; ud; ud = ud->next) { unregister_cpu_under_node(ud->cpu, ud->old_nid); register_cpu_under_node(ud->cpu, ud->new_nid); -- cgit v1.2.3 From 30c826358d10c1d6f8147de3310b97488daec830 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Tue, 14 Jan 2014 15:45:09 +0530 Subject: Move precessing of MCE queued event out from syscall exit path. Huge Dickins reported an issue that b5ff4211a829 "powerpc/book3s: Queue up and process delayed MCE events" breaks the PowerMac G5 boot. This patch fixes it by moving the mce even processing away from syscall exit, which was wrong to do that in first place, and using irq work framework to delay processing of mce event. Reported-by: Hugh Dickins Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/mce.h | 1 - arch/powerpc/kernel/entry_64.S | 5 ----- arch/powerpc/kernel/mce.c | 13 ++++++++++--- 3 files changed, 10 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index a2b8c7b35fba..8e99edf6d966 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -191,7 +191,6 @@ extern void save_mce_event(struct pt_regs *regs, long handled, extern int get_mce_event(struct machine_check_event *mce, bool release); extern void release_mce_event(void); extern void machine_check_queue_event(void); -extern void machine_check_process_queued_event(void); extern void machine_check_print_event_info(struct machine_check_event *evt); extern uint64_t get_mce_fault_addr(struct machine_check_event *evt); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 770d6d65c47b..bbfb0294b354 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -183,11 +183,6 @@ syscall_exit: #ifdef SHOW_SYSCALLS bl .do_show_syscall_exit ld r3,RESULT(r1) -#endif -#ifdef CONFIG_PPC_BOOK3S_64 -BEGIN_FTR_SECTION - bl .machine_check_process_queued_event -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) #endif CURRENT_THREAD_INFO(r12, r1) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index c0c52ec1fca7..cadef7e64e42 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -26,6 +26,7 @@ #include #include #include +#include #include static DEFINE_PER_CPU(int, mce_nest_count); @@ -35,6 +36,11 @@ static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); static DEFINE_PER_CPU(int, mce_queue_count); static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue); +static void machine_check_process_queued_event(struct irq_work *work); +struct irq_work mce_event_process_work = { + .func = machine_check_process_queued_event, +}; + static void mce_set_error_info(struct machine_check_event *mce, struct mce_error_info *mce_err) { @@ -185,17 +191,19 @@ void machine_check_queue_event(void) return; } __get_cpu_var(mce_event_queue[index]) = evt; + + /* Queue irq work to process this event later. */ + irq_work_queue(&mce_event_process_work); } /* * process pending MCE event from the mce event queue. This function will be * called during syscall exit. */ -void machine_check_process_queued_event(void) +static void machine_check_process_queued_event(struct irq_work *work) { int index; - preempt_disable(); /* * For now just print it to console. * TODO: log this error event to FSP or nvram. @@ -206,7 +214,6 @@ void machine_check_process_queued_event(void) &__get_cpu_var(mce_event_queue[index])); __get_cpu_var(mce_queue_count)--; } - preempt_enable(); } void machine_check_print_event_info(struct machine_check_event *evt) -- cgit v1.2.3 From ae39c58c2e56209ae3503286ffe2338d5e9a4bed Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 13 Jan 2014 15:56:28 +1100 Subject: powerpc: Reclaim two unused thread_info flag bits TIF_PERFMON_WORK and TIF_PERFMON_CTXSW are completely unused. They appear to be related to the old perfmon2 code, which has been superseded by the perf_event infrastructure. This removes their definitions so that the bits can be used for other purposes. Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/thread_info.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 9854c564ac52..fc2bf414c584 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -91,8 +91,6 @@ static inline struct thread_info *current_thread_info(void) #define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_32BIT 4 /* 32 bit binary */ -#define TIF_PERFMON_WORK 5 /* work for pfm_handle_work() */ -#define TIF_PERFMON_CTXSW 6 /* perfmon needs ctxsw calls */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SINGLESTEP 8 /* singlestepping active */ #define TIF_NOHZ 9 /* in adaptive nohz mode */ @@ -115,8 +113,6 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_NEED_RESCHED (1< Date: Mon, 13 Jan 2014 15:56:29 +1100 Subject: powerpc: Don't corrupt transactional state when using FP/VMX in kernel Currently, when we have a process using the transactional memory facilities on POWER8 (that is, the processor is in transactional or suspended state), and the process enters the kernel and the kernel then uses the floating-point or vector (VMX/Altivec) facility, we end up corrupting the user-visible FP/VMX/VSX state. This happens, for example, if a page fault causes a copy-on-write operation, because the copy_page function will use VMX to do the copy on POWER8. The test program below demonstrates the bug. The bug happens because when FP/VMX state for a transactional process is stored in the thread_struct, we store the checkpointed state in .fp_state/.vr_state and the transactional (current) state in .transact_fp/.transact_vr. However, when the kernel wants to use FP/VMX, it calls enable_kernel_fp() or enable_kernel_altivec(), which saves the current state in .fp_state/.vr_state. Furthermore, when we return to the user process we return with FP/VMX/VSX disabled. The next time the process uses FP/VMX/VSX, we don't know which set of state (the current register values, .fp_state/.vr_state, or .transact_fp/.transact_vr) we should be using, since we have no way to tell if we are still in the same transaction, and if not, whether the previous transaction succeeded or failed. Thus it is necessary to strictly adhere to the rule that if FP has been enabled at any point in a transaction, we must keep FP enabled for the user process with the current transactional state in the FP registers, until we detect that it is no longer in a transaction. Similarly for VMX; once enabled it must stay enabled until the process is no longer transactional. In order to keep this rule, we add a new thread_info flag which we test when returning from the kernel to userspace, called TIF_RESTORE_TM. This flag indicates that there is FP/VMX/VSX state to be restored before entering userspace, and when it is set the .tm_orig_msr field in the thread_struct indicates what state needs to be restored. The restoration is done by restore_tm_state(). The TIF_RESTORE_TM bit is set by new giveup_fpu/altivec_maybe_transactional helpers, which are called from enable_kernel_fp/altivec, giveup_vsx, and flush_fp/altivec_to_thread instead of giveup_fpu/altivec. The other thing to be done is to get the transactional FP/VMX/VSX state from .fp_state/.vr_state when doing reclaim, if that state has been saved there by giveup_fpu/altivec_maybe_transactional. Having done this, we set the FP/VMX bit in the thread's MSR after reclaim to indicate that that part of the state is now valid (having been reclaimed from the processor's checkpointed state). Finally, in the signal handling code, we move the clearing of the transactional state bits in the thread's MSR a bit earlier, before calling flush_fp_to_thread(), so that we don't unnecessarily set the TIF_RESTORE_TM bit. This is the test program: /* Michael Neuling 4/12/2013 * * See if the altivec state is leaked out of an aborted transaction due to * kernel vmx copy loops. * * gcc -m64 htm_vmxcopy.c -o htm_vmxcopy * */ /* We don't use all of these, but for reference: */ int main(int argc, char *argv[]) { long double vecin = 1.3; long double vecout; unsigned long pgsize = getpagesize(); int i; int fd; int size = pgsize*16; char tmpfile[] = "/tmp/page_faultXXXXXX"; char buf[pgsize]; char *a; uint64_t aborted = 0; fd = mkstemp(tmpfile); assert(fd >= 0); memset(buf, 0, pgsize); for (i = 0; i < size; i += pgsize) assert(write(fd, buf, pgsize) == pgsize); unlink(tmpfile); a = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); assert(a != MAP_FAILED); asm __volatile__( "lxvd2x 40,0,%[vecinptr] ; " // set 40 to initial value TBEGIN "beq 3f ;" TSUSPEND "xxlxor 40,40,40 ; " // set 40 to 0 "std 5, 0(%[map]) ;" // cause kernel vmx copy page TABORT TRESUME TEND "li %[res], 0 ;" "b 5f ;" "3: ;" // Abort handler "li %[res], 1 ;" "5: ;" "stxvd2x 40,0,%[vecoutptr] ; " : [res]"=r"(aborted) : [vecinptr]"r"(&vecin), [vecoutptr]"r"(&vecout), [map]"r"(a) : "memory", "r0", "r3", "r4", "r5", "r6", "r7"); if (aborted && (vecin != vecout)){ printf("FAILED: vector state leaked on abort %f != %f\n", (double)vecin, (double)vecout); exit(1); } munmap(a, size); close(fd); printf("PASSED!\n"); return 0; } Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/processor.h | 2 + arch/powerpc/include/asm/thread_info.h | 5 +- arch/powerpc/include/asm/tm.h | 1 + arch/powerpc/kernel/entry_64.S | 12 ++- arch/powerpc/kernel/fpu.S | 16 ++++ arch/powerpc/kernel/process.c | 146 ++++++++++++++++++++++++++++++--- arch/powerpc/kernel/signal.c | 3 +- arch/powerpc/kernel/signal_32.c | 21 ++--- arch/powerpc/kernel/signal_64.c | 14 ++-- arch/powerpc/kernel/traps.c | 12 +-- arch/powerpc/kernel/vector.S | 10 +++ 11 files changed, 195 insertions(+), 47 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index fc14a38c7ccf..232a2fa5b483 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -373,6 +373,8 @@ extern int set_endian(struct task_struct *tsk, unsigned int val); extern int get_unalign_ctl(struct task_struct *tsk, unsigned long adr); extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val); +extern void fp_enable(void); +extern void vec_enable(void); extern void load_fp_state(struct thread_fp_state *fp); extern void store_fp_state(struct thread_fp_state *fp); extern void load_vr_state(struct thread_vr_state *vr); diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index fc2bf414c584..b034ecdb7c74 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -91,6 +91,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_32BIT 4 /* 32 bit binary */ +#define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SINGLESTEP 8 /* singlestepping active */ #define TIF_NOHZ 9 /* in adaptive nohz mode */ @@ -113,6 +114,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_NEED_RESCHED (1<thread.regs && + MSR_TM_ACTIVE(tsk->thread.regs->msr) && + !test_thread_flag(TIF_RESTORE_TM)) { + tsk->thread.tm_orig_msr = tsk->thread.regs->msr; + set_thread_flag(TIF_RESTORE_TM); + } + + giveup_fpu(tsk); +} + +void giveup_altivec_maybe_transactional(struct task_struct *tsk) +{ + /* + * If we are saving the current thread's registers, and the + * thread is in a transactional state, set the TIF_RESTORE_TM + * bit so that we know to restore the registers before + * returning to userspace. + */ + if (tsk == current && tsk->thread.regs && + MSR_TM_ACTIVE(tsk->thread.regs->msr) && + !test_thread_flag(TIF_RESTORE_TM)) { + tsk->thread.tm_orig_msr = tsk->thread.regs->msr; + set_thread_flag(TIF_RESTORE_TM); + } + + giveup_altivec(tsk); +} + +#else +#define giveup_fpu_maybe_transactional(tsk) giveup_fpu(tsk) +#define giveup_altivec_maybe_transactional(tsk) giveup_altivec(tsk) +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + #ifdef CONFIG_PPC_FPU /* * Make sure the floating-point register state in the @@ -101,13 +143,13 @@ void flush_fp_to_thread(struct task_struct *tsk) */ BUG_ON(tsk != current); #endif - giveup_fpu(tsk); + giveup_fpu_maybe_transactional(tsk); } preempt_enable(); } } EXPORT_SYMBOL_GPL(flush_fp_to_thread); -#endif +#endif /* CONFIG_PPC_FPU */ void enable_kernel_fp(void) { @@ -115,11 +157,11 @@ void enable_kernel_fp(void) #ifdef CONFIG_SMP if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) - giveup_fpu(current); + giveup_fpu_maybe_transactional(current); else giveup_fpu(NULL); /* just enables FP for kernel */ #else - giveup_fpu(last_task_used_math); + giveup_fpu_maybe_transactional(last_task_used_math); #endif /* CONFIG_SMP */ } EXPORT_SYMBOL(enable_kernel_fp); @@ -131,11 +173,11 @@ void enable_kernel_altivec(void) #ifdef CONFIG_SMP if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) - giveup_altivec(current); + giveup_altivec_maybe_transactional(current); else giveup_altivec_notask(); #else - giveup_altivec(last_task_used_altivec); + giveup_altivec_maybe_transactional(last_task_used_altivec); #endif /* CONFIG_SMP */ } EXPORT_SYMBOL(enable_kernel_altivec); @@ -152,7 +194,7 @@ void flush_altivec_to_thread(struct task_struct *tsk) #ifdef CONFIG_SMP BUG_ON(tsk != current); #endif - giveup_altivec(tsk); + giveup_altivec_maybe_transactional(tsk); } preempt_enable(); } @@ -181,8 +223,8 @@ EXPORT_SYMBOL(enable_kernel_vsx); void giveup_vsx(struct task_struct *tsk) { - giveup_fpu(tsk); - giveup_altivec(tsk); + giveup_fpu_maybe_transactional(tsk); + giveup_altivec_maybe_transactional(tsk); __giveup_vsx(tsk); } @@ -478,7 +520,48 @@ static inline bool hw_brk_match(struct arch_hw_breakpoint *a, return false; return true; } + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static void tm_reclaim_thread(struct thread_struct *thr, + struct thread_info *ti, uint8_t cause) +{ + unsigned long msr_diff = 0; + + /* + * If FP/VSX registers have been already saved to the + * thread_struct, move them to the transact_fp array. + * We clear the TIF_RESTORE_TM bit since after the reclaim + * the thread will no longer be transactional. + */ + if (test_ti_thread_flag(ti, TIF_RESTORE_TM)) { + msr_diff = thr->tm_orig_msr & ~thr->regs->msr; + if (msr_diff & MSR_FP) + memcpy(&thr->transact_fp, &thr->fp_state, + sizeof(struct thread_fp_state)); + if (msr_diff & MSR_VEC) + memcpy(&thr->transact_vr, &thr->vr_state, + sizeof(struct thread_vr_state)); + clear_ti_thread_flag(ti, TIF_RESTORE_TM); + msr_diff &= MSR_FP | MSR_VEC | MSR_VSX | MSR_FE0 | MSR_FE1; + } + + tm_reclaim(thr, thr->regs->msr, cause); + + /* Having done the reclaim, we now have the checkpointed + * FP/VSX values in the registers. These might be valid + * even if we have previously called enable_kernel_fp() or + * flush_fp_to_thread(), so update thr->regs->msr to + * indicate their current validity. + */ + thr->regs->msr |= msr_diff; +} + +void tm_reclaim_current(uint8_t cause) +{ + tm_enable(); + tm_reclaim_thread(¤t->thread, current_thread_info(), cause); +} + static inline void tm_reclaim_task(struct task_struct *tsk) { /* We have to work out if we're switching from/to a task that's in the @@ -501,9 +584,11 @@ static inline void tm_reclaim_task(struct task_struct *tsk) /* Stash the original thread MSR, as giveup_fpu et al will * modify it. We hold onto it to see whether the task used - * FP & vector regs. + * FP & vector regs. If the TIF_RESTORE_TM flag is set, + * tm_orig_msr is already set. */ - thr->tm_orig_msr = thr->regs->msr; + if (!test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_TM)) + thr->tm_orig_msr = thr->regs->msr; TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, " "ccr=%lx, msr=%lx, trap=%lx)\n", @@ -511,7 +596,7 @@ static inline void tm_reclaim_task(struct task_struct *tsk) thr->regs->ccr, thr->regs->msr, thr->regs->trap); - tm_reclaim(thr, thr->regs->msr, TM_CAUSE_RESCHED); + tm_reclaim_thread(thr, task_thread_info(tsk), TM_CAUSE_RESCHED); TM_DEBUG("--- tm_reclaim on pid %d complete\n", tsk->pid); @@ -587,6 +672,43 @@ static inline void __switch_to_tm(struct task_struct *prev) tm_reclaim_task(prev); } } + +/* + * This is called if we are on the way out to userspace and the + * TIF_RESTORE_TM flag is set. It checks if we need to reload + * FP and/or vector state and does so if necessary. + * If userspace is inside a transaction (whether active or + * suspended) and FP/VMX/VSX instructions have ever been enabled + * inside that transaction, then we have to keep them enabled + * and keep the FP/VMX/VSX state loaded while ever the transaction + * continues. The reason is that if we didn't, and subsequently + * got a FP/VMX/VSX unavailable interrupt inside a transaction, + * we don't know whether it's the same transaction, and thus we + * don't know which of the checkpointed state and the transactional + * state to use. + */ +void restore_tm_state(struct pt_regs *regs) +{ + unsigned long msr_diff; + + clear_thread_flag(TIF_RESTORE_TM); + if (!MSR_TM_ACTIVE(regs->msr)) + return; + + msr_diff = current->thread.tm_orig_msr & ~regs->msr; + msr_diff &= MSR_FP | MSR_VEC | MSR_VSX; + if (msr_diff & MSR_FP) { + fp_enable(); + load_fp_state(¤t->thread.fp_state); + regs->msr |= current->thread.fpexc_mode; + } + if (msr_diff & MSR_VEC) { + vec_enable(); + load_vr_state(¤t->thread.vr_state); + } + regs->msr |= msr_diff; +} + #else #define tm_recheckpoint_new_task(new) #define __switch_to_tm(prev) diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 457e97aa2945..8fc4177ed65a 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -203,8 +203,7 @@ unsigned long get_tm_stackpointer(struct pt_regs *regs) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (MSR_TM_ACTIVE(regs->msr)) { - tm_enable(); - tm_reclaim(¤t->thread, regs->msr, TM_CAUSE_SIGNAL); + tm_reclaim_current(TM_CAUSE_SIGNAL); if (MSR_TM_TRANSACTIONAL(regs->msr)) return current->thread.ckpt_regs.gpr[1]; } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 68027bfa5f8e..6ce69e6f1fcb 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -519,6 +519,13 @@ static int save_tm_user_regs(struct pt_regs *regs, { unsigned long msr = regs->msr; + /* Remove TM bits from thread's MSR. The MSR in the sigcontext + * just indicates to userland that we were doing a transaction, but we + * don't want to return in transactional state. This also ensures + * that flush_fp_to_thread won't set TIF_RESTORE_TM again. + */ + regs->msr &= ~MSR_TS_MASK; + /* Make sure floating point registers are stored in regs */ flush_fp_to_thread(current); @@ -1056,13 +1063,6 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, /* enter the signal handler in native-endian mode */ regs->msr &= ~MSR_LE; regs->msr |= (MSR_KERNEL & MSR_LE); -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - /* Remove TM bits from thread's MSR. The MSR in the sigcontext - * just indicates to userland that we were doing a transaction, but we - * don't want to return in transactional state: - */ - regs->msr &= ~MSR_TS_MASK; -#endif return 1; badframe: @@ -1484,13 +1484,6 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, regs->nip = (unsigned long) ka->sa.sa_handler; /* enter the signal handler in big-endian mode */ regs->msr &= ~MSR_LE; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - /* Remove TM bits from thread's MSR. The MSR in the sigcontext - * just indicates to userland that we were doing a transaction, but we - * don't want to return in transactional state: - */ - regs->msr &= ~MSR_TS_MASK; -#endif return 1; badframe: diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 42991045349f..e35bf773df7a 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -192,6 +192,13 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, BUG_ON(!MSR_TM_ACTIVE(regs->msr)); + /* Remove TM bits from thread's MSR. The MSR in the sigcontext + * just indicates to userland that we were doing a transaction, but we + * don't want to return in transactional state. This also ensures + * that flush_fp_to_thread won't set TIF_RESTORE_TM again. + */ + regs->msr &= ~MSR_TS_MASK; + flush_fp_to_thread(current); #ifdef CONFIG_ALTIVEC @@ -749,13 +756,6 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, /* Make sure signal handler doesn't get spurious FP exceptions */ current->thread.fp_state.fpscr = 0; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - /* Remove TM bits from thread's MSR. The MSR in the sigcontext - * just indicates to userland that we were doing a transaction, but we - * don't want to return in transactional state: - */ - regs->msr &= ~MSR_TS_MASK; -#endif /* Set up to return from userspace. */ if (vdso64_rt_sigtramp && current->mm->context.vdso_base) { diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 330841766b09..26e1358ff0bc 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1399,7 +1399,6 @@ void fp_unavailable_tm(struct pt_regs *regs) TM_DEBUG("FP Unavailable trap whilst transactional at 0x%lx, MSR=%lx\n", regs->nip, regs->msr); - tm_enable(); /* We can only have got here if the task started using FP after * beginning the transaction. So, the transactional regs are just a @@ -1408,8 +1407,7 @@ void fp_unavailable_tm(struct pt_regs *regs) * transaction, and probably retry but now with FP enabled. So the * checkpointed FP registers need to be loaded. */ - tm_reclaim(¤t->thread, current->thread.regs->msr, - TM_CAUSE_FAC_UNAV); + tm_reclaim_current(TM_CAUSE_FAC_UNAV); /* Reclaim didn't save out any FPRs to transact_fprs. */ /* Enable FP for the task: */ @@ -1432,9 +1430,7 @@ void altivec_unavailable_tm(struct pt_regs *regs) TM_DEBUG("Vector Unavailable trap whilst transactional at 0x%lx," "MSR=%lx\n", regs->nip, regs->msr); - tm_enable(); - tm_reclaim(¤t->thread, current->thread.regs->msr, - TM_CAUSE_FAC_UNAV); + tm_reclaim_current(TM_CAUSE_FAC_UNAV); regs->msr |= MSR_VEC; tm_recheckpoint(¤t->thread, regs->msr); current->thread.used_vr = 1; @@ -1455,10 +1451,8 @@ void vsx_unavailable_tm(struct pt_regs *regs) "MSR=%lx\n", regs->nip, regs->msr); - tm_enable(); /* This reclaims FP and/or VR regs if they're already enabled */ - tm_reclaim(¤t->thread, current->thread.regs->msr, - TM_CAUSE_FAC_UNAV); + tm_reclaim_current(TM_CAUSE_FAC_UNAV); regs->msr |= MSR_VEC | MSR_FP | current->thread.fpexc_mode | MSR_VSX; diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 0458a9aaba9d..74f8050518d6 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -36,6 +36,16 @@ _GLOBAL(do_load_up_transact_altivec) blr #endif +/* + * Enable use of VMX/Altivec for the caller. + */ +_GLOBAL(vec_enable) + mfmsr r3 + oris r3,r3,MSR_VEC@h + MTMSRD(r3) + isync + blr + /* * Load state from memory into VMX registers including VSCR. * Assumes the caller has enabled VMX in the MSR. -- cgit v1.2.3 From b3084f4db3aeb991c507ca774337c7e7893ed04f Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 13 Jan 2014 11:34:24 +0530 Subject: powerpc/thp: Fix crash on mremap This patch fix the below crash NIP [c00000000004cee4] .__hash_page_thp+0x2a4/0x440 LR [c0000000000439ac] .hash_page+0x18c/0x5e0 ... Call Trace: [c000000736103c40] [00001ffffb000000] 0x1ffffb000000(unreliable) [437908.479693] [c000000736103d50] [c0000000000439ac] .hash_page+0x18c/0x5e0 [437908.479699] [c000000736103e30] [c00000000000924c] .do_hash_page+0x4c/0x58 On ppc64 we use the pgtable for storing the hpte slot information and store address to the pgtable at a constant offset (PTRS_PER_PMD) from pmd. On mremap, when we switch the pmd, we need to withdraw and deposit the pgtable again, so that we find the pgtable at PTRS_PER_PMD offset from new pmd. We also want to move the withdraw and deposit before the set_pmd so that, when page fault find the pmd as trans huge we can be sure that pgtable can be located at the offset. Signed-off-by: Aneesh Kumar K.V Acked-by: Kirill A. Shutemov Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/pgtable-ppc64.h | 14 ++++++++++++++ include/asm-generic/pgtable.h | 12 ++++++++++++ mm/huge_memory.c | 14 +++++--------- 3 files changed, 31 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index 4a191c472867..d27960c89a71 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -558,5 +558,19 @@ extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); #define __HAVE_ARCH_PMDP_INVALIDATE extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); + +#define pmd_move_must_withdraw pmd_move_must_withdraw +typedef struct spinlock spinlock_t; +static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl, + spinlock_t *old_pmd_ptl) +{ + /* + * Archs like ppc64 use pgtable to store per pmd + * specific information. So when we switch the pmd, + * we should also withdraw and deposit the pgtable + */ + return true; +} + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index db0923458940..8e4f41d9af4d 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -558,6 +558,18 @@ static inline pmd_t pmd_read_atomic(pmd_t *pmdp) } #endif +#ifndef pmd_move_must_withdraw +static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl, + spinlock_t *old_pmd_ptl) +{ + /* + * With split pmd lock we also need to move preallocated + * PTE page table if new_pmd is on different PMD page table. + */ + return new_pmd_ptl != old_pmd_ptl; +} +#endif + /* * This function is meant to be used by sites walking pagetables with * the mmap_sem hold in read mode to protect against MADV_DONTNEED and diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 95d1acb0f3d2..5d80c53b87cb 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1502,19 +1502,15 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); VM_BUG_ON(!pmd_none(*new_pmd)); - set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); - if (new_ptl != old_ptl) { - pgtable_t pgtable; - /* - * Move preallocated PTE page table if new_pmd is on - * different PMD page table. - */ + if (pmd_move_must_withdraw(new_ptl, old_ptl)) { + pgtable_t pgtable; pgtable = pgtable_trans_huge_withdraw(mm, old_pmd); pgtable_trans_huge_deposit(mm, new_pmd, pgtable); - - spin_unlock(new_ptl); } + set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); + if (new_ptl != old_ptl) + spin_unlock(new_ptl); spin_unlock(old_ptl); } out: -- cgit v1.2.3 From 7e4e7867b1e551b7b8f326da3604c47332972bc6 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 15 Jan 2014 13:16:11 +0800 Subject: powerpc/eeh: Handle multiple EEH errors For one PCI error relevant OPAL event, we possibly have multiple EEH errors for that. For example, multiple frozen PEs detected on different PHBs. Unfortunately, we didn't cover the case. The patch enumarates the return value from eeh_ops::next_error() and change eeh_handle_special_event() and eeh_ops::next_error() to handle all existing EEH errors. As Ben pointed out, we needn't list_for_each_entry_safe() since we are not deleting any PHB from the hose_list and the EEH serialized lock should be held while purging EEH events. The patch covers those suggestions as well. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/eeh.h | 10 ++ arch/powerpc/kernel/eeh_driver.c | 150 ++++++++++++++++-------------- arch/powerpc/platforms/powernv/eeh-ioda.c | 39 +++++--- 3 files changed, 112 insertions(+), 87 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 8b4b8e4a5c32..9e39ceb1d19f 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -118,6 +118,16 @@ static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev) return edev ? edev->pdev : NULL; } +/* Return values from eeh_ops::next_error */ +enum { + EEH_NEXT_ERR_NONE = 0, + EEH_NEXT_ERR_INF, + EEH_NEXT_ERR_FROZEN_PE, + EEH_NEXT_ERR_FENCED_PHB, + EEH_NEXT_ERR_DEAD_PHB, + EEH_NEXT_ERR_DEAD_IOC +}; + /* * The struct is used to trace the registered EEH operation * callback functions. Actually, those operation callback diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 7db39203a073..baa3b06e6dab 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -630,84 +630,90 @@ static void eeh_handle_special_event(void) { struct eeh_pe *pe, *phb_pe; struct pci_bus *bus; - struct pci_controller *hose, *tmp; + struct pci_controller *hose; unsigned long flags; - int rc = 0; + int rc; - /* - * The return value from next_error() has been classified as follows. - * It might be good to enumerate them. However, next_error() is only - * supported by PowerNV platform for now. So it would be fine to use - * integer directly: - * - * 4 - Dead IOC 3 - Dead PHB - * 2 - Fenced PHB 1 - Frozen PE - * 0 - No error found - * - */ - rc = eeh_ops->next_error(&pe); - if (rc <= 0) - return; - switch (rc) { - case 4: - /* Mark all PHBs in dead state */ - eeh_serialize_lock(&flags); - list_for_each_entry_safe(hose, tmp, - &hose_list, list_node) { - phb_pe = eeh_phb_pe_get(hose); - if (!phb_pe) continue; - - eeh_pe_state_mark(phb_pe, - EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); + do { + rc = eeh_ops->next_error(&pe); + + switch (rc) { + case EEH_NEXT_ERR_DEAD_IOC: + /* Mark all PHBs in dead state */ + eeh_serialize_lock(&flags); + + /* Purge all events */ + eeh_remove_event(NULL); + + list_for_each_entry(hose, &hose_list, list_node) { + phb_pe = eeh_phb_pe_get(hose); + if (!phb_pe) continue; + + eeh_pe_state_mark(phb_pe, + EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); + } + + eeh_serialize_unlock(flags); + + break; + case EEH_NEXT_ERR_FROZEN_PE: + case EEH_NEXT_ERR_FENCED_PHB: + case EEH_NEXT_ERR_DEAD_PHB: + /* Mark the PE in fenced state */ + eeh_serialize_lock(&flags); + + /* Purge all events of the PHB */ + eeh_remove_event(pe); + + if (rc == EEH_NEXT_ERR_DEAD_PHB) + eeh_pe_state_mark(pe, + EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); + else + eeh_pe_state_mark(pe, + EEH_PE_ISOLATED | EEH_PE_RECOVERING); + + eeh_serialize_unlock(flags); + + break; + case EEH_NEXT_ERR_NONE: + return; + default: + pr_warn("%s: Invalid value %d from next_error()\n", + __func__, rc); + return; } - eeh_serialize_unlock(flags); - - /* Purge all events */ - eeh_remove_event(NULL); - break; - case 3: - case 2: - case 1: - /* Mark the PE in fenced state */ - eeh_serialize_lock(&flags); - if (rc == 3) - eeh_pe_state_mark(pe, - EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); - else - eeh_pe_state_mark(pe, - EEH_PE_ISOLATED | EEH_PE_RECOVERING); - eeh_serialize_unlock(flags); - - /* Purge all events of the PHB */ - eeh_remove_event(pe); - break; - default: - pr_err("%s: Invalid value %d from next_error()\n", - __func__, rc); - return; - } - /* - * For fenced PHB and frozen PE, it's handled as normal - * event. We have to remove the affected PHBs for dead - * PHB and IOC - */ - if (rc == 2 || rc == 1) - eeh_handle_normal_event(pe); - else { - list_for_each_entry_safe(hose, tmp, - &hose_list, list_node) { - phb_pe = eeh_phb_pe_get(hose); - if (!phb_pe || !(phb_pe->state & EEH_PE_PHB_DEAD)) - continue; - - bus = eeh_pe_bus_get(phb_pe); - /* Notify all devices that they're about to go down. */ - eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); - pcibios_remove_pci_devices(bus); + /* + * For fenced PHB and frozen PE, it's handled as normal + * event. We have to remove the affected PHBs for dead + * PHB and IOC + */ + if (rc == EEH_NEXT_ERR_FROZEN_PE || + rc == EEH_NEXT_ERR_FENCED_PHB) { + eeh_handle_normal_event(pe); + } else { + list_for_each_entry(hose, &hose_list, list_node) { + phb_pe = eeh_phb_pe_get(hose); + if (!phb_pe || + !(phb_pe->state & EEH_PE_PHB_DEAD)) + continue; + + /* Notify all devices to be down */ + bus = eeh_pe_bus_get(phb_pe); + eeh_pe_dev_traverse(pe, + eeh_report_failure, NULL); + pcibios_remove_pci_devices(bus); + } } - } + + /* + * If we have detected dead IOC, we needn't proceed + * any more since all PHBs would have been removed + */ + if (rc == EEH_NEXT_ERR_DEAD_IOC) + break; + } while (rc != EEH_NEXT_ERR_NONE); } /** diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 007ac4989841..8dd16f4675a2 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -735,12 +735,12 @@ static int ioda_eeh_get_pe(struct pci_controller *hose, */ static int ioda_eeh_next_error(struct eeh_pe **pe) { - struct pci_controller *hose, *tmp; + struct pci_controller *hose; struct pnv_phb *phb; u64 frozen_pe_no; u16 err_type, severity; long rc; - int ret = 1; + int ret = EEH_NEXT_ERR_NONE; /* * While running here, it's safe to purge the event queue. @@ -750,7 +750,7 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) eeh_remove_event(NULL); opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul); - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + list_for_each_entry(hose, &hose_list, list_node) { /* * If the subordinate PCI buses of the PHB has been * removed, we needn't take care of it any more. @@ -789,19 +789,19 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) switch (err_type) { case OPAL_EEH_IOC_ERROR: if (severity == OPAL_EEH_SEV_IOC_DEAD) { - list_for_each_entry_safe(hose, tmp, - &hose_list, list_node) { + list_for_each_entry(hose, &hose_list, + list_node) { phb = hose->private_data; phb->eeh_state |= PNV_EEH_STATE_REMOVED; } pr_err("EEH: dead IOC detected\n"); - ret = 4; - goto out; + ret = EEH_NEXT_ERR_DEAD_IOC; } else if (severity == OPAL_EEH_SEV_INF) { pr_info("EEH: IOC informative error " "detected\n"); ioda_eeh_hub_diag(hose); + ret = EEH_NEXT_ERR_NONE; } break; @@ -813,21 +813,20 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) pr_err("EEH: dead PHB#%x detected\n", hose->global_number); phb->eeh_state |= PNV_EEH_STATE_REMOVED; - ret = 3; - goto out; + ret = EEH_NEXT_ERR_DEAD_PHB; } else if (severity == OPAL_EEH_SEV_PHB_FENCED) { if (ioda_eeh_get_phb_pe(hose, pe)) break; pr_err("EEH: fenced PHB#%x detected\n", hose->global_number); - ret = 2; - goto out; + ret = EEH_NEXT_ERR_FENCED_PHB; } else if (severity == OPAL_EEH_SEV_INF) { pr_info("EEH: PHB#%x informative error " "detected\n", hose->global_number); ioda_eeh_phb_diag(hose); + ret = EEH_NEXT_ERR_NONE; } break; @@ -837,13 +836,23 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) pr_err("EEH: Frozen PE#%x on PHB#%x detected\n", (*pe)->addr, (*pe)->phb->global_number); - ret = 1; - goto out; + ret = EEH_NEXT_ERR_FROZEN_PE; + break; + default: + pr_warn("%s: Unexpected error type %d\n", + __func__, err_type); } + + /* + * If we have no errors on the specific PHB or only + * informative error there, we continue poking it. + * Otherwise, we need actions to be taken by upper + * layer. + */ + if (ret > EEH_NEXT_ERR_INF) + break; } - ret = 0; -out: return ret; } -- cgit v1.2.3 From f7d98d18a01ece2863984d4fb5ae949b18b02715 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 15 Jan 2014 17:02:04 +1100 Subject: powerpc/powernv: Call OPAL sync before kexec'ing Its possible that OPAL may be writing to host memory during kexec (like dump retrieve scenario). In this situation we might end up corrupting host memory. This patch makes OPAL sync call to make sure OPAL stops writing to host memory before kexec'ing. Signed-off-by: Vasant Hegde Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 2 ++ arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + arch/powerpc/platforms/powernv/opal.c | 16 ++++++++++++++++ arch/powerpc/platforms/powernv/setup.c | 6 ++++-- 4 files changed, 23 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 9a87b4401a41..40157e2ca691 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -156,6 +156,7 @@ extern int opal_enter_rtas(struct rtas_args *args, #define OPAL_FLASH_UPDATE 78 #define OPAL_GET_MSG 85 #define OPAL_CHECK_ASYNC_COMPLETION 86 +#define OPAL_SYNC_HOST_REBOOT 87 #ifndef __ASSEMBLY__ @@ -828,6 +829,7 @@ int64_t opal_update_flash(uint64_t blk_list); int64_t opal_get_msg(uint64_t buffer, size_t size); int64_t opal_check_completion(uint64_t buffer, size_t size, uint64_t token); +int64_t opal_sync_host_reboot(void); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 719aa5c325c6..3e8829c40fbb 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -128,3 +128,4 @@ OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE); OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE); OPAL_CALL(opal_get_msg, OPAL_GET_MSG); OPAL_CALL(opal_check_completion, OPAL_CHECK_ASYNC_COMPLETION); +OPAL_CALL(opal_sync_host_reboot, OPAL_SYNC_HOST_REBOOT); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 7a184a0ff183..65499adaecff 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -482,10 +483,25 @@ subsys_initcall(opal_init); void opal_shutdown(void) { unsigned int i; + long rc = OPAL_BUSY; + /* First free interrupts, which will also mask them */ for (i = 0; i < opal_irq_count; i++) { if (opal_irqs[i]) free_irq(opal_irqs[i], NULL); opal_irqs[i] = 0; } + + /* + * Then sync with OPAL which ensure anything that can + * potentially write to our memory has completed such + * as an ongoing dump retrieval + */ + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { + rc = opal_sync_host_reboot(); + if (rc == OPAL_BUSY) + opal_poll_events(NULL); + else + mdelay(10); + } } diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 19884b2a51b4..a932feb2901c 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -145,8 +145,10 @@ static void pnv_shutdown(void) /* Let the PCI code clear up IODA tables */ pnv_pci_shutdown(); - /* And unregister all OPAL interrupts so they don't fire - * up while we kexec + /* + * Stop OPAL activity: Unregister all OPAL interrupts so they + * don't fire up while we kexec and make sure all potentially + * DMA'ing ops are complete (such as dump retrieval). */ opal_shutdown(); } -- cgit v1.2.3 From 9494a1e8428ea8e60ae77b221b9d32a5edc21ef4 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Thu, 23 Jan 2014 15:53:55 -0800 Subject: powerpc: use generic fixmap.h Signed-off-by: Mark Salter Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/fixmap.h | 44 ++------------------------------------- 1 file changed, 2 insertions(+), 42 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 5c2c0233175e..90f604bbcd19 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -58,52 +58,12 @@ enum fixed_addresses { extern void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags); -#define set_fixmap(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL) -/* - * Some hardware wants to get fixmapped without caching. - */ -#define set_fixmap_nocache(idx, phys) \ - __set_fixmap(idx, phys, PAGE_KERNEL_NCG) - -#define clear_fixmap(idx) \ - __set_fixmap(idx, 0, __pgprot(0)) - #define __FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) -#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) - -extern void __this_fixmap_does_not_exist(void); - -/* - * 'index to address' translation. If anyone tries to use the idx - * directly without tranlation, we catch the bug with a NULL-deference - * kernel oops. Illegal ranges of incoming indices are caught too. - */ -static __always_inline unsigned long fix_to_virt(const unsigned int idx) -{ - /* - * this branch gets completely eliminated after inlining, - * except when someone tries to use fixaddr indices in an - * illegal way. (such as mixing up address types or using - * out-of-range indices). - * - * If it doesn't get removed, the linker will complain - * loudly with a reasonably clear error message.. - */ - if (idx >= __end_of_fixed_addresses) - __this_fixmap_does_not_exist(); - - return __fix_to_virt(idx); -} +#define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_NCG -static inline unsigned long virt_to_fix(const unsigned long vaddr) -{ - BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); - return __virt_to_fix(vaddr); -} +#include #endif /* !__ASSEMBLY__ */ #endif -- cgit v1.2.3 From 736017752d2f6ed0d64f5e15cf48e79779b11c85 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 9 Jan 2014 11:51:16 +0100 Subject: KVM: PPC: Book3S: MMIO emulation support for little endian guests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MMIO emulation reads the last instruction executed by the guest and then emulates. If the guest is running in Little Endian order, or more generally in a different endian order of the host, the instruction needs to be byte-swapped before being emulated. This patch adds a helper routine which tests the endian order of the host and the guest in order to decide whether a byteswap is needed or not. It is then used to byteswap the last instruction of the guest in the endian order of the host before MMIO emulation is performed. Finally, kvmppc_handle_load() of kvmppc_handle_store() are modified to reverse the endianness of the MMIO if required. Signed-off-by: Cédric Le Goater [agraf: add booke handling] Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s.h | 8 +++++++- arch/powerpc/include/asm/kvm_booke.h | 6 ++++++ arch/powerpc/include/asm/kvm_ppc.h | 7 ++++--- arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +- arch/powerpc/kvm/emulate.c | 1 - arch/powerpc/kvm/powerpc.c | 28 ++++++++++++++++++++++++---- 6 files changed, 42 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index f8b23201c105..1e9c26f45d18 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -264,6 +264,11 @@ static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) return vcpu->arch.pc; } +static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.shared->msr & MSR_LE) != (MSR_KERNEL & MSR_LE); +} + static inline u32 kvmppc_get_last_inst_internal(struct kvm_vcpu *vcpu, ulong pc) { /* Load the instruction manually if it failed to do so in the @@ -271,7 +276,8 @@ static inline u32 kvmppc_get_last_inst_internal(struct kvm_vcpu *vcpu, ulong pc) if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false); - return vcpu->arch.last_inst; + return kvmppc_need_byteswap(vcpu) ? swab32(vcpu->arch.last_inst) : + vcpu->arch.last_inst; } static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index dd8f61510dfd..80d46b5a7efb 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -63,6 +63,12 @@ static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) return vcpu->arch.xer; } +static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu) +{ + /* XXX Would need to check TLB entry */ + return false; +} + static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) { return vcpu->arch.last_inst; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index c8317fbf92c4..629277df4798 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -54,12 +54,13 @@ extern void kvmppc_handler_highmem(void); extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu); extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, - int is_bigendian); + int is_default_endian); extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, - int is_bigendian); + int is_default_endian); extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, - u64 val, unsigned int bytes, int is_bigendian); + u64 val, unsigned int bytes, + int is_default_endian); extern int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index f3ff587a8b7d..efb8aa544876 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -558,7 +558,7 @@ static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, * we just return and retry the instruction. */ - if (instruction_is_store(vcpu->arch.last_inst) != !!is_store) + if (instruction_is_store(kvmppc_get_last_inst(vcpu)) != !!is_store) return RESUME_GUEST; /* diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 2f9a0873b44f..c2b887be2c29 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -219,7 +219,6 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) * lmw * stmw * - * XXX is_bigendian should depend on MMU mapping or MSR[LE] */ /* XXX Should probably auto-generate instruction decoding for a particular core * from opcode tables in the future. */ diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 7ca9e0a80499..026dfaaa4772 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -673,9 +673,19 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, } int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int rt, unsigned int bytes, int is_bigendian) + unsigned int rt, unsigned int bytes, + int is_default_endian) { int idx, ret; + int is_bigendian; + + if (kvmppc_need_byteswap(vcpu)) { + /* Default endianness is "little endian". */ + is_bigendian = !is_default_endian; + } else { + /* Default endianness is "big endian". */ + is_bigendian = is_default_endian; + } if (bytes > sizeof(run->mmio.data)) { printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, @@ -711,21 +721,31 @@ EXPORT_SYMBOL_GPL(kvmppc_handle_load); /* Same as above, but sign extends */ int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int rt, unsigned int bytes, int is_bigendian) + unsigned int rt, unsigned int bytes, + int is_default_endian) { int r; vcpu->arch.mmio_sign_extend = 1; - r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian); + r = kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian); return r; } int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, - u64 val, unsigned int bytes, int is_bigendian) + u64 val, unsigned int bytes, int is_default_endian) { void *data = run->mmio.data; int idx, ret; + int is_bigendian; + + if (kvmppc_need_byteswap(vcpu)) { + /* Default endianness is "little endian". */ + is_bigendian = !is_default_endian; + } else { + /* Default endianness is "big endian". */ + is_bigendian = is_default_endian; + } if (bytes > sizeof(run->mmio.data)) { printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, -- cgit v1.2.3 From 6c85f52b10fd60e45c6e30c5b85d116406bd3c9b Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 9 Jan 2014 19:18:40 -0600 Subject: kvm/ppc: IRQ disabling cleanup Simplify the handling of lazy EE by going directly from fully-enabled to hard-disabled. This replaces the lazy_irq_pending() check (including its misplaced kvm_guest_exit() call). As suggested by Tiejun Chen, move the interrupt disabling into kvmppc_prepare_to_enter() rather than have each caller do it. Also move the IRQ enabling on heavyweight exit into kvmppc_prepare_to_enter(). Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h | 6 ++++++ arch/powerpc/kvm/book3s_pr.c | 14 ++++++-------- arch/powerpc/kvm/booke.c | 12 +++++------- arch/powerpc/kvm/powerpc.c | 26 +++++++++----------------- 4 files changed, 26 insertions(+), 32 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 629277df4798..fcd53f0d34ba 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -456,6 +456,12 @@ static inline void kvmppc_fix_ee_before_entry(void) trace_hardirqs_on(); #ifdef CONFIG_PPC64 + /* + * To avoid races, the caller must have gone directly from having + * interrupts fully-enabled to hard-disabled. + */ + WARN_ON(local_paca->irq_happened != PACA_IRQ_HARD_DIS); + /* Only need to enable IRQs by hard enabling them after this */ local_paca->irq_happened = 0; local_paca->soft_enabled = 1; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index aedba681bb94..e82fafdaf880 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -999,14 +999,14 @@ program_interrupt: * and if we really did time things so badly, then we just exit * again due to a host external interrupt. */ - local_irq_disable(); s = kvmppc_prepare_to_enter(vcpu); - if (s <= 0) { - local_irq_enable(); + if (s <= 0) r = s; - } else { + else { + /* interrupts now hard-disabled */ kvmppc_fix_ee_before_entry(); } + kvmppc_handle_lost_ext(vcpu); } @@ -1219,12 +1219,10 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) * really did time things so badly, then we just exit again due to * a host external interrupt. */ - local_irq_disable(); ret = kvmppc_prepare_to_enter(vcpu); - if (ret <= 0) { - local_irq_enable(); + if (ret <= 0) goto out; - } + /* interrupts now hard-disabled */ /* Save FPU state in thread_struct */ if (current->thread.regs->msr & MSR_FP) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 6a8c32ec4173..07b89c711898 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -643,7 +643,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) local_irq_enable(); kvm_vcpu_block(vcpu); clear_bit(KVM_REQ_UNHALT, &vcpu->requests); - local_irq_disable(); + hard_irq_disable(); kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS); r = 1; @@ -688,13 +688,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) return -EINVAL; } - local_irq_disable(); s = kvmppc_prepare_to_enter(vcpu); if (s <= 0) { - local_irq_enable(); ret = s; goto out; } + /* interrupts now hard-disabled */ #ifdef CONFIG_PPC_FPU /* Save userspace FPU state in stack */ @@ -1187,12 +1186,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * aren't already exiting to userspace for some other reason. */ if (!(r & RESUME_HOST)) { - local_irq_disable(); s = kvmppc_prepare_to_enter(vcpu); - if (s <= 0) { - local_irq_enable(); + if (s <= 0) r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); - } else { + else { + /* interrupts now hard-disabled */ kvmppc_fix_ee_before_entry(); } } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 026dfaaa4772..3cf541a53e2a 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -68,14 +68,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) */ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) { - int r = 1; + int r; + + WARN_ON(irqs_disabled()); + hard_irq_disable(); - WARN_ON_ONCE(!irqs_disabled()); while (true) { if (need_resched()) { local_irq_enable(); cond_resched(); - local_irq_disable(); + hard_irq_disable(); continue; } @@ -101,7 +103,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) local_irq_enable(); trace_kvm_check_requests(vcpu); r = kvmppc_core_check_requests(vcpu); - local_irq_disable(); + hard_irq_disable(); if (r > 0) continue; break; @@ -113,22 +115,12 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) continue; } -#ifdef CONFIG_PPC64 - /* lazy EE magic */ - hard_irq_disable(); - if (lazy_irq_pending()) { - /* Got an interrupt in between, try again */ - local_irq_enable(); - local_irq_disable(); - kvm_guest_exit(); - continue; - } -#endif - kvm_guest_enter(); - break; + return 1; } + /* return to host */ + local_irq_enable(); return r; } EXPORT_SYMBOL_GPL(kvmppc_prepare_to_enter); -- cgit v1.2.3 From e0b7ec058c0eb7ba8d5d937d81de2bd16db6970e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 8 Jan 2014 21:25:20 +1100 Subject: KVM: PPC: Book3S HV: Align physical and virtual CPU thread numbers On a threaded processor such as POWER7, we group VCPUs into virtual cores and arrange that the VCPUs in a virtual core run on the same physical core. Currently we don't enforce any correspondence between virtual thread numbers within a virtual core and physical thread numbers. Physical threads are allocated starting at 0 on a first-come first-served basis to runnable virtual threads (VCPUs). POWER8 implements a new "msgsndp" instruction which guest kernels can use to interrupt other threads in the same core or sub-core. Since the instruction takes the destination physical thread ID as a parameter, it becomes necessary to align the physical thread IDs with the virtual thread IDs, that is, to make sure virtual thread N within a virtual core always runs on physical thread N. This means that it's possible that thread 0, which is where we call __kvmppc_vcore_entry, may end up running some other vcpu than the one whose task called kvmppc_run_core(), or it may end up running no vcpu at all, if for example thread 0 of the virtual core is currently executing in userspace. However, we do need thread 0 to be responsible for switching the MMU -- a previous version of this patch that had other threads switching the MMU was found to be responsible for occasional memory corruption and machine check interrupts in the guest on POWER7 machines. To accommodate this, we no longer pass the vcpu pointer to __kvmppc_vcore_entry, but instead let the assembly code load it from the PACA. Since the assembly code will need to know the kvm pointer and the thread ID for threads which don't have a vcpu, we move the thread ID into the PACA and we add a kvm pointer to the virtual core structure. In the case where thread 0 has no vcpu to run, it still calls into kvmppc_hv_entry in order to do the MMU switch, and then naps until either its vcpu is ready to run in the guest, or some other thread needs to exit the guest. In the latter case, thread 0 jumps to the code that switches the MMU back to the host. This control flow means that now we switch the MMU before loading any guest vcpu state. Similarly, on guest exit we now save all the guest vcpu state before switching the MMU back to the host. This has required substantial code movement, making the diff rather large. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s_asm.h | 1 + arch/powerpc/include/asm/kvm_host.h | 2 + arch/powerpc/kernel/asm-offsets.c | 3 +- arch/powerpc/kvm/book3s_hv.c | 46 +- arch/powerpc/kvm/book3s_hv_interrupts.S | 6 +- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 676 +++++++++++++++++------------- 6 files changed, 397 insertions(+), 337 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 0bd9348a4db9..490b34f5d6bf 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -87,6 +87,7 @@ struct kvmppc_host_state { u8 hwthread_req; u8 hwthread_state; u8 host_ipi; + u8 ptid; struct kvm_vcpu *kvm_vcpu; struct kvmppc_vcore *kvm_vcore; unsigned long xics_phys; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 2c2ca5faf7f2..b850544dbc3f 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -288,6 +288,7 @@ struct kvmppc_vcore { int n_woken; int nap_count; int napping_threads; + int first_vcpuid; u16 pcpu; u16 last_cpu; u8 vcore_state; @@ -298,6 +299,7 @@ struct kvmppc_vcore { u64 stolen_tb; u64 preempt_tb; struct kvm_vcpu *runner; + struct kvm *kvm; u64 tb_offset; /* guest timebase - host timebase */ ulong lpcr; u32 arch_compat; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 5e64c3d2149f..332ae66883e4 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -506,7 +506,6 @@ int main(void) DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar)); DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); - DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid)); DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar)); DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr)); DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1)); @@ -514,6 +513,7 @@ int main(void) DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count)); DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads)); + DEFINE(VCORE_KVM, offsetof(struct kvmppc_vcore, kvm)); DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset)); DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr)); DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr)); @@ -583,6 +583,7 @@ int main(void) HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr); HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); + HSTATE_FIELD(HSTATE_PTID, ptid); HSTATE_FIELD(HSTATE_MMCR, host_mmcr); HSTATE_FIELD(HSTATE_PMC, host_pmc); HSTATE_FIELD(HSTATE_PURR, host_purr); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7e1813ceabc1..7da53cd215db 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -990,6 +990,8 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, init_waitqueue_head(&vcore->wq); vcore->preempt_tb = TB_NIL; vcore->lpcr = kvm->arch.lpcr; + vcore->first_vcpuid = core * threads_per_core; + vcore->kvm = kvm; } kvm->arch.vcores[core] = vcore; kvm->arch.online_vcores++; @@ -1003,6 +1005,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, ++vcore->num_threads; spin_unlock(&vcore->lock); vcpu->arch.vcore = vcore; + vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid; vcpu->arch.cpu_type = KVM_CPU_3S_64; kvmppc_sanity_check(vcpu); @@ -1066,7 +1069,7 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu) } } -extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); +extern void __kvmppc_vcore_entry(void); static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, struct kvm_vcpu *vcpu) @@ -1140,15 +1143,16 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu) tpaca = &paca[cpu]; tpaca->kvm_hstate.kvm_vcpu = vcpu; tpaca->kvm_hstate.kvm_vcore = vc; - tpaca->kvm_hstate.napping = 0; + tpaca->kvm_hstate.ptid = vcpu->arch.ptid; vcpu->cpu = vc->pcpu; smp_wmb(); #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) - if (vcpu->arch.ptid) { + if (cpu != smp_processor_id()) { #ifdef CONFIG_KVM_XICS xics_wake_cpu(cpu); #endif - ++vc->n_woken; + if (vcpu->arch.ptid) + ++vc->n_woken; } #endif } @@ -1205,10 +1209,10 @@ static int on_primary_thread(void) */ static void kvmppc_run_core(struct kvmppc_vcore *vc) { - struct kvm_vcpu *vcpu, *vcpu0, *vnext; + struct kvm_vcpu *vcpu, *vnext; long ret; u64 now; - int ptid, i, need_vpa_update; + int i, need_vpa_update; int srcu_idx; struct kvm_vcpu *vcpus_to_update[threads_per_core]; @@ -1245,25 +1249,6 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) spin_lock(&vc->lock); } - /* - * Assign physical thread IDs, first to non-ceded vcpus - * and then to ceded ones. - */ - ptid = 0; - vcpu0 = NULL; - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { - if (!vcpu->arch.ceded) { - if (!ptid) - vcpu0 = vcpu; - vcpu->arch.ptid = ptid++; - } - } - if (!vcpu0) - goto out; /* nothing to run; should never happen */ - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) - if (vcpu->arch.ceded) - vcpu->arch.ptid = ptid++; - /* * Make sure we are running on thread 0, and that * secondary threads are offline. @@ -1280,15 +1265,19 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) kvmppc_create_dtl_entry(vcpu, vc); } + /* Set this explicitly in case thread 0 doesn't have a vcpu */ + get_paca()->kvm_hstate.kvm_vcore = vc; + get_paca()->kvm_hstate.ptid = 0; + vc->vcore_state = VCORE_RUNNING; preempt_disable(); spin_unlock(&vc->lock); kvm_guest_enter(); - srcu_idx = srcu_read_lock(&vcpu0->kvm->srcu); + srcu_idx = srcu_read_lock(&vc->kvm->srcu); - __kvmppc_vcore_entry(NULL, vcpu0); + __kvmppc_vcore_entry(); spin_lock(&vc->lock); /* disable sending of IPIs on virtual external irqs */ @@ -1303,7 +1292,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) vc->vcore_state = VCORE_EXITING; spin_unlock(&vc->lock); - srcu_read_unlock(&vcpu0->kvm->srcu, srcu_idx); + srcu_read_unlock(&vc->kvm->srcu, srcu_idx); /* make sure updates to secondary vcpu structs are visible now */ smp_mb(); @@ -1411,7 +1400,6 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (!signal_pending(current)) { if (vc->vcore_state == VCORE_RUNNING && VCORE_EXIT_COUNT(vc) == 0) { - vcpu->arch.ptid = vc->n_runnable - 1; kvmppc_create_dtl_entry(vcpu, vc); kvmppc_start_thread(vcpu); } else if (vc->vcore_state == VCORE_SLEEPING) { diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 00b7ed41ea17..e873796b1a29 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -35,7 +35,7 @@ ****************************************************************************/ /* Registers: - * r4: vcpu pointer + * none */ _GLOBAL(__kvmppc_vcore_entry) @@ -71,7 +71,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtmsrd r10,1 /* Save host PMU registers */ - /* R4 is live here (vcpu pointer) but not r3 or r5 */ li r3, 1 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ mfspr r7, SPRN_MMCR0 /* save MMCR0 */ @@ -136,16 +135,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) * enters the guest with interrupts enabled. */ BEGIN_FTR_SECTION + ld r4, HSTATE_KVM_VCPU(r13) ld r0, VCPU_PENDING_EXC(r4) li r7, (1 << BOOK3S_IRQPRIO_EXTERNAL) oris r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h and. r0, r0, r7 beq 32f - mr r31, r4 lhz r3, PACAPACAINDEX(r13) bl smp_send_reschedule nop - mr r4, r31 32: END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 66db71c9156a..8bbe91bdb6da 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -33,6 +33,10 @@ #error Need to fix lppaca and SLB shadow accesses in little endian mode #endif +/* Values in HSTATE_NAPPING(r13) */ +#define NAPPING_CEDE 1 +#define NAPPING_NOVCPU 2 + /* * Call kvmppc_hv_entry in real mode. * Must be called with interrupts hard-disabled. @@ -57,6 +61,7 @@ _GLOBAL(kvmppc_hv_entry_trampoline) RFI kvmppc_call_hv_entry: + ld r4, HSTATE_KVM_VCPU(r13) bl kvmppc_hv_entry /* Back from guest - restore host state and return to caller */ @@ -73,15 +78,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) ld r3,PACA_SPRG3(r13) mtspr SPRN_SPRG3,r3 - /* - * Reload DEC. HDEC interrupts were disabled when - * we reloaded the host's LPCR value. - */ - ld r3, HSTATE_DECEXP(r13) - mftb r4 - subf r4, r4, r3 - mtspr SPRN_DEC, r4 - /* Reload the host's PMU registers */ ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ lbz r4, LPPACA_PMCINUSE(r3) @@ -116,6 +112,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) isync 23: + /* + * Reload DEC. HDEC interrupts were disabled when + * we reloaded the host's LPCR value. + */ + ld r3, HSTATE_DECEXP(r13) + mftb r4 + subf r4, r4, r3 + mtspr SPRN_DEC, r4 + /* * For external and machine check interrupts, we need * to call the Linux handler to process the interrupt. @@ -156,15 +161,82 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 13: b machine_check_fwnmi +kvmppc_primary_no_guest: + /* We handle this much like a ceded vcpu */ + /* set our bit in napping_threads */ + ld r5, HSTATE_KVM_VCORE(r13) + lbz r7, HSTATE_PTID(r13) + li r0, 1 + sld r0, r0, r7 + addi r6, r5, VCORE_NAPPING_THREADS +1: lwarx r3, 0, r6 + or r3, r3, r0 + stwcx. r3, 0, r6 + bne 1b + /* order napping_threads update vs testing entry_exit_count */ + isync + li r12, 0 + lwz r7, VCORE_ENTRY_EXIT(r5) + cmpwi r7, 0x100 + bge kvm_novcpu_exit /* another thread already exiting */ + li r3, NAPPING_NOVCPU + stb r3, HSTATE_NAPPING(r13) + li r3, 1 + stb r3, HSTATE_HWTHREAD_REQ(r13) + + b kvm_do_nap + +kvm_novcpu_wakeup: + ld r1, HSTATE_HOST_R1(r13) + ld r5, HSTATE_KVM_VCORE(r13) + li r0, 0 + stb r0, HSTATE_NAPPING(r13) + stb r0, HSTATE_HWTHREAD_REQ(r13) + + /* see if any other thread is already exiting */ + li r12, 0 + lwz r0, VCORE_ENTRY_EXIT(r5) + cmpwi r0, 0x100 + bge kvm_novcpu_exit + + /* clear our bit in napping_threads */ + lbz r7, HSTATE_PTID(r13) + li r0, 1 + sld r0, r0, r7 + addi r6, r5, VCORE_NAPPING_THREADS +4: lwarx r3, 0, r6 + andc r3, r3, r0 + stwcx. r3, 0, r6 + bne 4b + + /* Check the wake reason in SRR1 to see why we got here */ + mfspr r3, SPRN_SRR1 + rlwinm r3, r3, 44-31, 0x7 /* extract wake reason field */ + cmpwi r3, 4 /* was it an external interrupt? */ + bne kvm_novcpu_exit /* if not, exit the guest */ + + /* extern interrupt - read and handle it */ + li r12, BOOK3S_INTERRUPT_EXTERNAL + bl kvmppc_read_intr + cmpdi r3, 0 + bge kvm_novcpu_exit + li r12, 0 + + /* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */ + ld r4, HSTATE_KVM_VCPU(r13) + cmpdi r4, 0 + bne kvmppc_got_guest + +kvm_novcpu_exit: + b hdec_soon + /* - * We come in here when wakened from nap mode on a secondary hw thread. + * We come in here when wakened from nap mode. * Relocation is off and most register values are lost. * r13 points to the PACA. */ .globl kvm_start_guest kvm_start_guest: - ld r1,PACAEMERGSP(r13) - subi r1,r1,STACK_FRAME_OVERHEAD ld r2,PACATOC(r13) li r0,KVM_HWTHREAD_IN_KVM @@ -176,8 +248,13 @@ kvm_start_guest: /* were we napping due to cede? */ lbz r0,HSTATE_NAPPING(r13) - cmpwi r0,0 - bne kvm_end_cede + cmpwi r0,NAPPING_CEDE + beq kvm_end_cede + cmpwi r0,NAPPING_NOVCPU + beq kvm_novcpu_wakeup + + ld r1,PACAEMERGSP(r13) + subi r1,r1,STACK_FRAME_OVERHEAD /* * We weren't napping due to cede, so this must be a secondary @@ -220,7 +297,13 @@ kvm_start_guest: stw r8,HSTATE_SAVED_XIRR(r13) b kvm_no_guest -30: bl kvmppc_hv_entry +30: + /* Set HSTATE_DSCR(r13) to something sensible */ + LOAD_REG_ADDR(r6, dscr_default) + ld r6, 0(r6) + std r6, HSTATE_DSCR(r13) + + bl kvmppc_hv_entry /* Back from the guest, go back to nap */ /* Clear our vcpu pointer so we don't come back in early */ @@ -252,6 +335,7 @@ kvm_start_guest: kvm_no_guest: li r0, KVM_HWTHREAD_IN_NAP stb r0, HSTATE_HWTHREAD_STATE(r13) +kvm_do_nap: li r3, LPCR_PECE0 mfspr r4, SPRN_LPCR rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 @@ -276,7 +360,7 @@ kvmppc_hv_entry: /* Required state: * - * R4 = vcpu pointer + * R4 = vcpu pointer (or NULL) * MSR = ~IR|DR * R13 = PACA * R1 = host R1 @@ -286,124 +370,12 @@ kvmppc_hv_entry: std r0, PPC_LR_STKOFF(r1) stdu r1, -112(r1) -BEGIN_FTR_SECTION - /* Set partition DABR */ - /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ - li r5,3 - ld r6,VCPU_DABR(r4) - mtspr SPRN_DABRX,r5 - mtspr SPRN_DABR,r6 - BEGIN_FTR_SECTION_NESTED(89) - isync - END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89) -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) - - /* Load guest PMU registers */ - /* R4 is live here (vcpu pointer) */ - li r3, 1 - sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ - mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ - isync - lwz r3, VCPU_PMC(r4) /* always load up guest PMU registers */ - lwz r5, VCPU_PMC + 4(r4) /* to prevent information leak */ - lwz r6, VCPU_PMC + 8(r4) - lwz r7, VCPU_PMC + 12(r4) - lwz r8, VCPU_PMC + 16(r4) - lwz r9, VCPU_PMC + 20(r4) -BEGIN_FTR_SECTION - lwz r10, VCPU_PMC + 24(r4) - lwz r11, VCPU_PMC + 28(r4) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) - mtspr SPRN_PMC1, r3 - mtspr SPRN_PMC2, r5 - mtspr SPRN_PMC3, r6 - mtspr SPRN_PMC4, r7 - mtspr SPRN_PMC5, r8 - mtspr SPRN_PMC6, r9 -BEGIN_FTR_SECTION - mtspr SPRN_PMC7, r10 - mtspr SPRN_PMC8, r11 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) - ld r3, VCPU_MMCR(r4) - ld r5, VCPU_MMCR + 8(r4) - ld r6, VCPU_MMCR + 16(r4) - ld r7, VCPU_SIAR(r4) - ld r8, VCPU_SDAR(r4) - mtspr SPRN_MMCR1, r5 - mtspr SPRN_MMCRA, r6 - mtspr SPRN_SIAR, r7 - mtspr SPRN_SDAR, r8 - mtspr SPRN_MMCR0, r3 - isync - - /* Load up FP, VMX and VSX registers */ - bl kvmppc_load_fp - - ld r14, VCPU_GPR(R14)(r4) - ld r15, VCPU_GPR(R15)(r4) - ld r16, VCPU_GPR(R16)(r4) - ld r17, VCPU_GPR(R17)(r4) - ld r18, VCPU_GPR(R18)(r4) - ld r19, VCPU_GPR(R19)(r4) - ld r20, VCPU_GPR(R20)(r4) - ld r21, VCPU_GPR(R21)(r4) - ld r22, VCPU_GPR(R22)(r4) - ld r23, VCPU_GPR(R23)(r4) - ld r24, VCPU_GPR(R24)(r4) - ld r25, VCPU_GPR(R25)(r4) - ld r26, VCPU_GPR(R26)(r4) - ld r27, VCPU_GPR(R27)(r4) - ld r28, VCPU_GPR(R28)(r4) - ld r29, VCPU_GPR(R29)(r4) - ld r30, VCPU_GPR(R30)(r4) - ld r31, VCPU_GPR(R31)(r4) - -BEGIN_FTR_SECTION - /* Switch DSCR to guest value */ - ld r5, VCPU_DSCR(r4) - mtspr SPRN_DSCR, r5 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) - - /* - * Set the decrementer to the guest decrementer. - */ - ld r8,VCPU_DEC_EXPIRES(r4) - mftb r7 - subf r3,r7,r8 - mtspr SPRN_DEC,r3 - stw r3,VCPU_DEC(r4) - - ld r5, VCPU_SPRG0(r4) - ld r6, VCPU_SPRG1(r4) - ld r7, VCPU_SPRG2(r4) - ld r8, VCPU_SPRG3(r4) - mtspr SPRN_SPRG0, r5 - mtspr SPRN_SPRG1, r6 - mtspr SPRN_SPRG2, r7 - mtspr SPRN_SPRG3, r8 - /* Save R1 in the PACA */ std r1, HSTATE_HOST_R1(r13) - /* Load up DAR and DSISR */ - ld r5, VCPU_DAR(r4) - lwz r6, VCPU_DSISR(r4) - mtspr SPRN_DAR, r5 - mtspr SPRN_DSISR, r6 - li r6, KVM_GUEST_MODE_HOST_HV stb r6, HSTATE_IN_GUEST(r13) -BEGIN_FTR_SECTION - /* Restore AMR and UAMOR, set AMOR to all 1s */ - ld r5,VCPU_AMR(r4) - ld r6,VCPU_UAMOR(r4) - li r7,-1 - mtspr SPRN_AMR,r5 - mtspr SPRN_UAMOR,r6 - mtspr SPRN_AMOR,r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) - /* Clear out SLB */ li r6,0 slbmte r6,r6 @@ -429,8 +401,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) bne 21b /* Primary thread switches to guest partition. */ - ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ - lwz r6,VCPU_PTID(r4) + ld r9,VCORE_KVM(r5) /* pointer to struct kvm */ + lbz r6,HSTATE_PTID(r13) cmpwi r6,0 bne 20f ld r6,KVM_SDR1(r9) @@ -504,32 +476,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) mtspr SPRN_RMOR,r8 isync - /* Increment yield count if they have a VPA */ - ld r3, VCPU_VPA(r4) - cmpdi r3, 0 - beq 25f - lwz r5, LPPACA_YIELDCOUNT(r3) - addi r5, r5, 1 - stw r5, LPPACA_YIELDCOUNT(r3) - li r6, 1 - stb r6, VCPU_VPA_DIRTY(r4) -25: /* Check if HDEC expires soon */ mfspr r3,SPRN_HDEC - cmpwi r3,10 + cmpwi r3,512 /* 1 microsecond */ li r12,BOOK3S_INTERRUPT_HV_DECREMENTER - mr r9,r4 blt hdec_soon - - /* Save purr/spurr */ - mfspr r5,SPRN_PURR - mfspr r6,SPRN_SPURR - std r5,HSTATE_PURR(r13) - std r6,HSTATE_SPURR(r13) - ld r7,VCPU_PURR(r4) - ld r8,VCPU_SPURR(r4) - mtspr SPRN_PURR,r7 - mtspr SPRN_SPURR,r8 b 31f /* @@ -540,7 +491,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) * We also have to invalidate the TLB since its * entries aren't tagged with the LPID. */ -30: ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ +30: ld r5,HSTATE_KVM_VCORE(r13) + ld r9,VCORE_KVM(r5) /* pointer to struct kvm */ /* first take native_tlbie_lock */ .section ".toc","aw" @@ -605,7 +557,6 @@ toc_tlbie_lock: mfspr r3,SPRN_HDEC cmpwi r3,10 li r12,BOOK3S_INTERRUPT_HV_DECREMENTER - mr r9,r4 blt hdec_soon /* Enable HDEC interrupts */ @@ -620,9 +571,14 @@ toc_tlbie_lock: mfspr r0,SPRN_HID0 mfspr r0,SPRN_HID0 mfspr r0,SPRN_HID0 +31: + /* Do we have a guest vcpu to run? */ + cmpdi r4, 0 + beq kvmppc_primary_no_guest +kvmppc_got_guest: /* Load up guest SLB entries */ -31: lwz r5,VCPU_SLB_MAX(r4) + lwz r5,VCPU_SLB_MAX(r4) cmpwi r5,0 beq 9f mtctr r5 @@ -633,6 +589,140 @@ toc_tlbie_lock: addi r6,r6,VCPU_SLB_SIZE bdnz 1b 9: + /* Increment yield count if they have a VPA */ + ld r3, VCPU_VPA(r4) + cmpdi r3, 0 + beq 25f + lwz r5, LPPACA_YIELDCOUNT(r3) + addi r5, r5, 1 + stw r5, LPPACA_YIELDCOUNT(r3) + li r6, 1 + stb r6, VCPU_VPA_DIRTY(r4) +25: + +BEGIN_FTR_SECTION + /* Save purr/spurr */ + mfspr r5,SPRN_PURR + mfspr r6,SPRN_SPURR + std r5,HSTATE_PURR(r13) + std r6,HSTATE_SPURR(r13) + ld r7,VCPU_PURR(r4) + ld r8,VCPU_SPURR(r4) + mtspr SPRN_PURR,r7 + mtspr SPRN_SPURR,r8 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + +BEGIN_FTR_SECTION + /* Set partition DABR */ + /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ + li r5,3 + ld r6,VCPU_DABR(r4) + mtspr SPRN_DABRX,r5 + mtspr SPRN_DABR,r6 + BEGIN_FTR_SECTION_NESTED(89) + isync + END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89) +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) + + /* Load guest PMU registers */ + /* R4 is live here (vcpu pointer) */ + li r3, 1 + sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ + mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ + isync + lwz r3, VCPU_PMC(r4) /* always load up guest PMU registers */ + lwz r5, VCPU_PMC + 4(r4) /* to prevent information leak */ + lwz r6, VCPU_PMC + 8(r4) + lwz r7, VCPU_PMC + 12(r4) + lwz r8, VCPU_PMC + 16(r4) + lwz r9, VCPU_PMC + 20(r4) +BEGIN_FTR_SECTION + lwz r10, VCPU_PMC + 24(r4) + lwz r11, VCPU_PMC + 28(r4) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + mtspr SPRN_PMC1, r3 + mtspr SPRN_PMC2, r5 + mtspr SPRN_PMC3, r6 + mtspr SPRN_PMC4, r7 + mtspr SPRN_PMC5, r8 + mtspr SPRN_PMC6, r9 +BEGIN_FTR_SECTION + mtspr SPRN_PMC7, r10 + mtspr SPRN_PMC8, r11 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + ld r3, VCPU_MMCR(r4) + ld r5, VCPU_MMCR + 8(r4) + ld r6, VCPU_MMCR + 16(r4) + ld r7, VCPU_SIAR(r4) + ld r8, VCPU_SDAR(r4) + mtspr SPRN_MMCR1, r5 + mtspr SPRN_MMCRA, r6 + mtspr SPRN_SIAR, r7 + mtspr SPRN_SDAR, r8 + mtspr SPRN_MMCR0, r3 + isync + + /* Load up FP, VMX and VSX registers */ + bl kvmppc_load_fp + + ld r14, VCPU_GPR(R14)(r4) + ld r15, VCPU_GPR(R15)(r4) + ld r16, VCPU_GPR(R16)(r4) + ld r17, VCPU_GPR(R17)(r4) + ld r18, VCPU_GPR(R18)(r4) + ld r19, VCPU_GPR(R19)(r4) + ld r20, VCPU_GPR(R20)(r4) + ld r21, VCPU_GPR(R21)(r4) + ld r22, VCPU_GPR(R22)(r4) + ld r23, VCPU_GPR(R23)(r4) + ld r24, VCPU_GPR(R24)(r4) + ld r25, VCPU_GPR(R25)(r4) + ld r26, VCPU_GPR(R26)(r4) + ld r27, VCPU_GPR(R27)(r4) + ld r28, VCPU_GPR(R28)(r4) + ld r29, VCPU_GPR(R29)(r4) + ld r30, VCPU_GPR(R30)(r4) + ld r31, VCPU_GPR(R31)(r4) + +BEGIN_FTR_SECTION + /* Switch DSCR to guest value */ + ld r5, VCPU_DSCR(r4) + mtspr SPRN_DSCR, r5 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + + /* + * Set the decrementer to the guest decrementer. + */ + ld r8,VCPU_DEC_EXPIRES(r4) + mftb r7 + subf r3,r7,r8 + mtspr SPRN_DEC,r3 + stw r3,VCPU_DEC(r4) + + ld r5, VCPU_SPRG0(r4) + ld r6, VCPU_SPRG1(r4) + ld r7, VCPU_SPRG2(r4) + ld r8, VCPU_SPRG3(r4) + mtspr SPRN_SPRG0, r5 + mtspr SPRN_SPRG1, r6 + mtspr SPRN_SPRG2, r7 + mtspr SPRN_SPRG3, r8 + + /* Load up DAR and DSISR */ + ld r5, VCPU_DAR(r4) + lwz r6, VCPU_DSISR(r4) + mtspr SPRN_DAR, r5 + mtspr SPRN_DSISR, r6 + +BEGIN_FTR_SECTION + /* Restore AMR and UAMOR, set AMOR to all 1s */ + ld r5,VCPU_AMR(r4) + ld r6,VCPU_UAMOR(r4) + li r7,-1 + mtspr SPRN_AMR,r5 + mtspr SPRN_UAMOR,r6 + mtspr SPRN_AMOR,r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Restore state of CTRL run bit; assume 1 on entry */ lwz r5,VCPU_CTRL(r4) @@ -984,13 +1074,130 @@ BEGIN_FTR_SECTION mtspr SPRN_SPURR,r4 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201) + /* Save DEC */ + mfspr r5,SPRN_DEC + mftb r6 + extsw r5,r5 + add r5,r5,r6 + std r5,VCPU_DEC_EXPIRES(r9) + + /* Save and reset AMR and UAMOR before turning on the MMU */ +BEGIN_FTR_SECTION + mfspr r5,SPRN_AMR + mfspr r6,SPRN_UAMOR + std r5,VCPU_AMR(r9) + std r6,VCPU_UAMOR(r9) + li r6,0 + mtspr SPRN_AMR,r6 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + + /* Switch DSCR back to host value */ +BEGIN_FTR_SECTION + mfspr r8, SPRN_DSCR + ld r7, HSTATE_DSCR(r13) + std r8, VCPU_DSCR(r9) + mtspr SPRN_DSCR, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + + /* Save non-volatile GPRs */ + std r14, VCPU_GPR(R14)(r9) + std r15, VCPU_GPR(R15)(r9) + std r16, VCPU_GPR(R16)(r9) + std r17, VCPU_GPR(R17)(r9) + std r18, VCPU_GPR(R18)(r9) + std r19, VCPU_GPR(R19)(r9) + std r20, VCPU_GPR(R20)(r9) + std r21, VCPU_GPR(R21)(r9) + std r22, VCPU_GPR(R22)(r9) + std r23, VCPU_GPR(R23)(r9) + std r24, VCPU_GPR(R24)(r9) + std r25, VCPU_GPR(R25)(r9) + std r26, VCPU_GPR(R26)(r9) + std r27, VCPU_GPR(R27)(r9) + std r28, VCPU_GPR(R28)(r9) + std r29, VCPU_GPR(R29)(r9) + std r30, VCPU_GPR(R30)(r9) + std r31, VCPU_GPR(R31)(r9) + + /* Save SPRGs */ + mfspr r3, SPRN_SPRG0 + mfspr r4, SPRN_SPRG1 + mfspr r5, SPRN_SPRG2 + mfspr r6, SPRN_SPRG3 + std r3, VCPU_SPRG0(r9) + std r4, VCPU_SPRG1(r9) + std r5, VCPU_SPRG2(r9) + std r6, VCPU_SPRG3(r9) + + /* save FP state */ + mr r3, r9 + bl kvmppc_save_fp + + /* Increment yield count if they have a VPA */ + ld r8, VCPU_VPA(r9) /* do they have a VPA? */ + cmpdi r8, 0 + beq 25f + lwz r3, LPPACA_YIELDCOUNT(r8) + addi r3, r3, 1 + stw r3, LPPACA_YIELDCOUNT(r8) + li r3, 1 + stb r3, VCPU_VPA_DIRTY(r9) +25: + /* Save PMU registers if requested */ + /* r8 and cr0.eq are live here */ + li r3, 1 + sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ + mfspr r4, SPRN_MMCR0 /* save MMCR0 */ + mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ + mfspr r6, SPRN_MMCRA +BEGIN_FTR_SECTION + /* On P7, clear MMCRA in order to disable SDAR updates */ + li r7, 0 + mtspr SPRN_MMCRA, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + isync + beq 21f /* if no VPA, save PMU stuff anyway */ + lbz r7, LPPACA_PMCINUSE(r8) + cmpwi r7, 0 /* did they ask for PMU stuff to be saved? */ + bne 21f + std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */ + b 22f +21: mfspr r5, SPRN_MMCR1 + mfspr r7, SPRN_SIAR + mfspr r8, SPRN_SDAR + std r4, VCPU_MMCR(r9) + std r5, VCPU_MMCR + 8(r9) + std r6, VCPU_MMCR + 16(r9) + std r7, VCPU_SIAR(r9) + std r8, VCPU_SDAR(r9) + mfspr r3, SPRN_PMC1 + mfspr r4, SPRN_PMC2 + mfspr r5, SPRN_PMC3 + mfspr r6, SPRN_PMC4 + mfspr r7, SPRN_PMC5 + mfspr r8, SPRN_PMC6 +BEGIN_FTR_SECTION + mfspr r10, SPRN_PMC7 + mfspr r11, SPRN_PMC8 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + stw r3, VCPU_PMC(r9) + stw r4, VCPU_PMC + 4(r9) + stw r5, VCPU_PMC + 8(r9) + stw r6, VCPU_PMC + 12(r9) + stw r7, VCPU_PMC + 16(r9) + stw r8, VCPU_PMC + 20(r9) +BEGIN_FTR_SECTION + stw r10, VCPU_PMC + 24(r9) + stw r11, VCPU_PMC + 28(r9) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) +22: /* Clear out SLB */ li r5,0 slbmte r5,r5 slbia ptesync -hdec_soon: /* r9 = vcpu, r12 = trap, r13 = paca */ +hdec_soon: /* r12 = trap, r13 = paca */ BEGIN_FTR_SECTION b 32f END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) @@ -1024,8 +1231,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) */ cmpwi r3,0x100 /* Are we the first here? */ bge 43f - cmpwi r3,1 /* Are any other threads in the guest? */ - ble 43f cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER beq 40f li r0,0 @@ -1036,7 +1241,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) * doesn't wake CPUs up from nap. */ lwz r3,VCORE_NAPPING_THREADS(r5) - lwz r4,VCPU_PTID(r9) + lbz r4,HSTATE_PTID(r13) li r0,1 sld r0,r0,r4 andc. r3,r3,r0 /* no sense IPI'ing ourselves */ @@ -1053,10 +1258,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) addi r6,r6,PACA_SIZE bne 42b +secondary_too_late: /* Secondary threads wait for primary to do partition switch */ -43: ld r4,VCPU_KVM(r9) /* pointer to struct kvm */ - ld r5,HSTATE_KVM_VCORE(r13) - lwz r3,VCPU_PTID(r9) +43: ld r5,HSTATE_KVM_VCORE(r13) + ld r4,VCORE_KVM(r5) /* pointer to struct kvm */ + lbz r3,HSTATE_PTID(r13) cmpwi r3,0 beq 15f HMT_LOW @@ -1121,7 +1327,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) * We have to lock against concurrent tlbies, and * we have to flush the whole TLB. */ -32: ld r4,VCPU_KVM(r9) /* pointer to struct kvm */ +32: ld r5,HSTATE_KVM_VCORE(r13) + ld r4,VCORE_KVM(r5) /* pointer to struct kvm */ /* Take the guest's tlbie_lock */ #ifdef __BIG_ENDIAN__ @@ -1204,151 +1411,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 1: addi r8,r8,16 .endr - /* Save DEC */ - mfspr r5,SPRN_DEC - mftb r6 - extsw r5,r5 - add r5,r5,r6 - std r5,VCPU_DEC_EXPIRES(r9) - - /* Save and reset AMR and UAMOR before turning on the MMU */ -BEGIN_FTR_SECTION - mfspr r5,SPRN_AMR - mfspr r6,SPRN_UAMOR - std r5,VCPU_AMR(r9) - std r6,VCPU_UAMOR(r9) - li r6,0 - mtspr SPRN_AMR,r6 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) - /* Unset guest mode */ li r0, KVM_GUEST_MODE_NONE stb r0, HSTATE_IN_GUEST(r13) - /* Switch DSCR back to host value */ -BEGIN_FTR_SECTION - mfspr r8, SPRN_DSCR - ld r7, HSTATE_DSCR(r13) - std r8, VCPU_DSCR(r9) - mtspr SPRN_DSCR, r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) - - /* Save non-volatile GPRs */ - std r14, VCPU_GPR(R14)(r9) - std r15, VCPU_GPR(R15)(r9) - std r16, VCPU_GPR(R16)(r9) - std r17, VCPU_GPR(R17)(r9) - std r18, VCPU_GPR(R18)(r9) - std r19, VCPU_GPR(R19)(r9) - std r20, VCPU_GPR(R20)(r9) - std r21, VCPU_GPR(R21)(r9) - std r22, VCPU_GPR(R22)(r9) - std r23, VCPU_GPR(R23)(r9) - std r24, VCPU_GPR(R24)(r9) - std r25, VCPU_GPR(R25)(r9) - std r26, VCPU_GPR(R26)(r9) - std r27, VCPU_GPR(R27)(r9) - std r28, VCPU_GPR(R28)(r9) - std r29, VCPU_GPR(R29)(r9) - std r30, VCPU_GPR(R30)(r9) - std r31, VCPU_GPR(R31)(r9) - - /* Save SPRGs */ - mfspr r3, SPRN_SPRG0 - mfspr r4, SPRN_SPRG1 - mfspr r5, SPRN_SPRG2 - mfspr r6, SPRN_SPRG3 - std r3, VCPU_SPRG0(r9) - std r4, VCPU_SPRG1(r9) - std r5, VCPU_SPRG2(r9) - std r6, VCPU_SPRG3(r9) - - /* save FP state */ - mr r3, r9 - bl kvmppc_save_fp - - /* Increment yield count if they have a VPA */ - ld r8, VCPU_VPA(r9) /* do they have a VPA? */ - cmpdi r8, 0 - beq 25f - lwz r3, LPPACA_YIELDCOUNT(r8) - addi r3, r3, 1 - stw r3, LPPACA_YIELDCOUNT(r8) - li r3, 1 - stb r3, VCPU_VPA_DIRTY(r9) -25: - /* Save PMU registers if requested */ - /* r8 and cr0.eq are live here */ - li r3, 1 - sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ - mfspr r4, SPRN_MMCR0 /* save MMCR0 */ - mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ - mfspr r6, SPRN_MMCRA -BEGIN_FTR_SECTION - /* On P7, clear MMCRA in order to disable SDAR updates */ - li r7, 0 - mtspr SPRN_MMCRA, r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) - isync - beq 21f /* if no VPA, save PMU stuff anyway */ - lbz r7, LPPACA_PMCINUSE(r8) - cmpwi r7, 0 /* did they ask for PMU stuff to be saved? */ - bne 21f - std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */ - b 22f -21: mfspr r5, SPRN_MMCR1 - mfspr r7, SPRN_SIAR - mfspr r8, SPRN_SDAR - std r4, VCPU_MMCR(r9) - std r5, VCPU_MMCR + 8(r9) - std r6, VCPU_MMCR + 16(r9) - std r7, VCPU_SIAR(r9) - std r8, VCPU_SDAR(r9) - mfspr r3, SPRN_PMC1 - mfspr r4, SPRN_PMC2 - mfspr r5, SPRN_PMC3 - mfspr r6, SPRN_PMC4 - mfspr r7, SPRN_PMC5 - mfspr r8, SPRN_PMC6 -BEGIN_FTR_SECTION - mfspr r10, SPRN_PMC7 - mfspr r11, SPRN_PMC8 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) - stw r3, VCPU_PMC(r9) - stw r4, VCPU_PMC + 4(r9) - stw r5, VCPU_PMC + 8(r9) - stw r6, VCPU_PMC + 12(r9) - stw r7, VCPU_PMC + 16(r9) - stw r8, VCPU_PMC + 20(r9) -BEGIN_FTR_SECTION - stw r10, VCPU_PMC + 24(r9) - stw r11, VCPU_PMC + 28(r9) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) -22: ld r0, 112+PPC_LR_STKOFF(r1) addi r1, r1, 112 mtlr r0 blr -secondary_too_late: - ld r5,HSTATE_KVM_VCORE(r13) - HMT_LOW -13: lbz r3,VCORE_IN_GUEST(r5) - cmpwi r3,0 - bne 13b - HMT_MEDIUM - li r0, KVM_GUEST_MODE_NONE - stb r0, HSTATE_IN_GUEST(r13) - ld r11,PACA_SLBSHADOWPTR(r13) - - .rept SLB_NUM_BOLTED - ld r5,SLBSHADOW_SAVEAREA(r11) - ld r6,SLBSHADOW_SAVEAREA+8(r11) - andis. r7,r5,SLB_ESID_V@h - beq 1f - slbmte r6,r5 -1: addi r11,r11,16 - .endr - b 22b /* * Check whether an HDSI is an HPTE not found fault or something else. @@ -1649,7 +1719,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) * up to the host. */ ld r5,HSTATE_KVM_VCORE(r13) - lwz r6,VCPU_PTID(r3) + lbz r6,HSTATE_PTID(r13) lwz r8,VCORE_ENTRY_EXIT(r5) clrldi r8,r8,56 li r0,1 @@ -1662,7 +1732,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) bge kvm_cede_exit stwcx. r4,0,r6 bne 31b - li r0,1 + li r0,NAPPING_CEDE stb r0,HSTATE_NAPPING(r13) /* order napping_threads update vs testing entry_exit_count */ lwsync @@ -1751,7 +1821,7 @@ kvm_end_cede: /* clear our bit in vcore->napping_threads */ 33: ld r5,HSTATE_KVM_VCORE(r13) - lwz r3,VCPU_PTID(r4) + lbz r3,HSTATE_PTID(r13) li r0,1 sld r0,r0,r3 addi r6,r5,VCORE_NAPPING_THREADS -- cgit v1.2.3 From b005255e12a311d2c87ea70a7c7b192b2187c22c Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 8 Jan 2014 21:25:21 +1100 Subject: KVM: PPC: Book3S HV: Context-switch new POWER8 SPRs This adds fields to the struct kvm_vcpu_arch to store the new guest-accessible SPRs on POWER8, adds code to the get/set_one_reg functions to allow userspace to access this state, and adds code to the guest entry and exit to context-switch these SPRs between host and guest. Note that DPDES (Directed Privileged Doorbell Exception State) is shared between threads on a core; hence we store it in struct kvmppc_vcore and have the master thread save and restore it. Signed-off-by: Michael Neuling Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 25 +++++- arch/powerpc/include/asm/reg.h | 17 ++++ arch/powerpc/include/uapi/asm/kvm.h | 1 + arch/powerpc/kernel/asm-offsets.c | 23 +++++ arch/powerpc/kvm/book3s_hv.c | 153 +++++++++++++++++++++++++++++++- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 145 ++++++++++++++++++++++++++++++ 6 files changed, 361 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index b850544dbc3f..81c92d1d7978 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -304,6 +304,7 @@ struct kvmppc_vcore { ulong lpcr; u32 arch_compat; ulong pcr; + ulong dpdes; /* doorbell state (POWER8) */ }; #define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) @@ -448,6 +449,7 @@ struct kvm_vcpu_arch { ulong pc; ulong ctr; ulong lr; + ulong tar; ulong xer; u32 cr; @@ -457,13 +459,32 @@ struct kvm_vcpu_arch { ulong guest_owned_ext; ulong purr; ulong spurr; + ulong ic; + ulong vtb; ulong dscr; ulong amr; ulong uamor; + ulong iamr; u32 ctrl; ulong dabr; + ulong dawr; + ulong dawrx; + ulong ciabr; ulong cfar; ulong ppr; + ulong pspb; + ulong fscr; + ulong tfhar; + ulong tfiar; + ulong texasr; + ulong ebbhr; + ulong ebbrr; + ulong bescr; + ulong csigr; + ulong tacr; + ulong tcscr; + ulong acop; + ulong wort; ulong shadow_srr1; #endif u32 vrsave; /* also USPRG0 */ @@ -498,10 +519,12 @@ struct kvm_vcpu_arch { u32 ccr1; u32 dbsr; - u64 mmcr[3]; + u64 mmcr[5]; u32 pmc[8]; + u32 spmc[2]; u64 siar; u64 sdar; + u64 sier; #ifdef CONFIG_KVM_EXIT_TIMING struct mutex exit_timing_lock; diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 5c45787d551e..2f41e6475648 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -223,6 +223,11 @@ #define CTRL_TE 0x00c00000 /* thread enable */ #define CTRL_RUNLATCH 0x1 #define SPRN_DAWR 0xB4 +#define SPRN_CIABR 0xBB +#define CIABR_PRIV 0x3 +#define CIABR_PRIV_USER 1 +#define CIABR_PRIV_SUPER 2 +#define CIABR_PRIV_HYPER 3 #define SPRN_DAWRX 0xBC #define DAWRX_USER (1UL << 0) #define DAWRX_KERNEL (1UL << 1) @@ -260,6 +265,8 @@ #define SPRN_HRMOR 0x139 /* Real mode offset register */ #define SPRN_HSRR0 0x13A /* Hypervisor Save/Restore 0 */ #define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */ +#define SPRN_IC 0x350 /* Virtual Instruction Count */ +#define SPRN_VTB 0x351 /* Virtual Time Base */ /* HFSCR and FSCR bit numbers are the same */ #define FSCR_TAR_LG 8 /* Enable Target Address Register */ #define FSCR_EBB_LG 7 /* Enable Event Based Branching */ @@ -368,6 +375,8 @@ #define DER_EBRKE 0x00000002 /* External Breakpoint Interrupt */ #define DER_DPIE 0x00000001 /* Dev. Port Nonmaskable Request */ #define SPRN_DMISS 0x3D0 /* Data TLB Miss Register */ +#define SPRN_DHDES 0x0B1 /* Directed Hyp. Doorbell Exc. State */ +#define SPRN_DPDES 0x0B0 /* Directed Priv. Doorbell Exc. State */ #define SPRN_EAR 0x11A /* External Address Register */ #define SPRN_HASH1 0x3D2 /* Primary Hash Address Register */ #define SPRN_HASH2 0x3D3 /* Secondary Hash Address Resgister */ @@ -427,6 +436,7 @@ #define SPRN_IABR 0x3F2 /* Instruction Address Breakpoint Register */ #define SPRN_IABR2 0x3FA /* 83xx */ #define SPRN_IBCR 0x135 /* 83xx Insn Breakpoint Control Reg */ +#define SPRN_IAMR 0x03D /* Instr. Authority Mask Reg */ #define SPRN_HID4 0x3F4 /* 970 HID4 */ #define HID4_LPES0 (1ul << (63-0)) /* LPAR env. sel. bit 0 */ #define HID4_RMLS2_SH (63 - 2) /* Real mode limit bottom 2 bits */ @@ -541,6 +551,7 @@ #define SPRN_PIR 0x3FF /* Processor Identification Register */ #endif #define SPRN_TIR 0x1BE /* Thread Identification Register */ +#define SPRN_PSPB 0x09F /* Problem State Priority Boost reg */ #define SPRN_PTEHI 0x3D5 /* 981 7450 PTE HI word (S/W TLB load) */ #define SPRN_PTELO 0x3D6 /* 982 7450 PTE LO word (S/W TLB load) */ #define SPRN_PURR 0x135 /* Processor Utilization of Resources Reg */ @@ -682,6 +693,7 @@ #define SPRN_EBBHR 804 /* Event based branch handler register */ #define SPRN_EBBRR 805 /* Event based branch return register */ #define SPRN_BESCR 806 /* Branch event status and control register */ +#define SPRN_WORT 895 /* Workload optimization register - thread */ #define SPRN_PMC1 787 #define SPRN_PMC2 788 @@ -698,6 +710,11 @@ #define SIER_SIHV 0x1000000 /* Sampled MSR_HV */ #define SIER_SIAR_VALID 0x0400000 /* SIAR contents valid */ #define SIER_SDAR_VALID 0x0200000 /* SDAR contents valid */ +#define SPRN_TACR 888 +#define SPRN_TCSCR 889 +#define SPRN_CSIGR 890 +#define SPRN_SPMC1 892 +#define SPRN_SPMC2 893 /* When EBB is enabled, some of MMCR0/MMCR2/SIER are user accessible */ #define MMCR0_USER_MASK (MMCR0_FC | MMCR0_PMXE | MMCR0_PMAO) diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 6836ec79a830..a586fb9b77bd 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -545,6 +545,7 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_TCSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb1) #define KVM_REG_PPC_PID (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb2) #define KVM_REG_PPC_ACOP (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb3) +#define KVM_REG_PPC_WORT (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb4) #define KVM_REG_PPC_VRSAVE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4) #define KVM_REG_PPC_LPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 332ae66883e4..043900abbbb0 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -432,6 +432,7 @@ int main(void) DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); + DEFINE(VCPU_TAR, offsetof(struct kvm_vcpu, arch.tar)); DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE @@ -484,11 +485,17 @@ int main(void) DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr)); DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr)); + DEFINE(VCPU_IC, offsetof(struct kvm_vcpu, arch.ic)); + DEFINE(VCPU_VTB, offsetof(struct kvm_vcpu, arch.vtb)); DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr)); DEFINE(VCPU_AMR, offsetof(struct kvm_vcpu, arch.amr)); DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor)); + DEFINE(VCPU_IAMR, offsetof(struct kvm_vcpu, arch.iamr)); DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl)); DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr)); + DEFINE(VCPU_DAWR, offsetof(struct kvm_vcpu, arch.dawr)); + DEFINE(VCPU_DAWRX, offsetof(struct kvm_vcpu, arch.dawrx)); + DEFINE(VCPU_CIABR, offsetof(struct kvm_vcpu, arch.ciabr)); DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec)); DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires)); @@ -497,8 +504,10 @@ int main(void) DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded)); DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); + DEFINE(VCPU_SPMC, offsetof(struct kvm_vcpu, arch.spmc)); DEFINE(VCPU_SIAR, offsetof(struct kvm_vcpu, arch.siar)); DEFINE(VCPU_SDAR, offsetof(struct kvm_vcpu, arch.sdar)); + DEFINE(VCPU_SIER, offsetof(struct kvm_vcpu, arch.sier)); DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb)); DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max)); DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr)); @@ -508,6 +517,19 @@ int main(void) DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar)); DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr)); + DEFINE(VCPU_FSCR, offsetof(struct kvm_vcpu, arch.fscr)); + DEFINE(VCPU_PSPB, offsetof(struct kvm_vcpu, arch.pspb)); + DEFINE(VCPU_TFHAR, offsetof(struct kvm_vcpu, arch.tfhar)); + DEFINE(VCPU_TFIAR, offsetof(struct kvm_vcpu, arch.tfiar)); + DEFINE(VCPU_TEXASR, offsetof(struct kvm_vcpu, arch.texasr)); + DEFINE(VCPU_EBBHR, offsetof(struct kvm_vcpu, arch.ebbhr)); + DEFINE(VCPU_EBBRR, offsetof(struct kvm_vcpu, arch.ebbrr)); + DEFINE(VCPU_BESCR, offsetof(struct kvm_vcpu, arch.bescr)); + DEFINE(VCPU_CSIGR, offsetof(struct kvm_vcpu, arch.csigr)); + DEFINE(VCPU_TACR, offsetof(struct kvm_vcpu, arch.tacr)); + DEFINE(VCPU_TCSCR, offsetof(struct kvm_vcpu, arch.tcscr)); + DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop)); + DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort)); DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1)); DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count)); DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count)); @@ -517,6 +539,7 @@ int main(void) DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset)); DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr)); DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr)); + DEFINE(VCORE_DPDES, offsetof(struct kvmppc_vcore, dpdes)); DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv)); DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb)); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7da53cd215db..5b08ddf91d2d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -800,7 +800,7 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_UAMOR: *val = get_reg_val(id, vcpu->arch.uamor); break; - case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA: + case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS: i = id - KVM_REG_PPC_MMCR0; *val = get_reg_val(id, vcpu->arch.mmcr[i]); break; @@ -808,12 +808,85 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, i = id - KVM_REG_PPC_PMC1; *val = get_reg_val(id, vcpu->arch.pmc[i]); break; + case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2: + i = id - KVM_REG_PPC_SPMC1; + *val = get_reg_val(id, vcpu->arch.spmc[i]); + break; case KVM_REG_PPC_SIAR: *val = get_reg_val(id, vcpu->arch.siar); break; case KVM_REG_PPC_SDAR: *val = get_reg_val(id, vcpu->arch.sdar); break; + case KVM_REG_PPC_SIER: + *val = get_reg_val(id, vcpu->arch.sier); + break; + case KVM_REG_PPC_IAMR: + *val = get_reg_val(id, vcpu->arch.iamr); + break; + case KVM_REG_PPC_TFHAR: + *val = get_reg_val(id, vcpu->arch.tfhar); + break; + case KVM_REG_PPC_TFIAR: + *val = get_reg_val(id, vcpu->arch.tfiar); + break; + case KVM_REG_PPC_TEXASR: + *val = get_reg_val(id, vcpu->arch.texasr); + break; + case KVM_REG_PPC_FSCR: + *val = get_reg_val(id, vcpu->arch.fscr); + break; + case KVM_REG_PPC_PSPB: + *val = get_reg_val(id, vcpu->arch.pspb); + break; + case KVM_REG_PPC_EBBHR: + *val = get_reg_val(id, vcpu->arch.ebbhr); + break; + case KVM_REG_PPC_EBBRR: + *val = get_reg_val(id, vcpu->arch.ebbrr); + break; + case KVM_REG_PPC_BESCR: + *val = get_reg_val(id, vcpu->arch.bescr); + break; + case KVM_REG_PPC_TAR: + *val = get_reg_val(id, vcpu->arch.tar); + break; + case KVM_REG_PPC_DPDES: + *val = get_reg_val(id, vcpu->arch.vcore->dpdes); + break; + case KVM_REG_PPC_DAWR: + *val = get_reg_val(id, vcpu->arch.dawr); + break; + case KVM_REG_PPC_DAWRX: + *val = get_reg_val(id, vcpu->arch.dawrx); + break; + case KVM_REG_PPC_CIABR: + *val = get_reg_val(id, vcpu->arch.ciabr); + break; + case KVM_REG_PPC_IC: + *val = get_reg_val(id, vcpu->arch.ic); + break; + case KVM_REG_PPC_VTB: + *val = get_reg_val(id, vcpu->arch.vtb); + break; + case KVM_REG_PPC_CSIGR: + *val = get_reg_val(id, vcpu->arch.csigr); + break; + case KVM_REG_PPC_TACR: + *val = get_reg_val(id, vcpu->arch.tacr); + break; + case KVM_REG_PPC_TCSCR: + *val = get_reg_val(id, vcpu->arch.tcscr); + break; + case KVM_REG_PPC_PID: + *val = get_reg_val(id, vcpu->arch.pid); + break; + case KVM_REG_PPC_ACOP: + *val = get_reg_val(id, vcpu->arch.acop); + break; + case KVM_REG_PPC_WORT: + *val = get_reg_val(id, vcpu->arch.wort); + break; case KVM_REG_PPC_VPA_ADDR: spin_lock(&vcpu->arch.vpa_update_lock); *val = get_reg_val(id, vcpu->arch.vpa.next_gpa); @@ -882,7 +955,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_UAMOR: vcpu->arch.uamor = set_reg_val(id, *val); break; - case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA: + case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS: i = id - KVM_REG_PPC_MMCR0; vcpu->arch.mmcr[i] = set_reg_val(id, *val); break; @@ -890,12 +963,88 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, i = id - KVM_REG_PPC_PMC1; vcpu->arch.pmc[i] = set_reg_val(id, *val); break; + case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2: + i = id - KVM_REG_PPC_SPMC1; + vcpu->arch.spmc[i] = set_reg_val(id, *val); + break; case KVM_REG_PPC_SIAR: vcpu->arch.siar = set_reg_val(id, *val); break; case KVM_REG_PPC_SDAR: vcpu->arch.sdar = set_reg_val(id, *val); break; + case KVM_REG_PPC_SIER: + vcpu->arch.sier = set_reg_val(id, *val); + break; + case KVM_REG_PPC_IAMR: + vcpu->arch.iamr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TFHAR: + vcpu->arch.tfhar = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TFIAR: + vcpu->arch.tfiar = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TEXASR: + vcpu->arch.texasr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_FSCR: + vcpu->arch.fscr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_PSPB: + vcpu->arch.pspb = set_reg_val(id, *val); + break; + case KVM_REG_PPC_EBBHR: + vcpu->arch.ebbhr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_EBBRR: + vcpu->arch.ebbrr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_BESCR: + vcpu->arch.bescr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TAR: + vcpu->arch.tar = set_reg_val(id, *val); + break; + case KVM_REG_PPC_DPDES: + vcpu->arch.vcore->dpdes = set_reg_val(id, *val); + break; + case KVM_REG_PPC_DAWR: + vcpu->arch.dawr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_DAWRX: + vcpu->arch.dawrx = set_reg_val(id, *val) & ~DAWRX_HYP; + break; + case KVM_REG_PPC_CIABR: + vcpu->arch.ciabr = set_reg_val(id, *val); + /* Don't allow setting breakpoints in hypervisor code */ + if ((vcpu->arch.ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER) + vcpu->arch.ciabr &= ~CIABR_PRIV; /* disable */ + break; + case KVM_REG_PPC_IC: + vcpu->arch.ic = set_reg_val(id, *val); + break; + case KVM_REG_PPC_VTB: + vcpu->arch.vtb = set_reg_val(id, *val); + break; + case KVM_REG_PPC_CSIGR: + vcpu->arch.csigr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TACR: + vcpu->arch.tacr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_TCSCR: + vcpu->arch.tcscr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_PID: + vcpu->arch.pid = set_reg_val(id, *val); + break; + case KVM_REG_PPC_ACOP: + vcpu->arch.acop = set_reg_val(id, *val); + break; + case KVM_REG_PPC_WORT: + vcpu->arch.wort = set_reg_val(id, *val); + break; case KVM_REG_PPC_VPA_ADDR: addr = set_reg_val(id, *val); r = -EINVAL; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 8bbe91bdb6da..691dd1ef555b 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -460,6 +460,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) beq 38f mtspr SPRN_PCR, r7 38: + +BEGIN_FTR_SECTION + /* DPDES is shared between threads */ + ld r8, VCORE_DPDES(r5) + mtspr SPRN_DPDES, r8 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + li r0,1 stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */ b 10f @@ -659,6 +666,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) mtspr SPRN_MMCRA, r6 mtspr SPRN_SIAR, r7 mtspr SPRN_SDAR, r8 +BEGIN_FTR_SECTION + ld r5, VCPU_MMCR + 24(r4) + ld r6, VCPU_SIER(r4) + lwz r7, VCPU_PMC + 24(r4) + lwz r8, VCPU_PMC + 28(r4) + ld r9, VCPU_MMCR + 32(r4) + mtspr SPRN_MMCR2, r5 + mtspr SPRN_SIER, r6 + mtspr SPRN_SPMC1, r7 + mtspr SPRN_SPMC2, r8 + mtspr SPRN_MMCRS, r9 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mtspr SPRN_MMCR0, r3 isync @@ -690,6 +709,61 @@ BEGIN_FTR_SECTION mtspr SPRN_DSCR, r5 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) +BEGIN_FTR_SECTION + /* Skip next section on POWER7 or PPC970 */ + b 8f +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) + /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */ + mfmsr r8 + li r0, 1 + rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG + mtmsrd r8 + + /* Load up POWER8-specific registers */ + ld r5, VCPU_IAMR(r4) + lwz r6, VCPU_PSPB(r4) + ld r7, VCPU_FSCR(r4) + mtspr SPRN_IAMR, r5 + mtspr SPRN_PSPB, r6 + mtspr SPRN_FSCR, r7 + ld r5, VCPU_DAWR(r4) + ld r6, VCPU_DAWRX(r4) + ld r7, VCPU_CIABR(r4) + ld r8, VCPU_TAR(r4) + mtspr SPRN_DAWR, r5 + mtspr SPRN_DAWRX, r6 + mtspr SPRN_CIABR, r7 + mtspr SPRN_TAR, r8 + ld r5, VCPU_IC(r4) + ld r6, VCPU_VTB(r4) + mtspr SPRN_IC, r5 + mtspr SPRN_VTB, r6 + ld r5, VCPU_TFHAR(r4) + ld r6, VCPU_TFIAR(r4) + ld r7, VCPU_TEXASR(r4) + ld r8, VCPU_EBBHR(r4) + mtspr SPRN_TFHAR, r5 + mtspr SPRN_TFIAR, r6 + mtspr SPRN_TEXASR, r7 + mtspr SPRN_EBBHR, r8 + ld r5, VCPU_EBBRR(r4) + ld r6, VCPU_BESCR(r4) + ld r7, VCPU_CSIGR(r4) + ld r8, VCPU_TACR(r4) + mtspr SPRN_EBBRR, r5 + mtspr SPRN_BESCR, r6 + mtspr SPRN_CSIGR, r7 + mtspr SPRN_TACR, r8 + ld r5, VCPU_TCSCR(r4) + ld r6, VCPU_ACOP(r4) + lwz r7, VCPU_GUEST_PID(r4) + ld r8, VCPU_WORT(r4) + mtspr SPRN_TCSCR, r5 + mtspr SPRN_ACOP, r6 + mtspr SPRN_PID, r7 + mtspr SPRN_WORT, r8 +8: + /* * Set the decrementer to the guest decrementer. */ @@ -1081,6 +1155,54 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201) add r5,r5,r6 std r5,VCPU_DEC_EXPIRES(r9) +BEGIN_FTR_SECTION + b 8f +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) + /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */ + mfmsr r8 + li r0, 1 + rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG + mtmsrd r8 + + /* Save POWER8-specific registers */ + mfspr r5, SPRN_IAMR + mfspr r6, SPRN_PSPB + mfspr r7, SPRN_FSCR + std r5, VCPU_IAMR(r9) + stw r6, VCPU_PSPB(r9) + std r7, VCPU_FSCR(r9) + mfspr r5, SPRN_IC + mfspr r6, SPRN_VTB + mfspr r7, SPRN_TAR + std r5, VCPU_IC(r9) + std r6, VCPU_VTB(r9) + std r7, VCPU_TAR(r9) + mfspr r5, SPRN_TFHAR + mfspr r6, SPRN_TFIAR + mfspr r7, SPRN_TEXASR + mfspr r8, SPRN_EBBHR + std r5, VCPU_TFHAR(r9) + std r6, VCPU_TFIAR(r9) + std r7, VCPU_TEXASR(r9) + std r8, VCPU_EBBHR(r9) + mfspr r5, SPRN_EBBRR + mfspr r6, SPRN_BESCR + mfspr r7, SPRN_CSIGR + mfspr r8, SPRN_TACR + std r5, VCPU_EBBRR(r9) + std r6, VCPU_BESCR(r9) + std r7, VCPU_CSIGR(r9) + std r8, VCPU_TACR(r9) + mfspr r5, SPRN_TCSCR + mfspr r6, SPRN_ACOP + mfspr r7, SPRN_PID + mfspr r8, SPRN_WORT + std r5, VCPU_TCSCR(r9) + std r6, VCPU_ACOP(r9) + stw r7, VCPU_GUEST_PID(r9) + std r8, VCPU_WORT(r9) +8: + /* Save and reset AMR and UAMOR before turning on the MMU */ BEGIN_FTR_SECTION mfspr r5,SPRN_AMR @@ -1190,6 +1312,20 @@ BEGIN_FTR_SECTION stw r10, VCPU_PMC + 24(r9) stw r11, VCPU_PMC + 28(r9) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) +BEGIN_FTR_SECTION + mfspr r4, SPRN_MMCR2 + mfspr r5, SPRN_SIER + mfspr r6, SPRN_SPMC1 + mfspr r7, SPRN_SPMC2 + mfspr r8, SPRN_MMCRS + std r4, VCPU_MMCR + 24(r9) + std r5, VCPU_SIER(r9) + stw r6, VCPU_PMC + 24(r9) + stw r7, VCPU_PMC + 28(r9) + std r8, VCPU_MMCR + 32(r9) + lis r4, 0x8000 + mtspr SPRN_MMCRS, r4 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 22: /* Clear out SLB */ li r5,0 @@ -1290,6 +1426,15 @@ secondary_too_late: mtspr SPRN_LPID,r7 isync +BEGIN_FTR_SECTION + /* DPDES is shared between threads */ + mfspr r7, SPRN_DPDES + std r7, VCORE_DPDES(r5) + /* clear DPDES so we don't get guest doorbells in the host */ + li r8, 0 + mtspr SPRN_DPDES, r8 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + /* Subtract timebase offset from timebase */ ld r8,VCORE_TB_OFFSET(r5) cmpdi r8,0 -- cgit v1.2.3 From bd3048b80caace9cf0ae9ad22b2fbb8333b44a97 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 8 Jan 2014 21:25:23 +1100 Subject: KVM: PPC: Book3S HV: Add handler for HV facility unavailable At present this should never happen, since the host kernel sets HFSCR to allow access to all facilities. It's better to be prepared to handle it cleanly if it does ever happen, though. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_asm.h | 1 + arch/powerpc/kvm/book3s_hv.c | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 1bd92fd43cfb..dba8fb244100 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -99,6 +99,7 @@ #define BOOK3S_INTERRUPT_PERFMON 0xf00 #define BOOK3S_INTERRUPT_ALTIVEC 0xf20 #define BOOK3S_INTERRUPT_VSX 0xf40 +#define BOOK3S_INTERRUPT_H_FAC_UNAVAIL 0xf80 #define BOOK3S_IRQPRIO_SYSTEM_RESET 0 #define BOOK3S_IRQPRIO_DATA_SEGMENT 1 diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 5b08ddf91d2d..1bf681e8a05f 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -706,7 +706,16 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, * we don't emulate any guest instructions at this stage. */ case BOOK3S_INTERRUPT_H_EMUL_ASSIST: - kvmppc_core_queue_program(vcpu, 0x80000); + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + r = RESUME_GUEST; + break; + /* + * This occurs if the guest (kernel or userspace), does something that + * is prohibited by HFSCR. We just generate a program interrupt to + * the guest. + */ + case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); r = RESUME_GUEST; break; default: -- cgit v1.2.3 From 5557ae0ec77c2b4b5bbce2883c0603054ab66e68 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 8 Jan 2014 21:25:24 +1100 Subject: KVM: PPC: Book3S HV: Implement architecture compatibility modes for POWER8 This allows us to select architecture 2.05 (POWER6) or 2.06 (POWER7) compatibility modes on a POWER8 processor. (Note that transactional memory is disabled for usermode if either or both of the PCR_TM_DIS and PCR_ARCH_206 bits are set.) Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/reg.h | 2 ++ arch/powerpc/kvm/book3s_hv.c | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 2f41e6475648..5a9983147683 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -329,6 +329,8 @@ #define SPRN_PCR 0x152 /* Processor compatibility register */ #define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */ #define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */ +#define PCR_TM_DIS (1ul << (63-2)) /* Trans. memory disable (POWER8) */ +#define PCR_ARCH_206 0x4 /* Architecture 2.06 */ #define PCR_ARCH_205 0x2 /* Architecture 2.05 */ #define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */ #define SPRN_TLBINDEXR 0x154 /* P7 TLB control register */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 1bf681e8a05f..1e9f4b45432b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -184,14 +184,28 @@ int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) switch (arch_compat) { case PVR_ARCH_205: - pcr = PCR_ARCH_205; + /* + * If an arch bit is set in PCR, all the defined + * higher-order arch bits also have to be set. + */ + pcr = PCR_ARCH_206 | PCR_ARCH_205; break; case PVR_ARCH_206: case PVR_ARCH_206p: + pcr = PCR_ARCH_206; + break; + case PVR_ARCH_207: break; default: return -EINVAL; } + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) { + /* POWER7 can't emulate POWER8 */ + if (!(pcr & PCR_ARCH_206)) + return -EINVAL; + pcr &= ~PCR_ARCH_206; + } } spin_lock(&vc->lock); -- cgit v1.2.3 From aa31e843225769735b79795c955426c9479046a5 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 8 Jan 2014 21:25:26 +1100 Subject: KVM: PPC: Book3S HV: Handle guest using doorbells for IPIs * SRR1 wake reason field for system reset interrupt on wakeup from nap is now a 4-bit field on P8, compared to 3 bits on P7. * Set PECEDP in LPCR when napping because of H_CEDE so guest doorbells will wake us up. * Waking up from nap because of a guest doorbell interrupt is not a reason to exit the guest. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/reg.h | 4 +++- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 19 +++++++++++++++---- 2 files changed, 18 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 5a9983147683..1248b40107ea 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -307,7 +307,9 @@ #define LPCR_ILE 0x02000000 /* !HV irqs set MSR:LE */ #define LPCR_AIL_0 0x00000000 /* MMU off exception offset 0x0 */ #define LPCR_AIL_3 0x01800000 /* MMU on exception offset 0xc00...4xxx */ -#define LPCR_PECE 0x00007000 /* powersave exit cause enable */ +#define LPCR_PECE 0x0001f000 /* powersave exit cause enable */ +#define LPCR_PECEDP 0x00010000 /* directed priv dbells cause exit */ +#define LPCR_PECEDH 0x00008000 /* directed hyp dbells cause exit */ #define LPCR_PECE0 0x00004000 /* ext. exceptions can cause exit */ #define LPCR_PECE1 0x00002000 /* decrementer can cause exit */ #define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */ diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 386e141dbf16..9e89c7577b4a 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1857,13 +1857,16 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) bl kvmppc_save_fp /* - * Take a nap until a decrementer or external interrupt occurs, - * with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR + * Take a nap until a decrementer or external or doobell interrupt + * occurs, with PECE1, PECE0 and PECEDP set in LPCR */ li r0,1 stb r0,HSTATE_HWTHREAD_REQ(r13) mfspr r5,SPRN_LPCR ori r5,r5,LPCR_PECE0 | LPCR_PECE1 +BEGIN_FTR_SECTION + oris r5,r5,LPCR_PECEDP@h +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mtspr SPRN_LPCR,r5 isync li r0, 0 @@ -1979,14 +1982,22 @@ machine_check_realmode: */ kvmppc_check_wake_reason: mfspr r6, SPRN_SRR1 - rlwinm r6, r6, 44-31, 0x7 /* extract wake reason field */ - cmpwi r6, 4 /* was it an external interrupt? */ +BEGIN_FTR_SECTION + rlwinm r6, r6, 45-31, 0xf /* extract wake reason field (P8) */ +FTR_SECTION_ELSE + rlwinm r6, r6, 45-31, 0xe /* P7 wake reason field is 3 bits */ +ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S) + cmpwi r6, 8 /* was it an external interrupt? */ li r12, BOOK3S_INTERRUPT_EXTERNAL beq kvmppc_read_intr /* if so, see what it was */ li r3, 0 li r12, 0 cmpwi r6, 6 /* was it the decrementer? */ beq 0f +BEGIN_FTR_SECTION + cmpwi r6, 5 /* privileged doorbell? */ + beq 0f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) li r3, 1 /* anything else, return 1 */ 0: blr -- cgit v1.2.3 From e0622bd9f2fccc8a801fa7aaf4fa6d7c728c3a78 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 8 Jan 2014 21:25:27 +1100 Subject: KVM: PPC: Book3S HV: Handle new LPCR bits on POWER8 POWER8 has a bit in the LPCR to enable or disable the PURR and SPURR registers to count when in the guest. Set this bit. POWER8 has a field in the LPCR called AIL (Alternate Interrupt Location) which is used to enable relocation-on interrupts. Allow userspace to set this field. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/reg.h | 2 ++ arch/powerpc/kvm/book3s_hv.c | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 1248b40107ea..05ecb072540c 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -305,8 +305,10 @@ #define LPCR_RMLS 0x1C000000 /* impl dependent rmo limit sel */ #define LPCR_RMLS_SH (63-37) #define LPCR_ILE 0x02000000 /* !HV irqs set MSR:LE */ +#define LPCR_AIL 0x01800000 /* Alternate interrupt location */ #define LPCR_AIL_0 0x00000000 /* MMU off exception offset 0x0 */ #define LPCR_AIL_3 0x01800000 /* MMU on exception offset 0xc00...4xxx */ +#define LPCR_ONL 0x00040000 /* online - PURR/SPURR count */ #define LPCR_PECE 0x0001f000 /* powersave exit cause enable */ #define LPCR_PECEDP 0x00010000 /* directed priv dbells cause exit */ #define LPCR_PECEDH 0x00008000 /* directed hyp dbells cause exit */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 1e9f4b45432b..d7f2ec6f1419 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -789,8 +789,11 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr) /* * Userspace can only modify DPFD (default prefetch depth), * ILE (interrupt little-endian) and TC (translation control). + * On POWER8 userspace can also modify AIL (alt. interrupt loc.) */ mask = LPCR_DPFD | LPCR_ILE | LPCR_TC; + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + mask |= LPCR_AIL; vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask); spin_unlock(&vc->lock); } @@ -2166,6 +2169,9 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) LPCR_VPM0 | LPCR_VPM1; kvm->arch.vrma_slb_v = SLB_VSID_B_1T | (VRMA_VSID << SLB_VSID_SHIFT_1T); + /* On POWER8 turn on online bit to enable PURR/SPURR */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + lpcr |= LPCR_ONL; } kvm->arch.lpcr = lpcr; -- cgit v1.2.3 From 5d00f66b865e3782c5852cdafe1cea11a292a81e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 8 Jan 2014 21:25:28 +1100 Subject: KVM: PPC: Book3S HV: Prepare for host using hypervisor doorbells POWER8 has support for hypervisor doorbell interrupts. Though the kernel doesn't use them for IPIs on the powernv platform yet, it probably will in future, so this makes KVM cope gracefully if a hypervisor doorbell interrupt arrives while in a guest. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_asm.h | 1 + arch/powerpc/kvm/book3s_hv.c | 1 + arch/powerpc/kvm/book3s_hv_rmhandlers.S | 7 +++++++ 3 files changed, 9 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index dba8fb244100..c3815b1e79e8 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -96,6 +96,7 @@ #define BOOK3S_INTERRUPT_H_DATA_STORAGE 0xe00 #define BOOK3S_INTERRUPT_H_INST_STORAGE 0xe20 #define BOOK3S_INTERRUPT_H_EMUL_ASSIST 0xe40 +#define BOOK3S_INTERRUPT_H_DOORBELL 0xe80 #define BOOK3S_INTERRUPT_PERFMON 0xf00 #define BOOK3S_INTERRUPT_ALTIVEC 0xf20 #define BOOK3S_INTERRUPT_VSX 0xf40 diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index d7f2ec6f1419..216049ff7368 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -651,6 +651,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; break; case BOOK3S_INTERRUPT_EXTERNAL: + case BOOK3S_INTERRUPT_H_DOORBELL: vcpu->stat.ext_intr_exits++; r = RESUME_GUEST; break; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 9e89c7577b4a..eae4ab9b9135 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1997,10 +1997,17 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S) BEGIN_FTR_SECTION cmpwi r6, 5 /* privileged doorbell? */ beq 0f + cmpwi r6, 3 /* hypervisor doorbell? */ + beq 3f END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) li r3, 1 /* anything else, return 1 */ 0: blr + /* hypervisor doorbell */ +3: li r12, BOOK3S_INTERRUPT_H_DOORBELL + li r3, 1 + blr + /* * Determine what sort of external interrupt is pending (if any). * Returns: -- cgit v1.2.3 From 8563bf52d509213e746295341ab52896b562ca5e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 8 Jan 2014 21:25:29 +1100 Subject: KVM: PPC: Book3S HV: Add support for DABRX register on POWER7 The DABRX (DABR extension) register on POWER7 processors provides finer control over which accesses cause a data breakpoint interrupt. It contains 3 bits which indicate whether to enable accesses in user, kernel and hypervisor modes respectively to cause data breakpoint interrupts, plus one bit that enables both real mode and virtual mode accesses to cause interrupts. Currently, KVM sets DABRX to allow both kernel and user accesses to cause interrupts while in the guest. This adds support for the guest to specify other values for DABRX. PAPR defines a H_SET_XDABR hcall to allow the guest to set both DABR and DABRX with one call. This adds a real-mode implementation of H_SET_XDABR, which shares most of its code with the existing H_SET_DABR implementation. To support this, we add a per-vcpu field to store the DABRX value plus code to get and set it via the ONE_REG interface. For Linux guests to use this new hcall, userspace needs to add "hcall-xdabr" to the set of strings in the /chosen/hypertas-functions property in the device tree. If userspace does this and then migrates the guest to a host where the kernel doesn't include this patch, then userspace will need to implement H_SET_XDABR by writing the specified DABR value to the DABR using the ONE_REG interface. In that case, the old kernel will set DABRX to DABRX_USER | DABRX_KERNEL. That should still work correctly, at least for Linux guests, since Linux guests cope with getting data breakpoint interrupts in modes that weren't requested by just ignoring the interrupt, and Linux guests never set DABRX_BTI. The other thing this does is to make H_SET_DABR and H_SET_XDABR work on POWER8, which has the DAWR and DAWRX instead of DABR/X. Guests that know about POWER8 should use H_SET_MODE rather than H_SET_[X]DABR, but guests running in POWER7 compatibility mode will still use H_SET_[X]DABR. For them, this adds the logic to convert DABR/X values into DAWR/X values on POWER8. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 1 + arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/include/asm/reg.h | 18 +++++++++++------- arch/powerpc/include/uapi/asm/kvm.h | 2 ++ arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kvm/book3s_hv.c | 6 ++++++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 32 ++++++++++++++++++++++++++++++-- 7 files changed, 52 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a30035dd4c26..5056baf574f4 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1838,6 +1838,7 @@ registers, find a list below: PPC | KVM_REG_PPC_LPCR | 64 PPC | KVM_REG_PPC_PPR | 64 PPC | KVM_REG_PPC_ARCH_COMPAT 32 + PPC | KVM_REG_PPC_DABRX | 32 PPC | KVM_REG_PPC_TM_GPR0 | 64 ... PPC | KVM_REG_PPC_TM_GPR31 | 64 diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 81c92d1d7978..d161bc09153b 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -466,6 +466,7 @@ struct kvm_vcpu_arch { ulong uamor; ulong iamr; u32 ctrl; + u32 dabrx; ulong dabr; ulong dawr; ulong dawrx; diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 05ecb072540c..adf644a80a3e 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -229,16 +229,20 @@ #define CIABR_PRIV_SUPER 2 #define CIABR_PRIV_HYPER 3 #define SPRN_DAWRX 0xBC -#define DAWRX_USER (1UL << 0) -#define DAWRX_KERNEL (1UL << 1) -#define DAWRX_HYP (1UL << 2) +#define DAWRX_USER __MASK(0) +#define DAWRX_KERNEL __MASK(1) +#define DAWRX_HYP __MASK(2) +#define DAWRX_WTI __MASK(3) +#define DAWRX_WT __MASK(4) +#define DAWRX_DR __MASK(5) +#define DAWRX_DW __MASK(6) #define SPRN_DABR 0x3F5 /* Data Address Breakpoint Register */ #define SPRN_DABR2 0x13D /* e300 */ #define SPRN_DABRX 0x3F7 /* Data Address Breakpoint Register Extension */ -#define DABRX_USER (1UL << 0) -#define DABRX_KERNEL (1UL << 1) -#define DABRX_HYP (1UL << 2) -#define DABRX_BTI (1UL << 3) +#define DABRX_USER __MASK(0) +#define DABRX_KERNEL __MASK(1) +#define DABRX_HYP __MASK(2) +#define DABRX_BTI __MASK(3) #define DABRX_ALL (DABRX_BTI | DABRX_HYP | DABRX_KERNEL | DABRX_USER) #define SPRN_DAR 0x013 /* Data Address Register */ #define SPRN_DBCR 0x136 /* e300 Data Breakpoint Control Reg */ diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index a586fb9b77bd..a6665be4f3ab 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -554,6 +554,8 @@ struct kvm_get_htab_header { /* Architecture compatibility level */ #define KVM_REG_PPC_ARCH_COMPAT (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb7) +#define KVM_REG_PPC_DABRX (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8) + /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 043900abbbb0..239a857f1141 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -493,6 +493,7 @@ int main(void) DEFINE(VCPU_IAMR, offsetof(struct kvm_vcpu, arch.iamr)); DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl)); DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr)); + DEFINE(VCPU_DABRX, offsetof(struct kvm_vcpu, arch.dabrx)); DEFINE(VCPU_DAWR, offsetof(struct kvm_vcpu, arch.dawr)); DEFINE(VCPU_DAWRX, offsetof(struct kvm_vcpu, arch.dawrx)); DEFINE(VCPU_CIABR, offsetof(struct kvm_vcpu, arch.ciabr)); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 216049ff7368..eb4eed4a7173 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -812,6 +812,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_DABR: *val = get_reg_val(id, vcpu->arch.dabr); break; + case KVM_REG_PPC_DABRX: + *val = get_reg_val(id, vcpu->arch.dabrx); + break; case KVM_REG_PPC_DSCR: *val = get_reg_val(id, vcpu->arch.dscr); break; @@ -967,6 +970,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_DABR: vcpu->arch.dabr = set_reg_val(id, *val); break; + case KVM_REG_PPC_DABRX: + vcpu->arch.dabrx = set_reg_val(id, *val) & ~DABRX_HYP; + break; case KVM_REG_PPC_DSCR: vcpu->arch.dscr = set_reg_val(id, *val); break; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index eae4ab9b9135..56299349e94b 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -585,7 +585,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) BEGIN_FTR_SECTION /* Set partition DABR */ /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ - li r5,3 + lwz r5,VCPU_DABRX(r4) ld r6,VCPU_DABR(r4) mtspr SPRN_DABRX,r5 mtspr SPRN_DABR,r6 @@ -1763,24 +1763,52 @@ hcall_real_table: .long 0 /* 0x11c */ .long 0 /* 0x120 */ .long .kvmppc_h_bulk_remove - hcall_real_table + .long 0 /* 0x128 */ + .long 0 /* 0x12c */ + .long 0 /* 0x130 */ + .long .kvmppc_h_set_xdabr - hcall_real_table hcall_real_table_end: ignore_hdec: mr r4,r9 b fast_guest_return +_GLOBAL(kvmppc_h_set_xdabr) + andi. r0, r5, DABRX_USER | DABRX_KERNEL + beq 6f + li r0, DABRX_USER | DABRX_KERNEL | DABRX_BTI + andc. r0, r5, r0 + beq 3f +6: li r3, H_PARAMETER + blr + _GLOBAL(kvmppc_h_set_dabr) + li r5, DABRX_USER | DABRX_KERNEL +3: BEGIN_FTR_SECTION b 2f END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) std r4,VCPU_DABR(r3) + stw r5, VCPU_DABRX(r3) + mtspr SPRN_DABRX, r5 /* Work around P7 bug where DABR can get corrupted on mtspr */ 1: mtspr SPRN_DABR,r4 mfspr r5, SPRN_DABR cmpd r4, r5 bne 1b isync -2: li r3,0 + li r3,0 + blr + + /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */ +2: rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW + rlwimi r5, r4, 1, DAWRX_WT + clrrdi r4, r4, 3 + std r4, VCPU_DAWR(r3) + std r5, VCPU_DAWRX(r3) + mtspr SPRN_DAWR, r4 + mtspr SPRN_DAWRX, r5 + li r3, 0 blr _GLOBAL(kvmppc_h_cede) -- cgit v1.2.3 From d682916a381ac7c8eb965c10ab64bc7cc2f18647 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 8 Jan 2014 21:25:30 +1100 Subject: KVM: PPC: Book3S HV: Basic little-endian guest support We create a guest MSR from scratch when delivering exceptions in a few places. Instead of extracting LPCR[ILE] and inserting it into MSR_LE each time, we simply create a new variable intr_msr which contains the entire MSR to use. For a little-endian guest, userspace needs to set the ILE (interrupt little-endian) bit in the LPCR for each vcpu (or at least one vcpu in each virtual core). [paulus@samba.org - removed H_SET_MODE implementation from original version of the patch, and made kvmppc_set_lpcr update vcpu->arch.intr_msr.] Signed-off-by: Anton Blanchard Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +- arch/powerpc/kvm/book3s_hv.c | 22 ++++++++++++++++++++++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 15 +++++---------- 5 files changed, 30 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index d161bc09153b..7726a3bc8ff0 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -636,6 +636,7 @@ struct kvm_vcpu_arch { spinlock_t tbacct_lock; u64 busy_stolen; u64 busy_preempt; + unsigned long intr_msr; #endif }; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 239a857f1141..a60a2fdae5bd 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -480,6 +480,7 @@ int main(void) DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty)); + DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr)); #endif #ifdef CONFIG_PPC_BOOK3S DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index efb8aa544876..22cc895333e6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -262,7 +262,7 @@ int kvmppc_mmu_hv_init(void) static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) { - kvmppc_set_msr(vcpu, MSR_SF | MSR_ME); + kvmppc_set_msr(vcpu, vcpu->arch.intr_msr); } /* diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index eb4eed4a7173..3195e4f8a2ed 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -787,6 +787,27 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr) u64 mask; spin_lock(&vc->lock); + /* + * If ILE (interrupt little-endian) has changed, update the + * MSR_LE bit in the intr_msr for each vcpu in this vcore. + */ + if ((new_lpcr & LPCR_ILE) != (vc->lpcr & LPCR_ILE)) { + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *vcpu; + int i; + + mutex_lock(&kvm->lock); + kvm_for_each_vcpu(i, vcpu, kvm) { + if (vcpu->arch.vcore != vc) + continue; + if (new_lpcr & LPCR_ILE) + vcpu->arch.intr_msr |= MSR_LE; + else + vcpu->arch.intr_msr &= ~MSR_LE; + } + mutex_unlock(&kvm->lock); + } + /* * Userspace can only modify DPFD (default prefetch depth), * ILE (interrupt little-endian) and TC (translation control). @@ -1155,6 +1176,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, spin_lock_init(&vcpu->arch.vpa_update_lock); spin_lock_init(&vcpu->arch.tbacct_lock); vcpu->arch.busy_preempt = TB_NIL; + vcpu->arch.intr_msr = MSR_SF | MSR_ME; kvmppc_mmu_book3s_hv_init(vcpu); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 56299349e94b..ecb76357c9d0 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -812,8 +812,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 12: mtspr SPRN_SRR0, r10 mr r10,r0 mtspr SPRN_SRR1, r11 - li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ - rotldi r11,r11,63 + ld r11, VCPU_INTR_MSR(r4) 5: /* @@ -1551,8 +1550,7 @@ kvmppc_hdsi: mtspr SPRN_SRR0, r10 mtspr SPRN_SRR1, r11 li r10, BOOK3S_INTERRUPT_DATA_STORAGE - li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ - rotldi r11, r11, 63 + ld r11, VCPU_INTR_MSR(r9) fast_interrupt_c_return: 6: ld r7, VCPU_CTR(r9) lwz r8, VCPU_XER(r9) @@ -1621,8 +1619,7 @@ kvmppc_hisi: 1: mtspr SPRN_SRR0, r10 mtspr SPRN_SRR1, r11 li r10, BOOK3S_INTERRUPT_INST_STORAGE - li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ - rotldi r11, r11, 63 + ld r11, VCPU_INTR_MSR(r9) b fast_interrupt_c_return 3: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */ @@ -1665,8 +1662,7 @@ sc_1_fast_return: mtspr SPRN_SRR0,r10 mtspr SPRN_SRR1,r11 li r10, BOOK3S_INTERRUPT_SYSCALL - li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ - rotldi r11, r11, 63 + ld r11, VCPU_INTR_MSR(r9) mr r4,r9 b fast_guest_return @@ -1994,8 +1990,7 @@ machine_check_realmode: beq mc_cont /* If not, deliver a machine check. SRR0/1 are already set */ li r10, BOOK3S_INTERRUPT_MACHINE_CHECK - li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ - rotldi r11, r11, 63 + ld r11, VCPU_INTR_MSR(r9) b fast_interrupt_c_return /* -- cgit v1.2.3 From 7b490411c37f7ab7965cbdfe5e3ec28eadb6db5b Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 8 Jan 2014 21:25:32 +1100 Subject: KVM: PPC: Book3S HV: Add new state for transactional memory Add new state for transactional memory (TM) to kvm_vcpu_arch. Also add asm-offset bits that are going to be required. This also moves the existing TFHAR, TFIAR and TEXASR SPRs into a CONFIG_PPC_TRANSACTIONAL_MEM section. This requires some code changes to ensure we still compile with CONFIG_PPC_TRANSACTIONAL_MEM=N. Much of the added the added #ifdefs are removed in a later patch when the bulk of the TM code is added. Signed-off-by: Michael Neuling Signed-off-by: Paul Mackerras [agraf: fix merge conflict] Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 24 +++++++++-- arch/powerpc/kernel/asm-offsets.c | 19 +++++++-- arch/powerpc/kvm/book3s_hv.c | 4 ++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 75 ++++++++++++++++++++++++++++++++- 4 files changed, 114 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 7726a3bc8ff0..1eaea2dea174 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -475,9 +475,6 @@ struct kvm_vcpu_arch { ulong ppr; ulong pspb; ulong fscr; - ulong tfhar; - ulong tfiar; - ulong texasr; ulong ebbhr; ulong ebbrr; ulong bescr; @@ -526,6 +523,27 @@ struct kvm_vcpu_arch { u64 siar; u64 sdar; u64 sier; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + u64 tfhar; + u64 texasr; + u64 tfiar; + + u32 cr_tm; + u64 lr_tm; + u64 ctr_tm; + u64 amr_tm; + u64 ppr_tm; + u64 dscr_tm; + u64 tar_tm; + + ulong gpr_tm[32]; + + struct thread_fp_state fp_tm; + + struct thread_vr_state vr_tm; + u32 vrsave_tm; /* also USPRG0 */ + +#endif #ifdef CONFIG_KVM_EXIT_TIMING struct mutex exit_timing_lock; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index a60a2fdae5bd..687f2ebf0cce 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -521,9 +521,6 @@ int main(void) DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr)); DEFINE(VCPU_FSCR, offsetof(struct kvm_vcpu, arch.fscr)); DEFINE(VCPU_PSPB, offsetof(struct kvm_vcpu, arch.pspb)); - DEFINE(VCPU_TFHAR, offsetof(struct kvm_vcpu, arch.tfhar)); - DEFINE(VCPU_TFIAR, offsetof(struct kvm_vcpu, arch.tfiar)); - DEFINE(VCPU_TEXASR, offsetof(struct kvm_vcpu, arch.texasr)); DEFINE(VCPU_EBBHR, offsetof(struct kvm_vcpu, arch.ebbhr)); DEFINE(VCPU_EBBRR, offsetof(struct kvm_vcpu, arch.ebbrr)); DEFINE(VCPU_BESCR, offsetof(struct kvm_vcpu, arch.bescr)); @@ -545,6 +542,22 @@ int main(void) DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv)); DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb)); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + DEFINE(VCPU_TFHAR, offsetof(struct kvm_vcpu, arch.tfhar)); + DEFINE(VCPU_TFIAR, offsetof(struct kvm_vcpu, arch.tfiar)); + DEFINE(VCPU_TEXASR, offsetof(struct kvm_vcpu, arch.texasr)); + DEFINE(VCPU_GPR_TM, offsetof(struct kvm_vcpu, arch.gpr_tm)); + DEFINE(VCPU_FPRS_TM, offsetof(struct kvm_vcpu, arch.fp_tm.fpr)); + DEFINE(VCPU_VRS_TM, offsetof(struct kvm_vcpu, arch.vr_tm.vr)); + DEFINE(VCPU_VRSAVE_TM, offsetof(struct kvm_vcpu, arch.vrsave_tm)); + DEFINE(VCPU_CR_TM, offsetof(struct kvm_vcpu, arch.cr_tm)); + DEFINE(VCPU_LR_TM, offsetof(struct kvm_vcpu, arch.lr_tm)); + DEFINE(VCPU_CTR_TM, offsetof(struct kvm_vcpu, arch.ctr_tm)); + DEFINE(VCPU_AMR_TM, offsetof(struct kvm_vcpu, arch.amr_tm)); + DEFINE(VCPU_PPR_TM, offsetof(struct kvm_vcpu, arch.ppr_tm)); + DEFINE(VCPU_DSCR_TM, offsetof(struct kvm_vcpu, arch.dscr_tm)); + DEFINE(VCPU_TAR_TM, offsetof(struct kvm_vcpu, arch.tar_tm)); +#endif #ifdef CONFIG_PPC_BOOK3S_64 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 3195e4f8a2ed..f4a4c5c82fb2 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -875,6 +875,7 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_IAMR: *val = get_reg_val(id, vcpu->arch.iamr); break; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM case KVM_REG_PPC_TFHAR: *val = get_reg_val(id, vcpu->arch.tfhar); break; @@ -884,6 +885,7 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_TEXASR: *val = get_reg_val(id, vcpu->arch.texasr); break; +#endif case KVM_REG_PPC_FSCR: *val = get_reg_val(id, vcpu->arch.fscr); break; @@ -1033,6 +1035,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_IAMR: vcpu->arch.iamr = set_reg_val(id, *val); break; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM case KVM_REG_PPC_TFHAR: vcpu->arch.tfhar = set_reg_val(id, *val); break; @@ -1042,6 +1045,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_TEXASR: vcpu->arch.texasr = set_reg_val(id, *val); break; +#endif case KVM_REG_PPC_FSCR: vcpu->arch.fscr = set_reg_val(id, *val); break; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index ecb76357c9d0..dfa144cb199b 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -701,13 +701,15 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) ld r6, VCPU_VTB(r4) mtspr SPRN_IC, r5 mtspr SPRN_VTB, r6 +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM ld r5, VCPU_TFHAR(r4) ld r6, VCPU_TFIAR(r4) ld r7, VCPU_TEXASR(r4) - ld r8, VCPU_EBBHR(r4) mtspr SPRN_TFHAR, r5 mtspr SPRN_TFIAR, r6 mtspr SPRN_TEXASR, r7 +#endif + ld r8, VCPU_EBBHR(r4) mtspr SPRN_EBBHR, r8 ld r5, VCPU_EBBRR(r4) ld r6, VCPU_BESCR(r4) @@ -1118,13 +1120,15 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) std r5, VCPU_IC(r9) std r6, VCPU_VTB(r9) std r7, VCPU_TAR(r9) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM mfspr r5, SPRN_TFHAR mfspr r6, SPRN_TFIAR mfspr r7, SPRN_TEXASR - mfspr r8, SPRN_EBBHR std r5, VCPU_TFHAR(r9) std r6, VCPU_TFIAR(r9) std r7, VCPU_TEXASR(r9) +#endif + mfspr r8, SPRN_EBBHR std r8, VCPU_EBBHR(r9) mfspr r5, SPRN_EBBRR mfspr r6, SPRN_BESCR @@ -1497,6 +1501,73 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 1: addi r8,r8,16 .endr + /* Save DEC */ + mfspr r5,SPRN_DEC + mftb r6 + extsw r5,r5 + add r5,r5,r6 + std r5,VCPU_DEC_EXPIRES(r9) + +BEGIN_FTR_SECTION + b 8f +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) + /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */ + mfmsr r8 + li r0, 1 + rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG + mtmsrd r8 + + /* Save POWER8-specific registers */ + mfspr r5, SPRN_IAMR + mfspr r6, SPRN_PSPB + mfspr r7, SPRN_FSCR + std r5, VCPU_IAMR(r9) + stw r6, VCPU_PSPB(r9) + std r7, VCPU_FSCR(r9) + mfspr r5, SPRN_IC + mfspr r6, SPRN_VTB + mfspr r7, SPRN_TAR + std r5, VCPU_IC(r9) + std r6, VCPU_VTB(r9) + std r7, VCPU_TAR(r9) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + mfspr r5, SPRN_TFHAR + mfspr r6, SPRN_TFIAR + mfspr r7, SPRN_TEXASR + std r5, VCPU_TFHAR(r9) + std r6, VCPU_TFIAR(r9) + std r7, VCPU_TEXASR(r9) +#endif + mfspr r8, SPRN_EBBHR + std r8, VCPU_EBBHR(r9) + mfspr r5, SPRN_EBBRR + mfspr r6, SPRN_BESCR + mfspr r7, SPRN_CSIGR + mfspr r8, SPRN_TACR + std r5, VCPU_EBBRR(r9) + std r6, VCPU_BESCR(r9) + std r7, VCPU_CSIGR(r9) + std r8, VCPU_TACR(r9) + mfspr r5, SPRN_TCSCR + mfspr r6, SPRN_ACOP + mfspr r7, SPRN_PID + mfspr r8, SPRN_WORT + std r5, VCPU_TCSCR(r9) + std r6, VCPU_ACOP(r9) + stw r7, VCPU_GUEST_PID(r9) + std r8, VCPU_WORT(r9) +8: + + /* Save and reset AMR and UAMOR before turning on the MMU */ +BEGIN_FTR_SECTION + mfspr r5,SPRN_AMR + mfspr r6,SPRN_UAMOR + std r5,VCPU_AMR(r9) + std r6,VCPU_UAMOR(r9) + li r6,0 + mtspr SPRN_AMR,r6 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + /* Unset guest mode */ li r0, KVM_GUEST_MODE_NONE stb r0, HSTATE_IN_GUEST(r13) -- cgit v1.2.3 From 4068890931f62752abc3591e7b3736e7537c6dcb Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 8 Jan 2014 21:25:36 +1100 Subject: KVM: PPC: Book3S PR: Cope with doorbell interrupts When the PR host is running on a POWER8 machine in POWER8 mode, it will use doorbell interrupts for IPIs. If one of them arrives while we are in the guest, we pop out of the guest with trap number 0xA00, which isn't handled by kvmppc_handle_exit_pr, leading to the following BUG_ON: [ 331.436215] exit_nr=0xa00 | pc=0x1d2c | msr=0x800000000000d032 [ 331.437522] ------------[ cut here ]------------ [ 331.438296] kernel BUG at arch/powerpc/kvm/book3s_pr.c:982! [ 331.439063] Oops: Exception in kernel mode, sig: 5 [#2] [ 331.439819] SMP NR_CPUS=1024 NUMA pSeries [ 331.440552] Modules linked in: tun nf_conntrack_netbios_ns nf_conntrack_broadcast ipt_MASQUERADE ip6t_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw virtio_net kvm binfmt_misc ibmvscsi scsi_transport_srp scsi_tgt virtio_blk [ 331.447614] CPU: 11 PID: 1296 Comm: qemu-system-ppc Tainted: G D 3.11.7-200.2.fc19.ppc64p7 #1 [ 331.448920] task: c0000003bdc8c000 ti: c0000003bd32c000 task.ti: c0000003bd32c000 [ 331.450088] NIP: d0000000025d6b9c LR: d0000000025d6b98 CTR: c0000000004cfdd0 [ 331.451042] REGS: c0000003bd32f420 TRAP: 0700 Tainted: G D (3.11.7-200.2.fc19.ppc64p7) [ 331.452331] MSR: 800000000282b032 CR: 28004824 XER: 20000000 [ 331.454616] SOFTE: 1 [ 331.455106] CFAR: c000000000848bb8 [ 331.455726] GPR00: d0000000025d6b98 c0000003bd32f6a0 d0000000026017b8 0000000000000032 GPR04: c0000000018627f8 c000000001873208 320d0a3030303030 3030303030643033 GPR08: c000000000c490a8 0000000000000000 0000000000000000 0000000000000002 GPR12: 0000000028004822 c00000000fdc6300 0000000000000000 00000100076ec310 GPR16: 000000002ae343b8 00003ffffd397398 0000000000000000 0000000000000000 GPR20: 00000100076f16f4 00000100076ebe60 0000000000000008 ffffffffffffffff GPR24: 0000000000000000 0000008001041e60 0000000000000000 0000008001040ce8 GPR28: c0000003a2d80000 0000000000000a00 0000000000000001 c0000003a2681810 [ 331.466504] NIP [d0000000025d6b9c] .kvmppc_handle_exit_pr+0x75c/0xa80 [kvm] [ 331.466999] LR [d0000000025d6b98] .kvmppc_handle_exit_pr+0x758/0xa80 [kvm] [ 331.467517] Call Trace: [ 331.467909] [c0000003bd32f6a0] [d0000000025d6b98] .kvmppc_handle_exit_pr+0x758/0xa80 [kvm] (unreliable) [ 331.468553] [c0000003bd32f750] [d0000000025d98f0] kvm_start_lightweight+0xb4/0xc4 [kvm] [ 331.469189] [c0000003bd32f920] [d0000000025d7648] .kvmppc_vcpu_run_pr+0xd8/0x270 [kvm] [ 331.469838] [c0000003bd32f9c0] [d0000000025cf748] .kvmppc_vcpu_run+0xc8/0xf0 [kvm] [ 331.470790] [c0000003bd32fa50] [d0000000025cc19c] .kvm_arch_vcpu_ioctl_run+0x5c/0x1b0 [kvm] [ 331.471401] [c0000003bd32fae0] [d0000000025c4888] .kvm_vcpu_ioctl+0x478/0x730 [kvm] [ 331.472026] [c0000003bd32fc90] [c00000000026192c] .do_vfs_ioctl+0x4dc/0x7a0 [ 331.472561] [c0000003bd32fd80] [c000000000261cc4] .SyS_ioctl+0xd4/0xf0 [ 331.473095] [c0000003bd32fe30] [c000000000009ed8] syscall_exit+0x0/0x98 [ 331.473633] Instruction dump: [ 331.473766] 4bfff9b4 2b9d0800 419efc18 60000000 60420000 3d220000 e8bf11a0 e8df12a8 [ 331.474733] 7fa4eb78 e8698660 48015165 e8410028 <0fe00000> 813f00e4 3ba00000 39290001 [ 331.475386] ---[ end trace 49fc47d994c1f8f2 ]--- [ 331.479817] This fixes the problem by making kvmppc_handle_exit_pr() recognize the interrupt. We also need to jump to the doorbell interrupt handler in book3s_segment.S to handle the interrupt on the way out of the guest. Having done that, there's nothing further to be done in kvmppc_handle_exit_pr(). Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_asm.h | 1 + arch/powerpc/kvm/book3s_pr.c | 1 + arch/powerpc/kvm/book3s_segment.S | 2 ++ 3 files changed, 4 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index c3815b1e79e8..8337c33c2eb3 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -91,6 +91,7 @@ #define BOOK3S_INTERRUPT_FP_UNAVAIL 0x800 #define BOOK3S_INTERRUPT_DECREMENTER 0x900 #define BOOK3S_INTERRUPT_HV_DECREMENTER 0x980 +#define BOOK3S_INTERRUPT_DOORBELL 0xa00 #define BOOK3S_INTERRUPT_SYSCALL 0xc00 #define BOOK3S_INTERRUPT_TRACE 0xd00 #define BOOK3S_INTERRUPT_H_DATA_STORAGE 0xe00 diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index e82fafdaf880..6a5fc7d53de6 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -827,6 +827,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, /* We're good on these - the host merely wanted to get our attention */ case BOOK3S_INTERRUPT_DECREMENTER: case BOOK3S_INTERRUPT_HV_DECREMENTER: + case BOOK3S_INTERRUPT_DOORBELL: vcpu->stat.dec_exits++; r = RESUME_GUEST; break; diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index bc50c97751d3..1e0cc2adfd40 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -361,6 +361,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) beqa BOOK3S_INTERRUPT_DECREMENTER cmpwi r12, BOOK3S_INTERRUPT_PERFMON beqa BOOK3S_INTERRUPT_PERFMON + cmpwi r12, BOOK3S_INTERRUPT_DOORBELL + beqa BOOK3S_INTERRUPT_DOORBELL RFI kvmppc_handler_trampoline_exit_end: -- cgit v1.2.3 From 3405d230b374b6923878b21b8d708d7db1f734ef Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 15 Jan 2014 18:14:28 +1100 Subject: powerpc: Add support for the optimised lockref implementation This commit adds the architecture support required to enable the optimised implementation of lockrefs. That's as simple as defining arch_spin_value_unlocked() and selecting the Kconfig option. We also define cmpxchg64_relaxed(), because the lockref code does not need the cmpxchg to have barrier semantics. Using Linus' test case[1] on one system I see a 4x improvement for the basic enablement, and a further 1.3x for cmpxchg64_relaxed(), for a total of 5.3x vs the baseline. On another system I see more like 2x improvement. [1]: http://marc.info/?l=linux-fsdevel&m=137782380714721&w=4 Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/cmpxchg.h | 1 + arch/powerpc/include/asm/spinlock.h | 5 +++++ 3 files changed, 7 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index fa395179ddd6..6ca5d5cabeb1 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -139,6 +139,7 @@ config PPC select OLD_SIGACTION if PPC32 select HAVE_DEBUG_STACKOVERFLOW select HAVE_IRQ_EXIT_ON_IRQ_STACK + select ARCH_USE_CMPXCHG_LOCKREF if PPC64 config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index e245aab7f191..d463c68fe7f0 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -300,6 +300,7 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ cmpxchg_local((ptr), (o), (n)); \ }) +#define cmpxchg64_relaxed cmpxchg64_local #else #include #define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 5f54a744dcc5..5162f8cd18c0 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -54,6 +54,11 @@ #define SYNC_IO #endif +static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) +{ + return lock.slock == 0; +} + /* * This returns the old value in the lock, so we succeeded * in getting the lock if the return value is 0. -- cgit v1.2.3 From 7179ba52889bef7e5e23f72908270e1ab2b7fc6f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 15 Jan 2014 18:14:29 +1100 Subject: powerpc: Implement arch_spin_is_locked() using arch_spin_value_unlocked() At a glance these are just the inverse of each other. The one subtlety is that arch_spin_value_unlocked() takes the lock by value, rather than as a pointer, which is important for the lockref code. On the other hand arch_spin_is_locked() doesn't really care, so implement it in terms of arch_spin_value_unlocked(). Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/spinlock.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 5162f8cd18c0..a30ef6999d66 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -28,8 +28,6 @@ #include #include -#define arch_spin_is_locked(x) ((x)->slock != 0) - #ifdef CONFIG_PPC64 /* use 0x800000yy when locked, where yy == CPU number */ #ifdef __BIG_ENDIAN__ @@ -59,6 +57,11 @@ static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) return lock.slock == 0; } +static inline int arch_spin_is_locked(arch_spinlock_t *lock) +{ + return !arch_spin_value_unlocked(*lock); +} + /* * This returns the old value in the lock, so we succeeded * in getting the lock if the return value is 0. -- cgit v1.2.3 From fd120dc2e205d2318a8b47d6d8098b789e3af67d Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Tue, 28 Jan 2014 17:52:42 +0530 Subject: powerpc/mm: Fix compile error of pgtable-ppc64.h It seems that forward declaration couldn't work well with typedef, use struct spinlock directly to avoiding following build errors: In file included from include/linux/spinlock.h:81, from include/linux/seqlock.h:35, from include/linux/time.h:5, from include/uapi/linux/timex.h:56, from include/linux/timex.h:56, from include/linux/sched.h:17, from arch/powerpc/kernel/asm-offsets.c:17: include/linux/spinlock_types.h:76: error: redefinition of typedef 'spinlock_t' /root/linux-next/arch/powerpc/include/asm/pgtable-ppc64.h:563: note: previous declaration of 'spinlock_t' was here Signed-off-by: Li Zhong Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/pgtable-ppc64.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index d27960c89a71..bc141c950b1e 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -560,9 +560,9 @@ extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); #define pmd_move_must_withdraw pmd_move_must_withdraw -typedef struct spinlock spinlock_t; -static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl, - spinlock_t *old_pmd_ptl) +struct spinlock; +static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, + struct spinlock *old_pmd_ptl) { /* * Archs like ppc64 use pgtable to store per pmd -- cgit v1.2.3 From 962e7bd4976516c34fc9ef51d536aab801980767 Mon Sep 17 00:00:00 2001 From: Deepthi Dharwar Date: Tue, 14 Jan 2014 16:26:02 +0530 Subject: powerpc/pseries/cpuidle: Move processor_idle.c to drivers/cpuidle. Move the file from arch specific pseries/processor_idle.c to drivers/cpuidle/cpuidle-pseries.c Make the relevant Makefile and Kconfig changes. Also, introduce Kconfig.powerpc in drivers/cpuidle for all powerpc cpuidle drivers. Signed-off-by: Deepthi Dharwar Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/processor.h | 2 +- arch/powerpc/platforms/pseries/Kconfig | 9 - arch/powerpc/platforms/pseries/Makefile | 1 - arch/powerpc/platforms/pseries/processor_idle.c | 308 -------------------- drivers/cpuidle/Kconfig | 5 + drivers/cpuidle/Kconfig.powerpc | 11 + drivers/cpuidle/Makefile | 4 + drivers/cpuidle/cpuidle-pseries.c | 361 ++++++++++++++++++++++++ 8 files changed, 382 insertions(+), 319 deletions(-) delete mode 100644 arch/powerpc/platforms/pseries/processor_idle.c create mode 100644 drivers/cpuidle/Kconfig.powerpc create mode 100644 drivers/cpuidle/cpuidle-pseries.c (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 8ca20ac28dc2..c2c0f4478be3 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -451,7 +451,7 @@ enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF}; extern int powersave_nap; /* set if nap mode can be used in idle loop */ extern void power7_nap(void); -#ifdef CONFIG_PSERIES_IDLE +#ifdef CONFIG_PSERIES_CPUIDLE extern void update_smt_snooze_delay(int cpu, int residency); #else static inline void update_smt_snooze_delay(int cpu, int residency) {} diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index e66643250fee..37300f6ee244 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -119,12 +119,3 @@ config DTL which are accessible through a debugfs file. Say N if you are unsure. - -config PSERIES_IDLE - bool "Cpuidle driver for pSeries platforms" - depends on CPU_IDLE - depends on PPC_PSERIES - default y - help - Select this option to enable processor idle state management - through cpuidle subsystem. diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index fbccac9cd2dc..03480796af9a 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -21,7 +21,6 @@ obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_DTL) += dtl.o obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o -obj-$(CONFIG_PSERIES_IDLE) += processor_idle.o obj-$(CONFIG_LPARCFG) += lparcfg.o ifeq ($(CONFIG_PPC_PSERIES),y) diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c deleted file mode 100644 index 002d5b4112f2..000000000000 --- a/arch/powerpc/platforms/pseries/processor_idle.c +++ /dev/null @@ -1,308 +0,0 @@ -/* - * processor_idle - idle state cpuidle driver. - * Adapted from drivers/idle/intel_idle.c and - * drivers/acpi/processor_idle.c - * - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -struct cpuidle_driver pseries_idle_driver = { - .name = "pseries_idle", - .owner = THIS_MODULE, -}; - -#define MAX_IDLE_STATE_COUNT 2 - -static int max_idle_state = MAX_IDLE_STATE_COUNT - 1; -static struct cpuidle_state *cpuidle_state_table; - -static inline void idle_loop_prolog(unsigned long *in_purr) -{ - *in_purr = mfspr(SPRN_PURR); - /* - * Indicate to the HV that we are idle. Now would be - * a good time to find other work to dispatch. - */ - get_lppaca()->idle = 1; -} - -static inline void idle_loop_epilog(unsigned long in_purr) -{ - u64 wait_cycles; - - wait_cycles = be64_to_cpu(get_lppaca()->wait_state_cycles); - wait_cycles += mfspr(SPRN_PURR) - in_purr; - get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles); - get_lppaca()->idle = 0; -} - -static int snooze_loop(struct cpuidle_device *dev, - struct cpuidle_driver *drv, - int index) -{ - unsigned long in_purr; - int cpu = dev->cpu; - - idle_loop_prolog(&in_purr); - local_irq_enable(); - set_thread_flag(TIF_POLLING_NRFLAG); - - while ((!need_resched()) && cpu_online(cpu)) { - HMT_low(); - HMT_very_low(); - } - - HMT_medium(); - clear_thread_flag(TIF_POLLING_NRFLAG); - smp_mb(); - - idle_loop_epilog(in_purr); - - return index; -} - -static void check_and_cede_processor(void) -{ - /* - * Ensure our interrupt state is properly tracked, - * also checks if no interrupt has occurred while we - * were soft-disabled - */ - if (prep_irq_for_idle()) { - cede_processor(); -#ifdef CONFIG_TRACE_IRQFLAGS - /* Ensure that H_CEDE returns with IRQs on */ - if (WARN_ON(!(mfmsr() & MSR_EE))) - __hard_irq_enable(); -#endif - } -} - -static int dedicated_cede_loop(struct cpuidle_device *dev, - struct cpuidle_driver *drv, - int index) -{ - unsigned long in_purr; - - idle_loop_prolog(&in_purr); - get_lppaca()->donate_dedicated_cpu = 1; - - HMT_medium(); - check_and_cede_processor(); - - get_lppaca()->donate_dedicated_cpu = 0; - - idle_loop_epilog(in_purr); - - return index; -} - -static int shared_cede_loop(struct cpuidle_device *dev, - struct cpuidle_driver *drv, - int index) -{ - unsigned long in_purr; - - idle_loop_prolog(&in_purr); - - /* - * Yield the processor to the hypervisor. We return if - * an external interrupt occurs (which are driven prior - * to returning here) or if a prod occurs from another - * processor. When returning here, external interrupts - * are enabled. - */ - check_and_cede_processor(); - - idle_loop_epilog(in_purr); - - return index; -} - -/* - * States for dedicated partition case. - */ -static struct cpuidle_state dedicated_states[MAX_IDLE_STATE_COUNT] = { - { /* Snooze */ - .name = "snooze", - .desc = "snooze", - .flags = CPUIDLE_FLAG_TIME_VALID, - .exit_latency = 0, - .target_residency = 0, - .enter = &snooze_loop }, - { /* CEDE */ - .name = "CEDE", - .desc = "CEDE", - .flags = CPUIDLE_FLAG_TIME_VALID, - .exit_latency = 10, - .target_residency = 100, - .enter = &dedicated_cede_loop }, -}; - -/* - * States for shared partition case. - */ -static struct cpuidle_state shared_states[MAX_IDLE_STATE_COUNT] = { - { /* Shared Cede */ - .name = "Shared Cede", - .desc = "Shared Cede", - .flags = CPUIDLE_FLAG_TIME_VALID, - .exit_latency = 0, - .target_residency = 0, - .enter = &shared_cede_loop }, -}; - -void update_smt_snooze_delay(int cpu, int residency) -{ - struct cpuidle_driver *drv = cpuidle_get_driver(); - struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu); - - if (cpuidle_state_table != dedicated_states) - return; - - if (residency < 0) { - /* Disable the Nap state on that cpu */ - if (dev) - dev->states_usage[1].disable = 1; - } else - if (drv) - drv->states[1].target_residency = residency; -} - -static int pseries_cpuidle_add_cpu_notifier(struct notifier_block *n, - unsigned long action, void *hcpu) -{ - int hotcpu = (unsigned long)hcpu; - struct cpuidle_device *dev = - per_cpu_ptr(cpuidle_devices, hotcpu); - - if (dev && cpuidle_get_driver()) { - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - cpuidle_pause_and_lock(); - cpuidle_enable_device(dev); - cpuidle_resume_and_unlock(); - break; - - case CPU_DEAD: - case CPU_DEAD_FROZEN: - cpuidle_pause_and_lock(); - cpuidle_disable_device(dev); - cpuidle_resume_and_unlock(); - break; - - default: - return NOTIFY_DONE; - } - } - return NOTIFY_OK; -} - -static struct notifier_block setup_hotplug_notifier = { - .notifier_call = pseries_cpuidle_add_cpu_notifier, -}; - -/* - * pseries_cpuidle_driver_init() - */ -static int pseries_cpuidle_driver_init(void) -{ - int idle_state; - struct cpuidle_driver *drv = &pseries_idle_driver; - - drv->state_count = 0; - - for (idle_state = 0; idle_state < MAX_IDLE_STATE_COUNT; ++idle_state) { - - if (idle_state > max_idle_state) - break; - - /* is the state not enabled? */ - if (cpuidle_state_table[idle_state].enter == NULL) - continue; - - drv->states[drv->state_count] = /* structure copy */ - cpuidle_state_table[idle_state]; - - drv->state_count += 1; - } - - return 0; -} - -/* - * pseries_idle_probe() - * Choose state table for shared versus dedicated partition - */ -static int pseries_idle_probe(void) -{ - - if (!firmware_has_feature(FW_FEATURE_SPLPAR)) - return -ENODEV; - - if (cpuidle_disable != IDLE_NO_OVERRIDE) - return -ENODEV; - - if (max_idle_state == 0) { - printk(KERN_DEBUG "pseries processor idle disabled.\n"); - return -EPERM; - } - - if (lppaca_shared_proc(get_lppaca())) - cpuidle_state_table = shared_states; - else - cpuidle_state_table = dedicated_states; - - return 0; -} - -static int __init pseries_processor_idle_init(void) -{ - int retval; - - retval = pseries_idle_probe(); - if (retval) - return retval; - - pseries_cpuidle_driver_init(); - retval = cpuidle_register(&pseries_idle_driver, NULL); - if (retval) { - printk(KERN_DEBUG "Registration of pseries driver failed.\n"); - return retval; - } - - register_cpu_notifier(&setup_hotplug_notifier); - printk(KERN_DEBUG "pseries_idle_driver registered\n"); - - return 0; -} - -static void __exit pseries_processor_idle_exit(void) -{ - - unregister_cpu_notifier(&setup_hotplug_notifier); - cpuidle_unregister(&pseries_idle_driver); - - return; -} - -module_init(pseries_processor_idle_init); -module_exit(pseries_processor_idle_exit); - -MODULE_AUTHOR("Deepthi Dharwar "); -MODULE_DESCRIPTION("Cpuidle driver for POWER"); -MODULE_LICENSE("GPL"); diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index b3fb81d7cf04..f04e25f6c98d 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -35,6 +35,11 @@ depends on ARM source "drivers/cpuidle/Kconfig.arm" endmenu +menu "POWERPC CPU Idle Drivers" +depends on PPC +source "drivers/cpuidle/Kconfig.powerpc" +endmenu + endif config ARCH_NEEDS_CPU_IDLE_COUPLED diff --git a/drivers/cpuidle/Kconfig.powerpc b/drivers/cpuidle/Kconfig.powerpc new file mode 100644 index 000000000000..8147de522a96 --- /dev/null +++ b/drivers/cpuidle/Kconfig.powerpc @@ -0,0 +1,11 @@ +# +# POWERPC CPU Idle Drivers +# +config PSERIES_CPUIDLE + bool "Cpuidle driver for pSeries platforms" + depends on CPU_IDLE + depends on PPC_PSERIES + default y + help + Select this option to enable processor idle state management + through cpuidle subsystem. diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile index 527be28e5c1e..a6331ad32738 100644 --- a/drivers/cpuidle/Makefile +++ b/drivers/cpuidle/Makefile @@ -13,3 +13,7 @@ obj-$(CONFIG_ARM_KIRKWOOD_CPUIDLE) += cpuidle-kirkwood.o obj-$(CONFIG_ARM_ZYNQ_CPUIDLE) += cpuidle-zynq.o obj-$(CONFIG_ARM_U8500_CPUIDLE) += cpuidle-ux500.o obj-$(CONFIG_ARM_AT91_CPUIDLE) += cpuidle-at91.o + +############################################################################### +# POWERPC drivers +obj-$(CONFIG_PSERIES_CPUIDLE) += cpuidle-pseries.o diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c new file mode 100644 index 000000000000..21154782402a --- /dev/null +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -0,0 +1,361 @@ +/* + * cpuidle-pseries - idle state cpuidle driver. + * Adapted from drivers/idle/intel_idle.c and + * drivers/acpi/processor_idle.c + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +struct cpuidle_driver pseries_idle_driver = { + .name = "pseries_idle", + .owner = THIS_MODULE, +}; + +#define MAX_IDLE_STATE_COUNT 2 + +static int max_idle_state = MAX_IDLE_STATE_COUNT - 1; +static struct cpuidle_device __percpu *pseries_cpuidle_devices; +static struct cpuidle_state *cpuidle_state_table; + +static inline void idle_loop_prolog(unsigned long *in_purr) +{ + *in_purr = mfspr(SPRN_PURR); + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + get_lppaca()->idle = 1; +} + +static inline void idle_loop_epilog(unsigned long in_purr) +{ + u64 wait_cycles; + + wait_cycles = be64_to_cpu(get_lppaca()->wait_state_cycles); + wait_cycles += mfspr(SPRN_PURR) - in_purr; + get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles); + get_lppaca()->idle = 0; +} + +static int snooze_loop(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) +{ + unsigned long in_purr; + int cpu = dev->cpu; + + idle_loop_prolog(&in_purr); + local_irq_enable(); + set_thread_flag(TIF_POLLING_NRFLAG); + + while ((!need_resched()) && cpu_online(cpu)) { + HMT_low(); + HMT_very_low(); + } + + HMT_medium(); + clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb(); + + idle_loop_epilog(in_purr); + + return index; +} + +static void check_and_cede_processor(void) +{ + /* + * Ensure our interrupt state is properly tracked, + * also checks if no interrupt has occurred while we + * were soft-disabled + */ + if (prep_irq_for_idle()) { + cede_processor(); +#ifdef CONFIG_TRACE_IRQFLAGS + /* Ensure that H_CEDE returns with IRQs on */ + if (WARN_ON(!(mfmsr() & MSR_EE))) + __hard_irq_enable(); +#endif + } +} + +static int dedicated_cede_loop(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) +{ + unsigned long in_purr; + + idle_loop_prolog(&in_purr); + get_lppaca()->donate_dedicated_cpu = 1; + + HMT_medium(); + check_and_cede_processor(); + + get_lppaca()->donate_dedicated_cpu = 0; + + idle_loop_epilog(in_purr); + + return index; +} + +static int shared_cede_loop(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) +{ + unsigned long in_purr; + + idle_loop_prolog(&in_purr); + + /* + * Yield the processor to the hypervisor. We return if + * an external interrupt occurs (which are driven prior + * to returning here) or if a prod occurs from another + * processor. When returning here, external interrupts + * are enabled. + */ + check_and_cede_processor(); + + idle_loop_epilog(in_purr); + + return index; +} + +/* + * States for dedicated partition case. + */ +static struct cpuidle_state dedicated_states[MAX_IDLE_STATE_COUNT] = { + { /* Snooze */ + .name = "snooze", + .desc = "snooze", + .flags = CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 0, + .target_residency = 0, + .enter = &snooze_loop }, + { /* CEDE */ + .name = "CEDE", + .desc = "CEDE", + .flags = CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 10, + .target_residency = 100, + .enter = &dedicated_cede_loop }, +}; + +/* + * States for shared partition case. + */ +static struct cpuidle_state shared_states[MAX_IDLE_STATE_COUNT] = { + { /* Shared Cede */ + .name = "Shared Cede", + .desc = "Shared Cede", + .flags = CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 0, + .target_residency = 0, + .enter = &shared_cede_loop }, +}; + +void update_smt_snooze_delay(int cpu, int residency) +{ + struct cpuidle_driver *drv = cpuidle_get_driver(); + struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu); + + if (cpuidle_state_table != dedicated_states) + return; + + if (residency < 0) { + /* Disable the Nap state on that cpu */ + if (dev) + dev->states_usage[1].disable = 1; + } else + if (drv) + drv->states[1].target_residency = residency; +} + +static int pseries_cpuidle_add_cpu_notifier(struct notifier_block *n, + unsigned long action, void *hcpu) +{ + int hotcpu = (unsigned long)hcpu; + struct cpuidle_device *dev = + per_cpu_ptr(pseries_cpuidle_devices, hotcpu); + + if (dev && cpuidle_get_driver()) { + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + cpuidle_pause_and_lock(); + cpuidle_enable_device(dev); + cpuidle_resume_and_unlock(); + break; + + case CPU_DEAD: + case CPU_DEAD_FROZEN: + cpuidle_pause_and_lock(); + cpuidle_disable_device(dev); + cpuidle_resume_and_unlock(); + break; + + default: + return NOTIFY_DONE; + } + } + return NOTIFY_OK; +} + +static struct notifier_block setup_hotplug_notifier = { + .notifier_call = pseries_cpuidle_add_cpu_notifier, +}; + +/* + * pseries_cpuidle_driver_init() + */ +static int pseries_cpuidle_driver_init(void) +{ + int idle_state; + struct cpuidle_driver *drv = &pseries_idle_driver; + + drv->state_count = 0; + + for (idle_state = 0; idle_state < MAX_IDLE_STATE_COUNT; ++idle_state) { + + if (idle_state > max_idle_state) + break; + + /* is the state not enabled? */ + if (cpuidle_state_table[idle_state].enter == NULL) + continue; + + drv->states[drv->state_count] = /* structure copy */ + cpuidle_state_table[idle_state]; + + drv->state_count += 1; + } + + return 0; +} + +/* pseries_idle_devices_uninit(void) + * unregister cpuidle devices and de-allocate memory + */ +static void pseries_idle_devices_uninit(void) +{ + int i; + struct cpuidle_device *dev; + + for_each_possible_cpu(i) { + dev = per_cpu_ptr(pseries_cpuidle_devices, i); + cpuidle_unregister_device(dev); + } + + free_percpu(pseries_cpuidle_devices); + return; +} + +/* pseries_idle_devices_init() + * allocate, initialize and register cpuidle device + */ +static int pseries_idle_devices_init(void) +{ + int i; + struct cpuidle_driver *drv = &pseries_idle_driver; + struct cpuidle_device *dev; + + pseries_cpuidle_devices = alloc_percpu(struct cpuidle_device); + if (pseries_cpuidle_devices == NULL) + return -ENOMEM; + + for_each_possible_cpu(i) { + dev = per_cpu_ptr(pseries_cpuidle_devices, i); + dev->state_count = drv->state_count; + dev->cpu = i; + if (cpuidle_register_device(dev)) { + printk(KERN_DEBUG \ + "cpuidle_register_device %d failed!\n", i); + return -EIO; + } + } + + return 0; +} + +/* + * pseries_idle_probe() + * Choose state table for shared versus dedicated partition + */ +static int pseries_idle_probe(void) +{ + + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) + return -ENODEV; + + if (cpuidle_disable != IDLE_NO_OVERRIDE) + return -ENODEV; + + if (max_idle_state == 0) { + printk(KERN_DEBUG "pseries processor idle disabled.\n"); + return -EPERM; + } + + if (lppaca_shared_proc(get_lppaca())) + cpuidle_state_table = shared_states; + else + cpuidle_state_table = dedicated_states; + + return 0; +} + +static int __init pseries_processor_idle_init(void) +{ + int retval; + + retval = pseries_idle_probe(); + if (retval) + return retval; + + pseries_cpuidle_driver_init(); + retval = cpuidle_register_driver(&pseries_idle_driver); + if (retval) { + printk(KERN_DEBUG "Registration of pseries driver failed.\n"); + return retval; + } + + retval = pseries_idle_devices_init(); + if (retval) { + pseries_idle_devices_uninit(); + cpuidle_unregister_driver(&pseries_idle_driver); + return retval; + } + + register_cpu_notifier(&setup_hotplug_notifier); + printk(KERN_DEBUG "pseries_idle_driver registered\n"); + + return 0; +} + +static void __exit pseries_processor_idle_exit(void) +{ + + unregister_cpu_notifier(&setup_hotplug_notifier); + pseries_idle_devices_uninit(); + cpuidle_unregister_driver(&pseries_idle_driver); + + return; +} + +module_init(pseries_processor_idle_init); +module_exit(pseries_processor_idle_exit); + +MODULE_AUTHOR("Deepthi Dharwar "); +MODULE_DESCRIPTION("Cpuidle driver for POWER"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 3fa8cad82b94d0bed002571bd246f2299ffc876b Mon Sep 17 00:00:00 2001 From: Deepthi Dharwar Date: Tue, 14 Jan 2014 16:26:38 +0530 Subject: powerpc/pseries/cpuidle: smt-snooze-delay cleanup. smt-snooze-delay was designed to disable NAP state or delay the entry to the NAP state prior to adoption of cpuidle framework. This is per-cpu variable. With the coming of CPUIDLE framework, states can be disabled on per-cpu basis using the cpuidle/enable sysfs entry. Also, with the coming of cpuidle driver each state's target residency is per-driver unlike earlier which was per-device. Therefore, the per-cpu sysfs smt-snooze-delay which decides the target residency of the idle state on a particular cpu causes more confusion to the user as we cannot have different smt-snooze-delay (target residency) values for each cpu. In the current code, smt-snooze-delay functionality is completely broken. It makes sense to remove smt-snooze-delay from idle driver with the coming of cpuidle framework. However, sysfs files are retained as ppc64_util currently utilises it. Once we fix ppc64_util, propose to clean up the kernel code. Signed-off-by: Deepthi Dharwar Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/processor.h | 7 ------- arch/powerpc/kernel/sysfs.c | 2 -- drivers/cpuidle/cpuidle-pseries.c | 17 ----------------- 3 files changed, 26 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index c2c0f4478be3..b62de43ae5f3 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -450,13 +450,6 @@ enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF}; extern int powersave_nap; /* set if nap mode can be used in idle loop */ extern void power7_nap(void); - -#ifdef CONFIG_PSERIES_CPUIDLE -extern void update_smt_snooze_delay(int cpu, int residency); -#else -static inline void update_smt_snooze_delay(int cpu, int residency) {} -#endif - extern void flush_instruction_cache(void); extern void hard_reset_now(void); extern void poweroff_now(void); diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index d4a43e64a6a9..97e1dc917683 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -51,8 +51,6 @@ static ssize_t store_smt_snooze_delay(struct device *dev, return -EINVAL; per_cpu(smt_snooze_delay, cpu->dev.id) = snooze; - update_smt_snooze_delay(cpu->dev.id, snooze); - return count; } diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c index bb56091685d3..7ab564aa0b1c 100644 --- a/drivers/cpuidle/cpuidle-pseries.c +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -162,23 +162,6 @@ static struct cpuidle_state shared_states[] = { .enter = &shared_cede_loop }, }; -void update_smt_snooze_delay(int cpu, int residency) -{ - struct cpuidle_driver *drv = cpuidle_get_driver(); - struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu); - - if (cpuidle_state_table != dedicated_states) - return; - - if (residency < 0) { - /* Disable the Nap state on that cpu */ - if (dev) - dev->states_usage[1].disable = 1; - } else - if (drv) - drv->states[1].target_residency = residency; -} - static int pseries_cpuidle_add_cpu_notifier(struct notifier_block *n, unsigned long action, void *hcpu) { -- cgit v1.2.3 From f878f84373aefda7f041a74b24a83b8b7dec1cf0 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 29 Jan 2014 17:13:05 +1100 Subject: powerpc: Wire up sched_setattr and sched_getattr syscalls Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/systbl.h | 2 ++ arch/powerpc/include/asm/unistd.h | 2 +- arch/powerpc/include/uapi/asm/unistd.h | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 43523fe0d8b4..3ddf70276706 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -359,3 +359,5 @@ COMPAT_SYS(process_vm_readv) COMPAT_SYS(process_vm_writev) SYSCALL(finit_module) SYSCALL(ni_syscall) /* sys_kcmp */ +SYSCALL_SPU(sched_setattr) +SYSCALL_SPU(sched_getattr) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 3ca819f541bf..4494f029b632 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include -#define __NR_syscalls 355 +#define __NR_syscalls 357 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index 74cb4d72d673..881bf2e2560d 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -377,6 +377,7 @@ #define __NR_process_vm_writev 352 #define __NR_finit_module 353 #define __NR_kcmp 354 - +#define __NR_sched_setattr 355 +#define __NR_sched_getattr 356 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ -- cgit v1.2.3