diff options
Diffstat (limited to 'arch/powerpc')
24 files changed, 236 insertions, 282 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index feab3bad6d0f..8a01098eaaca 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -87,10 +87,6 @@ config ARCH_HAS_ILOG2_U64 bool default y if 64BIT -config ARCH_HAS_CPU_IDLE_WAIT - bool - default y - config GENERIC_HWEIGHT bool default y @@ -141,9 +137,10 @@ config PPC select IRQ_FORCED_THREADING select HAVE_RCU_TABLE_FREE if SMP select HAVE_SYSCALL_TRACEPOINTS - select HAVE_BPF_JIT if (PPC64 && NET) + select HAVE_BPF_JIT if PPC64 select HAVE_ARCH_JUMP_LABEL select ARCH_HAVE_NMI_SAFE_CMPXCHG + select GENERIC_SMP_IDLE_THREAD config EARLY_PRINTK bool diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 548da3aa0a30..d58fc4e4149c 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -288,13 +288,6 @@ label##_hv: \ /* Exception addition: Hard disable interrupts */ #define DISABLE_INTS SOFT_DISABLE_INTS(r10,r11) -/* Exception addition: Keep interrupt state */ -#define ENABLE_INTS \ - ld r11,PACAKMSR(r13); \ - ld r12,_MSR(r1); \ - rlwimi r11,r12,0,MSR_EE; \ - mtmsrd r11,1 - #define ADD_NVGPRS \ bl .save_nvgprs diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index aa795ccef294..fd07f43d6622 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -81,12 +81,13 @@ struct kvmppc_vcpu_book3s { u64 sdr1; u64 hior; u64 msr_mask; - u64 vsid_next; #ifdef CONFIG_PPC_BOOK3S_32 u32 vsid_pool[VSID_POOL_SIZE]; + u32 vsid_next; #else - u64 vsid_first; - u64 vsid_max; + u64 proto_vsid_first; + u64 proto_vsid_max; + u64 proto_vsid_next; #endif int context_id[SID_CONTEXTS]; diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 8e2d0371fe1e..48a26d379222 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -386,7 +386,6 @@ extern unsigned long cpuidle_disable; enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF}; extern int powersave_nap; /* set if nap mode can be used in idle loop */ -void cpu_idle_wait(void); #ifdef CONFIG_PSERIES_IDLE extern void update_smt_snooze_delay(int snooze); diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 4a741c7efd02..1a1bb00f061a 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -62,21 +62,8 @@ struct thread_info { #define init_thread_info (init_thread_union.thread_info) #define init_stack (init_thread_union.stack) -/* thread information allocation */ - -#if THREAD_SHIFT >= PAGE_SHIFT - #define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT) -#else /* THREAD_SHIFT < PAGE_SHIFT */ - -#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR - -extern struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node); -extern void free_thread_info(struct thread_info *ti); - -#endif /* THREAD_SHIFT < PAGE_SHIFT */ - /* how to get the thread information struct from C */ static inline struct thread_info *current_thread_info(void) { diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index f5808a35688c..83afacd3ba7b 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -28,7 +28,7 @@ endif obj-y := cputable.o ptrace.o syscalls.o \ irq.o align.o signal_32.o pmc.o vdso.o \ - init_task.o process.o systbl.o idle.o \ + process.o systbl.o idle.o \ signal.o sysfs.o cacheinfo.o time.o \ prom.o traps.o setup-common.o \ udbg.o misc.o io.o dma.o \ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index f8a7a1a1a9f4..ef2074c3e906 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -588,23 +588,19 @@ _GLOBAL(ret_from_except_lite) fast_exc_return_irq: restore: /* - * This is the main kernel exit path, we first check if we - * have to change our interrupt state. + * This is the main kernel exit path. First we check if we + * are about to re-enable interrupts */ ld r5,SOFTE(r1) lbz r6,PACASOFTIRQEN(r13) - cmpwi cr1,r5,0 - cmpw cr0,r5,r6 - beq cr0,4f + cmpwi cr0,r5,0 + beq restore_irq_off - /* We do, handle disable first, which is easy */ - bne cr1,3f; - li r0,0 - stb r0,PACASOFTIRQEN(r13); - TRACE_DISABLE_INTS - b 4f + /* We are enabling, were we already enabled ? Yes, just return */ + cmpwi cr0,r6,1 + beq cr0,do_restore -3: /* + /* * We are about to soft-enable interrupts (we are hard disabled * at this point). We check if there's anything that needs to * be replayed first. @@ -626,7 +622,7 @@ restore_no_replay: /* * Final return path. BookE is handled in a different file */ -4: +do_restore: #ifdef CONFIG_PPC_BOOK3E b .exception_return_book3e #else @@ -700,6 +696,25 @@ fast_exception_return: #endif /* CONFIG_PPC_BOOK3E */ /* + * We are returning to a context with interrupts soft disabled. + * + * However, we may also about to hard enable, so we need to + * make sure that in this case, we also clear PACA_IRQ_HARD_DIS + * or that bit can get out of sync and bad things will happen + */ +restore_irq_off: + ld r3,_MSR(r1) + lbz r7,PACAIRQHAPPENED(r13) + andi. r0,r3,MSR_EE + beq 1f + rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS + stb r7,PACAIRQHAPPENED(r13) +1: li r0,0 + stb r0,PACASOFTIRQEN(r13); + TRACE_DISABLE_INTS + b do_restore + + /* * Something did happen, check if a re-emit is needed * (this also clears paca->irq_happened) */ @@ -748,6 +763,9 @@ restore_check_irq_replay: #endif /* CONFIG_PPC_BOOK3E */ 1: b .ret_from_except /* What else to do here ? */ + + +3: do_work: #ifdef CONFIG_PREEMPT andi. r0,r3,MSR_PR /* Returning to user mode? */ @@ -767,16 +785,6 @@ do_work: SOFT_DISABLE_INTS(r3,r4) 1: bl .preempt_schedule_irq - /* Hard-disable interrupts again (and update PACA) */ -#ifdef CONFIG_PPC_BOOK3E - wrteei 0 -#else - ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */ - mtmsrd r10,1 -#endif /* CONFIG_PPC_BOOK3E */ - li r0,PACA_IRQ_HARD_DIS - stb r0,PACAIRQHAPPENED(r13) - /* Re-test flags and eventually loop */ clrrdi r9,r1,THREAD_SHIFT ld r4,TI_FLAGS(r9) @@ -787,14 +795,6 @@ do_work: user_work: #endif /* CONFIG_PREEMPT */ - /* Enable interrupts */ -#ifdef CONFIG_PPC_BOOK3E - wrteei 1 -#else - ori r10,r10,MSR_EE - mtmsrd r10,1 -#endif /* CONFIG_PPC_BOOK3E */ - andi. r0,r4,_TIF_NEED_RESCHED beq 1f bl .restore_interrupts diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index cb705fdbb458..8f880bc77c56 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -768,8 +768,8 @@ alignment_common: std r3,_DAR(r1) std r4,_DSISR(r1) bl .save_nvgprs + DISABLE_INTS addi r3,r1,STACK_FRAME_OVERHEAD - ENABLE_INTS bl .alignment_exception b .ret_from_except diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 6d2209ac0c44..2099d9a879e8 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -113,29 +113,6 @@ void cpu_idle(void) } } - -/* - * cpu_idle_wait - Used to ensure that all the CPUs come out of the old - * idle loop and start using the new idle loop. - * Required while changing idle handler on SMP systems. - * Caller must have changed idle handler to the new value before the call. - * This window may be larger on shared systems. - */ -void cpu_idle_wait(void) -{ - int cpu; - smp_mb(); - - /* kick all the CPUs so that they exit out of old idle routine */ - get_online_cpus(); - for_each_online_cpu(cpu) { - if (cpu != smp_processor_id()) - smp_send_reschedule(cpu); - } - put_online_cpus(); -} -EXPORT_SYMBOL_GPL(cpu_idle_wait); - int powersave_nap; #ifdef CONFIG_SYSCTL diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c deleted file mode 100644 index d076d465dbd1..000000000000 --- a/arch/powerpc/kernel/init_task.c +++ /dev/null @@ -1,29 +0,0 @@ -#include <linux/mm.h> -#include <linux/export.h> -#include <linux/sched.h> -#include <linux/init.h> -#include <linux/init_task.h> -#include <linux/fs.h> -#include <linux/mqueue.h> -#include <asm/uaccess.h> - -static struct signal_struct init_signals = INIT_SIGNALS(init_signals); -static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -/* - * Initial thread structure. - * - * We need to make sure that this is 16384-byte aligned due to the - * way process stacks are handled. This is done by having a special - * "init_task" linker map entry.. - */ -union thread_union init_thread_union __init_task_data = - { INIT_THREAD_INFO(init_task) }; - -/* - * Initial task structure. - * - * All other task structs will be allocated on slabs in fork.c - */ -struct task_struct init_task = INIT_TASK(init_task); - -EXPORT_SYMBOL(init_task); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 43eb74fcedde..641da9e868ce 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -229,6 +229,19 @@ notrace void arch_local_irq_restore(unsigned long en) */ if (unlikely(irq_happened != PACA_IRQ_HARD_DIS)) __hard_irq_disable(); +#ifdef CONFIG_TRACE_IRQFLAG + else { + /* + * We should already be hard disabled here. We had bugs + * where that wasn't the case so let's dbl check it and + * warn if we are wrong. Only do that when IRQ tracing + * is enabled as mfmsr() can be costly. + */ + if (WARN_ON(mfmsr() & MSR_EE)) + __hard_irq_disable(); + } +#endif /* CONFIG_TRACE_IRQFLAG */ + set_soft_enabled(0); /* @@ -260,11 +273,17 @@ EXPORT_SYMBOL(arch_local_irq_restore); * if they are currently disabled. This is typically called before * schedule() or do_signal() when returning to userspace. We do it * in C to avoid the burden of dealing with lockdep etc... + * + * NOTE: This is called with interrupts hard disabled but not marked + * as such in paca->irq_happened, so we need to resync this. */ void restore_interrupts(void) { - if (irqs_disabled()) + if (irqs_disabled()) { + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; local_irq_enable(); + } else + __hard_irq_enable(); } #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 4937c9690090..aa05935b6947 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1252,37 +1252,6 @@ void __ppc64_runlatch_off(void) } #endif /* CONFIG_PPC64 */ -#if THREAD_SHIFT < PAGE_SHIFT - -static struct kmem_cache *thread_info_cache; - -struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node) -{ - struct thread_info *ti; - - ti = kmem_cache_alloc_node(thread_info_cache, GFP_KERNEL, node); - if (unlikely(ti == NULL)) - return NULL; -#ifdef CONFIG_DEBUG_STACK_USAGE - memset(ti, 0, THREAD_SIZE); -#endif - return ti; -} - -void free_thread_info(struct thread_info *ti) -{ - kmem_cache_free(thread_info_cache, ti); -} - -void thread_info_cache_init(void) -{ - thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE, - THREAD_SIZE, 0, NULL); - BUG_ON(thread_info_cache == NULL); -} - -#endif /* THREAD_SHIFT < PAGE_SHIFT */ - unsigned long arch_align_stack(unsigned long sp) { if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 8d8e028893be..dd5e214cdf21 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1710,7 +1710,7 @@ long do_syscall_trace_enter(struct pt_regs *regs) { long ret = 0; - secure_computing(regs->gpr[0]); + secure_computing_strict(regs->gpr[0]); if (test_thread_flag(TIF_SYSCALL_TRACE) && tracehook_report_syscall_entry(regs)) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index d9f94410fd7f..e4cb34322de4 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -57,27 +57,9 @@ #define DBG(fmt...) #endif - -/* Store all idle threads, this can be reused instead of creating -* a new thread. Also avoids complicated thread destroy functionality -* for idle threads. -*/ #ifdef CONFIG_HOTPLUG_CPU -/* - * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is - * removed after init for !CONFIG_HOTPLUG_CPU. - */ -static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); -#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) -#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) - /* State of each CPU during hotplug phases */ static DEFINE_PER_CPU(int, cpu_state) = { 0 }; - -#else -static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; -#define get_idle_for_cpu(x) (idle_thread_array[(x)]) -#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p)) #endif struct thread_info *secondary_ti; @@ -429,60 +411,19 @@ int generic_check_cpu_restart(unsigned int cpu) } #endif -struct create_idle { - struct work_struct work; - struct task_struct *idle; - struct completion done; - int cpu; -}; - -static void __cpuinit do_fork_idle(struct work_struct *work) +static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle) { - struct create_idle *c_idle = - container_of(work, struct create_idle, work); - - c_idle->idle = fork_idle(c_idle->cpu); - complete(&c_idle->done); -} - -static int __cpuinit create_idle(unsigned int cpu) -{ - struct thread_info *ti; - struct create_idle c_idle = { - .cpu = cpu, - .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), - }; - INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle); - - c_idle.idle = get_idle_for_cpu(cpu); - - /* We can't use kernel_thread since we must avoid to - * reschedule the child. We use a workqueue because - * we want to fork from a kernel thread, not whatever - * userspace process happens to be trying to online us. - */ - if (!c_idle.idle) { - schedule_work(&c_idle.work); - wait_for_completion(&c_idle.done); - } else - init_idle(c_idle.idle, cpu); - if (IS_ERR(c_idle.idle)) { - pr_err("Failed fork for CPU %u: %li", cpu, PTR_ERR(c_idle.idle)); - return PTR_ERR(c_idle.idle); - } - ti = task_thread_info(c_idle.idle); + struct thread_info *ti = task_thread_info(idle); #ifdef CONFIG_PPC64 - paca[cpu].__current = c_idle.idle; + paca[cpu].__current = idle; paca[cpu].kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD; #endif ti->cpu = cpu; - current_set[cpu] = ti; - - return 0; + secondary_ti = current_set[cpu] = ti; } -int __cpuinit __cpu_up(unsigned int cpu) +int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle) { int rc, c; @@ -490,12 +431,7 @@ int __cpuinit __cpu_up(unsigned int cpu) (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu))) return -EINVAL; - /* Make sure we have an idle thread */ - rc = create_idle(cpu); - if (rc) - return rc; - - secondary_ti = current_set[cpu]; + cpu_idle_thread_init(cpu, tidle); /* Make sure callin-map entry is 0 (can be leftover a CPU * hotplug diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 6aa0c663e247..158972341a2d 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -248,7 +248,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) addr, regs->nip, regs->link, code); } - if (!arch_irq_disabled_regs(regs)) + if (arch_irqs_disabled() && !arch_irq_disabled_regs(regs)) local_irq_enable(); memset(&info, 0, sizeof(info)); @@ -1019,7 +1019,9 @@ void __kprobes program_check_exception(struct pt_regs *regs) return; } - local_irq_enable(); + /* We restore the interrupt state now */ + if (!arch_irq_disabled_regs(regs)) + local_irq_enable(); #ifdef CONFIG_MATH_EMULATION /* (reason & REASON_ILLEGAL) would be the obvious thing here, @@ -1069,6 +1071,10 @@ void alignment_exception(struct pt_regs *regs) { int sig, code, fixed = 0; + /* We restore the interrupt state now */ + if (!arch_irq_disabled_regs(regs)) + local_irq_enable(); + /* we don't implement logging of alignment exceptions */ if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS)) fixed = fix_alignment(regs); diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 6f87f39a1ac2..10fc8ec9d2a8 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -194,14 +194,14 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) backwards_map = !backwards_map; /* Uh-oh ... out of mappings. Let's flush! */ - if (vcpu_book3s->vsid_next == vcpu_book3s->vsid_max) { - vcpu_book3s->vsid_next = vcpu_book3s->vsid_first; + if (vcpu_book3s->proto_vsid_next == vcpu_book3s->proto_vsid_max) { + vcpu_book3s->proto_vsid_next = vcpu_book3s->proto_vsid_first; memset(vcpu_book3s->sid_map, 0, sizeof(struct kvmppc_sid_map) * SID_MAP_NUM); kvmppc_mmu_pte_flush(vcpu, 0, 0); kvmppc_mmu_flush_segments(vcpu); } - map->host_vsid = vcpu_book3s->vsid_next++; + map->host_vsid = vsid_scramble(vcpu_book3s->proto_vsid_next++, 256M); map->guest_vsid = gvsid; map->valid = true; @@ -319,9 +319,10 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu) return -1; vcpu3s->context_id[0] = err; - vcpu3s->vsid_max = ((vcpu3s->context_id[0] + 1) << USER_ESID_BITS) - 1; - vcpu3s->vsid_first = vcpu3s->context_id[0] << USER_ESID_BITS; - vcpu3s->vsid_next = vcpu3s->vsid_first; + vcpu3s->proto_vsid_max = ((vcpu3s->context_id[0] + 1) + << USER_ESID_BITS) - 1; + vcpu3s->proto_vsid_first = vcpu3s->context_id[0] << USER_ESID_BITS; + vcpu3s->proto_vsid_next = vcpu3s->proto_vsid_first; kvmppc_mmu_hpte_init(vcpu); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index ddc485a529f2..c3beaeef3f60 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -258,6 +258,8 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, !(memslot->userspace_addr & (s - 1))) { start &= ~(s - 1); pgsize = s; + get_page(hpage); + put_page(page); page = hpage; } } @@ -281,11 +283,8 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, err = 0; out: - if (got) { - if (PageHuge(page)) - page = compound_head(page); + if (got) put_page(page); - } return err; up_err: @@ -678,8 +677,15 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, SetPageDirty(page); out_put: - if (page) - put_page(page); + if (page) { + /* + * We drop pages[0] here, not page because page might + * have been set to the head page of a compound, but + * we have to drop the reference on the correct tail + * page to match the get inside gup() + */ + put_page(pages[0]); + } return ret; out_unlock: @@ -979,6 +985,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, pa = *physp; } page = pfn_to_page(pa >> PAGE_SHIFT); + get_page(page); } else { hva = gfn_to_hva_memslot(memslot, gfn); npages = get_user_pages_fast(hva, 1, 1, pages); @@ -991,8 +998,6 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, page = compound_head(page); psize <<= compound_order(page); } - if (!kvm->arch.using_mmu_notifiers) - get_page(page); offset = gpa & (psize - 1); if (nb_ret) *nb_ret = psize - offset; @@ -1003,7 +1008,6 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va) { struct page *page = virt_to_page(va); - page = compound_head(page); put_page(page); } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 01294a5099dd..108d1f580177 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1192,8 +1192,6 @@ static void unpin_slot(struct kvm *kvm, int slot_id) continue; pfn = physp[j] >> PAGE_SHIFT; page = pfn_to_page(pfn); - if (PageHuge(page)) - page = compound_head(page); SetPageDirty(page); put_page(page); } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index def880aea63a..cec4daddbf31 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -463,6 +463,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) /* insert R and C bits from PTE */ rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); args[j] |= rcbits << (56 - 5); + hp[0] = 0; continue; } diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 0676ae249b9f..6e6e9cef34a8 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -197,7 +197,8 @@ kvmppc_interrupt: /* Save guest PC and MSR */ #ifdef CONFIG_PPC64 BEGIN_FTR_SECTION - andi. r0,r12,0x2 + andi. r0, r12, 0x2 + cmpwi cr1, r0, 0 beq 1f mfspr r3,SPRN_HSRR0 mfspr r4,SPRN_HSRR1 @@ -250,6 +251,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) beq ld_last_prev_inst cmpwi r12, BOOK3S_INTERRUPT_ALIGNMENT beq- ld_last_inst +#ifdef CONFIG_PPC64 +BEGIN_FTR_SECTION + cmpwi r12, BOOK3S_INTERRUPT_H_EMUL_ASSIST + beq- ld_last_inst +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) +#endif b no_ld_last_inst @@ -316,23 +323,17 @@ no_dcbz32_off: * Having set up SRR0/1 with the address where we want * to continue with relocation on (potentially in module * space), we either just go straight there with rfi[d], - * or we jump to an interrupt handler with bctr if there - * is an interrupt to be handled first. In the latter - * case, the rfi[d] at the end of the interrupt handler - * will get us back to where we want to continue. + * or we jump to an interrupt handler if there is an + * interrupt to be handled first. In the latter case, + * the rfi[d] at the end of the interrupt handler will + * get us back to where we want to continue. */ - cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL - beq 1f - cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER - beq 1f - cmpwi r12, BOOK3S_INTERRUPT_PERFMON -1: mtctr r12 - /* Register usage at this point: * * R1 = host R1 * R2 = host R2 + * R10 = raw exit handler id * R12 = exit handler id * R13 = shadow vcpu (32-bit) or PACA (64-bit) * SVCPU.* = guest * @@ -342,12 +343,25 @@ no_dcbz32_off: PPC_LL r6, HSTATE_HOST_MSR(r13) PPC_LL r8, HSTATE_VMHANDLER(r13) - /* Restore host msr -> SRR1 */ +#ifdef CONFIG_PPC64 +BEGIN_FTR_SECTION + beq cr1, 1f + mtspr SPRN_HSRR1, r6 + mtspr SPRN_HSRR0, r8 +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) +#endif +1: /* Restore host msr -> SRR1 */ mtsrr1 r6 /* Load highmem handler address */ mtsrr0 r8 /* RFI into the highmem handler, or jump to interrupt handler */ - beqctr + cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL + beqa BOOK3S_INTERRUPT_EXTERNAL + cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER + beqa BOOK3S_INTERRUPT_DECREMENTER + cmpwi r12, BOOK3S_INTERRUPT_PERFMON + beqa BOOK3S_INTERRUPT_PERFMON + RFI kvmppc_handler_trampoline_exit_end: diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index fb05b123218f..1a6de0a7d8eb 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -271,7 +271,8 @@ int alloc_bootmem_huge_page(struct hstate *hstate) unsigned long gpage_npages[MMU_PAGE_COUNT]; -static int __init do_gpage_early_setup(char *param, char *val) +static int __init do_gpage_early_setup(char *param, char *val, + const char *unused) { static phys_addr_t size; unsigned long npages; diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index af1ab5e9a691..5c3cf2d04e41 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -48,7 +48,13 @@ /* * Assembly helpers from arch/powerpc/net/bpf_jit.S: */ -extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[]; +#define DECLARE_LOAD_FUNC(func) \ + extern u8 func[], func##_negative_offset[], func##_positive_offset[] + +DECLARE_LOAD_FUNC(sk_load_word); +DECLARE_LOAD_FUNC(sk_load_half); +DECLARE_LOAD_FUNC(sk_load_byte); +DECLARE_LOAD_FUNC(sk_load_byte_msh); #define FUNCTION_DESCR_SIZE 24 diff --git a/arch/powerpc/net/bpf_jit_64.S b/arch/powerpc/net/bpf_jit_64.S index ff4506e85cce..55ba3855a97f 100644 --- a/arch/powerpc/net/bpf_jit_64.S +++ b/arch/powerpc/net/bpf_jit_64.S @@ -31,14 +31,13 @@ * then branch directly to slow_path_XXX if required. (In fact, could * load a spare GPR with the address of slow_path_generic and pass size * as an argument, making the call site a mtlr, li and bllr.) - * - * Technically, the "is addr < 0" check is unnecessary & slowing down - * the ABS path, as it's statically checked on generation. */ .globl sk_load_word sk_load_word: cmpdi r_addr, 0 - blt bpf_error + blt bpf_slow_path_word_neg + .globl sk_load_word_positive_offset +sk_load_word_positive_offset: /* Are we accessing past headlen? */ subi r_scratch1, r_HL, 4 cmpd r_scratch1, r_addr @@ -51,7 +50,9 @@ sk_load_word: .globl sk_load_half sk_load_half: cmpdi r_addr, 0 - blt bpf_error + blt bpf_slow_path_half_neg + .globl sk_load_half_positive_offset +sk_load_half_positive_offset: subi r_scratch1, r_HL, 2 cmpd r_scratch1, r_addr blt bpf_slow_path_half @@ -61,7 +62,9 @@ sk_load_half: .globl sk_load_byte sk_load_byte: cmpdi r_addr, 0 - blt bpf_error + blt bpf_slow_path_byte_neg + .globl sk_load_byte_positive_offset +sk_load_byte_positive_offset: cmpd r_HL, r_addr ble bpf_slow_path_byte lbzx r_A, r_D, r_addr @@ -69,22 +72,20 @@ sk_load_byte: /* * BPF_S_LDX_B_MSH: ldxb 4*([offset]&0xf) - * r_addr is the offset value, already known positive + * r_addr is the offset value */ .globl sk_load_byte_msh sk_load_byte_msh: + cmpdi r_addr, 0 + blt bpf_slow_path_byte_msh_neg + .globl sk_load_byte_msh_positive_offset +sk_load_byte_msh_positive_offset: cmpd r_HL, r_addr ble bpf_slow_path_byte_msh lbzx r_X, r_D, r_addr rlwinm r_X, r_X, 2, 32-4-2, 31-2 blr -bpf_error: - /* Entered with cr0 = lt */ - li r3, 0 - /* Generated code will 'blt epilogue', returning 0. */ - blr - /* Call out to skb_copy_bits: * We'll need to back up our volatile regs first; we have * local variable space at r1+(BPF_PPC_STACK_BASIC). @@ -136,3 +137,84 @@ bpf_slow_path_byte_msh: lbz r_X, BPF_PPC_STACK_BASIC+(2*8)(r1) rlwinm r_X, r_X, 2, 32-4-2, 31-2 blr + +/* Call out to bpf_internal_load_pointer_neg_helper: + * We'll need to back up our volatile regs first; we have + * local variable space at r1+(BPF_PPC_STACK_BASIC). + * Allocate a new stack frame here to remain ABI-compliant in + * stashing LR. + */ +#define sk_negative_common(SIZE) \ + mflr r0; \ + std r0, 16(r1); \ + /* R3 goes in parameter space of caller's frame */ \ + std r_skb, (BPF_PPC_STACKFRAME+48)(r1); \ + std r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1); \ + std r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1); \ + stdu r1, -BPF_PPC_SLOWPATH_FRAME(r1); \ + /* R3 = r_skb, as passed */ \ + mr r4, r_addr; \ + li r5, SIZE; \ + bl bpf_internal_load_pointer_neg_helper; \ + /* R3 != 0 on success */ \ + addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \ + ld r0, 16(r1); \ + ld r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1); \ + ld r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1); \ + mtlr r0; \ + cmpldi r3, 0; \ + beq bpf_error_slow; /* cr0 = EQ */ \ + mr r_addr, r3; \ + ld r_skb, (BPF_PPC_STACKFRAME+48)(r1); \ + /* Great success! */ + +bpf_slow_path_word_neg: + lis r_scratch1,-32 /* SKF_LL_OFF */ + cmpd r_addr, r_scratch1 /* addr < SKF_* */ + blt bpf_error /* cr0 = LT */ + .globl sk_load_word_negative_offset +sk_load_word_negative_offset: + sk_negative_common(4) + lwz r_A, 0(r_addr) + blr + +bpf_slow_path_half_neg: + lis r_scratch1,-32 /* SKF_LL_OFF */ + cmpd r_addr, r_scratch1 /* addr < SKF_* */ + blt bpf_error /* cr0 = LT */ + .globl sk_load_half_negative_offset +sk_load_half_negative_offset: + sk_negative_common(2) + lhz r_A, 0(r_addr) + blr + +bpf_slow_path_byte_neg: + lis r_scratch1,-32 /* SKF_LL_OFF */ + cmpd r_addr, r_scratch1 /* addr < SKF_* */ + blt bpf_error /* cr0 = LT */ + .globl sk_load_byte_negative_offset +sk_load_byte_negative_offset: + sk_negative_common(1) + lbz r_A, 0(r_addr) + blr + +bpf_slow_path_byte_msh_neg: + lis r_scratch1,-32 /* SKF_LL_OFF */ + cmpd r_addr, r_scratch1 /* addr < SKF_* */ + blt bpf_error /* cr0 = LT */ + .globl sk_load_byte_msh_negative_offset +sk_load_byte_msh_negative_offset: + sk_negative_common(1) + lbz r_X, 0(r_addr) + rlwinm r_X, r_X, 2, 32-4-2, 31-2 + blr + +bpf_error_slow: + /* fabricate a cr0 = lt */ + li r_scratch1, -1 + cmpdi r_scratch1, 0 +bpf_error: + /* Entered with cr0 = lt */ + li r3, 0 + /* Generated code will 'blt epilogue', returning 0. */ + blr diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 73619d3aeb6c..2dc8b1484845 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -127,6 +127,9 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) PPC_BLR(); } +#define CHOOSE_LOAD_FUNC(K, func) \ + ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) + /* Assemble the body code between the prologue & epilogue. */ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, struct codegen_context *ctx, @@ -391,21 +394,16 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, /*** Absolute loads from packet header/data ***/ case BPF_S_LD_W_ABS: - func = sk_load_word; + func = CHOOSE_LOAD_FUNC(K, sk_load_word); goto common_load; case BPF_S_LD_H_ABS: - func = sk_load_half; + func = CHOOSE_LOAD_FUNC(K, sk_load_half); goto common_load; case BPF_S_LD_B_ABS: - func = sk_load_byte; + func = CHOOSE_LOAD_FUNC(K, sk_load_byte); common_load: - /* - * Load from [K]. Reference with the (negative) - * SKF_NET_OFF/SKF_LL_OFF offsets is unsupported. - */ + /* Load from [K]. */ ctx->seen |= SEEN_DATAREF; - if ((int)K < 0) - return -ENOTSUPP; PPC_LI64(r_scratch1, func); PPC_MTLR(r_scratch1); PPC_LI32(r_addr, K); @@ -429,7 +427,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, common_load_ind: /* * Load from [X + K]. Negative offsets are tested for - * in the helper functions, and result in a 'ret 0'. + * in the helper functions. */ ctx->seen |= SEEN_DATAREF | SEEN_XREG; PPC_LI64(r_scratch1, func); @@ -443,13 +441,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, break; case BPF_S_LDX_B_MSH: - /* - * x86 version drops packet (RET 0) when K<0, whereas - * interpreter does allow K<0 (__load_pointer, special - * ancillary data). common_load returns ENOTSUPP if K<0, - * so we fall back to interpreter & filter works. - */ - func = sk_load_byte_msh; + func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh); goto common_load; break; |