diff options
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r-- | arch/powerpc/kvm/Kconfig | 3 | ||||
-rw-r--r-- | arch/powerpc/kvm/Makefile | 20 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s.c | 13 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_emulate.c | 4 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 533 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_builtin.c | 156 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_hmi.c | 56 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rm_xics.c | 120 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rmhandlers.S | 197 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_pr.c | 10 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xics.c | 57 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xics.h | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/booke.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/e500_mmu.c | 73 | ||||
-rw-r--r-- | arch/powerpc/kvm/powerpc.c | 61 | ||||
-rw-r--r-- | arch/powerpc/kvm/trace_hv.h | 22 |
16 files changed, 1024 insertions, 305 deletions
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index c2024ac9d4e8..029be26b5a17 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -22,6 +22,9 @@ config KVM select ANON_INODES select HAVE_KVM_EVENTFD select SRCU + select KVM_VFIO + select IRQ_BYPASS_MANAGER + select HAVE_KVM_IRQ_BYPASS config KVM_BOOK3S_HANDLER bool diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 1f9e5529e692..7dd89b79d038 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -7,16 +7,16 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm KVM := ../../../virt/kvm -common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ - $(KVM)/eventfd.o +common-objs-y = $(KVM)/kvm_main.o $(KVM)/eventfd.o common-objs-$(CONFIG_KVM_VFIO) += $(KVM)/vfio.o +common-objs-$(CONFIG_KVM_MMIO) += $(KVM)/coalesced_mmio.o CFLAGS_e500_mmu.o := -I. CFLAGS_e500_mmu_host.o := -I. CFLAGS_emulate.o := -I. CFLAGS_emulate_loadstore.o := -I. -common-objs-y += powerpc.o emulate.o emulate_loadstore.o +common-objs-y += powerpc.o emulate_loadstore.o obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o @@ -24,6 +24,7 @@ AFLAGS_booke_interrupts.o := -I$(objtree)/$(obj) kvm-e500-objs := \ $(common-objs-y) \ + emulate.o \ booke.o \ booke_emulate.o \ booke_interrupts.o \ @@ -35,6 +36,7 @@ kvm-objs-$(CONFIG_KVM_E500V2) := $(kvm-e500-objs) kvm-e500mc-objs := \ $(common-objs-y) \ + emulate.o \ booke.o \ booke_emulate.o \ bookehv_interrupts.o \ @@ -61,9 +63,6 @@ kvm-pr-y := \ book3s_32_mmu.o ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE -kvm-book3s_64-module-objs := \ - $(KVM)/coalesced_mmio.o - kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ book3s_rmhandlers.o endif @@ -78,6 +77,7 @@ kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ + book3s_hv_hmi.o \ book3s_hv_rmhandlers.o \ book3s_hv_rm_mmu.o \ book3s_hv_ras.o \ @@ -88,11 +88,8 @@ endif kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ book3s_xics.o -kvm-book3s_64-module-objs += \ - $(KVM)/kvm_main.o \ - $(KVM)/eventfd.o \ - powerpc.o \ - emulate_loadstore.o \ +kvm-book3s_64-module-objs := \ + $(common-objs-y) \ book3s.o \ book3s_64_vio.o \ book3s_rtas.o \ @@ -102,6 +99,7 @@ kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs) kvm-book3s_32-objs := \ $(common-objs-y) \ + emulate.o \ fpu.o \ book3s_paired_singles.o \ book3s.o \ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 47018fcbf7d6..b6952dd23152 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -52,8 +52,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "dec", VCPU_STAT(dec_exits) }, { "ext_intr", VCPU_STAT(ext_intr_exits) }, { "queue_intr", VCPU_STAT(queue_intr) }, + { "halt_poll_success_ns", VCPU_STAT(halt_poll_success_ns) }, + { "halt_poll_fail_ns", VCPU_STAT(halt_poll_fail_ns) }, + { "halt_wait_ns", VCPU_STAT(halt_wait_ns) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll), }, { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), }, + { "halt_successful_wait", VCPU_STAT(halt_successful_wait) }, { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "pf_storage", VCPU_STAT(pf_storage) }, @@ -64,6 +68,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "ld_slow", VCPU_STAT(ld_slow) }, { "st", VCPU_STAT(st) }, { "st_slow", VCPU_STAT(st_slow) }, + { "pthru_all", VCPU_STAT(pthru_all) }, + { "pthru_host", VCPU_STAT(pthru_host) }, + { "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) }, { NULL } }; @@ -592,9 +599,6 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_BESCR: *val = get_reg_val(id, vcpu->arch.bescr); break; - case KVM_REG_PPC_VTB: - *val = get_reg_val(id, vcpu->arch.vtb); - break; case KVM_REG_PPC_IC: *val = get_reg_val(id, vcpu->arch.ic); break; @@ -666,9 +670,6 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_BESCR: vcpu->arch.bescr = set_reg_val(id, *val); break; - case KVM_REG_PPC_VTB: - vcpu->arch.vtb = set_reg_val(id, *val); - break; case KVM_REG_PPC_IC: vcpu->arch.ic = set_reg_val(id, *val); break; diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 2afdb9c0937d..8359752b3efc 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -498,6 +498,7 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_MMCR0: case SPRN_MMCR1: case SPRN_MMCR2: + case SPRN_UMMCR2: #endif break; unprivileged: @@ -579,7 +580,7 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val *spr_val = vcpu->arch.spurr; break; case SPRN_VTB: - *spr_val = vcpu->arch.vtb; + *spr_val = to_book3s(vcpu)->vtb; break; case SPRN_IC: *spr_val = vcpu->arch.ic; @@ -640,6 +641,7 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val case SPRN_MMCR0: case SPRN_MMCR1: case SPRN_MMCR2: + case SPRN_UMMCR2: case SPRN_TIR: #endif *spr_val = 0; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 2fd5580c8f6e..3686471be32b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -53,11 +53,15 @@ #include <asm/smp.h> #include <asm/dbell.h> #include <asm/hmi.h> +#include <asm/pnv-pci.h> #include <linux/gfp.h> #include <linux/vmalloc.h> #include <linux/highmem.h> #include <linux/hugetlb.h> +#include <linux/kvm_irqfd.h> +#include <linux/irqbypass.h> #include <linux/module.h> +#include <linux/compiler.h> #include "book3s.h" @@ -70,6 +74,8 @@ /* Used to indicate that a guest page fault needs to be handled */ #define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1) +/* Used to indicate that a guest passthrough interrupt needs to be handled */ +#define RESUME_PASSTHROUGH (RESUME_GUEST | RESUME_FLAG_ARCH2) /* Used as a "null" value for timebase values */ #define TB_NIL (~(u64)0) @@ -89,14 +95,55 @@ static struct kernel_param_ops module_param_ops = { .get = param_get_int, }; +module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass, + S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(kvm_irq_bypass, "Bypass passthrough interrupt optimization"); + module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); #endif +/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */ +static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT; +module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns"); + +/* Factor by which the vcore halt poll interval is grown, default is to double + */ +static unsigned int halt_poll_ns_grow = 2; +module_param(halt_poll_ns_grow, int, S_IRUGO); +MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by"); + +/* Factor by which the vcore halt poll interval is shrunk, default is to reset + */ +static unsigned int halt_poll_ns_shrink; +module_param(halt_poll_ns_shrink, int, S_IRUGO); +MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by"); + static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); +static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc, + int *ip) +{ + int i = *ip; + struct kvm_vcpu *vcpu; + + while (++i < MAX_SMT_THREADS) { + vcpu = READ_ONCE(vc->runnable_threads[i]); + if (vcpu) { + *ip = i; + return vcpu; + } + } + return NULL; +} + +/* Used to traverse the list of runnable threads for a given vcore */ +#define for_each_runnable_thread(i, vcpu, vc) \ + for (i = -1; (vcpu = next_runnable_thread(vc, &i)); ) + static bool kvmppc_ipi_thread(int cpu) { /* On POWER8 for IPIs to threads in the same core, use msgsnd */ @@ -991,6 +1038,9 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_core_queue_program(vcpu, SRR1_PROGILL); r = RESUME_GUEST; break; + case BOOK3S_INTERRUPT_HV_RM_HARD: + r = RESUME_PASSTHROUGH; + break; default: kvmppc_dump_regs(vcpu); printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n", @@ -1149,6 +1199,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_DPDES: *val = get_reg_val(id, vcpu->arch.vcore->dpdes); break; + case KVM_REG_PPC_VTB: + *val = get_reg_val(id, vcpu->arch.vcore->vtb); + break; case KVM_REG_PPC_DAWR: *val = get_reg_val(id, vcpu->arch.dawr); break; @@ -1341,6 +1394,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_DPDES: vcpu->arch.vcore->dpdes = set_reg_val(id, *val); break; + case KVM_REG_PPC_VTB: + vcpu->arch.vcore->vtb = set_reg_val(id, *val); + break; case KVM_REG_PPC_DAWR: vcpu->arch.dawr = set_reg_val(id, *val); break; @@ -1493,7 +1549,6 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) if (vcore == NULL) return NULL; - INIT_LIST_HEAD(&vcore->runnable_threads); spin_lock_init(&vcore->lock); spin_lock_init(&vcore->stoltb_lock); init_swait_queue_head(&vcore->wq); @@ -1802,7 +1857,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; spin_unlock_irq(&vcpu->arch.tbacct_lock); --vc->n_runnable; - list_del(&vcpu->arch.run_list); + WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], NULL); } static int kvmppc_grab_hwthread(int cpu) @@ -2048,66 +2103,6 @@ static void init_master_vcore(struct kvmppc_vcore *vc) vc->conferring_threads = 0; } -/* - * See if the existing subcores can be split into 3 (or fewer) subcores - * of at most two threads each, so we can fit in another vcore. This - * assumes there are at most two subcores and at most 6 threads in total. - */ -static bool can_split_piggybacked_subcores(struct core_info *cip) -{ - int sub, new_sub; - int large_sub = -1; - int thr; - int n_subcores = cip->n_subcores; - struct kvmppc_vcore *vc, *vcnext; - struct kvmppc_vcore *master_vc = NULL; - - for (sub = 0; sub < cip->n_subcores; ++sub) { - if (cip->subcore_threads[sub] <= 2) - continue; - if (large_sub >= 0) - return false; - large_sub = sub; - vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore, - preempt_list); - if (vc->num_threads > 2) - return false; - n_subcores += (cip->subcore_threads[sub] - 1) >> 1; - } - if (large_sub < 0 || !subcore_config_ok(n_subcores + 1, 2)) - return false; - - /* - * Seems feasible, so go through and move vcores to new subcores. - * Note that when we have two or more vcores in one subcore, - * all those vcores must have only one thread each. - */ - new_sub = cip->n_subcores; - thr = 0; - sub = large_sub; - list_for_each_entry_safe(vc, vcnext, &cip->vcs[sub], preempt_list) { - if (thr >= 2) { - list_del(&vc->preempt_list); - list_add_tail(&vc->preempt_list, &cip->vcs[new_sub]); - /* vc->num_threads must be 1 */ - if (++cip->subcore_threads[new_sub] == 1) { - cip->subcore_vm[new_sub] = vc->kvm; - init_master_vcore(vc); - master_vc = vc; - ++cip->n_subcores; - } else { - vc->master_vcore = master_vc; - ++new_sub; - } - } - thr += vc->num_threads; - } - cip->subcore_threads[large_sub] = 2; - cip->max_subcore_threads = 2; - - return true; -} - static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) { int n_threads = vc->num_threads; @@ -2118,23 +2113,9 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) if (n_threads < cip->max_subcore_threads) n_threads = cip->max_subcore_threads; - if (subcore_config_ok(cip->n_subcores + 1, n_threads)) { - cip->max_subcore_threads = n_threads; - } else if (cip->n_subcores <= 2 && cip->total_threads <= 6 && - vc->num_threads <= 2) { - /* - * We may be able to fit another subcore in by - * splitting an existing subcore with 3 or 4 - * threads into two 2-thread subcores, or one - * with 5 or 6 threads into three subcores. - * We can only do this if those subcores have - * piggybacked virtual cores. - */ - if (!can_split_piggybacked_subcores(cip)) - return false; - } else { + if (!subcore_config_ok(cip->n_subcores + 1, n_threads)) return false; - } + cip->max_subcore_threads = n_threads; sub = cip->n_subcores; ++cip->n_subcores; @@ -2148,43 +2129,6 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) return true; } -static bool can_piggyback_subcore(struct kvmppc_vcore *pvc, - struct core_info *cip, int sub) -{ - struct kvmppc_vcore *vc; - int n_thr; - - vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore, - preempt_list); - - /* require same VM and same per-core reg values */ - if (pvc->kvm != vc->kvm || - pvc->tb_offset != vc->tb_offset || - pvc->pcr != vc->pcr || - pvc->lpcr != vc->lpcr) - return false; - - /* P8 guest with > 1 thread per core would see wrong TIR value */ - if (cpu_has_feature(CPU_FTR_ARCH_207S) && - (vc->num_threads > 1 || pvc->num_threads > 1)) - return false; - - n_thr = cip->subcore_threads[sub] + pvc->num_threads; - if (n_thr > cip->max_subcore_threads) { - if (!subcore_config_ok(cip->n_subcores, n_thr)) - return false; - cip->max_subcore_threads = n_thr; - } - - cip->total_threads += pvc->num_threads; - cip->subcore_threads[sub] = n_thr; - pvc->master_vcore = vc; - list_del(&pvc->preempt_list); - list_add_tail(&pvc->preempt_list, &cip->vcs[sub]); - - return true; -} - /* * Work out whether it is possible to piggyback the execution of * vcore *pvc onto the execution of the other vcores described in *cip. @@ -2192,27 +2136,18 @@ static bool can_piggyback_subcore(struct kvmppc_vcore *pvc, static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip, int target_threads) { - int sub; - if (cip->total_threads + pvc->num_threads > target_threads) return false; - for (sub = 0; sub < cip->n_subcores; ++sub) - if (cip->subcore_threads[sub] && - can_piggyback_subcore(pvc, cip, sub)) - return true; - - if (can_dynamic_split(pvc, cip)) - return true; - return false; + return can_dynamic_split(pvc, cip); } static void prepare_threads(struct kvmppc_vcore *vc) { - struct kvm_vcpu *vcpu, *vnext; + int i; + struct kvm_vcpu *vcpu; - list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, - arch.run_list) { + for_each_runnable_thread(i, vcpu, vc) { if (signal_pending(vcpu->arch.run_task)) vcpu->arch.ret = -EINTR; else if (vcpu->arch.vpa.update_pending || @@ -2259,15 +2194,14 @@ static void collect_piggybacks(struct core_info *cip, int target_threads) static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) { - int still_running = 0; + int still_running = 0, i; u64 now; long ret; - struct kvm_vcpu *vcpu, *vnext; + struct kvm_vcpu *vcpu; spin_lock(&vc->lock); now = get_tb(); - list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, - arch.run_list) { + for_each_runnable_thread(i, vcpu, vc) { /* cancel pending dec exception if dec is positive */ if (now < vcpu->arch.dec_expires && kvmppc_core_pending_dec(vcpu)) @@ -2307,8 +2241,8 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) } if (vc->n_runnable > 0 && vc->runner == NULL) { /* make sure there's a candidate runner awake */ - vcpu = list_first_entry(&vc->runnable_threads, - struct kvm_vcpu, arch.run_list); + i = -1; + vcpu = next_runnable_thread(vc, &i); wake_up(&vcpu->arch.cpu_run); } } @@ -2361,7 +2295,7 @@ static inline void kvmppc_set_host_core(int cpu) */ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) { - struct kvm_vcpu *vcpu, *vnext; + struct kvm_vcpu *vcpu; int i; int srcu_idx; struct core_info core_info; @@ -2397,8 +2331,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) */ if ((threads_per_core > 1) && ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { - list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, - arch.run_list) { + for_each_runnable_thread(i, vcpu, vc) { vcpu->arch.ret = -EBUSY; kvmppc_remove_runnable(vc, vcpu); wake_up(&vcpu->arch.cpu_run); @@ -2477,8 +2410,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) active |= 1 << thr; list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) { pvc->pcpu = pcpu + thr; - list_for_each_entry(vcpu, &pvc->runnable_threads, - arch.run_list) { + for_each_runnable_thread(i, vcpu, pvc) { kvmppc_start_thread(vcpu, pvc); kvmppc_create_dtl_entry(vcpu, pvc); trace_kvm_guest_enter(vcpu); @@ -2604,34 +2536,92 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc, finish_wait(&vcpu->arch.cpu_run, &wait); } +static void grow_halt_poll_ns(struct kvmppc_vcore *vc) +{ + /* 10us base */ + if (vc->halt_poll_ns == 0 && halt_poll_ns_grow) + vc->halt_poll_ns = 10000; + else + vc->halt_poll_ns *= halt_poll_ns_grow; + + if (vc->halt_poll_ns > halt_poll_max_ns) + vc->halt_poll_ns = halt_poll_max_ns; +} + +static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) +{ + if (halt_poll_ns_shrink == 0) + vc->halt_poll_ns = 0; + else + vc->halt_poll_ns /= halt_poll_ns_shrink; +} + +/* Check to see if any of the runnable vcpus on the vcore have pending + * exceptions or are no longer ceded + */ +static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc) +{ + struct kvm_vcpu *vcpu; + int i; + + for_each_runnable_thread(i, vcpu, vc) { + if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) + return 1; + } + + return 0; +} + /* * All the vcpus in this vcore are idle, so wait for a decrementer * or external interrupt to one of the vcpus. vc->lock is held. */ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) { - struct kvm_vcpu *vcpu; + ktime_t cur, start_poll, start_wait; int do_sleep = 1; + u64 block_ns; DECLARE_SWAITQUEUE(wait); - prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE); + /* Poll for pending exceptions and ceded state */ + cur = start_poll = ktime_get(); + if (vc->halt_poll_ns) { + ktime_t stop = ktime_add_ns(start_poll, vc->halt_poll_ns); + ++vc->runner->stat.halt_attempted_poll; - /* - * Check one last time for pending exceptions and ceded state after - * we put ourselves on the wait queue - */ - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { - if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) { - do_sleep = 0; - break; + vc->vcore_state = VCORE_POLLING; + spin_unlock(&vc->lock); + + do { + if (kvmppc_vcore_check_block(vc)) { + do_sleep = 0; + break; + } + cur = ktime_get(); + } while (single_task_running() && ktime_before(cur, stop)); + + spin_lock(&vc->lock); + vc->vcore_state = VCORE_INACTIVE; + + if (!do_sleep) { + ++vc->runner->stat.halt_successful_poll; + goto out; } } - if (!do_sleep) { + prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE); + + if (kvmppc_vcore_check_block(vc)) { finish_swait(&vc->wq, &wait); - return; + do_sleep = 0; + /* If we polled, count this as a successful poll */ + if (vc->halt_poll_ns) + ++vc->runner->stat.halt_successful_poll; + goto out; } + start_wait = ktime_get(); + vc->vcore_state = VCORE_SLEEPING; trace_kvmppc_vcore_blocked(vc, 0); spin_unlock(&vc->lock); @@ -2640,13 +2630,52 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) spin_lock(&vc->lock); vc->vcore_state = VCORE_INACTIVE; trace_kvmppc_vcore_blocked(vc, 1); + ++vc->runner->stat.halt_successful_wait; + + cur = ktime_get(); + +out: + block_ns = ktime_to_ns(cur) - ktime_to_ns(start_poll); + + /* Attribute wait time */ + if (do_sleep) { + vc->runner->stat.halt_wait_ns += + ktime_to_ns(cur) - ktime_to_ns(start_wait); + /* Attribute failed poll time */ + if (vc->halt_poll_ns) + vc->runner->stat.halt_poll_fail_ns += + ktime_to_ns(start_wait) - + ktime_to_ns(start_poll); + } else { + /* Attribute successful poll time */ + if (vc->halt_poll_ns) + vc->runner->stat.halt_poll_success_ns += + ktime_to_ns(cur) - + ktime_to_ns(start_poll); + } + + /* Adjust poll time */ + if (halt_poll_max_ns) { + if (block_ns <= vc->halt_poll_ns) + ; + /* We slept and blocked for longer than the max halt time */ + else if (vc->halt_poll_ns && block_ns > halt_poll_max_ns) + shrink_halt_poll_ns(vc); + /* We slept and our poll time is too small */ + else if (vc->halt_poll_ns < halt_poll_max_ns && + block_ns < halt_poll_max_ns) + grow_halt_poll_ns(vc); + } else + vc->halt_poll_ns = 0; + + trace_kvmppc_vcore_wakeup(do_sleep, block_ns); } static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { - int n_ceded; + int n_ceded, i; struct kvmppc_vcore *vc; - struct kvm_vcpu *v, *vn; + struct kvm_vcpu *v; trace_kvmppc_run_vcpu_enter(vcpu); @@ -2666,7 +2695,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb()); vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; vcpu->arch.busy_preempt = TB_NIL; - list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads); + WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], vcpu); ++vc->n_runnable; /* @@ -2706,8 +2735,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE); continue; } - list_for_each_entry_safe(v, vn, &vc->runnable_threads, - arch.run_list) { + for_each_runnable_thread(i, v, vc) { kvmppc_core_prepare_to_enter(v); if (signal_pending(v->arch.run_task)) { kvmppc_remove_runnable(vc, v); @@ -2720,7 +2748,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) break; n_ceded = 0; - list_for_each_entry(v, &vc->runnable_threads, arch.run_list) { + for_each_runnable_thread(i, v, vc) { if (!v->arch.pending_exceptions) n_ceded += v->arch.ceded; else @@ -2759,8 +2787,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) { /* Wake up some vcpu to run the core */ - v = list_first_entry(&vc->runnable_threads, - struct kvm_vcpu, arch.run_list); + i = -1; + v = next_runnable_thread(vc, &i); wake_up(&v->arch.cpu_run); } @@ -2818,7 +2846,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) r = kvmppc_book3s_hv_page_fault(run, vcpu, vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); - } + } else if (r == RESUME_PASSTHROUGH) + r = kvmppc_xics_rm_complete(vcpu, 0); } while (is_kvmppc_resume_guest(r)); out: @@ -3247,6 +3276,8 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) kvmppc_free_vcores(kvm); kvmppc_free_hpt(kvm); + + kvmppc_free_pimap(kvm); } /* We don't need to emulate any privileged instructions or dcbz */ @@ -3282,6 +3313,184 @@ static int kvmppc_core_check_processor_compat_hv(void) return 0; } +#ifdef CONFIG_KVM_XICS + +void kvmppc_free_pimap(struct kvm *kvm) +{ + kfree(kvm->arch.pimap); +} + +static struct kvmppc_passthru_irqmap *kvmppc_alloc_pimap(void) +{ + return kzalloc(sizeof(struct kvmppc_passthru_irqmap), GFP_KERNEL); +} + +static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) +{ + struct irq_desc *desc; + struct kvmppc_irq_map *irq_map; + struct kvmppc_passthru_irqmap *pimap; + struct irq_chip *chip; + int i; + + if (!kvm_irq_bypass) + return 1; + + desc = irq_to_desc(host_irq); + if (!desc) + return -EIO; + + mutex_lock(&kvm->lock); + + pimap = kvm->arch.pimap; + if (pimap == NULL) { + /* First call, allocate structure to hold IRQ map */ + pimap = kvmppc_alloc_pimap(); + if (pimap == NULL) { + mutex_unlock(&kvm->lock); + return -ENOMEM; + } + kvm->arch.pimap = pimap; + } + + /* + * For now, we only support interrupts for which the EOI operation + * is an OPAL call followed by a write to XIRR, since that's + * what our real-mode EOI code does. + */ + chip = irq_data_get_irq_chip(&desc->irq_data); + if (!chip || !is_pnv_opal_msi(chip)) { + pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n", + host_irq, guest_gsi); + mutex_unlock(&kvm->lock); + return -ENOENT; + } + + /* + * See if we already have an entry for this guest IRQ number. + * If it's mapped to a hardware IRQ number, that's an error, + * otherwise re-use this entry. + */ + for (i = 0; i < pimap->n_mapped; i++) { + if (guest_gsi == pimap->mapped[i].v_hwirq) { + if (pimap->mapped[i].r_hwirq) { + mutex_unlock(&kvm->lock); + return -EINVAL; + } + break; + } + } + + if (i == KVMPPC_PIRQ_MAPPED) { + mutex_unlock(&kvm->lock); + return -EAGAIN; /* table is full */ + } + + irq_map = &pimap->mapped[i]; + + irq_map->v_hwirq = guest_gsi; + irq_map->desc = desc; + + /* + * Order the above two stores before the next to serialize with + * the KVM real mode handler. + */ + smp_wmb(); + irq_map->r_hwirq = desc->irq_data.hwirq; + + if (i == pimap->n_mapped) + pimap->n_mapped++; + + kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq); + + mutex_unlock(&kvm->lock); + + return 0; +} + +static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) +{ + struct irq_desc *desc; + struct kvmppc_passthru_irqmap *pimap; + int i; + + if (!kvm_irq_bypass) + return 0; + + desc = irq_to_desc(host_irq); + if (!desc) + return -EIO; + + mutex_lock(&kvm->lock); + + if (kvm->arch.pimap == NULL) { + mutex_unlock(&kvm->lock); + return 0; + } + pimap = kvm->arch.pimap; + + for (i = 0; i < pimap->n_mapped; i++) { + if (guest_gsi == pimap->mapped[i].v_hwirq) + break; + } + + if (i == pimap->n_mapped) { + mutex_unlock(&kvm->lock); + return -ENODEV; + } + + kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq); + + /* invalidate the entry */ + pimap->mapped[i].r_hwirq = 0; + + /* + * We don't free this structure even when the count goes to + * zero. The structure is freed when we destroy the VM. + */ + + mutex_unlock(&kvm->lock); + return 0; +} + +static int kvmppc_irq_bypass_add_producer_hv(struct irq_bypass_consumer *cons, + struct irq_bypass_producer *prod) +{ + int ret = 0; + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + irqfd->producer = prod; + + ret = kvmppc_set_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi); + if (ret) + pr_info("kvmppc_set_passthru_irq (irq %d, gsi %d) fails: %d\n", + prod->irq, irqfd->gsi, ret); + + return ret; +} + +static void kvmppc_irq_bypass_del_producer_hv(struct irq_bypass_consumer *cons, + struct irq_bypass_producer *prod) +{ + int ret; + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + irqfd->producer = NULL; + + /* + * When producer of consumer is unregistered, we change back to + * default external interrupt handling mode - KVM real mode + * will switch back to host. + */ + ret = kvmppc_clr_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi); + if (ret) + pr_warn("kvmppc_clr_passthru_irq (irq %d, gsi %d) fails: %d\n", + prod->irq, irqfd->gsi, ret); +} +#endif + static long kvm_arch_vm_ioctl_hv(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -3400,6 +3609,10 @@ static struct kvmppc_ops kvm_ops_hv = { .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv, .arch_vm_ioctl = kvm_arch_vm_ioctl_hv, .hcall_implemented = kvmppc_hcall_impl_hv, +#ifdef CONFIG_KVM_XICS + .irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv, + .irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv, +#endif }; static int kvm_init_subcore_bitmap(void) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 5f0380db3eab..0c84d6bc8356 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -25,6 +25,7 @@ #include <asm/xics.h> #include <asm/dbell.h> #include <asm/cputhreads.h> +#include <asm/io.h> #define KVM_CMA_CHUNK_ORDER 18 @@ -286,3 +287,158 @@ void kvmhv_commence_exit(int trap) struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv; EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv); + +#ifdef CONFIG_KVM_XICS +static struct kvmppc_irq_map *get_irqmap(struct kvmppc_passthru_irqmap *pimap, + u32 xisr) +{ + int i; + + /* + * We access the mapped array here without a lock. That + * is safe because we never reduce the number of entries + * in the array and we never change the v_hwirq field of + * an entry once it is set. + * + * We have also carefully ordered the stores in the writer + * and the loads here in the reader, so that if we find a matching + * hwirq here, the associated GSI and irq_desc fields are valid. + */ + for (i = 0; i < pimap->n_mapped; i++) { + if (xisr == pimap->mapped[i].r_hwirq) { + /* + * Order subsequent reads in the caller to serialize + * with the writer. + */ + smp_rmb(); + return &pimap->mapped[i]; + } + } + return NULL; +} + +/* + * If we have an interrupt that's not an IPI, check if we have a + * passthrough adapter and if so, check if this external interrupt + * is for the adapter. + * We will attempt to deliver the IRQ directly to the target VCPU's + * ICP, the virtual ICP (based on affinity - the xive value in ICS). + * + * If the delivery fails or if this is not for a passthrough adapter, + * return to the host to handle this interrupt. We earlier + * saved a copy of the XIRR in the PACA, it will be picked up by + * the host ICP driver. + */ +static int kvmppc_check_passthru(u32 xisr, __be32 xirr) +{ + struct kvmppc_passthru_irqmap *pimap; + struct kvmppc_irq_map *irq_map; + struct kvm_vcpu *vcpu; + + vcpu = local_paca->kvm_hstate.kvm_vcpu; + if (!vcpu) + return 1; + pimap = kvmppc_get_passthru_irqmap(vcpu->kvm); + if (!pimap) + return 1; + irq_map = get_irqmap(pimap, xisr); + if (!irq_map) + return 1; + + /* We're handling this interrupt, generic code doesn't need to */ + local_paca->kvm_hstate.saved_xirr = 0; + + return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap); +} + +#else +static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr) +{ + return 1; +} +#endif + +/* + * Determine what sort of external interrupt is pending (if any). + * Returns: + * 0 if no interrupt is pending + * 1 if an interrupt is pending that needs to be handled by the host + * 2 Passthrough that needs completion in the host + * -1 if there was a guest wakeup IPI (which has now been cleared) + * -2 if there is PCI passthrough external interrupt that was handled + */ + +long kvmppc_read_intr(void) +{ + unsigned long xics_phys; + u32 h_xirr; + __be32 xirr; + u32 xisr; + u8 host_ipi; + + /* see if a host IPI is pending */ + host_ipi = local_paca->kvm_hstate.host_ipi; + if (host_ipi) + return 1; + + /* Now read the interrupt from the ICP */ + xics_phys = local_paca->kvm_hstate.xics_phys; + if (unlikely(!xics_phys)) + return 1; + + /* + * Save XIRR for later. Since we get control in reverse endian + * on LE systems, save it byte reversed and fetch it back in + * host endian. Note that xirr is the value read from the + * XIRR register, while h_xirr is the host endian version. + */ + xirr = _lwzcix(xics_phys + XICS_XIRR); + h_xirr = be32_to_cpu(xirr); + local_paca->kvm_hstate.saved_xirr = h_xirr; + xisr = h_xirr & 0xffffff; + /* + * Ensure that the store/load complete to guarantee all side + * effects of loading from XIRR has completed + */ + smp_mb(); + + /* if nothing pending in the ICP */ + if (!xisr) + return 0; + + /* We found something in the ICP... + * + * If it is an IPI, clear the MFRR and EOI it. + */ + if (xisr == XICS_IPI) { + _stbcix(xics_phys + XICS_MFRR, 0xff); + _stwcix(xics_phys + XICS_XIRR, xirr); + /* + * Need to ensure side effects of above stores + * complete before proceeding. + */ + smp_mb(); + + /* + * We need to re-check host IPI now in case it got set in the + * meantime. If it's clear, we bounce the interrupt to the + * guest + */ + host_ipi = local_paca->kvm_hstate.host_ipi; + if (unlikely(host_ipi != 0)) { + /* We raced with the host, + * we need to resend that IPI, bummer + */ + _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY); + /* Let side effects complete */ + smp_mb(); + return 1; + } + + /* OK, it's an IPI for us */ + local_paca->kvm_hstate.saved_xirr = 0; + return -1; + } + + return kvmppc_check_passthru(xisr, xirr); +} diff --git a/arch/powerpc/kvm/book3s_hv_hmi.c b/arch/powerpc/kvm/book3s_hv_hmi.c new file mode 100644 index 000000000000..e3f738eb1cac --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_hmi.c @@ -0,0 +1,56 @@ +/* + * Hypervisor Maintenance Interrupt (HMI) handling. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. + * + * Copyright 2015 IBM Corporation + * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> + */ + +#undef DEBUG + +#include <linux/types.h> +#include <linux/compiler.h> +#include <asm/paca.h> +#include <asm/hmi.h> + +void wait_for_subcore_guest_exit(void) +{ + int i; + + /* + * NULL bitmap pointer indicates that KVM module hasn't + * been loaded yet and hence no guests are running. + * If no KVM is in use, no need to co-ordinate among threads + * as all of them will always be in host and no one is going + * to modify TB other than the opal hmi handler. + * Hence, just return from here. + */ + if (!local_paca->sibling_subcore_state) + return; + + for (i = 0; i < MAX_SUBCORE_PER_CORE; i++) + while (local_paca->sibling_subcore_state->in_guest[i]) + cpu_relax(); +} + +void wait_for_tb_resync(void) +{ + if (!local_paca->sibling_subcore_state) + return; + + while (test_bit(CORE_TB_RESYNC_REQ_BIT, + &local_paca->sibling_subcore_state->flags)) + cpu_relax(); +} diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 980d8a6f7284..82ff5de8b1e7 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -10,6 +10,7 @@ #include <linux/kernel.h> #include <linux/kvm_host.h> #include <linux/err.h> +#include <linux/kernel_stat.h> #include <asm/kvm_book3s.h> #include <asm/kvm_ppc.h> @@ -18,7 +19,10 @@ #include <asm/debug.h> #include <asm/synch.h> #include <asm/cputhreads.h> +#include <asm/pgtable.h> #include <asm/ppc-opcode.h> +#include <asm/pnv-pci.h> +#include <asm/opal.h> #include "book3s_xics.h" @@ -26,9 +30,12 @@ int h_ipi_redirect = 1; EXPORT_SYMBOL(h_ipi_redirect); +int kvm_irq_bypass = 1; +EXPORT_SYMBOL(kvm_irq_bypass); static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, u32 new_irq); +static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu); /* -- ICS routines -- */ static void ics_rm_check_resend(struct kvmppc_xics *xics, @@ -708,10 +715,123 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) icp->rm_action |= XICS_RM_NOTIFY_EOI; icp->rm_eoied_irq = irq; } + + if (state->host_irq) { + ++vcpu->stat.pthru_all; + if (state->intr_cpu != -1) { + int pcpu = raw_smp_processor_id(); + + pcpu = cpu_first_thread_sibling(pcpu); + ++vcpu->stat.pthru_host; + if (state->intr_cpu != pcpu) { + ++vcpu->stat.pthru_bad_aff; + xics_opal_rm_set_server(state->host_irq, pcpu); + } + state->intr_cpu = -1; + } + } bail: return check_too_hard(xics, icp); } +unsigned long eoi_rc; + +static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr) +{ + unsigned long xics_phys; + int64_t rc; + + rc = pnv_opal_pci_msi_eoi(c, hwirq); + + if (rc) + eoi_rc = rc; + + iosync(); + + /* EOI it */ + xics_phys = local_paca->kvm_hstate.xics_phys; + _stwcix(xics_phys + XICS_XIRR, xirr); +} + +static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu) +{ + unsigned int mangle_cpu = get_hard_smp_processor_id(server_cpu) << 2; + + return opal_rm_set_xive(hw_irq, mangle_cpu, DEFAULT_PRIORITY); +} + +/* + * Increment a per-CPU 32-bit unsigned integer variable. + * Safe to call in real-mode. Handles vmalloc'ed addresses + * + * ToDo: Make this work for any integral type + */ + +static inline void this_cpu_inc_rm(unsigned int __percpu *addr) +{ + unsigned long l; + unsigned int *raddr; + int cpu = smp_processor_id(); + + raddr = per_cpu_ptr(addr, cpu); + l = (unsigned long)raddr; + + if (REGION_ID(l) == VMALLOC_REGION_ID) { + l = vmalloc_to_phys(raddr); + raddr = (unsigned int *)l; + } + ++*raddr; +} + +/* + * We don't try to update the flags in the irq_desc 'istate' field in + * here as would happen in the normal IRQ handling path for several reasons: + * - state flags represent internal IRQ state and are not expected to be + * updated outside the IRQ subsystem + * - more importantly, these are useful for edge triggered interrupts, + * IRQ probing, etc., but we are only handling MSI/MSIx interrupts here + * and these states shouldn't apply to us. + * + * However, we do update irq_stats - we somewhat duplicate the code in + * kstat_incr_irqs_this_cpu() for this since this function is defined + * in irq/internal.h which we don't want to include here. + * The only difference is that desc->kstat_irqs is an allocated per CPU + * variable and could have been vmalloc'ed, so we can't directly + * call __this_cpu_inc() on it. The kstat structure is a static + * per CPU variable and it should be accessible by real-mode KVM. + * + */ +static void kvmppc_rm_handle_irq_desc(struct irq_desc *desc) +{ + this_cpu_inc_rm(desc->kstat_irqs); + __this_cpu_inc(kstat.irqs_sum); +} + +long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, + u32 xirr, + struct kvmppc_irq_map *irq_map, + struct kvmppc_passthru_irqmap *pimap) +{ + struct kvmppc_xics *xics; + struct kvmppc_icp *icp; + u32 irq; + + irq = irq_map->v_hwirq; + xics = vcpu->kvm->arch.xics; + icp = vcpu->arch.icp; + + kvmppc_rm_handle_irq_desc(irq_map->desc); + icp_rm_deliver_irq(xics, icp, irq); + + /* EOI the interrupt */ + icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr); + + if (check_too_hard(xics, icp) == H_TOO_HARD) + return 2; + else + return -2; +} + /* --- Non-real mode XICS-related built-in routines --- */ /** diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 975655573844..c3c1d1bcfc67 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -221,6 +221,13 @@ kvmppc_primary_no_guest: li r3, 0 /* Don't wake on privileged (OS) doorbell */ b kvm_do_nap +/* + * kvm_novcpu_wakeup + * Entered from kvm_start_guest if kvm_hstate.napping is set + * to NAPPING_NOVCPU + * r2 = kernel TOC + * r13 = paca + */ kvm_novcpu_wakeup: ld r1, HSTATE_HOST_R1(r13) ld r5, HSTATE_KVM_VCORE(r13) @@ -230,6 +237,13 @@ kvm_novcpu_wakeup: /* check the wake reason */ bl kvmppc_check_wake_reason + /* + * Restore volatile registers since we could have called + * a C routine in kvmppc_check_wake_reason. + * r5 = VCORE + */ + ld r5, HSTATE_KVM_VCORE(r13) + /* see if any other thread is already exiting */ lwz r0, VCORE_ENTRY_EXIT(r5) cmpwi r0, 0x100 @@ -322,6 +336,11 @@ kvm_start_guest: /* Check the wake reason in SRR1 to see why we got here */ bl kvmppc_check_wake_reason + /* + * kvmppc_check_wake_reason could invoke a C routine, but we + * have no volatile registers to restore when we return. + */ + cmpdi r3, 0 bge kvm_no_guest @@ -625,9 +644,11 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S) 38: BEGIN_FTR_SECTION - /* DPDES is shared between threads */ + /* DPDES and VTB are shared between threads */ ld r8, VCORE_DPDES(r5) + ld r7, VCORE_VTB(r5) mtspr SPRN_DPDES, r8 + mtspr SPRN_VTB, r7 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) /* Mark the subcore state as inside guest */ @@ -787,10 +808,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtspr SPRN_CIABR, r7 mtspr SPRN_TAR, r8 ld r5, VCPU_IC(r4) - ld r6, VCPU_VTB(r4) - mtspr SPRN_IC, r5 - mtspr SPRN_VTB, r6 ld r8, VCPU_EBBHR(r4) + mtspr SPRN_IC, r5 mtspr SPRN_EBBHR, r8 ld r5, VCPU_EBBRR(r4) ld r6, VCPU_BESCR(r4) @@ -881,6 +900,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) cmpwi r3, 512 /* 1 microsecond */ blt hdec_soon +deliver_guest_interrupt: ld r6, VCPU_CTR(r4) ld r7, VCPU_XER(r4) @@ -895,7 +915,6 @@ kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */ mtspr SPRN_SRR0, r6 mtspr SPRN_SRR1, r7 -deliver_guest_interrupt: /* r11 = vcpu->arch.msr & ~MSR_HV */ rldicl r11, r11, 63 - MSR_HV_LG, 1 rotldi r11, r11, 1 + MSR_HV_LG @@ -1155,10 +1174,54 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * set, we know the host wants us out so let's do it now */ bl kvmppc_read_intr + + /* + * Restore the active volatile registers after returning from + * a C function. + */ + ld r9, HSTATE_KVM_VCPU(r13) + li r12, BOOK3S_INTERRUPT_EXTERNAL + + /* + * kvmppc_read_intr return codes: + * + * Exit to host (r3 > 0) + * 1 An interrupt is pending that needs to be handled by the host + * Exit guest and return to host by branching to guest_exit_cont + * + * 2 Passthrough that needs completion in the host + * Exit guest and return to host by branching to guest_exit_cont + * However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD + * to indicate to the host to complete handling the interrupt + * + * Before returning to guest, we check if any CPU is heading out + * to the host and if so, we head out also. If no CPUs are heading + * check return values <= 0. + * + * Return to guest (r3 <= 0) + * 0 No external interrupt is pending + * -1 A guest wakeup IPI (which has now been cleared) + * In either case, we return to guest to deliver any pending + * guest interrupts. + * + * -2 A PCI passthrough external interrupt was handled + * (interrupt was delivered directly to guest) + * Return to guest to deliver any pending guest interrupts. + */ + + cmpdi r3, 1 + ble 1f + + /* Return code = 2 */ + li r12, BOOK3S_INTERRUPT_HV_RM_HARD + stw r12, VCPU_TRAP(r9) + b guest_exit_cont + +1: /* Return code <= 1 */ cmpdi r3, 0 bgt guest_exit_cont - /* Check if any CPU is heading out to the host, if so head out too */ + /* Return code <= 0 */ 4: ld r5, HSTATE_KVM_VCORE(r13) lwz r0, VCORE_ENTRY_EXIT(r5) cmpwi r0, 0x100 @@ -1271,10 +1334,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) stw r6, VCPU_PSPB(r9) std r7, VCPU_FSCR(r9) mfspr r5, SPRN_IC - mfspr r6, SPRN_VTB mfspr r7, SPRN_TAR std r5, VCPU_IC(r9) - std r6, VCPU_VTB(r9) std r7, VCPU_TAR(r9) mfspr r8, SPRN_EBBHR std r8, VCPU_EBBHR(r9) @@ -1501,9 +1562,11 @@ kvmhv_switch_to_host: isync BEGIN_FTR_SECTION - /* DPDES is shared between threads */ + /* DPDES and VTB are shared between threads */ mfspr r7, SPRN_DPDES + mfspr r8, SPRN_VTB std r7, VCORE_DPDES(r5) + std r8, VCORE_VTB(r5) /* clear DPDES so we don't get guest doorbells in the host */ li r8, 0 mtspr SPRN_DPDES, r8 @@ -2213,10 +2276,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) ld r29, VCPU_GPR(R29)(r4) ld r30, VCPU_GPR(R30)(r4) ld r31, VCPU_GPR(R31)(r4) - + /* Check the wake reason in SRR1 to see why we got here */ bl kvmppc_check_wake_reason + /* + * Restore volatile registers since we could have called a + * C routine in kvmppc_check_wake_reason + * r4 = VCPU + * r3 tells us whether we need to return to host or not + * WARNING: it gets checked further down: + * should not modify r3 until this check is done. + */ + ld r4, HSTATE_KVM_VCPU(r13) + /* clear our bit in vcore->napping_threads */ 34: ld r5,HSTATE_KVM_VCORE(r13) lbz r7,HSTATE_PTID(r13) @@ -2230,7 +2303,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) li r0,0 stb r0,HSTATE_NAPPING(r13) - /* See if the wake reason means we need to exit */ + /* See if the wake reason saved in r3 means we need to exit */ stw r12, VCPU_TRAP(r4) mr r9, r4 cmpdi r3, 0 @@ -2297,10 +2370,14 @@ machine_check_realmode: * 0 if nothing needs to be done * 1 if something happened that needs to be handled by the host * -1 if there was a guest wakeup (IPI or msgsnd) + * -2 if we handled a PCI passthrough interrupt (returned by + * kvmppc_read_intr only) * * Also sets r12 to the interrupt vector for any interrupt that needs * to be handled now by the host (0x500 for external interrupt), or zero. - * Modifies r0, r6, r7, r8. + * Modifies all volatile registers (since it may call a C function). + * This routine calls kvmppc_read_intr, a C function, if an external + * interrupt is pending. */ kvmppc_check_wake_reason: mfspr r6, SPRN_SRR1 @@ -2310,8 +2387,7 @@ FTR_SECTION_ELSE rlwinm r6, r6, 45-31, 0xe /* P7 wake reason field is 3 bits */ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S) cmpwi r6, 8 /* was it an external interrupt? */ - li r12, BOOK3S_INTERRUPT_EXTERNAL - beq kvmppc_read_intr /* if so, see what it was */ + beq 7f /* if so, see what it was */ li r3, 0 li r12, 0 cmpwi r6, 6 /* was it the decrementer? */ @@ -2350,83 +2426,28 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) li r3, 1 blr -/* - * Determine what sort of external interrupt is pending (if any). - * Returns: - * 0 if no interrupt is pending - * 1 if an interrupt is pending that needs to be handled by the host - * -1 if there was a guest wakeup IPI (which has now been cleared) - * Modifies r0, r6, r7, r8, returns value in r3. - */ -kvmppc_read_intr: - /* see if a host IPI is pending */ - li r3, 1 - lbz r0, HSTATE_HOST_IPI(r13) - cmpwi r0, 0 - bne 1f + /* external interrupt - create a stack frame so we can call C */ +7: mflr r0 + std r0, PPC_LR_STKOFF(r1) + stdu r1, -PPC_MIN_STKFRM(r1) + bl kvmppc_read_intr + nop + li r12, BOOK3S_INTERRUPT_EXTERNAL + cmpdi r3, 1 + ble 1f - /* Now read the interrupt from the ICP */ - ld r6, HSTATE_XICS_PHYS(r13) - li r7, XICS_XIRR - cmpdi r6, 0 - beq- 1f - lwzcix r0, r6, r7 /* - * Save XIRR for later. Since we get in in reverse endian on LE - * systems, save it byte reversed and fetch it back in host endian. - */ - li r3, HSTATE_SAVED_XIRR - STWX_BE r0, r3, r13 -#ifdef __LITTLE_ENDIAN__ - lwz r3, HSTATE_SAVED_XIRR(r13) -#else - mr r3, r0 -#endif - rlwinm. r3, r3, 0, 0xffffff - sync - beq 1f /* if nothing pending in the ICP */ - - /* We found something in the ICP... - * - * If it's not an IPI, stash it in the PACA and return to - * the host, we don't (yet) handle directing real external - * interrupts directly to the guest + * Return code of 2 means PCI passthrough interrupt, but + * we need to return back to host to complete handling the + * interrupt. Trap reason is expected in r12 by guest + * exit code. */ - cmpwi r3, XICS_IPI /* if there is, is it an IPI? */ - bne 42f - - /* It's an IPI, clear the MFRR and EOI it */ - li r3, 0xff - li r8, XICS_MFRR - stbcix r3, r6, r8 /* clear the IPI */ - stwcix r0, r6, r7 /* EOI it */ - sync - - /* We need to re-check host IPI now in case it got set in the - * meantime. If it's clear, we bounce the interrupt to the - * guest - */ - lbz r0, HSTATE_HOST_IPI(r13) - cmpwi r0, 0 - bne- 43f - - /* OK, it's an IPI for us */ - li r12, 0 - li r3, -1 -1: blr - -42: /* It's not an IPI and it's for the host. We saved a copy of XIRR in - * the PACA earlier, it will be picked up by the host ICP driver - */ - li r3, 1 - b 1b - -43: /* We raced with the host, we need to resend that IPI, bummer */ - li r0, IPI_PRIORITY - stbcix r0, r6, r8 /* set the IPI */ - sync - li r3, 1 - b 1b + li r12, BOOK3S_INTERRUPT_HV_RM_HARD +1: + ld r0, PPC_MIN_STKFRM+PPC_LR_STKOFF(r1) + addi r1, r1, PPC_MIN_STKFRM + mtlr r0 + blr /* * Save away FP, VMX and VSX registers. diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index e76f79a45988..826c541a12af 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -226,7 +226,7 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, */ vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb; vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb; - vcpu->arch.vtb += get_vtb() - vcpu->arch.entry_vtb; + to_book3s(vcpu)->vtb += get_vtb() - vcpu->arch.entry_vtb; if (cpu_has_feature(CPU_FTR_ARCH_207S)) vcpu->arch.ic += mfspr(SPRN_IC) - vcpu->arch.entry_ic; svcpu->in_use = false; @@ -448,6 +448,8 @@ void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr) case PVR_POWER7: case PVR_POWER7p: case PVR_POWER8: + case PVR_POWER8E: + case PVR_POWER8NVL: vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE | BOOK3S_HFLAG_NEW_TLBIE; break; @@ -1361,6 +1363,9 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_HIOR: *val = get_reg_val(id, to_book3s(vcpu)->hior); break; + case KVM_REG_PPC_VTB: + *val = get_reg_val(id, to_book3s(vcpu)->vtb); + break; case KVM_REG_PPC_LPCR: case KVM_REG_PPC_LPCR_64: /* @@ -1397,6 +1402,9 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, to_book3s(vcpu)->hior = set_reg_val(id, *val); to_book3s(vcpu)->hior_explicit = true; break; + case KVM_REG_PPC_VTB: + to_book3s(vcpu)->vtb = set_reg_val(id, *val); + break; case KVM_REG_PPC_LPCR: case KVM_REG_PPC_LPCR_64: kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val)); diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 05aa11399a78..3bdc639157c1 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -99,6 +99,10 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level) return 0; } + /* Record which CPU this arrived on for passed-through interrupts */ + if (state->host_irq) + state->intr_cpu = raw_smp_processor_id(); + /* Attempt delivery */ icp_deliver_irq(xics, NULL, irq); @@ -812,7 +816,7 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) return H_SUCCESS; } -static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) +int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) { struct kvmppc_xics *xics = vcpu->kvm->arch.xics; struct kvmppc_icp *icp = vcpu->arch.icp; @@ -841,6 +845,7 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) return H_SUCCESS; } +EXPORT_SYMBOL_GPL(kvmppc_xics_rm_complete); int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req) { @@ -892,6 +897,21 @@ EXPORT_SYMBOL_GPL(kvmppc_xics_hcall); /* -- Initialisation code etc. -- */ +static void xics_debugfs_irqmap(struct seq_file *m, + struct kvmppc_passthru_irqmap *pimap) +{ + int i; + + if (!pimap) + return; + seq_printf(m, "========\nPIRQ mappings: %d maps\n===========\n", + pimap->n_mapped); + for (i = 0; i < pimap->n_mapped; i++) { + seq_printf(m, "r_hwirq=%x, v_hwirq=%x\n", + pimap->mapped[i].r_hwirq, pimap->mapped[i].v_hwirq); + } +} + static int xics_debug_show(struct seq_file *m, void *private) { struct kvmppc_xics *xics = m->private; @@ -913,6 +933,8 @@ static int xics_debug_show(struct seq_file *m, void *private) t_check_resend = 0; t_reject = 0; + xics_debugfs_irqmap(m, kvm->arch.pimap); + seq_printf(m, "=========\nICP state\n=========\n"); kvm_for_each_vcpu(i, vcpu, kvm) { @@ -1252,6 +1274,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, { struct kvmppc_xics *xics = kvm->arch.xics; + if (!xics) + return -ENODEV; return ics_deliver_irq(xics, irq, level); } @@ -1418,3 +1442,34 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin) { return pin; } + +void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long irq, + unsigned long host_irq) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + struct kvmppc_ics *ics; + u16 idx; + + ics = kvmppc_xics_find_ics(xics, irq, &idx); + if (!ics) + return; + + ics->irq_state[idx].host_irq = host_irq; + ics->irq_state[idx].intr_cpu = -1; +} +EXPORT_SYMBOL_GPL(kvmppc_xics_set_mapped); + +void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long irq, + unsigned long host_irq) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + struct kvmppc_ics *ics; + u16 idx; + + ics = kvmppc_xics_find_ics(xics, irq, &idx); + if (!ics) + return; + + ics->irq_state[idx].host_irq = 0; +} +EXPORT_SYMBOL_GPL(kvmppc_xics_clr_mapped); diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h index a46b954055c4..2a50320b55ca 100644 --- a/arch/powerpc/kvm/book3s_xics.h +++ b/arch/powerpc/kvm/book3s_xics.h @@ -42,6 +42,8 @@ struct ics_irq_state { u8 lsi; /* level-sensitive interrupt */ u8 asserted; /* Only for LSI */ u8 exists; + int intr_cpu; + u32 host_irq; }; /* Atomic ICP state, updated with a single compare & swap */ diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 02b4672f7347..df3f2706d3e5 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -2038,7 +2038,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, if (type == KVMPPC_DEBUG_NONE) continue; - if (type & !(KVMPPC_DEBUG_WATCH_READ | + if (type & ~(KVMPPC_DEBUG_WATCH_READ | KVMPPC_DEBUG_WATCH_WRITE | KVMPPC_DEBUG_BREAKPOINT)) return -EINVAL; diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index 29911a07bcdb..ddbf8f0284c0 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c @@ -743,7 +743,7 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, char *virt; struct page **pages; struct tlbe_priv *privs[2] = {}; - u64 *g2h_bitmap = NULL; + u64 *g2h_bitmap; size_t array_len; u32 sets; int num_pages, ret, i; @@ -779,41 +779,44 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) - cfg->array / PAGE_SIZE; - pages = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL); + pages = kmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL); if (!pages) return -ENOMEM; ret = get_user_pages_fast(cfg->array, num_pages, 1, pages); if (ret < 0) - goto err_pages; + goto free_pages; if (ret != num_pages) { num_pages = ret; ret = -EFAULT; - goto err_put_page; + goto put_pages; } virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL); if (!virt) { ret = -ENOMEM; - goto err_put_page; + goto put_pages; } - privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0], - GFP_KERNEL); - privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1], - GFP_KERNEL); + privs[0] = kcalloc(params.tlb_sizes[0], sizeof(*privs[0]), GFP_KERNEL); + if (!privs[0]) { + ret = -ENOMEM; + goto put_pages; + } - if (!privs[0] || !privs[1]) { + privs[1] = kcalloc(params.tlb_sizes[1], sizeof(*privs[1]), GFP_KERNEL); + if (!privs[1]) { ret = -ENOMEM; - goto err_privs; + goto free_privs_first; } - g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1], - GFP_KERNEL); + g2h_bitmap = kcalloc(params.tlb_sizes[1], + sizeof(*g2h_bitmap), + GFP_KERNEL); if (!g2h_bitmap) { ret = -ENOMEM; - goto err_privs; + goto free_privs_second; } free_gtlb(vcpu_e500); @@ -845,16 +848,14 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, kvmppc_recalc_tlb1map_range(vcpu_e500); return 0; - -err_privs: - kfree(privs[0]); + free_privs_second: kfree(privs[1]); - -err_put_page: + free_privs_first: + kfree(privs[0]); + put_pages: for (i = 0; i < num_pages; i++) put_page(pages[i]); - -err_pages: + free_pages: kfree(pages); return ret; } @@ -904,11 +905,9 @@ static int vcpu_mmu_init(struct kvm_vcpu *vcpu, int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) { struct kvm_vcpu *vcpu = &vcpu_e500->vcpu; - int entry_size = sizeof(struct kvm_book3e_206_tlb_entry); - int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE; if (e500_mmu_host_init(vcpu_e500)) - goto err; + goto free_vcpu; vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE; vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE; @@ -920,37 +919,39 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) vcpu_e500->gtlb_params[1].ways = KVM_E500_TLB1_SIZE; vcpu_e500->gtlb_params[1].sets = 1; - vcpu_e500->gtlb_arch = kmalloc(entries * entry_size, GFP_KERNEL); + vcpu_e500->gtlb_arch = kmalloc_array(KVM_E500_TLB0_SIZE + + KVM_E500_TLB1_SIZE, + sizeof(*vcpu_e500->gtlb_arch), + GFP_KERNEL); if (!vcpu_e500->gtlb_arch) return -ENOMEM; vcpu_e500->gtlb_offset[0] = 0; vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE; - vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) * - vcpu_e500->gtlb_params[0].entries, + vcpu_e500->gtlb_priv[0] = kcalloc(vcpu_e500->gtlb_params[0].entries, + sizeof(struct tlbe_ref), GFP_KERNEL); if (!vcpu_e500->gtlb_priv[0]) - goto err; + goto free_vcpu; - vcpu_e500->gtlb_priv[1] = kzalloc(sizeof(struct tlbe_ref) * - vcpu_e500->gtlb_params[1].entries, + vcpu_e500->gtlb_priv[1] = kcalloc(vcpu_e500->gtlb_params[1].entries, + sizeof(struct tlbe_ref), GFP_KERNEL); if (!vcpu_e500->gtlb_priv[1]) - goto err; + goto free_vcpu; - vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(u64) * - vcpu_e500->gtlb_params[1].entries, + vcpu_e500->g2h_tlb1_map = kcalloc(vcpu_e500->gtlb_params[1].entries, + sizeof(*vcpu_e500->g2h_tlb1_map), GFP_KERNEL); if (!vcpu_e500->g2h_tlb1_map) - goto err; + goto free_vcpu; vcpu_mmu_init(vcpu, vcpu_e500->gtlb_params); kvmppc_recalc_tlb1map_range(vcpu_e500); return 0; - -err: + free_vcpu: free_gtlb(vcpu_e500); return -1; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6ce40dd6fe51..70963c845e96 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -27,6 +27,8 @@ #include <linux/slab.h> #include <linux/file.h> #include <linux/module.h> +#include <linux/irqbypass.h> +#include <linux/kvm_irqfd.h> #include <asm/cputable.h> #include <asm/uaccess.h> #include <asm/kvm_ppc.h> @@ -436,6 +438,16 @@ err_out: return -EINVAL; } +bool kvm_arch_has_vcpu_debugfs(void) +{ + return false; +} + +int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) +{ + return 0; +} + void kvm_arch_destroy_vm(struct kvm *kvm) { unsigned int i; @@ -739,6 +751,42 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) #endif } +/* + * irq_bypass_add_producer and irq_bypass_del_producer are only + * useful if the architecture supports PCI passthrough. + * irq_bypass_stop and irq_bypass_start are not needed and so + * kvm_ops are not defined for them. + */ +bool kvm_arch_has_irq_bypass(void) +{ + return ((kvmppc_hv_ops && kvmppc_hv_ops->irq_bypass_add_producer) || + (kvmppc_pr_ops && kvmppc_pr_ops->irq_bypass_add_producer)); +} + +int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, + struct irq_bypass_producer *prod) +{ + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + struct kvm *kvm = irqfd->kvm; + + if (kvm->arch.kvm_ops->irq_bypass_add_producer) + return kvm->arch.kvm_ops->irq_bypass_add_producer(cons, prod); + + return 0; +} + +void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, + struct irq_bypass_producer *prod) +{ + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + struct kvm *kvm = irqfd->kvm; + + if (kvm->arch.kvm_ops->irq_bypass_del_producer) + kvm->arch.kvm_ops->irq_bypass_del_producer(cons, prod); +} + static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run) { @@ -1167,6 +1215,19 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, return r; } +bool kvm_arch_intc_initialized(struct kvm *kvm) +{ +#ifdef CONFIG_KVM_MPIC + if (kvm->arch.mpic) + return true; +#endif +#ifdef CONFIG_KVM_XICS + if (kvm->arch.xics) + return true; +#endif + return false; +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index 33d9daff5783..fb21990c0fb4 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -432,6 +432,28 @@ TRACE_EVENT(kvmppc_vcore_blocked, __entry->runner_vcpu, __entry->n_runnable, __entry->tgid) ); +TRACE_EVENT(kvmppc_vcore_wakeup, + TP_PROTO(int do_sleep, __u64 ns), + + TP_ARGS(do_sleep, ns), + + TP_STRUCT__entry( + __field(__u64, ns) + __field(int, waited) + __field(pid_t, tgid) + ), + + TP_fast_assign( + __entry->ns = ns; + __entry->waited = do_sleep; + __entry->tgid = current->tgid; + ), + + TP_printk("%s time %lld ns, tgid=%d", + __entry->waited ? "wait" : "poll", + __entry->ns, __entry->tgid) +); + TRACE_EVENT(kvmppc_run_vcpu_enter, TP_PROTO(struct kvm_vcpu *vcpu), |