diff options
Diffstat (limited to 'arch/powerpc/kvm/book3s_hv.c')
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 533 |
1 files changed, 373 insertions, 160 deletions
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 2fd5580c8f6e..3686471be32b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -53,11 +53,15 @@ #include <asm/smp.h> #include <asm/dbell.h> #include <asm/hmi.h> +#include <asm/pnv-pci.h> #include <linux/gfp.h> #include <linux/vmalloc.h> #include <linux/highmem.h> #include <linux/hugetlb.h> +#include <linux/kvm_irqfd.h> +#include <linux/irqbypass.h> #include <linux/module.h> +#include <linux/compiler.h> #include "book3s.h" @@ -70,6 +74,8 @@ /* Used to indicate that a guest page fault needs to be handled */ #define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1) +/* Used to indicate that a guest passthrough interrupt needs to be handled */ +#define RESUME_PASSTHROUGH (RESUME_GUEST | RESUME_FLAG_ARCH2) /* Used as a "null" value for timebase values */ #define TB_NIL (~(u64)0) @@ -89,14 +95,55 @@ static struct kernel_param_ops module_param_ops = { .get = param_get_int, }; +module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass, + S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(kvm_irq_bypass, "Bypass passthrough interrupt optimization"); + module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); #endif +/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */ +static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT; +module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns"); + +/* Factor by which the vcore halt poll interval is grown, default is to double + */ +static unsigned int halt_poll_ns_grow = 2; +module_param(halt_poll_ns_grow, int, S_IRUGO); +MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by"); + +/* Factor by which the vcore halt poll interval is shrunk, default is to reset + */ +static unsigned int halt_poll_ns_shrink; +module_param(halt_poll_ns_shrink, int, S_IRUGO); +MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by"); + static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); +static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc, + int *ip) +{ + int i = *ip; + struct kvm_vcpu *vcpu; + + while (++i < MAX_SMT_THREADS) { + vcpu = READ_ONCE(vc->runnable_threads[i]); + if (vcpu) { + *ip = i; + return vcpu; + } + } + return NULL; +} + +/* Used to traverse the list of runnable threads for a given vcore */ +#define for_each_runnable_thread(i, vcpu, vc) \ + for (i = -1; (vcpu = next_runnable_thread(vc, &i)); ) + static bool kvmppc_ipi_thread(int cpu) { /* On POWER8 for IPIs to threads in the same core, use msgsnd */ @@ -991,6 +1038,9 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_core_queue_program(vcpu, SRR1_PROGILL); r = RESUME_GUEST; break; + case BOOK3S_INTERRUPT_HV_RM_HARD: + r = RESUME_PASSTHROUGH; + break; default: kvmppc_dump_regs(vcpu); printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n", @@ -1149,6 +1199,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_DPDES: *val = get_reg_val(id, vcpu->arch.vcore->dpdes); break; + case KVM_REG_PPC_VTB: + *val = get_reg_val(id, vcpu->arch.vcore->vtb); + break; case KVM_REG_PPC_DAWR: *val = get_reg_val(id, vcpu->arch.dawr); break; @@ -1341,6 +1394,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_DPDES: vcpu->arch.vcore->dpdes = set_reg_val(id, *val); break; + case KVM_REG_PPC_VTB: + vcpu->arch.vcore->vtb = set_reg_val(id, *val); + break; case KVM_REG_PPC_DAWR: vcpu->arch.dawr = set_reg_val(id, *val); break; @@ -1493,7 +1549,6 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) if (vcore == NULL) return NULL; - INIT_LIST_HEAD(&vcore->runnable_threads); spin_lock_init(&vcore->lock); spin_lock_init(&vcore->stoltb_lock); init_swait_queue_head(&vcore->wq); @@ -1802,7 +1857,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; spin_unlock_irq(&vcpu->arch.tbacct_lock); --vc->n_runnable; - list_del(&vcpu->arch.run_list); + WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], NULL); } static int kvmppc_grab_hwthread(int cpu) @@ -2048,66 +2103,6 @@ static void init_master_vcore(struct kvmppc_vcore *vc) vc->conferring_threads = 0; } -/* - * See if the existing subcores can be split into 3 (or fewer) subcores - * of at most two threads each, so we can fit in another vcore. This - * assumes there are at most two subcores and at most 6 threads in total. - */ -static bool can_split_piggybacked_subcores(struct core_info *cip) -{ - int sub, new_sub; - int large_sub = -1; - int thr; - int n_subcores = cip->n_subcores; - struct kvmppc_vcore *vc, *vcnext; - struct kvmppc_vcore *master_vc = NULL; - - for (sub = 0; sub < cip->n_subcores; ++sub) { - if (cip->subcore_threads[sub] <= 2) - continue; - if (large_sub >= 0) - return false; - large_sub = sub; - vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore, - preempt_list); - if (vc->num_threads > 2) - return false; - n_subcores += (cip->subcore_threads[sub] - 1) >> 1; - } - if (large_sub < 0 || !subcore_config_ok(n_subcores + 1, 2)) - return false; - - /* - * Seems feasible, so go through and move vcores to new subcores. - * Note that when we have two or more vcores in one subcore, - * all those vcores must have only one thread each. - */ - new_sub = cip->n_subcores; - thr = 0; - sub = large_sub; - list_for_each_entry_safe(vc, vcnext, &cip->vcs[sub], preempt_list) { - if (thr >= 2) { - list_del(&vc->preempt_list); - list_add_tail(&vc->preempt_list, &cip->vcs[new_sub]); - /* vc->num_threads must be 1 */ - if (++cip->subcore_threads[new_sub] == 1) { - cip->subcore_vm[new_sub] = vc->kvm; - init_master_vcore(vc); - master_vc = vc; - ++cip->n_subcores; - } else { - vc->master_vcore = master_vc; - ++new_sub; - } - } - thr += vc->num_threads; - } - cip->subcore_threads[large_sub] = 2; - cip->max_subcore_threads = 2; - - return true; -} - static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) { int n_threads = vc->num_threads; @@ -2118,23 +2113,9 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) if (n_threads < cip->max_subcore_threads) n_threads = cip->max_subcore_threads; - if (subcore_config_ok(cip->n_subcores + 1, n_threads)) { - cip->max_subcore_threads = n_threads; - } else if (cip->n_subcores <= 2 && cip->total_threads <= 6 && - vc->num_threads <= 2) { - /* - * We may be able to fit another subcore in by - * splitting an existing subcore with 3 or 4 - * threads into two 2-thread subcores, or one - * with 5 or 6 threads into three subcores. - * We can only do this if those subcores have - * piggybacked virtual cores. - */ - if (!can_split_piggybacked_subcores(cip)) - return false; - } else { + if (!subcore_config_ok(cip->n_subcores + 1, n_threads)) return false; - } + cip->max_subcore_threads = n_threads; sub = cip->n_subcores; ++cip->n_subcores; @@ -2148,43 +2129,6 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) return true; } -static bool can_piggyback_subcore(struct kvmppc_vcore *pvc, - struct core_info *cip, int sub) -{ - struct kvmppc_vcore *vc; - int n_thr; - - vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore, - preempt_list); - - /* require same VM and same per-core reg values */ - if (pvc->kvm != vc->kvm || - pvc->tb_offset != vc->tb_offset || - pvc->pcr != vc->pcr || - pvc->lpcr != vc->lpcr) - return false; - - /* P8 guest with > 1 thread per core would see wrong TIR value */ - if (cpu_has_feature(CPU_FTR_ARCH_207S) && - (vc->num_threads > 1 || pvc->num_threads > 1)) - return false; - - n_thr = cip->subcore_threads[sub] + pvc->num_threads; - if (n_thr > cip->max_subcore_threads) { - if (!subcore_config_ok(cip->n_subcores, n_thr)) - return false; - cip->max_subcore_threads = n_thr; - } - - cip->total_threads += pvc->num_threads; - cip->subcore_threads[sub] = n_thr; - pvc->master_vcore = vc; - list_del(&pvc->preempt_list); - list_add_tail(&pvc->preempt_list, &cip->vcs[sub]); - - return true; -} - /* * Work out whether it is possible to piggyback the execution of * vcore *pvc onto the execution of the other vcores described in *cip. @@ -2192,27 +2136,18 @@ static bool can_piggyback_subcore(struct kvmppc_vcore *pvc, static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip, int target_threads) { - int sub; - if (cip->total_threads + pvc->num_threads > target_threads) return false; - for (sub = 0; sub < cip->n_subcores; ++sub) - if (cip->subcore_threads[sub] && - can_piggyback_subcore(pvc, cip, sub)) - return true; - - if (can_dynamic_split(pvc, cip)) - return true; - return false; + return can_dynamic_split(pvc, cip); } static void prepare_threads(struct kvmppc_vcore *vc) { - struct kvm_vcpu *vcpu, *vnext; + int i; + struct kvm_vcpu *vcpu; - list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, - arch.run_list) { + for_each_runnable_thread(i, vcpu, vc) { if (signal_pending(vcpu->arch.run_task)) vcpu->arch.ret = -EINTR; else if (vcpu->arch.vpa.update_pending || @@ -2259,15 +2194,14 @@ static void collect_piggybacks(struct core_info *cip, int target_threads) static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) { - int still_running = 0; + int still_running = 0, i; u64 now; long ret; - struct kvm_vcpu *vcpu, *vnext; + struct kvm_vcpu *vcpu; spin_lock(&vc->lock); now = get_tb(); - list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, - arch.run_list) { + for_each_runnable_thread(i, vcpu, vc) { /* cancel pending dec exception if dec is positive */ if (now < vcpu->arch.dec_expires && kvmppc_core_pending_dec(vcpu)) @@ -2307,8 +2241,8 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) } if (vc->n_runnable > 0 && vc->runner == NULL) { /* make sure there's a candidate runner awake */ - vcpu = list_first_entry(&vc->runnable_threads, - struct kvm_vcpu, arch.run_list); + i = -1; + vcpu = next_runnable_thread(vc, &i); wake_up(&vcpu->arch.cpu_run); } } @@ -2361,7 +2295,7 @@ static inline void kvmppc_set_host_core(int cpu) */ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) { - struct kvm_vcpu *vcpu, *vnext; + struct kvm_vcpu *vcpu; int i; int srcu_idx; struct core_info core_info; @@ -2397,8 +2331,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) */ if ((threads_per_core > 1) && ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { - list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, - arch.run_list) { + for_each_runnable_thread(i, vcpu, vc) { vcpu->arch.ret = -EBUSY; kvmppc_remove_runnable(vc, vcpu); wake_up(&vcpu->arch.cpu_run); @@ -2477,8 +2410,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) active |= 1 << thr; list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) { pvc->pcpu = pcpu + thr; - list_for_each_entry(vcpu, &pvc->runnable_threads, - arch.run_list) { + for_each_runnable_thread(i, vcpu, pvc) { kvmppc_start_thread(vcpu, pvc); kvmppc_create_dtl_entry(vcpu, pvc); trace_kvm_guest_enter(vcpu); @@ -2604,34 +2536,92 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc, finish_wait(&vcpu->arch.cpu_run, &wait); } +static void grow_halt_poll_ns(struct kvmppc_vcore *vc) +{ + /* 10us base */ + if (vc->halt_poll_ns == 0 && halt_poll_ns_grow) + vc->halt_poll_ns = 10000; + else + vc->halt_poll_ns *= halt_poll_ns_grow; + + if (vc->halt_poll_ns > halt_poll_max_ns) + vc->halt_poll_ns = halt_poll_max_ns; +} + +static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) +{ + if (halt_poll_ns_shrink == 0) + vc->halt_poll_ns = 0; + else + vc->halt_poll_ns /= halt_poll_ns_shrink; +} + +/* Check to see if any of the runnable vcpus on the vcore have pending + * exceptions or are no longer ceded + */ +static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc) +{ + struct kvm_vcpu *vcpu; + int i; + + for_each_runnable_thread(i, vcpu, vc) { + if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) + return 1; + } + + return 0; +} + /* * All the vcpus in this vcore are idle, so wait for a decrementer * or external interrupt to one of the vcpus. vc->lock is held. */ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) { - struct kvm_vcpu *vcpu; + ktime_t cur, start_poll, start_wait; int do_sleep = 1; + u64 block_ns; DECLARE_SWAITQUEUE(wait); - prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE); + /* Poll for pending exceptions and ceded state */ + cur = start_poll = ktime_get(); + if (vc->halt_poll_ns) { + ktime_t stop = ktime_add_ns(start_poll, vc->halt_poll_ns); + ++vc->runner->stat.halt_attempted_poll; - /* - * Check one last time for pending exceptions and ceded state after - * we put ourselves on the wait queue - */ - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { - if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) { - do_sleep = 0; - break; + vc->vcore_state = VCORE_POLLING; + spin_unlock(&vc->lock); + + do { + if (kvmppc_vcore_check_block(vc)) { + do_sleep = 0; + break; + } + cur = ktime_get(); + } while (single_task_running() && ktime_before(cur, stop)); + + spin_lock(&vc->lock); + vc->vcore_state = VCORE_INACTIVE; + + if (!do_sleep) { + ++vc->runner->stat.halt_successful_poll; + goto out; } } - if (!do_sleep) { + prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE); + + if (kvmppc_vcore_check_block(vc)) { finish_swait(&vc->wq, &wait); - return; + do_sleep = 0; + /* If we polled, count this as a successful poll */ + if (vc->halt_poll_ns) + ++vc->runner->stat.halt_successful_poll; + goto out; } + start_wait = ktime_get(); + vc->vcore_state = VCORE_SLEEPING; trace_kvmppc_vcore_blocked(vc, 0); spin_unlock(&vc->lock); @@ -2640,13 +2630,52 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) spin_lock(&vc->lock); vc->vcore_state = VCORE_INACTIVE; trace_kvmppc_vcore_blocked(vc, 1); + ++vc->runner->stat.halt_successful_wait; + + cur = ktime_get(); + +out: + block_ns = ktime_to_ns(cur) - ktime_to_ns(start_poll); + + /* Attribute wait time */ + if (do_sleep) { + vc->runner->stat.halt_wait_ns += + ktime_to_ns(cur) - ktime_to_ns(start_wait); + /* Attribute failed poll time */ + if (vc->halt_poll_ns) + vc->runner->stat.halt_poll_fail_ns += + ktime_to_ns(start_wait) - + ktime_to_ns(start_poll); + } else { + /* Attribute successful poll time */ + if (vc->halt_poll_ns) + vc->runner->stat.halt_poll_success_ns += + ktime_to_ns(cur) - + ktime_to_ns(start_poll); + } + + /* Adjust poll time */ + if (halt_poll_max_ns) { + if (block_ns <= vc->halt_poll_ns) + ; + /* We slept and blocked for longer than the max halt time */ + else if (vc->halt_poll_ns && block_ns > halt_poll_max_ns) + shrink_halt_poll_ns(vc); + /* We slept and our poll time is too small */ + else if (vc->halt_poll_ns < halt_poll_max_ns && + block_ns < halt_poll_max_ns) + grow_halt_poll_ns(vc); + } else + vc->halt_poll_ns = 0; + + trace_kvmppc_vcore_wakeup(do_sleep, block_ns); } static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { - int n_ceded; + int n_ceded, i; struct kvmppc_vcore *vc; - struct kvm_vcpu *v, *vn; + struct kvm_vcpu *v; trace_kvmppc_run_vcpu_enter(vcpu); @@ -2666,7 +2695,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb()); vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; vcpu->arch.busy_preempt = TB_NIL; - list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads); + WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], vcpu); ++vc->n_runnable; /* @@ -2706,8 +2735,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE); continue; } - list_for_each_entry_safe(v, vn, &vc->runnable_threads, - arch.run_list) { + for_each_runnable_thread(i, v, vc) { kvmppc_core_prepare_to_enter(v); if (signal_pending(v->arch.run_task)) { kvmppc_remove_runnable(vc, v); @@ -2720,7 +2748,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) break; n_ceded = 0; - list_for_each_entry(v, &vc->runnable_threads, arch.run_list) { + for_each_runnable_thread(i, v, vc) { if (!v->arch.pending_exceptions) n_ceded += v->arch.ceded; else @@ -2759,8 +2787,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) { /* Wake up some vcpu to run the core */ - v = list_first_entry(&vc->runnable_threads, - struct kvm_vcpu, arch.run_list); + i = -1; + v = next_runnable_thread(vc, &i); wake_up(&v->arch.cpu_run); } @@ -2818,7 +2846,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) r = kvmppc_book3s_hv_page_fault(run, vcpu, vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); - } + } else if (r == RESUME_PASSTHROUGH) + r = kvmppc_xics_rm_complete(vcpu, 0); } while (is_kvmppc_resume_guest(r)); out: @@ -3247,6 +3276,8 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) kvmppc_free_vcores(kvm); kvmppc_free_hpt(kvm); + + kvmppc_free_pimap(kvm); } /* We don't need to emulate any privileged instructions or dcbz */ @@ -3282,6 +3313,184 @@ static int kvmppc_core_check_processor_compat_hv(void) return 0; } +#ifdef CONFIG_KVM_XICS + +void kvmppc_free_pimap(struct kvm *kvm) +{ + kfree(kvm->arch.pimap); +} + +static struct kvmppc_passthru_irqmap *kvmppc_alloc_pimap(void) +{ + return kzalloc(sizeof(struct kvmppc_passthru_irqmap), GFP_KERNEL); +} + +static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) +{ + struct irq_desc *desc; + struct kvmppc_irq_map *irq_map; + struct kvmppc_passthru_irqmap *pimap; + struct irq_chip *chip; + int i; + + if (!kvm_irq_bypass) + return 1; + + desc = irq_to_desc(host_irq); + if (!desc) + return -EIO; + + mutex_lock(&kvm->lock); + + pimap = kvm->arch.pimap; + if (pimap == NULL) { + /* First call, allocate structure to hold IRQ map */ + pimap = kvmppc_alloc_pimap(); + if (pimap == NULL) { + mutex_unlock(&kvm->lock); + return -ENOMEM; + } + kvm->arch.pimap = pimap; + } + + /* + * For now, we only support interrupts for which the EOI operation + * is an OPAL call followed by a write to XIRR, since that's + * what our real-mode EOI code does. + */ + chip = irq_data_get_irq_chip(&desc->irq_data); + if (!chip || !is_pnv_opal_msi(chip)) { + pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n", + host_irq, guest_gsi); + mutex_unlock(&kvm->lock); + return -ENOENT; + } + + /* + * See if we already have an entry for this guest IRQ number. + * If it's mapped to a hardware IRQ number, that's an error, + * otherwise re-use this entry. + */ + for (i = 0; i < pimap->n_mapped; i++) { + if (guest_gsi == pimap->mapped[i].v_hwirq) { + if (pimap->mapped[i].r_hwirq) { + mutex_unlock(&kvm->lock); + return -EINVAL; + } + break; + } + } + + if (i == KVMPPC_PIRQ_MAPPED) { + mutex_unlock(&kvm->lock); + return -EAGAIN; /* table is full */ + } + + irq_map = &pimap->mapped[i]; + + irq_map->v_hwirq = guest_gsi; + irq_map->desc = desc; + + /* + * Order the above two stores before the next to serialize with + * the KVM real mode handler. + */ + smp_wmb(); + irq_map->r_hwirq = desc->irq_data.hwirq; + + if (i == pimap->n_mapped) + pimap->n_mapped++; + + kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq); + + mutex_unlock(&kvm->lock); + + return 0; +} + +static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) +{ + struct irq_desc *desc; + struct kvmppc_passthru_irqmap *pimap; + int i; + + if (!kvm_irq_bypass) + return 0; + + desc = irq_to_desc(host_irq); + if (!desc) + return -EIO; + + mutex_lock(&kvm->lock); + + if (kvm->arch.pimap == NULL) { + mutex_unlock(&kvm->lock); + return 0; + } + pimap = kvm->arch.pimap; + + for (i = 0; i < pimap->n_mapped; i++) { + if (guest_gsi == pimap->mapped[i].v_hwirq) + break; + } + + if (i == pimap->n_mapped) { + mutex_unlock(&kvm->lock); + return -ENODEV; + } + + kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq); + + /* invalidate the entry */ + pimap->mapped[i].r_hwirq = 0; + + /* + * We don't free this structure even when the count goes to + * zero. The structure is freed when we destroy the VM. + */ + + mutex_unlock(&kvm->lock); + return 0; +} + +static int kvmppc_irq_bypass_add_producer_hv(struct irq_bypass_consumer *cons, + struct irq_bypass_producer *prod) +{ + int ret = 0; + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + irqfd->producer = prod; + + ret = kvmppc_set_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi); + if (ret) + pr_info("kvmppc_set_passthru_irq (irq %d, gsi %d) fails: %d\n", + prod->irq, irqfd->gsi, ret); + + return ret; +} + +static void kvmppc_irq_bypass_del_producer_hv(struct irq_bypass_consumer *cons, + struct irq_bypass_producer *prod) +{ + int ret; + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + irqfd->producer = NULL; + + /* + * When producer of consumer is unregistered, we change back to + * default external interrupt handling mode - KVM real mode + * will switch back to host. + */ + ret = kvmppc_clr_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi); + if (ret) + pr_warn("kvmppc_clr_passthru_irq (irq %d, gsi %d) fails: %d\n", + prod->irq, irqfd->gsi, ret); +} +#endif + static long kvm_arch_vm_ioctl_hv(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -3400,6 +3609,10 @@ static struct kvmppc_ops kvm_ops_hv = { .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv, .arch_vm_ioctl = kvm_arch_vm_ioctl_hv, .hcall_implemented = kvmppc_hcall_impl_hv, +#ifdef CONFIG_KVM_XICS + .irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv, + .irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv, +#endif }; static int kvm_init_subcore_bitmap(void) |