summaryrefslogtreecommitdiff
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/Kconfig1
-rw-r--r--arch/powerpc/kvm/book3s.c5
-rw-r--r--arch/powerpc/kvm/book3s.h3
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c3
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c12
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c11
-rw-r--r--arch/powerpc/kvm/book3s_hv.c126
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c10
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c101
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c8
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S42
-rw-r--r--arch/powerpc/kvm/book3s_hv_tm.c61
-rw-r--r--arch/powerpc/kvm/book3s_xics.c6
-rw-r--r--arch/powerpc/kvm/book3s_xive.c74
-rw-r--r--arch/powerpc/kvm/book3s_xive.h11
-rw-r--r--arch/powerpc/kvm/book3s_xive_native.c24
-rw-r--r--arch/powerpc/kvm/booke.c5
18 files changed, 299 insertions, 206 deletions
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index e45644657d49..ff581d70f20c 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -38,7 +38,6 @@ config KVM_BOOK3S_32_HANDLER
config KVM_BOOK3S_64_HANDLER
bool
select KVM_BOOK3S_HANDLER
- select PPC_DAWR_FORCE_ENABLE
config KVM_BOOK3S_PR_POSSIBLE
bool
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 79833f78d1da..b785f6772391 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -43,8 +43,6 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
STATS_DESC_ICOUNTER(VM, num_2M_pages),
STATS_DESC_ICOUNTER(VM, num_1G_pages)
};
-static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
- sizeof(struct kvm_vm_stat) / sizeof(u64));
const struct kvm_stats_header kvm_vm_stats_header = {
.name_size = KVM_STATS_NAME_SIZE,
@@ -71,7 +69,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
STATS_DESC_COUNTER(VCPU, emulated_inst_exits),
STATS_DESC_COUNTER(VCPU, dec_exits),
STATS_DESC_COUNTER(VCPU, ext_intr_exits),
- STATS_DESC_TIME_NSEC(VCPU, halt_wait_ns),
STATS_DESC_COUNTER(VCPU, halt_successful_wait),
STATS_DESC_COUNTER(VCPU, dbell_exits),
STATS_DESC_COUNTER(VCPU, gdbell_exits),
@@ -88,8 +85,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
STATS_DESC_COUNTER(VCPU, pthru_host),
STATS_DESC_COUNTER(VCPU, pthru_bad_aff)
};
-static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
- sizeof(struct kvm_vcpu_stat) / sizeof(u64));
const struct kvm_stats_header kvm_vcpu_stats_header = {
.name_size = KVM_STATS_NAME_SIZE,
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index 740e51def5a5..58391b4b32ed 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -23,7 +23,8 @@ extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu,
extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu,
int sprn, ulong *spr_val);
extern int kvmppc_book3s_init_pr(void);
-extern void kvmppc_book3s_exit_pr(void);
+void kvmppc_book3s_exit_pr(void);
+extern int kvmppc_handle_exit_pr(struct kvm_vcpu *vcpu, unsigned int exit_nr);
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
extern void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val);
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 26b8b27a3755..feee40cb2ba1 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -196,7 +196,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
hva_t ptegp;
u64 pteg[16];
u64 avpn = 0;
- u64 v, r;
+ u64 r;
u64 v_val, v_mask;
u64 eaddr_mask;
int i;
@@ -285,7 +285,6 @@ do_second:
goto do_second;
}
- v = be64_to_cpu(pteg[i]);
r = be64_to_cpu(pteg[i+1]);
pp = (r & HPTE_R_PP) | key;
if (r & HPTE_R_PP0)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index b5905ae4377c..16359525a40f 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -44,6 +44,9 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
(to != NULL) ? __pa(to): 0,
(from != NULL) ? __pa(from): 0, n);
+ if (eaddr & (0xFFFUL << 52))
+ return ret;
+
quadrant = 1;
if (!pid)
quadrant = 2;
@@ -65,10 +68,12 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
}
isync();
+ pagefault_disable();
if (is_load)
- ret = copy_from_user_nofault(to, (const void __user *)from, n);
+ ret = __copy_from_user_inatomic(to, (const void __user *)from, n);
else
- ret = copy_to_user_nofault((void __user *)to, from, n);
+ ret = __copy_to_user_inatomic((void __user *)to, from, n);
+ pagefault_enable();
/* switch the pid first to avoid running host with unallocated pid */
if (quadrant == 1 && pid != old_pid)
@@ -81,7 +86,6 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
return ret;
}
-EXPORT_SYMBOL_GPL(__kvmhv_copy_tofrom_guest_radix);
static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
void *to, void *from, unsigned long n)
@@ -117,14 +121,12 @@ long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to,
return ret;
}
-EXPORT_SYMBOL_GPL(kvmhv_copy_from_guest_radix);
long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *from,
unsigned long n)
{
return kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, NULL, from, n);
}
-EXPORT_SYMBOL_GPL(kvmhv_copy_to_guest_radix);
int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *gpte, u64 root,
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 8da93fdfa59e..6365087f3160 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -346,7 +346,7 @@ static long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
unsigned long gfn = tce >> PAGE_SHIFT;
struct kvm_memory_slot *memslot;
- memslot = search_memslots(kvm_memslots(kvm), gfn);
+ memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
if (!memslot)
return -EINVAL;
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index dc6591548f0c..870b7f0c7ea5 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -80,7 +80,7 @@ static long kvmppc_rm_tce_to_ua(struct kvm *kvm,
unsigned long gfn = tce >> PAGE_SHIFT;
struct kvm_memory_slot *memslot;
- memslot = search_memslots(kvm_memslots_raw(kvm), gfn);
+ memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
if (!memslot)
return -EINVAL;
@@ -173,10 +173,13 @@ static void kvmppc_rm_tce_put(struct kvmppc_spapr_tce_table *stt,
idx -= stt->offset;
page = stt->pages[idx / TCES_PER_PAGE];
/*
- * page must not be NULL in real mode,
- * kvmppc_rm_ioba_validate() must have taken care of this.
+ * kvmppc_rm_ioba_validate() allows pages not be allocated if TCE is
+ * being cleared, otherwise it returns H_TOO_HARD and we skip this.
*/
- WARN_ON_ONCE_RM(!page);
+ if (!page) {
+ WARN_ON_ONCE_RM(tce != 0);
+ return;
+ }
tbl = kvmppc_page_address(page);
tbl[idx % TCES_PER_PAGE] = tce;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 085fb8ecbf68..2acb1c96cfaf 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -59,6 +59,7 @@
#include <asm/kvm_book3s.h>
#include <asm/mmu_context.h>
#include <asm/lppaca.h>
+#include <asm/pmc.h>
#include <asm/processor.h>
#include <asm/cputhreads.h>
#include <asm/page.h>
@@ -1165,7 +1166,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
break;
#endif
case H_RANDOM:
- if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4]))
+ if (!arch_get_random_seed_long(&vcpu->arch.regs.gpr[4]))
ret = H_HARDWARE;
break;
case H_RPT_INVALIDATE:
@@ -1679,6 +1680,21 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
}
break;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ case BOOK3S_INTERRUPT_HV_SOFTPATCH:
+ /*
+ * This occurs for various TM-related instructions that
+ * we need to emulate on POWER9 DD2.2. We have already
+ * handled the cases where the guest was in real-suspend
+ * mode and was transitioning to transactional state.
+ */
+ r = kvmhv_p9_tm_emulation(vcpu);
+ if (r != -1)
+ break;
+ fallthrough; /* go to facility unavailable handler */
+#endif
+
/*
* This occurs if the guest (kernel or userspace), does something that
* is prohibited by HFSCR.
@@ -1697,18 +1713,6 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
}
break;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- case BOOK3S_INTERRUPT_HV_SOFTPATCH:
- /*
- * This occurs for various TM-related instructions that
- * we need to emulate on POWER9 DD2.2. We have already
- * handled the cases where the guest was in real-suspend
- * mode and was transitioning to transactional state.
- */
- r = kvmhv_p9_tm_emulation(vcpu);
- break;
-#endif
-
case BOOK3S_INTERRUPT_HV_RM_HARD:
r = RESUME_PASSTHROUGH;
break;
@@ -1727,6 +1731,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
{
+ struct kvm_nested_guest *nested = vcpu->arch.nested;
int r;
int srcu_idx;
@@ -1811,9 +1816,41 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
* mode and was transitioning to transactional state.
*/
r = kvmhv_p9_tm_emulation(vcpu);
- break;
+ if (r != -1)
+ break;
+ fallthrough; /* go to facility unavailable handler */
#endif
+ case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: {
+ u64 cause = vcpu->arch.hfscr >> 56;
+
+ /*
+ * Only pass HFU interrupts to the L1 if the facility is
+ * permitted but disabled by the L1's HFSCR, otherwise
+ * the interrupt does not make sense to the L1 so turn
+ * it into a HEAI.
+ */
+ if (!(vcpu->arch.hfscr_permitted & (1UL << cause)) ||
+ (nested->hfscr & (1UL << cause))) {
+ vcpu->arch.trap = BOOK3S_INTERRUPT_H_EMUL_ASSIST;
+
+ /*
+ * If the fetch failed, return to guest and
+ * try executing it again.
+ */
+ r = kvmppc_get_last_inst(vcpu, INST_GENERIC,
+ &vcpu->arch.emul_inst);
+ if (r != EMULATE_DONE)
+ r = RESUME_GUEST;
+ else
+ r = RESUME_HOST;
+ } else {
+ r = RESUME_HOST;
+ }
+
+ break;
+ }
+
case BOOK3S_INTERRUPT_HV_RM_HARD:
vcpu->arch.trap = 0;
r = RESUME_GUEST;
@@ -2684,6 +2721,7 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
spin_lock_init(&vcpu->arch.vpa_update_lock);
spin_lock_init(&vcpu->arch.tbacct_lock);
vcpu->arch.busy_preempt = TB_NIL;
+ vcpu->arch.shregs.msr = MSR_ME;
vcpu->arch.intr_msr = MSR_SF | MSR_ME;
/*
@@ -2705,6 +2743,8 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
if (cpu_has_feature(CPU_FTR_TM_COMP))
vcpu->arch.hfscr |= HFSCR_TM;
+ vcpu->arch.hfscr_permitted = vcpu->arch.hfscr;
+
kvmppc_mmu_book3s_hv_init(vcpu);
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
@@ -3727,7 +3767,6 @@ static void load_spr_state(struct kvm_vcpu *vcpu)
mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
mtspr(SPRN_BESCR, vcpu->arch.bescr);
- mtspr(SPRN_WORT, vcpu->arch.wort);
mtspr(SPRN_TIDR, vcpu->arch.tid);
mtspr(SPRN_AMR, vcpu->arch.amr);
mtspr(SPRN_UAMOR, vcpu->arch.uamor);
@@ -3754,7 +3793,6 @@ static void store_spr_state(struct kvm_vcpu *vcpu)
vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
vcpu->arch.bescr = mfspr(SPRN_BESCR);
- vcpu->arch.wort = mfspr(SPRN_WORT);
vcpu->arch.tid = mfspr(SPRN_TIDR);
vcpu->arch.amr = mfspr(SPRN_AMR);
vcpu->arch.uamor = mfspr(SPRN_UAMOR);
@@ -3786,7 +3824,6 @@ static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
struct p9_host_os_sprs *host_os_sprs)
{
mtspr(SPRN_PSPB, 0);
- mtspr(SPRN_WORT, 0);
mtspr(SPRN_UAMOR, 0);
mtspr(SPRN_DSCR, host_os_sprs->dscr);
@@ -3852,6 +3889,18 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
+#ifdef CONFIG_PPC_PSERIES
+ if (kvmhv_on_pseries()) {
+ barrier();
+ if (vcpu->arch.vpa.pinned_addr) {
+ struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
+ get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use;
+ } else {
+ get_lppaca()->pmcregs_in_use = 1;
+ }
+ barrier();
+ }
+#endif
kvmhv_load_guest_pmu(vcpu);
msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
@@ -3986,6 +4035,13 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
save_pmu |= nesting_enabled(vcpu->kvm);
kvmhv_save_guest_pmu(vcpu, save_pmu);
+#ifdef CONFIG_PPC_PSERIES
+ if (kvmhv_on_pseries()) {
+ barrier();
+ get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse();
+ barrier();
+ }
+#endif
vc->entry_exit_map = 0x101;
vc->in_guest = 0;
@@ -4146,19 +4202,31 @@ out:
/* Attribute wait time */
if (do_sleep) {
- vc->runner->stat.halt_wait_ns +=
+ vc->runner->stat.generic.halt_wait_ns +=
ktime_to_ns(cur) - ktime_to_ns(start_wait);
+ KVM_STATS_LOG_HIST_UPDATE(
+ vc->runner->stat.generic.halt_wait_hist,
+ ktime_to_ns(cur) - ktime_to_ns(start_wait));
/* Attribute failed poll time */
- if (vc->halt_poll_ns)
+ if (vc->halt_poll_ns) {
vc->runner->stat.generic.halt_poll_fail_ns +=
ktime_to_ns(start_wait) -
ktime_to_ns(start_poll);
+ KVM_STATS_LOG_HIST_UPDATE(
+ vc->runner->stat.generic.halt_poll_fail_hist,
+ ktime_to_ns(start_wait) -
+ ktime_to_ns(start_poll));
+ }
} else {
/* Attribute successful poll time */
- if (vc->halt_poll_ns)
+ if (vc->halt_poll_ns) {
vc->runner->stat.generic.halt_poll_success_ns +=
ktime_to_ns(cur) -
ktime_to_ns(start_poll);
+ KVM_STATS_LOG_HIST_UPDATE(
+ vc->runner->stat.generic.halt_poll_success_hist,
+ ktime_to_ns(cur) - ktime_to_ns(start_poll));
+ }
}
/* Adjust poll time */
@@ -5328,6 +5396,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
struct kvmppc_passthru_irqmap *pimap;
struct irq_chip *chip;
int i, rc = 0;
+ struct irq_data *host_data;
if (!kvm_irq_bypass)
return 1;
@@ -5355,7 +5424,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
* what our real-mode EOI code does, or a XIVE interrupt
*/
chip = irq_data_get_irq_chip(&desc->irq_data);
- if (!chip || !(is_pnv_opal_msi(chip) || is_xive_irq(chip))) {
+ if (!chip || !is_pnv_opal_msi(chip)) {
pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n",
host_irq, guest_gsi);
mutex_unlock(&kvm->lock);
@@ -5392,15 +5461,22 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
* the KVM real mode handler.
*/
smp_wmb();
- irq_map->r_hwirq = desc->irq_data.hwirq;
+
+ /*
+ * The 'host_irq' number is mapped in the PCI-MSI domain but
+ * the underlying calls, which will EOI the interrupt in real
+ * mode, need an HW IRQ number mapped in the XICS IRQ domain.
+ */
+ host_data = irq_domain_get_irq_data(irq_get_default_host(), host_irq);
+ irq_map->r_hwirq = (unsigned int)irqd_to_hwirq(host_data);
if (i == pimap->n_mapped)
pimap->n_mapped++;
if (xics_on_xive())
- rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc);
+ rc = kvmppc_xive_set_mapped(kvm, guest_gsi, host_irq);
else
- kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
+ kvmppc_xics_set_mapped(kvm, guest_gsi, irq_map->r_hwirq);
if (rc)
irq_map->r_hwirq = 0;
@@ -5439,7 +5515,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
}
if (xics_on_xive())
- rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc);
+ rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, host_irq);
else
kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index be8ef1c5b1bf..fcf4760a3a0e 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -137,23 +137,23 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
* exist in the system. We use a counter of VMs to track this.
*
* One of the operations we need to block is onlining of secondaries, so we
- * protect hv_vm_count with get/put_online_cpus().
+ * protect hv_vm_count with cpus_read_lock/unlock().
*/
static atomic_t hv_vm_count;
void kvm_hv_vm_activated(void)
{
- get_online_cpus();
+ cpus_read_lock();
atomic_inc(&hv_vm_count);
- put_online_cpus();
+ cpus_read_unlock();
}
EXPORT_SYMBOL_GPL(kvm_hv_vm_activated);
void kvm_hv_vm_deactivated(void)
{
- get_online_cpus();
+ cpus_read_lock();
atomic_dec(&hv_vm_count);
- put_online_cpus();
+ cpus_read_unlock();
}
EXPORT_SYMBOL_GPL(kvm_hv_vm_deactivated);
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 898f942eb198..ed8a2c9f5629 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -99,13 +99,12 @@ static void byteswap_hv_regs(struct hv_guest_state *hr)
hr->dawrx1 = swab64(hr->dawrx1);
}
-static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
+static void save_hv_return_state(struct kvm_vcpu *vcpu,
struct hv_guest_state *hr)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
hr->dpdes = vc->dpdes;
- hr->hfscr = vcpu->arch.hfscr;
hr->purr = vcpu->arch.purr;
hr->spurr = vcpu->arch.spurr;
hr->ic = vcpu->arch.ic;
@@ -119,7 +118,7 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
hr->pidr = vcpu->arch.pid;
hr->cfar = vcpu->arch.cfar;
hr->ppr = vcpu->arch.ppr;
- switch (trap) {
+ switch (vcpu->arch.trap) {
case BOOK3S_INTERRUPT_H_DATA_STORAGE:
hr->hdar = vcpu->arch.fault_dar;
hr->hdsisr = vcpu->arch.fault_dsisr;
@@ -128,55 +127,17 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
case BOOK3S_INTERRUPT_H_INST_STORAGE:
hr->asdr = vcpu->arch.fault_gpa;
break;
+ case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
+ hr->hfscr = ((~HFSCR_INTR_CAUSE & hr->hfscr) |
+ (HFSCR_INTR_CAUSE & vcpu->arch.hfscr));
+ break;
case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
hr->heir = vcpu->arch.emul_inst;
break;
}
}
-/*
- * This can result in some L0 HV register state being leaked to an L1
- * hypervisor when the hv_guest_state is copied back to the guest after
- * being modified here.
- *
- * There is no known problem with such a leak, and in many cases these
- * register settings could be derived by the guest by observing behaviour
- * and timing, interrupts, etc., but it is an issue to consider.
- */
-static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
-{
- struct kvmppc_vcore *vc = vcpu->arch.vcore;
- u64 mask;
-
- /*
- * Don't let L1 change LPCR bits for the L2 except these:
- */
- mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
- LPCR_LPES | LPCR_MER;
-
- /*
- * Additional filtering is required depending on hardware
- * and configuration.
- */
- hr->lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm,
- (vc->lpcr & ~mask) | (hr->lpcr & mask));
-
- /*
- * Don't let L1 enable features for L2 which we've disabled for L1,
- * but preserve the interrupt cause field.
- */
- hr->hfscr &= (HFSCR_INTR_CAUSE | vcpu->arch.hfscr);
-
- /* Don't let data address watchpoint match in hypervisor state */
- hr->dawrx0 &= ~DAWRX_HYP;
- hr->dawrx1 &= ~DAWRX_HYP;
-
- /* Don't let completed instruction address breakpt match in HV state */
- if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
- hr->ciabr &= ~CIABR_PRIV;
-}
-
-static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
+static void restore_hv_regs(struct kvm_vcpu *vcpu, const struct hv_guest_state *hr)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
@@ -288,6 +249,43 @@ static int kvmhv_write_guest_state_and_regs(struct kvm_vcpu *vcpu,
sizeof(struct pt_regs));
}
+static void load_l2_hv_regs(struct kvm_vcpu *vcpu,
+ const struct hv_guest_state *l2_hv,
+ const struct hv_guest_state *l1_hv, u64 *lpcr)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ u64 mask;
+
+ restore_hv_regs(vcpu, l2_hv);
+
+ /*
+ * Don't let L1 change LPCR bits for the L2 except these:
+ */
+ mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
+ LPCR_LPES | LPCR_MER;
+
+ /*
+ * Additional filtering is required depending on hardware
+ * and configuration.
+ */
+ *lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm,
+ (vc->lpcr & ~mask) | (*lpcr & mask));
+
+ /*
+ * Don't let L1 enable features for L2 which we don't allow for L1,
+ * but preserve the interrupt cause field.
+ */
+ vcpu->arch.hfscr = l2_hv->hfscr & (HFSCR_INTR_CAUSE | vcpu->arch.hfscr_permitted);
+
+ /* Don't let data address watchpoint match in hypervisor state */
+ vcpu->arch.dawrx0 = l2_hv->dawrx0 & ~DAWRX_HYP;
+ vcpu->arch.dawrx1 = l2_hv->dawrx1 & ~DAWRX_HYP;
+
+ /* Don't let completed instruction address breakpt match in HV state */
+ if ((l2_hv->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
+ vcpu->arch.ciabr = l2_hv->ciabr & ~CIABR_PRIV;
+}
+
long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
{
long int err, r;
@@ -296,7 +294,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
struct hv_guest_state l2_hv = {0}, saved_l1_hv;
struct kvmppc_vcore *vc = vcpu->arch.vcore;
u64 hv_ptr, regs_ptr;
- u64 hdec_exp;
+ u64 hdec_exp, lpcr;
s64 delta_purr, delta_spurr, delta_ic, delta_vtb;
if (vcpu->kvm->arch.l1_ptcr == 0)
@@ -364,13 +362,14 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
/* set L1 state to L2 state */
vcpu->arch.nested = l2;
vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
+ l2->hfscr = l2_hv.hfscr;
vcpu->arch.regs = l2_regs;
/* Guest must always run with ME enabled, HV disabled. */
vcpu->arch.shregs.msr = (vcpu->arch.regs.msr | MSR_ME) & ~MSR_HV;
- sanitise_hv_regs(vcpu, &l2_hv);
- restore_hv_regs(vcpu, &l2_hv);
+ lpcr = l2_hv.lpcr;
+ load_l2_hv_regs(vcpu, &l2_hv, &saved_l1_hv, &lpcr);
vcpu->arch.ret = RESUME_GUEST;
vcpu->arch.trap = 0;
@@ -380,7 +379,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
r = RESUME_HOST;
break;
}
- r = kvmhv_run_single_vcpu(vcpu, hdec_exp, l2_hv.lpcr);
+ r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr);
} while (is_kvmppc_resume_guest(r));
/* save L2 state for return */
@@ -390,7 +389,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
delta_spurr = vcpu->arch.spurr - l2_hv.spurr;
delta_ic = vcpu->arch.ic - l2_hv.ic;
delta_vtb = vc->vtb - l2_hv.vtb;
- save_hv_return_state(vcpu, vcpu->arch.trap, &l2_hv);
+ save_hv_return_state(vcpu, &l2_hv);
/* restore L1 state */
vcpu->arch.nested = NULL;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 0a11ec88a0ae..587c33fc4564 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -706,6 +706,7 @@ static int ics_rm_eoi(struct kvm_vcpu *vcpu, u32 irq)
icp->rm_eoied_irq = irq;
}
+ /* Handle passthrough interrupts */
if (state->host_irq) {
++vcpu->stat.pthru_all;
if (state->intr_cpu != -1) {
@@ -759,12 +760,12 @@ int xics_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
static unsigned long eoi_rc;
-static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
+static void icp_eoi(struct irq_data *d, u32 hwirq, __be32 xirr, bool *again)
{
void __iomem *xics_phys;
int64_t rc;
- rc = pnv_opal_pci_msi_eoi(c, hwirq);
+ rc = pnv_opal_pci_msi_eoi(d);
if (rc)
eoi_rc = rc;
@@ -872,8 +873,7 @@ long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
icp_rm_deliver_irq(xics, icp, irq, false);
/* EOI the interrupt */
- icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr,
- again);
+ icp_eoi(irq_desc_get_irq_data(irq_map->desc), irq_map->r_hwirq, xirr, again);
if (check_too_hard(xics, icp) == H_TOO_HARD)
return 2;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 8dd437d7a2c6..75079397c2a5 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1088,12 +1088,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE
beq kvmppc_hisi
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- /* For softpatch interrupt, go off and do TM instruction emulation */
- cmpwi r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
- beq kvmppc_tm_emul
-#endif
-
/* See if this is a leftover HDEC interrupt */
cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
bne 2f
@@ -1599,42 +1593,6 @@ maybe_reenter_guest:
blt deliver_guest_interrupt
b guest_exit_cont
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Softpatch interrupt for transactional memory emulation cases
- * on POWER9 DD2.2. This is early in the guest exit path - we
- * haven't saved registers or done a treclaim yet.
- */
-kvmppc_tm_emul:
- /* Save instruction image in HEIR */
- mfspr r3, SPRN_HEIR
- stw r3, VCPU_HEIR(r9)
-
- /*
- * The cases we want to handle here are those where the guest
- * is in real suspend mode and is trying to transition to
- * transactional mode.
- */
- lbz r0, HSTATE_FAKE_SUSPEND(r13)
- cmpwi r0, 0 /* keep exiting guest if in fake suspend */
- bne guest_exit_cont
- rldicl r3, r11, 64 - MSR_TS_S_LG, 62
- cmpwi r3, 1 /* or if not in suspend state */
- bne guest_exit_cont
-
- /* Call C code to do the emulation */
- mr r3, r9
- bl kvmhv_p9_tm_emulation_early
- nop
- ld r9, HSTATE_KVM_VCPU(r13)
- li r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
- cmpwi r3, 0
- beq guest_exit_cont /* continue exiting if not handled */
- ld r10, VCPU_PC(r9)
- ld r11, VCPU_MSR(r9)
- b fast_interrupt_c_return /* go back to guest if handled */
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-
/*
* Check whether an HDSI is an HPTE not found fault or something else.
* If it is an HPTE not found fault that is due to the guest accessing
diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c
index cc90b8b82329..866cadd70094 100644
--- a/arch/powerpc/kvm/book3s_hv_tm.c
+++ b/arch/powerpc/kvm/book3s_hv_tm.c
@@ -47,6 +47,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
int ra, rs;
/*
+ * The TM softpatch interrupt sets NIP to the instruction following
+ * the faulting instruction, which is not executed. Rewind nip to the
+ * faulting instruction so it looks like a normal synchronous
+ * interrupt, then update nip in the places where the instruction is
+ * emulated.
+ */
+ vcpu->arch.regs.nip -= 4;
+
+ /*
* rfid, rfebb, and mtmsrd encode bit 31 = 0 since it's a reserved bit
* in these instructions, so masking bit 31 out doesn't change these
* instructions. For treclaim., tsr., and trechkpt. instructions if bit
@@ -67,7 +76,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
(newmsr & MSR_TM)));
newmsr = sanitize_msr(newmsr);
vcpu->arch.shregs.msr = newmsr;
- vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
+ vcpu->arch.cfar = vcpu->arch.regs.nip;
vcpu->arch.regs.nip = vcpu->arch.shregs.srr0;
return RESUME_GUEST;
@@ -79,14 +88,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
}
/* check EBB facility is available */
if (!(vcpu->arch.hfscr & HFSCR_EBB)) {
- /* generate an illegal instruction interrupt */
- kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
- return RESUME_GUEST;
+ vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+ vcpu->arch.hfscr |= (u64)FSCR_EBB_LG << 56;
+ vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+ return -1; /* rerun host interrupt handler */
}
if ((msr & MSR_PR) && !(vcpu->arch.fscr & FSCR_EBB)) {
/* generate a facility unavailable interrupt */
- vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
- ((u64)FSCR_EBB_LG << 56);
+ vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+ vcpu->arch.fscr |= (u64)FSCR_EBB_LG << 56;
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL);
return RESUME_GUEST;
}
@@ -100,7 +110,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
vcpu->arch.bescr = bescr;
msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
vcpu->arch.shregs.msr = msr;
- vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
+ vcpu->arch.cfar = vcpu->arch.regs.nip;
vcpu->arch.regs.nip = vcpu->arch.ebbrr;
return RESUME_GUEST;
@@ -116,6 +126,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE);
newmsr = sanitize_msr(newmsr);
vcpu->arch.shregs.msr = newmsr;
+ vcpu->arch.regs.nip += 4;
return RESUME_GUEST;
/* ignore bit 31, see comment above */
@@ -128,14 +139,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
}
/* check for TM disabled in the HFSCR or MSR */
if (!(vcpu->arch.hfscr & HFSCR_TM)) {
- /* generate an illegal instruction interrupt */
- kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
- return RESUME_GUEST;
+ vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+ vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56;
+ vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+ return -1; /* rerun host interrupt handler */
}
if (!(msr & MSR_TM)) {
/* generate a facility unavailable interrupt */
- vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
- ((u64)FSCR_TM_LG << 56);
+ vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+ vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56;
kvmppc_book3s_queue_irqprio(vcpu,
BOOK3S_INTERRUPT_FAC_UNAVAIL);
return RESUME_GUEST;
@@ -152,20 +164,22 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
msr = (msr & ~MSR_TS_MASK) | MSR_TS_S;
}
vcpu->arch.shregs.msr = msr;
+ vcpu->arch.regs.nip += 4;
return RESUME_GUEST;
/* ignore bit 31, see comment above */
case (PPC_INST_TRECLAIM & PO_XOP_OPCODE_MASK):
/* check for TM disabled in the HFSCR or MSR */
if (!(vcpu->arch.hfscr & HFSCR_TM)) {
- /* generate an illegal instruction interrupt */
- kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
- return RESUME_GUEST;
+ vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+ vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56;
+ vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+ return -1; /* rerun host interrupt handler */
}
if (!(msr & MSR_TM)) {
/* generate a facility unavailable interrupt */
- vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
- ((u64)FSCR_TM_LG << 56);
+ vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+ vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56;
kvmppc_book3s_queue_irqprio(vcpu,
BOOK3S_INTERRUPT_FAC_UNAVAIL);
return RESUME_GUEST;
@@ -189,6 +203,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29);
vcpu->arch.shregs.msr &= ~MSR_TS_MASK;
+ vcpu->arch.regs.nip += 4;
return RESUME_GUEST;
/* ignore bit 31, see comment above */
@@ -196,14 +211,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
/* XXX do we need to check for PR=0 here? */
/* check for TM disabled in the HFSCR or MSR */
if (!(vcpu->arch.hfscr & HFSCR_TM)) {
- /* generate an illegal instruction interrupt */
- kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
- return RESUME_GUEST;
+ vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+ vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56;
+ vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+ return -1; /* rerun host interrupt handler */
}
if (!(msr & MSR_TM)) {
/* generate a facility unavailable interrupt */
- vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
- ((u64)FSCR_TM_LG << 56);
+ vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+ vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56;
kvmppc_book3s_queue_irqprio(vcpu,
BOOK3S_INTERRUPT_FAC_UNAVAIL);
return RESUME_GUEST;
@@ -220,6 +236,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29);
vcpu->arch.shregs.msr = msr | MSR_TS_S;
+ vcpu->arch.regs.nip += 4;
return RESUME_GUEST;
}
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 303e3cb096db..ebd5d920de8c 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -10,13 +10,13 @@
#include <linux/gfp.h>
#include <linux/anon_inodes.h>
#include <linux/spinlock.h>
-
+#include <linux/debugfs.h>
#include <linux/uaccess.h>
+
#include <asm/kvm_book3s.h>
#include <asm/kvm_ppc.h>
#include <asm/hvcall.h>
#include <asm/xics.h>
-#include <asm/debugfs.h>
#include <asm/time.h>
#include <linux/seq_file.h>
@@ -1024,7 +1024,7 @@ static void xics_debugfs_init(struct kvmppc_xics *xics)
return;
}
- xics->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root,
+ xics->dentry = debugfs_create_file(name, 0444, arch_debugfs_dir,
xics, &xics_debug_fops);
pr_debug("%s: created %s\n", __func__, name);
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 8cfab3547494..a18db9e16ea4 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -22,7 +22,6 @@
#include <asm/xive.h>
#include <asm/xive-regs.h>
#include <asm/debug.h>
-#include <asm/debugfs.h>
#include <asm/time.h>
#include <asm/opal.h>
@@ -59,6 +58,25 @@
*/
#define XIVE_Q_GAP 2
+static bool kvmppc_xive_vcpu_has_save_restore(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+ /* Check enablement at VP level */
+ return xc->vp_cam & TM_QW1W2_HO;
+}
+
+bool kvmppc_xive_check_save_restore(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct kvmppc_xive *xive = xc->xive;
+
+ if (xive->flags & KVMPPC_XIVE_FLAG_SAVE_RESTORE)
+ return kvmppc_xive_vcpu_has_save_restore(vcpu);
+
+ return true;
+}
+
/*
* Push a vcpu's context to the XIVE on guest entry.
* This assumes we are in virtual mode (MMU on)
@@ -77,7 +95,8 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
return;
eieio();
- __raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
+ if (!kvmppc_xive_vcpu_has_save_restore(vcpu))
+ __raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
__raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
vcpu->arch.xive_pushed = 1;
eieio();
@@ -149,7 +168,8 @@ void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu)
/* First load to pull the context, we ignore the value */
__raw_readl(tima + TM_SPC_PULL_OS_CTX);
/* Second load to recover the context state (Words 0 and 1) */
- vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS);
+ if (!kvmppc_xive_vcpu_has_save_restore(vcpu))
+ vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS);
/* Fixup some of the state for the next load */
vcpu->arch.xive_saved_state.lsmfb = 0;
@@ -363,9 +383,9 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio)
if (!vcpu->arch.xive_vcpu)
continue;
rc = xive_provision_queue(vcpu, prio);
- if (rc == 0 && !xive->single_escalation)
+ if (rc == 0 && !kvmppc_xive_has_single_escalation(xive))
kvmppc_xive_attach_escalation(vcpu, prio,
- xive->single_escalation);
+ kvmppc_xive_has_single_escalation(xive));
if (rc)
return rc;
}
@@ -922,13 +942,13 @@ int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
}
int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
- struct irq_desc *host_desc)
+ unsigned long host_irq)
{
struct kvmppc_xive *xive = kvm->arch.xive;
struct kvmppc_xive_src_block *sb;
struct kvmppc_xive_irq_state *state;
- struct irq_data *host_data = irq_desc_get_irq_data(host_desc);
- unsigned int host_irq = irq_desc_get_irq(host_desc);
+ struct irq_data *host_data =
+ irq_domain_get_irq_data(irq_get_default_host(), host_irq);
unsigned int hw_irq = (unsigned int)irqd_to_hwirq(host_data);
u16 idx;
u8 prio;
@@ -937,7 +957,8 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
if (!xive)
return -ENODEV;
- pr_devel("set_mapped girq 0x%lx host HW irq 0x%x...\n",guest_irq, hw_irq);
+ pr_debug("%s: GIRQ 0x%lx host IRQ %ld XIVE HW IRQ 0x%x\n",
+ __func__, guest_irq, host_irq, hw_irq);
sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
if (!sb)
@@ -959,7 +980,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
*/
rc = irq_set_vcpu_affinity(host_irq, state);
if (rc) {
- pr_err("Failed to set VCPU affinity for irq %d\n", host_irq);
+ pr_err("Failed to set VCPU affinity for host IRQ %ld\n", host_irq);
return rc;
}
@@ -1019,12 +1040,11 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
EXPORT_SYMBOL_GPL(kvmppc_xive_set_mapped);
int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
- struct irq_desc *host_desc)
+ unsigned long host_irq)
{
struct kvmppc_xive *xive = kvm->arch.xive;
struct kvmppc_xive_src_block *sb;
struct kvmppc_xive_irq_state *state;
- unsigned int host_irq = irq_desc_get_irq(host_desc);
u16 idx;
u8 prio;
int rc;
@@ -1032,7 +1052,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
if (!xive)
return -ENODEV;
- pr_devel("clr_mapped girq 0x%lx...\n", guest_irq);
+ pr_debug("%s: GIRQ 0x%lx host IRQ %ld\n", __func__, guest_irq, host_irq);
sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
if (!sb)
@@ -1059,7 +1079,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
/* Release the passed-through interrupt to the host */
rc = irq_set_vcpu_affinity(host_irq, NULL);
if (rc) {
- pr_err("Failed to clr VCPU affinity for irq %d\n", host_irq);
+ pr_err("Failed to clr VCPU affinity for host IRQ %ld\n", host_irq);
return rc;
}
@@ -1199,7 +1219,7 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
/* Free escalations */
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
if (xc->esc_virq[i]) {
- if (xc->xive->single_escalation)
+ if (kvmppc_xive_has_single_escalation(xc->xive))
xive_cleanup_single_escalation(vcpu, xc,
xc->esc_virq[i]);
free_irq(xc->esc_virq[i], vcpu);
@@ -1319,6 +1339,12 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
if (r)
goto bail;
+ if (!kvmppc_xive_check_save_restore(vcpu)) {
+ pr_err("inconsistent save-restore setup for VCPU %d\n", cpu);
+ r = -EIO;
+ goto bail;
+ }
+
/* Configure VCPU fields for use by assembly push/pull */
vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
@@ -1340,7 +1366,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
* Enable the VP first as the single escalation mode will
* affect escalation interrupts numbering
*/
- r = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
+ r = xive_native_enable_vp(xc->vp_id, kvmppc_xive_has_single_escalation(xive));
if (r) {
pr_err("Failed to enable VP in OPAL, err %d\n", r);
goto bail;
@@ -1357,15 +1383,15 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
struct xive_q *q = &xc->queues[i];
/* Single escalation, no queue 7 */
- if (i == 7 && xive->single_escalation)
+ if (i == 7 && kvmppc_xive_has_single_escalation(xive))
break;
/* Is queue already enabled ? Provision it */
if (xive->qmap & (1 << i)) {
r = xive_provision_queue(vcpu, i);
- if (r == 0 && !xive->single_escalation)
+ if (r == 0 && !kvmppc_xive_has_single_escalation(xive))
kvmppc_xive_attach_escalation(
- vcpu, i, xive->single_escalation);
+ vcpu, i, kvmppc_xive_has_single_escalation(xive));
if (r)
goto bail;
} else {
@@ -1380,7 +1406,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
}
/* If not done above, attach priority 0 escalation */
- r = kvmppc_xive_attach_escalation(vcpu, 0, xive->single_escalation);
+ r = kvmppc_xive_attach_escalation(vcpu, 0, kvmppc_xive_has_single_escalation(xive));
if (r)
goto bail;
@@ -2135,7 +2161,11 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
*/
xive->nr_servers = KVM_MAX_VCPUS;
- xive->single_escalation = xive_native_has_single_escalation();
+ if (xive_native_has_single_escalation())
+ xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION;
+
+ if (xive_native_has_save_restore())
+ xive->flags |= KVMPPC_XIVE_FLAG_SAVE_RESTORE;
kvm->arch.xive = xive;
return 0;
@@ -2329,7 +2359,7 @@ static void xive_debugfs_init(struct kvmppc_xive *xive)
return;
}
- xive->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
+ xive->dentry = debugfs_create_file(name, S_IRUGO, arch_debugfs_dir,
xive, &xive_debug_fops);
pr_debug("%s: created %s\n", __func__, name);
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index afe9eeac6d56..e6a9651c6f1e 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -97,6 +97,9 @@ struct kvmppc_xive_ops {
int (*reset_mapped)(struct kvm *kvm, unsigned long guest_irq);
};
+#define KVMPPC_XIVE_FLAG_SINGLE_ESCALATION 0x1
+#define KVMPPC_XIVE_FLAG_SAVE_RESTORE 0x2
+
struct kvmppc_xive {
struct kvm *kvm;
struct kvm_device *dev;
@@ -133,7 +136,7 @@ struct kvmppc_xive {
u32 q_page_order;
/* Flags */
- u8 single_escalation;
+ u8 flags;
/* Number of entries in the VP block */
u32 nr_servers;
@@ -307,6 +310,12 @@ void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
struct kvmppc_xive_vcpu *xc, int irq);
int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp);
int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr);
+bool kvmppc_xive_check_save_restore(struct kvm_vcpu *vcpu);
+
+static inline bool kvmppc_xive_has_single_escalation(struct kvmppc_xive *xive)
+{
+ return xive->flags & KVMPPC_XIVE_FLAG_SINGLE_ESCALATION;
+}
#endif /* CONFIG_KVM_XICS */
#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 573ecaab3597..99db9ac49901 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -20,7 +20,6 @@
#include <asm/xive.h>
#include <asm/xive-regs.h>
#include <asm/debug.h>
-#include <asm/debugfs.h>
#include <asm/opal.h>
#include <linux/debugfs.h>
@@ -93,7 +92,7 @@ void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
/* Free the escalation irq */
if (xc->esc_virq[i]) {
- if (xc->xive->single_escalation)
+ if (kvmppc_xive_has_single_escalation(xc->xive))
xive_cleanup_single_escalation(vcpu, xc,
xc->esc_virq[i]);
free_irq(xc->esc_virq[i], vcpu);
@@ -168,11 +167,17 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
goto bail;
}
+ if (!kvmppc_xive_check_save_restore(vcpu)) {
+ pr_err("inconsistent save-restore setup for VCPU %d\n", server_num);
+ rc = -EIO;
+ goto bail;
+ }
+
/*
* Enable the VP first as the single escalation mode will
* affect escalation interrupts numbering
*/
- rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
+ rc = xive_native_enable_vp(xc->vp_id, kvmppc_xive_has_single_escalation(xive));
if (rc) {
pr_err("Failed to enable VP in OPAL: %d\n", rc);
goto bail;
@@ -693,7 +698,7 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
}
rc = kvmppc_xive_attach_escalation(vcpu, priority,
- xive->single_escalation);
+ kvmppc_xive_has_single_escalation(xive));
error:
if (rc)
kvmppc_xive_native_cleanup_queue(vcpu, priority);
@@ -820,7 +825,7 @@ static int kvmppc_xive_reset(struct kvmppc_xive *xive)
for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
/* Single escalation, no queue 7 */
- if (prio == 7 && xive->single_escalation)
+ if (prio == 7 && kvmppc_xive_has_single_escalation(xive))
break;
if (xc->esc_virq[prio]) {
@@ -1111,7 +1116,12 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
*/
xive->nr_servers = KVM_MAX_VCPUS;
- xive->single_escalation = xive_native_has_single_escalation();
+ if (xive_native_has_single_escalation())
+ xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION;
+
+ if (xive_native_has_save_restore())
+ xive->flags |= KVMPPC_XIVE_FLAG_SAVE_RESTORE;
+
xive->ops = &kvmppc_xive_native_ops;
kvm->arch.xive = xive;
@@ -1257,7 +1267,7 @@ static void xive_native_debugfs_init(struct kvmppc_xive *xive)
return;
}
- xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root,
+ xive->dentry = debugfs_create_file(name, 0444, arch_debugfs_dir,
xive, &xive_native_debug_fops);
pr_debug("%s: created %s\n", __func__, name);
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 551b30d84aee..977801c83aff 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -41,8 +41,6 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
STATS_DESC_ICOUNTER(VM, num_2M_pages),
STATS_DESC_ICOUNTER(VM, num_1G_pages)
};
-static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
- sizeof(struct kvm_vm_stat) / sizeof(u64));
const struct kvm_stats_header kvm_vm_stats_header = {
.name_size = KVM_STATS_NAME_SIZE,
@@ -69,7 +67,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
STATS_DESC_COUNTER(VCPU, emulated_inst_exits),
STATS_DESC_COUNTER(VCPU, dec_exits),
STATS_DESC_COUNTER(VCPU, ext_intr_exits),
- STATS_DESC_TIME_NSEC(VCPU, halt_wait_ns),
STATS_DESC_COUNTER(VCPU, halt_successful_wait),
STATS_DESC_COUNTER(VCPU, dbell_exits),
STATS_DESC_COUNTER(VCPU, gdbell_exits),
@@ -79,8 +76,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
STATS_DESC_COUNTER(VCPU, pthru_host),
STATS_DESC_COUNTER(VCPU, pthru_bad_aff)
};
-static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
- sizeof(struct kvm_vcpu_stat) / sizeof(u64));
const struct kvm_stats_header kvm_vcpu_stats_header = {
.name_size = KVM_STATS_NAME_SIZE,