summaryrefslogtreecommitdiff
path: root/arch/s390/kvm/kvm-s390.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/kvm/kvm-s390.c')
-rw-r--r--arch/s390/kvm/kvm-s390.c411
1 files changed, 338 insertions, 73 deletions
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index bb7134faaebf..d5ad10791c25 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -23,6 +23,7 @@
#include <linux/mman.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
+#include <linux/cpufeature.h>
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/timer.h>
@@ -36,13 +37,16 @@
#include <asm/access-regs.h>
#include <asm/asm-offsets.h>
#include <asm/lowcore.h>
+#include <asm/machine.h>
#include <asm/stp.h>
#include <asm/gmap.h>
+#include <asm/gmap_helpers.h>
#include <asm/nmi.h>
#include <asm/isc.h>
#include <asm/sclp.h>
#include <asm/cpacf.h>
#include <asm/timex.h>
+#include <asm/asm.h>
#include <asm/fpu.h>
#include <asm/ap.h>
#include <asm/uv.h>
@@ -340,12 +344,21 @@ static inline int plo_test_bit(unsigned char nr)
" lgr 0,%[function]\n"
/* Parameter registers are ignored for "test bit" */
" plo 0,0,0,0(0)\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (cc)
+ CC_IPM(cc)
+ : CC_OUT(cc, cc)
: [function] "d" (function)
+ : CC_CLOBBER_LIST("0"));
+ return CC_TRANSFORM(cc) == 0;
+}
+
+static __always_inline void pfcr_query(u8 (*query)[16])
+{
+ asm volatile(
+ " lghi 0,0\n"
+ " .insn rsy,0xeb0000000016,0,0,%[query]\n"
+ : [query] "=QS" (*query)
+ :
: "cc", "0");
- return cc == 0;
}
static __always_inline void __sortl_query(u8 (*query)[32])
@@ -429,13 +442,16 @@ static void __init kvm_s390_cpu_feat_init(void)
if (test_facility(151)) /* DFLTCC */
__dfltcc_query(&kvm_s390_available_subfunc.dfltcc);
- if (MACHINE_HAS_ESOP)
+ if (test_facility(201)) /* PFCR */
+ pfcr_query(&kvm_s390_available_subfunc.pfcr);
+
+ if (machine_has_esop())
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
/*
* We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
* 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
*/
- if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
+ if (!sclp.has_sief2 || !machine_has_esop() || !sclp.has_64bscao ||
!test_facility(3) || !nested)
return;
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
@@ -624,7 +640,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = min_t(unsigned int, num_online_cpus(), r);
break;
case KVM_CAP_S390_COW:
- r = MACHINE_HAS_ESOP;
+ r = machine_has_esop();
break;
case KVM_CAP_S390_VECTOR_REGISTERS:
r = test_facility(129);
@@ -799,6 +815,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
set_kvm_facility(kvm->arch.model.fac_mask, 192);
set_kvm_facility(kvm->arch.model.fac_list, 192);
}
+ if (test_facility(198)) {
+ set_kvm_facility(kvm->arch.model.fac_mask, 198);
+ set_kvm_facility(kvm->arch.model.fac_list, 198);
+ }
+ if (test_facility(199)) {
+ set_kvm_facility(kvm->arch.model.fac_mask, 199);
+ set_kvm_facility(kvm->arch.model.fac_list, 199);
+ }
r = 0;
} else
r = -EINVAL;
@@ -998,7 +1022,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
}
mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
- VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
+ VM_EVENT(kvm, 3, "New guest asce: 0x%p",
(void *) kvm->arch.gmap->asce);
break;
}
@@ -1543,6 +1567,9 @@ static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
+ VM_EVENT(kvm, 3, "GET: guest PFCR subfunc 0x%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
+ ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
return 0;
}
@@ -1757,6 +1784,9 @@ static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
+ VM_EVENT(kvm, 3, "GET: guest PFCR subfunc 0x%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
+ ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
return 0;
}
@@ -1825,6 +1855,9 @@ static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
+ VM_EVENT(kvm, 3, "GET: host PFCR subfunc 0x%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
+ ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
return 0;
}
@@ -1907,11 +1940,11 @@ static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
read_lock(&kvm->arch.sca_lock);
sca = kvm->arch.sca;
+ old = READ_ONCE(sca->utility);
do {
- old = READ_ONCE(sca->utility);
new = old;
new.mtcr = val;
- } while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
+ } while (!try_cmpxchg(&sca->utility.val, &old.val, new.val));
read_unlock(&kvm->arch.sca_lock);
}
@@ -2641,7 +2674,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
if (r)
break;
- r = s390_disable_cow_sharing();
+ mmap_write_lock(kvm->mm);
+ r = gmap_helper_disable_cow_sharing();
+ mmap_write_unlock(kvm->mm);
if (r)
break;
@@ -3365,7 +3400,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
/* we emulate STHYI in kvm */
set_kvm_facility(kvm->arch.model.fac_mask, 74);
set_kvm_facility(kvm->arch.model.fac_list, 74);
- if (MACHINE_HAS_TLB_GUEST) {
+ if (machine_has_tlb_guest()) {
set_kvm_facility(kvm->arch.model.fac_mask, 147);
set_kvm_facility(kvm->arch.model.fac_list, 147);
}
@@ -3398,8 +3433,20 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
VM_EVENT(kvm, 3, "vm created with type %lu", type);
if (type & KVM_VM_S390_UCONTROL) {
+ struct kvm_userspace_memory_region2 fake_memslot = {
+ .slot = KVM_S390_UCONTROL_MEMSLOT,
+ .guest_phys_addr = 0,
+ .userspace_addr = 0,
+ .memory_size = ALIGN_DOWN(TASK_SIZE, _SEGMENT_SIZE),
+ .flags = 0,
+ };
+
kvm->arch.gmap = NULL;
kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
+ /* one flat fake memslot covering the whole address-space */
+ mutex_lock(&kvm->slots_lock);
+ KVM_BUG_ON(kvm_set_internal_memslot(kvm, &fake_memslot), kvm);
+ mutex_unlock(&kvm->slots_lock);
} else {
if (sclp.hamax == U64_MAX)
kvm->arch.mem_limit = TASK_SIZE_MAX;
@@ -3421,7 +3468,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_s390_gisa_init(kvm);
INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
kvm->arch.pv.set_aside = NULL;
- KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
+ KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
return 0;
out_err:
@@ -3484,7 +3531,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_s390_destroy_adapters(kvm);
kvm_s390_clear_float_irqs(kvm);
kvm_s390_vsie_destroy(kvm);
- KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
+ KVM_EVENT(3, "vm 0x%p destroyed", kvm);
}
/* Section: vcpu related */
@@ -3605,7 +3652,7 @@ static int sca_switch_to_extended(struct kvm *kvm)
free_page((unsigned long)old_sca);
- VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
+ VM_EVENT(kvm, 2, "Switched to ESCA (0x%p -> 0x%p)",
old_sca, kvm->arch.sca);
return 0;
}
@@ -3719,7 +3766,6 @@ __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
- gmap_enable(vcpu->arch.enabled_gmap);
kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
__start_cpu_timer_accounting(vcpu);
@@ -3732,8 +3778,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
__stop_cpu_timer_accounting(vcpu);
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
- vcpu->arch.enabled_gmap = gmap_get_enabled();
- gmap_disable(vcpu->arch.enabled_gmap);
}
@@ -3751,8 +3795,6 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
}
if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
- /* make vcpu_load load the right gmap on the first trigger */
- vcpu->arch.enabled_gmap = vcpu->arch.gmap;
}
static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
@@ -3774,6 +3816,13 @@ static bool kvm_has_pckmo_ecc(struct kvm *kvm)
}
+static bool kvm_has_pckmo_hmac(struct kvm *kvm)
+{
+ /* At least one HMAC subfunction must be present */
+ return kvm_has_pckmo_subfunc(kvm, 118) ||
+ kvm_has_pckmo_subfunc(kvm, 122);
+}
+
static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
{
/*
@@ -3786,7 +3835,7 @@ static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
vcpu->arch.sie_block->eca &= ~ECA_APIE;
- vcpu->arch.sie_block->ecd &= ~ECD_ECC;
+ vcpu->arch.sie_block->ecd &= ~(ECD_ECC | ECD_HMAC);
if (vcpu->kvm->arch.crypto.apie)
vcpu->arch.sie_block->eca |= ECA_APIE;
@@ -3794,9 +3843,11 @@ static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
/* Set up protected key support */
if (vcpu->kvm->arch.crypto.aes_kw) {
vcpu->arch.sie_block->ecb3 |= ECB3_AES;
- /* ecc is also wrapped with AES key */
+ /* ecc/hmac is also wrapped with AES key */
if (kvm_has_pckmo_ecc(vcpu->kvm))
vcpu->arch.sie_block->ecd |= ECD_ECC;
+ if (kvm_has_pckmo_hmac(vcpu->kvm))
+ vcpu->arch.sie_block->ecd |= ECD_HMAC;
}
if (vcpu->kvm->arch.crypto.dea_kw)
@@ -3845,8 +3896,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
kvm_s390_vcpu_setup_model(vcpu);
- /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
- if (MACHINE_HAS_ESOP)
+ /* pgste_set_pte has special handling for !machine_has_esop() */
+ if (machine_has_esop())
vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
if (test_kvm_facility(vcpu->kvm, 9))
vcpu->arch.sie_block->ecb |= ECB_SRSI;
@@ -3896,8 +3947,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
if (rc)
return rc;
}
- hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
+ hrtimer_setup(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
vcpu->arch.sie_block->hpid = HPID_KVM;
@@ -3978,7 +4029,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
goto out_free_sie_block;
}
- VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
+ VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%p, sie block at 0x%p",
vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
@@ -4464,6 +4515,75 @@ static bool ibs_enabled(struct kvm_vcpu *vcpu)
return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
}
+static int __kvm_s390_fixup_fault_sync(struct gmap *gmap, gpa_t gaddr, unsigned int flags)
+{
+ struct kvm *kvm = gmap->private;
+ gfn_t gfn = gpa_to_gfn(gaddr);
+ bool unlocked;
+ hva_t vmaddr;
+ gpa_t tmp;
+ int rc;
+
+ if (kvm_is_ucontrol(kvm)) {
+ tmp = __gmap_translate(gmap, gaddr);
+ gfn = gpa_to_gfn(tmp);
+ }
+
+ vmaddr = gfn_to_hva(kvm, gfn);
+ rc = fixup_user_fault(gmap->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
+ if (!rc)
+ rc = __gmap_link(gmap, gaddr, vmaddr);
+ return rc;
+}
+
+/**
+ * __kvm_s390_mprotect_many() - Apply specified protection to guest pages
+ * @gmap: the gmap of the guest
+ * @gpa: the starting guest address
+ * @npages: how many pages to protect
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ * @bits: pgste notification bits to set
+ *
+ * Returns: 0 in case of success, < 0 in case of error - see gmap_protect_one()
+ *
+ * Context: kvm->srcu and gmap->mm need to be held in read mode
+ */
+int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot,
+ unsigned long bits)
+{
+ unsigned int fault_flag = (prot & PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
+ gpa_t end = gpa + npages * PAGE_SIZE;
+ int rc;
+
+ for (; gpa < end; gpa = ALIGN(gpa + 1, rc)) {
+ rc = gmap_protect_one(gmap, gpa, prot, bits);
+ if (rc == -EAGAIN) {
+ __kvm_s390_fixup_fault_sync(gmap, gpa, fault_flag);
+ rc = gmap_protect_one(gmap, gpa, prot, bits);
+ }
+ if (rc < 0)
+ return rc;
+ }
+
+ return 0;
+}
+
+static int kvm_s390_mprotect_notify_prefix(struct kvm_vcpu *vcpu)
+{
+ gpa_t gaddr = kvm_s390_get_prefix(vcpu);
+ int idx, rc;
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ mmap_read_lock(vcpu->arch.gmap->mm);
+
+ rc = __kvm_s390_mprotect_many(vcpu->arch.gmap, gaddr, 2, PROT_WRITE, GMAP_NOTIFY_MPROT);
+
+ mmap_read_unlock(vcpu->arch.gmap->mm);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ return rc;
+}
+
static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
{
retry:
@@ -4479,9 +4599,8 @@ retry:
*/
if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
int rc;
- rc = gmap_mprotect_notify(vcpu->arch.gmap,
- kvm_s390_get_prefix(vcpu),
- PAGE_SIZE * 2, PROT_WRITE);
+
+ rc = kvm_s390_mprotect_notify_prefix(vcpu);
if (rc) {
kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
return rc;
@@ -4579,22 +4698,6 @@ int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clo
return 1;
}
-/**
- * kvm_arch_fault_in_page - fault-in guest page if necessary
- * @vcpu: The corresponding virtual cpu
- * @gpa: Guest physical address
- * @writable: Whether the page should be writable or not
- *
- * Make sure that a guest page has been faulted-in on the host.
- *
- * Return: Zero on success, negative error code otherwise.
- */
-long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
-{
- return gmap_fault(vcpu->arch.gmap, gpa,
- writable ? FAULT_FLAG_WRITE : 0);
-}
-
static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
unsigned long token)
{
@@ -4662,12 +4765,11 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
if (!vcpu->arch.gmap->pfault_enabled)
return false;
- hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
- hva += current->thread.gmap_addr & ~PAGE_MASK;
+ hva = gfn_to_hva(vcpu->kvm, current->thread.gmap_teid.addr);
if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
return false;
- return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
+ return kvm_setup_async_pf(vcpu, current->thread.gmap_teid.addr * PAGE_SIZE, hva, &arch);
}
static int vcpu_pre_run(struct kvm_vcpu *vcpu)
@@ -4705,6 +4807,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
vcpu->arch.sie_block->icptcode = 0;
+ current->thread.gmap_int_code = 0;
cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
trace_kvm_s390_sie_enter(vcpu, cpuflags);
@@ -4712,7 +4815,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
return 0;
}
-static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
+static int vcpu_post_run_addressing_exception(struct kvm_vcpu *vcpu)
{
struct kvm_s390_pgm_info pgm_info = {
.code = PGM_ADDRESSING,
@@ -4748,10 +4851,182 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
+static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu)
+{
+ KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
+ "Unexpected program interrupt 0x%x, TEID 0x%016lx",
+ current->thread.gmap_int_code, current->thread.gmap_teid.val);
+}
+
+/*
+ * __kvm_s390_handle_dat_fault() - handle a dat fault for the gmap of a vcpu
+ * @vcpu: the vCPU whose gmap is to be fixed up
+ * @gfn: the guest frame number used for memslots (including fake memslots)
+ * @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps
+ * @flags: FOLL_* flags
+ *
+ * Return: 0 on success, < 0 in case of error.
+ * Context: The mm lock must not be held before calling. May sleep.
+ */
+int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags)
+{
+ struct kvm_memory_slot *slot;
+ unsigned int fault_flags;
+ bool writable, unlocked;
+ unsigned long vmaddr;
+ struct page *page;
+ kvm_pfn_t pfn;
+ int rc;
+
+ slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+ if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
+ return vcpu_post_run_addressing_exception(vcpu);
+
+ fault_flags = flags & FOLL_WRITE ? FAULT_FLAG_WRITE : 0;
+ if (vcpu->arch.gmap->pfault_enabled)
+ flags |= FOLL_NOWAIT;
+ vmaddr = __gfn_to_hva_memslot(slot, gfn);
+
+try_again:
+ pfn = __kvm_faultin_pfn(slot, gfn, flags, &writable, &page);
+
+ /* Access outside memory, inject addressing exception */
+ if (is_noslot_pfn(pfn))
+ return vcpu_post_run_addressing_exception(vcpu);
+ /* Signal pending: try again */
+ if (pfn == KVM_PFN_ERR_SIGPENDING)
+ return -EAGAIN;
+
+ /* Needs I/O, try to setup async pfault (only possible with FOLL_NOWAIT) */
+ if (pfn == KVM_PFN_ERR_NEEDS_IO) {
+ trace_kvm_s390_major_guest_pfault(vcpu);
+ if (kvm_arch_setup_async_pf(vcpu))
+ return 0;
+ vcpu->stat.pfault_sync++;
+ /* Could not setup async pfault, try again synchronously */
+ flags &= ~FOLL_NOWAIT;
+ goto try_again;
+ }
+ /* Any other error */
+ if (is_error_pfn(pfn))
+ return -EFAULT;
+
+ /* Success */
+ mmap_read_lock(vcpu->arch.gmap->mm);
+ /* Mark the userspace PTEs as young and/or dirty, to avoid page fault loops */
+ rc = fixup_user_fault(vcpu->arch.gmap->mm, vmaddr, fault_flags, &unlocked);
+ if (!rc)
+ rc = __gmap_link(vcpu->arch.gmap, gaddr, vmaddr);
+ scoped_guard(spinlock, &vcpu->kvm->mmu_lock) {
+ kvm_release_faultin_page(vcpu->kvm, page, false, writable);
+ }
+ mmap_read_unlock(vcpu->arch.gmap->mm);
+ return rc;
+}
+
+static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int flags)
+{
+ unsigned long gaddr_tmp;
+ gfn_t gfn;
+
+ gfn = gpa_to_gfn(gaddr);
+ if (kvm_is_ucontrol(vcpu->kvm)) {
+ /*
+ * This translates the per-vCPU guest address into a
+ * fake guest address, which can then be used with the
+ * fake memslots that are identity mapping userspace.
+ * This allows ucontrol VMs to use the normal fault
+ * resolution path, like normal VMs.
+ */
+ mmap_read_lock(vcpu->arch.gmap->mm);
+ gaddr_tmp = __gmap_translate(vcpu->arch.gmap, gaddr);
+ mmap_read_unlock(vcpu->arch.gmap->mm);
+ if (gaddr_tmp == -EFAULT) {
+ vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
+ vcpu->run->s390_ucontrol.trans_exc_code = gaddr;
+ vcpu->run->s390_ucontrol.pgm_code = PGM_SEGMENT_TRANSLATION;
+ return -EREMOTE;
+ }
+ gfn = gpa_to_gfn(gaddr_tmp);
+ }
+ return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, flags);
+}
+
+static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
+{
+ unsigned int flags = 0;
+ unsigned long gaddr;
+ int rc;
+
+ gaddr = current->thread.gmap_teid.addr * PAGE_SIZE;
+ if (kvm_s390_cur_gmap_fault_is_write())
+ flags = FAULT_FLAG_WRITE;
+
+ switch (current->thread.gmap_int_code & PGM_INT_CODE_MASK) {
+ case 0:
+ vcpu->stat.exit_null++;
+ break;
+ case PGM_SECURE_STORAGE_ACCESS:
+ case PGM_SECURE_STORAGE_VIOLATION:
+ kvm_s390_assert_primary_as(vcpu);
+ /*
+ * This can happen after a reboot with asynchronous teardown;
+ * the new guest (normal or protected) will run on top of the
+ * previous protected guest. The old pages need to be destroyed
+ * so the new guest can use them.
+ */
+ if (kvm_s390_pv_destroy_page(vcpu->kvm, gaddr)) {
+ /*
+ * Either KVM messed up the secure guest mapping or the
+ * same page is mapped into multiple secure guests.
+ *
+ * This exception is only triggered when a guest 2 is
+ * running and can therefore never occur in kernel
+ * context.
+ */
+ pr_warn_ratelimited("Secure storage violation (%x) in task: %s, pid %d\n",
+ current->thread.gmap_int_code, current->comm,
+ current->pid);
+ send_sig(SIGSEGV, current, 0);
+ }
+ break;
+ case PGM_NON_SECURE_STORAGE_ACCESS:
+ kvm_s390_assert_primary_as(vcpu);
+ /*
+ * This is normal operation; a page belonging to a protected
+ * guest has not been imported yet. Try to import the page into
+ * the protected guest.
+ */
+ rc = kvm_s390_pv_convert_to_secure(vcpu->kvm, gaddr);
+ if (rc == -EINVAL)
+ send_sig(SIGSEGV, current, 0);
+ if (rc != -ENXIO)
+ break;
+ flags = FAULT_FLAG_WRITE;
+ fallthrough;
+ case PGM_PROTECTION:
+ case PGM_SEGMENT_TRANSLATION:
+ case PGM_PAGE_TRANSLATION:
+ case PGM_ASCE_TYPE:
+ case PGM_REGION_FIRST_TRANS:
+ case PGM_REGION_SECOND_TRANS:
+ case PGM_REGION_THIRD_TRANS:
+ kvm_s390_assert_primary_as(vcpu);
+ return vcpu_dat_fault_handler(vcpu, gaddr, flags);
+ default:
+ KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx",
+ current->thread.gmap_int_code, current->thread.gmap_teid.val);
+ send_sig(SIGSEGV, current, 0);
+ break;
+ }
+ return 0;
+}
+
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
{
struct mcck_volatile_info *mcck_info;
struct sie_page *sie_page;
+ int rc;
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
vcpu->arch.sie_block->icptcode);
@@ -4773,7 +5048,7 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
}
if (vcpu->arch.sie_block->icptcode > 0) {
- int rc = kvm_handle_sie_intercept(vcpu);
+ rc = kvm_handle_sie_intercept(vcpu);
if (rc != -EOPNOTSUPP)
return rc;
@@ -4782,24 +5057,9 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
return -EREMOTE;
- } else if (exit_reason != -EFAULT) {
- vcpu->stat.exit_null++;
- return 0;
- } else if (kvm_is_ucontrol(vcpu->kvm)) {
- vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
- vcpu->run->s390_ucontrol.trans_exc_code =
- current->thread.gmap_addr;
- vcpu->run->s390_ucontrol.pgm_code = 0x10;
- return -EREMOTE;
- } else if (current->thread.gmap_pfault) {
- trace_kvm_s390_major_guest_pfault(vcpu);
- current->thread.gmap_pfault = 0;
- if (kvm_arch_setup_async_pf(vcpu))
- return 0;
- vcpu->stat.pfault_sync++;
- return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
}
- return vcpu_post_run_fault_in_sie(vcpu);
+
+ return vcpu_post_run_handle_fault(vcpu);
}
#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
@@ -4835,7 +5095,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
}
exit_reason = sie64a(vcpu->arch.sie_block,
vcpu->run->s.regs.gprs,
- gmap_get_enabled()->asce);
+ vcpu->arch.gmap->asce);
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
memcpy(vcpu->run->s.regs.gprs,
sie_page->pv_grregs,
@@ -4920,7 +5180,7 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
}
- if (MACHINE_HAS_GS) {
+ if (cpu_has_gs()) {
preempt_disable();
local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
if (current->thread.gs_cb) {
@@ -4986,7 +5246,7 @@ static void store_regs_fmt2(struct kvm_vcpu *vcpu)
kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
- if (MACHINE_HAS_GS) {
+ if (cpu_has_gs()) {
preempt_disable();
local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
if (vcpu->arch.gs_enabled)
@@ -5638,7 +5898,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
#endif
case KVM_S390_VCPU_FAULT: {
- r = gmap_fault(vcpu->arch.gmap, arg, 0);
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ r = vcpu_dat_fault_handler(vcpu, arg, 0);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
}
case KVM_ENABLE_CAP:
@@ -5754,7 +6016,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
{
gpa_t size;
- if (kvm_is_ucontrol(kvm))
+ if (kvm_is_ucontrol(kvm) && new->id < KVM_USER_MEM_SLOTS)
return -EINVAL;
/* When we are protected, we should not change the memory slots */
@@ -5806,6 +6068,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
{
int rc = 0;
+ if (kvm_is_ucontrol(kvm))
+ return;
+
switch (change) {
case KVM_MR_DELETE:
rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,