summaryrefslogtreecommitdiff
path: root/arch/s390/kvm/kvm-s390.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/kvm/kvm-s390.c')
-rw-r--r--arch/s390/kvm/kvm-s390.c260
1 files changed, 211 insertions, 49 deletions
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d8080c27d45b..020502af7dc9 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -50,6 +50,7 @@
#include "kvm-s390.h"
#include "gaccess.h"
#include "pci.h"
+#include "gmap.h"
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -3428,8 +3429,20 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
VM_EVENT(kvm, 3, "vm created with type %lu", type);
if (type & KVM_VM_S390_UCONTROL) {
+ struct kvm_userspace_memory_region2 fake_memslot = {
+ .slot = KVM_S390_UCONTROL_MEMSLOT,
+ .guest_phys_addr = 0,
+ .userspace_addr = 0,
+ .memory_size = ALIGN_DOWN(TASK_SIZE, _SEGMENT_SIZE),
+ .flags = 0,
+ };
+
kvm->arch.gmap = NULL;
kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
+ /* one flat fake memslot covering the whole address-space */
+ mutex_lock(&kvm->slots_lock);
+ KVM_BUG_ON(kvm_set_internal_memslot(kvm, &fake_memslot), kvm);
+ mutex_unlock(&kvm->slots_lock);
} else {
if (sclp.hamax == U64_MAX)
kvm->arch.mem_limit = TASK_SIZE_MAX;
@@ -4498,6 +4511,75 @@ static bool ibs_enabled(struct kvm_vcpu *vcpu)
return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
}
+static int __kvm_s390_fixup_fault_sync(struct gmap *gmap, gpa_t gaddr, unsigned int flags)
+{
+ struct kvm *kvm = gmap->private;
+ gfn_t gfn = gpa_to_gfn(gaddr);
+ bool unlocked;
+ hva_t vmaddr;
+ gpa_t tmp;
+ int rc;
+
+ if (kvm_is_ucontrol(kvm)) {
+ tmp = __gmap_translate(gmap, gaddr);
+ gfn = gpa_to_gfn(tmp);
+ }
+
+ vmaddr = gfn_to_hva(kvm, gfn);
+ rc = fixup_user_fault(gmap->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
+ if (!rc)
+ rc = __gmap_link(gmap, gaddr, vmaddr);
+ return rc;
+}
+
+/**
+ * __kvm_s390_mprotect_many() - Apply specified protection to guest pages
+ * @gmap: the gmap of the guest
+ * @gpa: the starting guest address
+ * @npages: how many pages to protect
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ * @bits: pgste notification bits to set
+ *
+ * Returns: 0 in case of success, < 0 in case of error - see gmap_protect_one()
+ *
+ * Context: kvm->srcu and gmap->mm need to be held in read mode
+ */
+int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot,
+ unsigned long bits)
+{
+ unsigned int fault_flag = (prot & PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
+ gpa_t end = gpa + npages * PAGE_SIZE;
+ int rc;
+
+ for (; gpa < end; gpa = ALIGN(gpa + 1, rc)) {
+ rc = gmap_protect_one(gmap, gpa, prot, bits);
+ if (rc == -EAGAIN) {
+ __kvm_s390_fixup_fault_sync(gmap, gpa, fault_flag);
+ rc = gmap_protect_one(gmap, gpa, prot, bits);
+ }
+ if (rc < 0)
+ return rc;
+ }
+
+ return 0;
+}
+
+static int kvm_s390_mprotect_notify_prefix(struct kvm_vcpu *vcpu)
+{
+ gpa_t gaddr = kvm_s390_get_prefix(vcpu);
+ int idx, rc;
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ mmap_read_lock(vcpu->arch.gmap->mm);
+
+ rc = __kvm_s390_mprotect_many(vcpu->arch.gmap, gaddr, 2, PROT_WRITE, GMAP_NOTIFY_MPROT);
+
+ mmap_read_unlock(vcpu->arch.gmap->mm);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ return rc;
+}
+
static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
{
retry:
@@ -4513,9 +4595,8 @@ retry:
*/
if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
int rc;
- rc = gmap_mprotect_notify(vcpu->arch.gmap,
- kvm_s390_get_prefix(vcpu),
- PAGE_SIZE * 2, PROT_WRITE);
+
+ rc = kvm_s390_mprotect_notify_prefix(vcpu);
if (rc) {
kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
return rc;
@@ -4766,11 +4847,112 @@ static int vcpu_post_run_addressing_exception(struct kvm_vcpu *vcpu)
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
+static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu)
+{
+ KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
+ "Unexpected program interrupt 0x%x, TEID 0x%016lx",
+ current->thread.gmap_int_code, current->thread.gmap_teid.val);
+}
+
+/*
+ * __kvm_s390_handle_dat_fault() - handle a dat fault for the gmap of a vcpu
+ * @vcpu: the vCPU whose gmap is to be fixed up
+ * @gfn: the guest frame number used for memslots (including fake memslots)
+ * @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps
+ * @flags: FOLL_* flags
+ *
+ * Return: 0 on success, < 0 in case of error.
+ * Context: The mm lock must not be held before calling. May sleep.
+ */
+int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags)
+{
+ struct kvm_memory_slot *slot;
+ unsigned int fault_flags;
+ bool writable, unlocked;
+ unsigned long vmaddr;
+ struct page *page;
+ kvm_pfn_t pfn;
+ int rc;
+
+ slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+ if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
+ return vcpu_post_run_addressing_exception(vcpu);
+
+ fault_flags = flags & FOLL_WRITE ? FAULT_FLAG_WRITE : 0;
+ if (vcpu->arch.gmap->pfault_enabled)
+ flags |= FOLL_NOWAIT;
+ vmaddr = __gfn_to_hva_memslot(slot, gfn);
+
+try_again:
+ pfn = __kvm_faultin_pfn(slot, gfn, flags, &writable, &page);
+
+ /* Access outside memory, inject addressing exception */
+ if (is_noslot_pfn(pfn))
+ return vcpu_post_run_addressing_exception(vcpu);
+ /* Signal pending: try again */
+ if (pfn == KVM_PFN_ERR_SIGPENDING)
+ return -EAGAIN;
+
+ /* Needs I/O, try to setup async pfault (only possible with FOLL_NOWAIT) */
+ if (pfn == KVM_PFN_ERR_NEEDS_IO) {
+ trace_kvm_s390_major_guest_pfault(vcpu);
+ if (kvm_arch_setup_async_pf(vcpu))
+ return 0;
+ vcpu->stat.pfault_sync++;
+ /* Could not setup async pfault, try again synchronously */
+ flags &= ~FOLL_NOWAIT;
+ goto try_again;
+ }
+ /* Any other error */
+ if (is_error_pfn(pfn))
+ return -EFAULT;
+
+ /* Success */
+ mmap_read_lock(vcpu->arch.gmap->mm);
+ /* Mark the userspace PTEs as young and/or dirty, to avoid page fault loops */
+ rc = fixup_user_fault(vcpu->arch.gmap->mm, vmaddr, fault_flags, &unlocked);
+ if (!rc)
+ rc = __gmap_link(vcpu->arch.gmap, gaddr, vmaddr);
+ scoped_guard(spinlock, &vcpu->kvm->mmu_lock) {
+ kvm_release_faultin_page(vcpu->kvm, page, false, writable);
+ }
+ mmap_read_unlock(vcpu->arch.gmap->mm);
+ return rc;
+}
+
+static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int flags)
+{
+ unsigned long gaddr_tmp;
+ gfn_t gfn;
+
+ gfn = gpa_to_gfn(gaddr);
+ if (kvm_is_ucontrol(vcpu->kvm)) {
+ /*
+ * This translates the per-vCPU guest address into a
+ * fake guest address, which can then be used with the
+ * fake memslots that are identity mapping userspace.
+ * This allows ucontrol VMs to use the normal fault
+ * resolution path, like normal VMs.
+ */
+ mmap_read_lock(vcpu->arch.gmap->mm);
+ gaddr_tmp = __gmap_translate(vcpu->arch.gmap, gaddr);
+ mmap_read_unlock(vcpu->arch.gmap->mm);
+ if (gaddr_tmp == -EFAULT) {
+ vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
+ vcpu->run->s390_ucontrol.trans_exc_code = gaddr;
+ vcpu->run->s390_ucontrol.pgm_code = PGM_SEGMENT_TRANSLATION;
+ return -EREMOTE;
+ }
+ gfn = gpa_to_gfn(gaddr_tmp);
+ }
+ return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, flags);
+}
+
static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
{
unsigned int flags = 0;
unsigned long gaddr;
- int rc = 0;
+ int rc;
gaddr = current->thread.gmap_teid.addr * PAGE_SIZE;
if (kvm_s390_cur_gmap_fault_is_write())
@@ -4780,23 +4962,9 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
case 0:
vcpu->stat.exit_null++;
break;
- case PGM_NON_SECURE_STORAGE_ACCESS:
- KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
- "Unexpected program interrupt 0x%x, TEID 0x%016lx",
- current->thread.gmap_int_code, current->thread.gmap_teid.val);
- /*
- * This is normal operation; a page belonging to a protected
- * guest has not been imported yet. Try to import the page into
- * the protected guest.
- */
- if (gmap_convert_to_secure(vcpu->arch.gmap, gaddr) == -EINVAL)
- send_sig(SIGSEGV, current, 0);
- break;
case PGM_SECURE_STORAGE_ACCESS:
case PGM_SECURE_STORAGE_VIOLATION:
- KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
- "Unexpected program interrupt 0x%x, TEID 0x%016lx",
- current->thread.gmap_int_code, current->thread.gmap_teid.val);
+ kvm_s390_assert_primary_as(vcpu);
/*
* This can happen after a reboot with asynchronous teardown;
* the new guest (normal or protected) will run on top of the
@@ -4818,6 +4986,20 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
send_sig(SIGSEGV, current, 0);
}
break;
+ case PGM_NON_SECURE_STORAGE_ACCESS:
+ kvm_s390_assert_primary_as(vcpu);
+ /*
+ * This is normal operation; a page belonging to a protected
+ * guest has not been imported yet. Try to import the page into
+ * the protected guest.
+ */
+ rc = gmap_convert_to_secure(vcpu->arch.gmap, gaddr);
+ if (rc == -EINVAL)
+ send_sig(SIGSEGV, current, 0);
+ if (rc != -ENXIO)
+ break;
+ flags = FAULT_FLAG_WRITE;
+ fallthrough;
case PGM_PROTECTION:
case PGM_SEGMENT_TRANSLATION:
case PGM_PAGE_TRANSLATION:
@@ -4825,40 +5007,15 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
case PGM_REGION_FIRST_TRANS:
case PGM_REGION_SECOND_TRANS:
case PGM_REGION_THIRD_TRANS:
- KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
- "Unexpected program interrupt 0x%x, TEID 0x%016lx",
- current->thread.gmap_int_code, current->thread.gmap_teid.val);
- if (vcpu->arch.gmap->pfault_enabled) {
- rc = gmap_fault(vcpu->arch.gmap, gaddr, flags | FAULT_FLAG_RETRY_NOWAIT);
- if (rc == -EFAULT)
- return vcpu_post_run_addressing_exception(vcpu);
- if (rc == -EAGAIN) {
- trace_kvm_s390_major_guest_pfault(vcpu);
- if (kvm_arch_setup_async_pf(vcpu))
- return 0;
- vcpu->stat.pfault_sync++;
- } else {
- return rc;
- }
- }
- rc = gmap_fault(vcpu->arch.gmap, gaddr, flags);
- if (rc == -EFAULT) {
- if (kvm_is_ucontrol(vcpu->kvm)) {
- vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
- vcpu->run->s390_ucontrol.trans_exc_code = gaddr;
- vcpu->run->s390_ucontrol.pgm_code = 0x10;
- return -EREMOTE;
- }
- return vcpu_post_run_addressing_exception(vcpu);
- }
- break;
+ kvm_s390_assert_primary_as(vcpu);
+ return vcpu_dat_fault_handler(vcpu, gaddr, flags);
default:
KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx",
current->thread.gmap_int_code, current->thread.gmap_teid.val);
send_sig(SIGSEGV, current, 0);
break;
}
- return rc;
+ return 0;
}
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
@@ -5737,7 +5894,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
#endif
case KVM_S390_VCPU_FAULT: {
- r = gmap_fault(vcpu->arch.gmap, arg, 0);
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ r = vcpu_dat_fault_handler(vcpu, arg, 0);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
}
case KVM_ENABLE_CAP:
@@ -5853,7 +6012,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
{
gpa_t size;
- if (kvm_is_ucontrol(kvm))
+ if (kvm_is_ucontrol(kvm) && new->id < KVM_USER_MEM_SLOTS)
return -EINVAL;
/* When we are protected, we should not change the memory slots */
@@ -5905,6 +6064,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
{
int rc = 0;
+ if (kvm_is_ucontrol(kvm))
+ return;
+
switch (change) {
case KVM_MR_DELETE:
rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,