diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2023-02-15 20:35:26 +0300 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2023-02-15 20:35:26 +0300 |
commit | e4922088f8e90ea163281ba8e69b98f34a7a1b83 (patch) | |
tree | 1db574412a5365f285754dd0364d249581535ad8 /arch/s390/kvm | |
parent | 33436335e93a1788a58443fc99c5ab320ce4b9d9 (diff) | |
parent | 5fc5b94a273655128159186c87662105db8afeb5 (diff) | |
download | linux-e4922088f8e90ea163281ba8e69b98f34a7a1b83.tar.xz |
Merge tag 'kvm-s390-next-6.3-1' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD
* Two more V!=R patches
* The last part of the cmpxchg patches
* A few fixes
Diffstat (limited to 'arch/s390/kvm')
-rw-r--r-- | arch/s390/kvm/gaccess.c | 109 | ||||
-rw-r--r-- | arch/s390/kvm/gaccess.h | 3 | ||||
-rw-r--r-- | arch/s390/kvm/interrupt.c | 11 | ||||
-rw-r--r-- | arch/s390/kvm/kvm-s390.c | 264 |
4 files changed, 285 insertions, 102 deletions
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 0243b6e38d36..3eb85f254881 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1162,6 +1162,115 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, } /** + * cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address. + * @kvm: Virtual machine instance. + * @gpa: Absolute guest address of the location to be changed. + * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a + * non power of two will result in failure. + * @old_addr: Pointer to old value. If the location at @gpa contains this value, + * the exchange will succeed. After calling cmpxchg_guest_abs_with_key() + * *@old_addr contains the value at @gpa before the attempt to + * exchange the value. + * @new: The value to place at @gpa. + * @access_key: The access key to use for the guest access. + * @success: output value indicating if an exchange occurred. + * + * Atomically exchange the value at @gpa by @new, if it contains *@old. + * Honors storage keys. + * + * Return: * 0: successful exchange + * * >0: a program interruption code indicating the reason cmpxchg could + * not be attempted + * * -EINVAL: address misaligned or len not power of two + * * -EAGAIN: transient failure (len 1 or 2) + * * -EOPNOTSUPP: read-only memslot (should never occur) + */ +int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, + __uint128_t *old_addr, __uint128_t new, + u8 access_key, bool *success) +{ + gfn_t gfn = gpa_to_gfn(gpa); + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); + bool writable; + hva_t hva; + int ret; + + if (!IS_ALIGNED(gpa, len)) + return -EINVAL; + + hva = gfn_to_hva_memslot_prot(slot, gfn, &writable); + if (kvm_is_error_hva(hva)) + return PGM_ADDRESSING; + /* + * Check if it's a read-only memslot, even though that cannot occur + * since those are unsupported. + * Don't try to actually handle that case. + */ + if (!writable) + return -EOPNOTSUPP; + + hva += offset_in_page(gpa); + /* + * The cmpxchg_user_key macro depends on the type of "old", so we need + * a case for each valid length and get some code duplication as long + * as we don't introduce a new macro. + */ + switch (len) { + case 1: { + u8 old; + + ret = cmpxchg_user_key((u8 __user *)hva, &old, *old_addr, new, access_key); + *success = !ret && old == *old_addr; + *old_addr = old; + break; + } + case 2: { + u16 old; + + ret = cmpxchg_user_key((u16 __user *)hva, &old, *old_addr, new, access_key); + *success = !ret && old == *old_addr; + *old_addr = old; + break; + } + case 4: { + u32 old; + + ret = cmpxchg_user_key((u32 __user *)hva, &old, *old_addr, new, access_key); + *success = !ret && old == *old_addr; + *old_addr = old; + break; + } + case 8: { + u64 old; + + ret = cmpxchg_user_key((u64 __user *)hva, &old, *old_addr, new, access_key); + *success = !ret && old == *old_addr; + *old_addr = old; + break; + } + case 16: { + __uint128_t old; + + ret = cmpxchg_user_key((__uint128_t __user *)hva, &old, *old_addr, new, access_key); + *success = !ret && old == *old_addr; + *old_addr = old; + break; + } + default: + return -EINVAL; + } + if (*success) + mark_page_dirty_in_slot(kvm, slot, gfn); + /* + * Assume that the fault is caused by protection, either key protection + * or user page write protection. + */ + if (ret == -EFAULT) + ret = PGM_PROTECTION; + return ret; +} + +/** * guest_translate_address_with_key - translate guest logical into guest absolute address * @vcpu: virtual cpu * @gva: Guest virtual address diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 9408d6cc8e2c..b320d12aa049 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -206,6 +206,9 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, unsigned long len, enum gacc_mode mode); +int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, __uint128_t *old, + __uint128_t new, u8 access_key, bool *success); + /** * write_guest_with_key - copy data from kernel space to guest space * @vcpu: virtual cpu diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 8edb3bee74b6..9250fde1f97d 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -3103,9 +3103,9 @@ static enum hrtimer_restart gisa_vcpu_kicker(struct hrtimer *timer) static void process_gib_alert_list(void) { struct kvm_s390_gisa_interrupt *gi; + u32 final, gisa_phys, origin = 0UL; struct kvm_s390_gisa *gisa; struct kvm *kvm; - u32 final, origin = 0UL; do { /* @@ -3131,9 +3131,10 @@ static void process_gib_alert_list(void) * interruptions asap. */ while (origin & GISA_ADDR_MASK) { - gisa = (struct kvm_s390_gisa *)(u64)origin; + gisa_phys = origin; + gisa = phys_to_virt(gisa_phys); origin = gisa->next_alert; - gisa->next_alert = (u32)(u64)gisa; + gisa->next_alert = gisa_phys; kvm = container_of(gisa, struct sie_page2, gisa)->kvm; gi = &kvm->arch.gisa_int; if (hrtimer_active(&gi->timer)) @@ -3417,6 +3418,7 @@ void kvm_s390_gib_destroy(void) int __init kvm_s390_gib_init(u8 nisc) { + u32 gib_origin; int rc = 0; if (!css_general_characteristics.aiv) { @@ -3438,7 +3440,8 @@ int __init kvm_s390_gib_init(u8 nisc) } gib->nisc = nisc; - if (chsc_sgib((u32)(u64)gib)) { + gib_origin = virt_to_phys(gib); + if (chsc_sgib(gib_origin)) { pr_err("Associating the GIB with the AIV facility failed\n"); free_page((unsigned long)gib); gib = NULL; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index bd25076aa19b..39b36562c043 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -573,7 +573,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_VCPU_RESETS: case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_S390_DIAG318: - case KVM_CAP_S390_MEM_OP_EXTENSION: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: @@ -587,6 +586,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_MEM_OP: r = MEM_OP_MAX_SIZE; break; + case KVM_CAP_S390_MEM_OP_EXTENSION: + /* + * Flag bits indicating which extensions are supported. + * If r > 0, the base extension must also be supported/indicated, + * in order to maintain backwards compatibility. + */ + r = KVM_S390_MEMOP_EXTENSION_CAP_BASE | + KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG; + break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: case KVM_CAP_MAX_VCPU_ID: @@ -2753,41 +2761,33 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) return r; } -static bool access_key_invalid(u8 access_key) -{ - return access_key > 0xf; -} - -static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) +static int mem_op_validate_common(struct kvm_s390_mem_op *mop, u64 supported_flags) { - void __user *uaddr = (void __user *)mop->buf; - u64 supported_flags; - void *tmpbuf = NULL; - int r, srcu_idx; - - supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION - | KVM_S390_MEMOP_F_CHECK_ONLY; if (mop->flags & ~supported_flags || !mop->size) return -EINVAL; if (mop->size > MEM_OP_MAX_SIZE) return -E2BIG; - /* - * This is technically a heuristic only, if the kvm->lock is not - * taken, it is not guaranteed that the vm is/remains non-protected. - * This is ok from a kernel perspective, wrongdoing is detected - * on the access, -EFAULT is returned and the vm may crash the - * next time it accesses the memory in question. - * There is no sane usecase to do switching and a memop on two - * different CPUs at the same time. - */ - if (kvm_s390_pv_get_handle(kvm)) - return -EINVAL; if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { - if (access_key_invalid(mop->key)) + if (mop->key > 0xf) return -EINVAL; } else { mop->key = 0; } + return 0; +} + +static int kvm_s390_vm_mem_op_abs(struct kvm *kvm, struct kvm_s390_mem_op *mop) +{ + void __user *uaddr = (void __user *)mop->buf; + enum gacc_mode acc_mode; + void *tmpbuf = NULL; + int r, srcu_idx; + + r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION | + KVM_S390_MEMOP_F_CHECK_ONLY); + if (r) + return r; + if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { tmpbuf = vmalloc(mop->size); if (!tmpbuf) @@ -2801,35 +2801,25 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) goto out_unlock; } - switch (mop->op) { - case KVM_S390_MEMOP_ABSOLUTE_READ: { - if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { - r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key); - } else { - r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, - mop->size, GACC_FETCH, mop->key); - if (r == 0) { - if (copy_to_user(uaddr, tmpbuf, mop->size)) - r = -EFAULT; - } - } - break; + acc_mode = mop->op == KVM_S390_MEMOP_ABSOLUTE_READ ? GACC_FETCH : GACC_STORE; + if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { + r = check_gpa_range(kvm, mop->gaddr, mop->size, acc_mode, mop->key); + goto out_unlock; } - case KVM_S390_MEMOP_ABSOLUTE_WRITE: { - if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { - r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key); - } else { - if (copy_from_user(tmpbuf, uaddr, mop->size)) { - r = -EFAULT; - break; - } - r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, - mop->size, GACC_STORE, mop->key); + if (acc_mode == GACC_FETCH) { + r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, + mop->size, GACC_FETCH, mop->key); + if (r) + goto out_unlock; + if (copy_to_user(uaddr, tmpbuf, mop->size)) + r = -EFAULT; + } else { + if (copy_from_user(tmpbuf, uaddr, mop->size)) { + r = -EFAULT; + goto out_unlock; } - break; - } - default: - r = -EINVAL; + r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, + mop->size, GACC_STORE, mop->key); } out_unlock: @@ -2839,6 +2829,75 @@ out_unlock: return r; } +static int kvm_s390_vm_mem_op_cmpxchg(struct kvm *kvm, struct kvm_s390_mem_op *mop) +{ + void __user *uaddr = (void __user *)mop->buf; + void __user *old_addr = (void __user *)mop->old_addr; + union { + __uint128_t quad; + char raw[sizeof(__uint128_t)]; + } old = { .quad = 0}, new = { .quad = 0 }; + unsigned int off_in_quad = sizeof(new) - mop->size; + int r, srcu_idx; + bool success; + + r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION); + if (r) + return r; + /* + * This validates off_in_quad. Checking that size is a power + * of two is not necessary, as cmpxchg_guest_abs_with_key + * takes care of that + */ + if (mop->size > sizeof(new)) + return -EINVAL; + if (copy_from_user(&new.raw[off_in_quad], uaddr, mop->size)) + return -EFAULT; + if (copy_from_user(&old.raw[off_in_quad], old_addr, mop->size)) + return -EFAULT; + + srcu_idx = srcu_read_lock(&kvm->srcu); + + if (kvm_is_error_gpa(kvm, mop->gaddr)) { + r = PGM_ADDRESSING; + goto out_unlock; + } + + r = cmpxchg_guest_abs_with_key(kvm, mop->gaddr, mop->size, &old.quad, + new.quad, mop->key, &success); + if (!success && copy_to_user(old_addr, &old.raw[off_in_quad], mop->size)) + r = -EFAULT; + +out_unlock: + srcu_read_unlock(&kvm->srcu, srcu_idx); + return r; +} + +static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) +{ + /* + * This is technically a heuristic only, if the kvm->lock is not + * taken, it is not guaranteed that the vm is/remains non-protected. + * This is ok from a kernel perspective, wrongdoing is detected + * on the access, -EFAULT is returned and the vm may crash the + * next time it accesses the memory in question. + * There is no sane usecase to do switching and a memop on two + * different CPUs at the same time. + */ + if (kvm_s390_pv_get_handle(kvm)) + return -EINVAL; + + switch (mop->op) { + case KVM_S390_MEMOP_ABSOLUTE_READ: + case KVM_S390_MEMOP_ABSOLUTE_WRITE: + return kvm_s390_vm_mem_op_abs(kvm, mop); + case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG: + return kvm_s390_vm_mem_op_cmpxchg(kvm, mop); + default: + return -EINVAL; + } +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -5238,62 +5297,54 @@ static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu, struct kvm_s390_mem_op *mop) { void __user *uaddr = (void __user *)mop->buf; + enum gacc_mode acc_mode; void *tmpbuf = NULL; - int r = 0; - const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION - | KVM_S390_MEMOP_F_CHECK_ONLY - | KVM_S390_MEMOP_F_SKEY_PROTECTION; + int r; - if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size) + r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_INJECT_EXCEPTION | + KVM_S390_MEMOP_F_CHECK_ONLY | + KVM_S390_MEMOP_F_SKEY_PROTECTION); + if (r) + return r; + if (mop->ar >= NUM_ACRS) return -EINVAL; - if (mop->size > MEM_OP_MAX_SIZE) - return -E2BIG; if (kvm_s390_pv_cpu_is_protected(vcpu)) return -EINVAL; - if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { - if (access_key_invalid(mop->key)) - return -EINVAL; - } else { - mop->key = 0; - } if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { tmpbuf = vmalloc(mop->size); if (!tmpbuf) return -ENOMEM; } - switch (mop->op) { - case KVM_S390_MEMOP_LOGICAL_READ: - if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { - r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, - GACC_FETCH, mop->key); - break; - } + acc_mode = mop->op == KVM_S390_MEMOP_LOGICAL_READ ? GACC_FETCH : GACC_STORE; + if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { + r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, + acc_mode, mop->key); + goto out_inject; + } + if (acc_mode == GACC_FETCH) { r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size, mop->key); - if (r == 0) { - if (copy_to_user(uaddr, tmpbuf, mop->size)) - r = -EFAULT; - } - break; - case KVM_S390_MEMOP_LOGICAL_WRITE: - if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { - r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, - GACC_STORE, mop->key); - break; + if (r) + goto out_inject; + if (copy_to_user(uaddr, tmpbuf, mop->size)) { + r = -EFAULT; + goto out_free; } + } else { if (copy_from_user(tmpbuf, uaddr, mop->size)) { r = -EFAULT; - break; + goto out_free; } r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size, mop->key); - break; } +out_inject: if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); +out_free: vfree(tmpbuf); return r; } @@ -5622,23 +5673,40 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (kvm_s390_pv_get_handle(kvm)) return -EINVAL; - if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY) - return 0; + if (change != KVM_MR_DELETE && change != KVM_MR_FLAGS_ONLY) { + /* + * A few sanity checks. We can have memory slots which have to be + * located/ended at a segment boundary (1MB). The memory in userland is + * ok to be fragmented into various different vmas. It is okay to mmap() + * and munmap() stuff in this slot after doing this call at any time + */ - /* A few sanity checks. We can have memory slots which have to be - located/ended at a segment boundary (1MB). The memory in userland is - ok to be fragmented into various different vmas. It is okay to mmap() - and munmap() stuff in this slot after doing this call at any time */ + if (new->userspace_addr & 0xffffful) + return -EINVAL; - if (new->userspace_addr & 0xffffful) - return -EINVAL; + size = new->npages * PAGE_SIZE; + if (size & 0xffffful) + return -EINVAL; - size = new->npages * PAGE_SIZE; - if (size & 0xffffful) - return -EINVAL; + if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit) + return -EINVAL; + } - if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit) - return -EINVAL; + if (!kvm->arch.migration_mode) + return 0; + + /* + * Turn off migration mode when: + * - userspace creates a new memslot with dirty logging off, + * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and + * dirty logging is turned off. + * Migration mode expects dirty page logging being enabled to store + * its dirty bitmap. + */ + if (change != KVM_MR_DELETE && + !(new->flags & KVM_MEM_LOG_DIRTY_PAGES)) + WARN(kvm_s390_vm_stop_migration(kvm), + "Failed to stop migration mode"); return 0; } |