diff options
Diffstat (limited to 'arch/s390/kvm')
-rw-r--r-- | arch/s390/kvm/Kconfig | 1 | ||||
-rw-r--r-- | arch/s390/kvm/gaccess.c | 4 | ||||
-rw-r--r-- | arch/s390/kvm/intercept.c | 41 | ||||
-rw-r--r-- | arch/s390/kvm/interrupt.c | 191 | ||||
-rw-r--r-- | arch/s390/kvm/kvm-s390.c | 596 | ||||
-rw-r--r-- | arch/s390/kvm/kvm-s390.h | 19 | ||||
-rw-r--r-- | arch/s390/kvm/priv.c | 13 | ||||
-rw-r--r-- | arch/s390/kvm/sigp.c | 160 | ||||
-rw-r--r-- | arch/s390/kvm/trace-s390.h | 14 |
9 files changed, 823 insertions, 216 deletions
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 646db9c467d1..5fce52cf0e57 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -28,6 +28,7 @@ config KVM select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING + select SRCU ---help--- Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 8a1be9017730..267523cac6de 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -357,8 +357,8 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, union asce asce; ctlreg0.val = vcpu->arch.sie_block->gcr[0]; - edat1 = ctlreg0.edat && test_vfacility(8); - edat2 = edat1 && test_vfacility(78); + edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8); + edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78); asce.val = get_vcpu_asce(vcpu); if (asce.r) goto real_address; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 81c77ab8102e..bebd2157edd0 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -68,18 +68,27 @@ static int handle_noop(struct kvm_vcpu *vcpu) static int handle_stop(struct kvm_vcpu *vcpu) { + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; int rc = 0; - unsigned int action_bits; + uint8_t flags, stop_pending; vcpu->stat.exit_stop_request++; - trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits); - action_bits = vcpu->arch.local_int.action_bits; + /* delay the stop if any non-stop irq is pending */ + if (kvm_s390_vcpu_has_irq(vcpu, 1)) + return 0; + + /* avoid races with the injection/SIGP STOP code */ + spin_lock(&li->lock); + flags = li->irq.stop.flags; + stop_pending = kvm_s390_is_stop_irq_pending(vcpu); + spin_unlock(&li->lock); - if (!(action_bits & ACTION_STOP_ON_STOP)) + trace_kvm_s390_stop_request(stop_pending, flags); + if (!stop_pending) return 0; - if (action_bits & ACTION_STORE_ON_STOP) { + if (flags & KVM_S390_STOP_FLAG_STORE_STATUS) { rc = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_NOADDR); if (rc) @@ -279,11 +288,13 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu) irq.type = KVM_S390_INT_CPU_TIMER; break; case EXT_IRQ_EXTERNAL_CALL: - if (kvm_s390_si_ext_call_pending(vcpu)) - return 0; irq.type = KVM_S390_INT_EXTERNAL_CALL; irq.u.extcall.code = vcpu->arch.sie_block->extcpuaddr; - break; + rc = kvm_s390_inject_vcpu(vcpu, &irq); + /* ignore if another external call is already pending */ + if (rc == -EBUSY) + return 0; + return rc; default: return -EOPNOTSUPP; } @@ -307,17 +318,19 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) kvm_s390_get_regs_rre(vcpu, ®1, ®2); /* Make sure that the source is paged-in */ - srcaddr = kvm_s390_real_to_abs(vcpu, vcpu->run->s.regs.gprs[reg2]); - if (kvm_is_error_gpa(vcpu->kvm, srcaddr)) - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2], + &srcaddr, 0); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0); if (rc != 0) return rc; /* Make sure that the destination is paged-in */ - dstaddr = kvm_s390_real_to_abs(vcpu, vcpu->run->s.regs.gprs[reg1]); - if (kvm_is_error_gpa(vcpu->kvm, dstaddr)) - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1], + &dstaddr, 1); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1); if (rc != 0) return rc; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index f00f31e66cd8..073b5f387d1d 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -19,6 +19,7 @@ #include <linux/bitmap.h> #include <asm/asm-offsets.h> #include <asm/uaccess.h> +#include <asm/sclp.h> #include "kvm-s390.h" #include "gaccess.h" #include "trace-s390.h" @@ -159,6 +160,12 @@ static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu) if (psw_mchk_disabled(vcpu)) active_mask &= ~IRQ_PEND_MCHK_MASK; + /* + * STOP irqs will never be actively delivered. They are triggered via + * intercept requests and cleared when the stop intercept is performed. + */ + __clear_bit(IRQ_PEND_SIGP_STOP, &active_mask); + return active_mask; } @@ -186,9 +193,6 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) LCTL_CR10 | LCTL_CR11); vcpu->arch.sie_block->ictl |= (ICTL_STCTL | ICTL_PINT); } - - if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) - atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags); } static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) @@ -216,11 +220,18 @@ static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->lctl |= LCTL_CR14; } +static void set_intercept_indicators_stop(struct kvm_vcpu *vcpu) +{ + if (kvm_s390_is_stop_irq_pending(vcpu)) + __set_cpuflag(vcpu, CPUSTAT_STOP_INT); +} + /* Set interception request for non-deliverable local interrupts */ static void set_intercept_indicators_local(struct kvm_vcpu *vcpu) { set_intercept_indicators_ext(vcpu); set_intercept_indicators_mchk(vcpu); + set_intercept_indicators_stop(vcpu); } static void __set_intercept_indicator(struct kvm_vcpu *vcpu, @@ -392,18 +403,6 @@ static int __must_check __deliver_restart(struct kvm_vcpu *vcpu) return rc ? -EFAULT : 0; } -static int __must_check __deliver_stop(struct kvm_vcpu *vcpu) -{ - VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); - vcpu->stat.deliver_stop_signal++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_SIGP_STOP, - 0, 0); - - __set_cpuflag(vcpu, CPUSTAT_STOP_INT); - clear_bit(IRQ_PEND_SIGP_STOP, &vcpu->arch.local_int.pending_irqs); - return 0; -} - static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; @@ -705,7 +704,6 @@ static const deliver_irq_t deliver_irq_funcs[] = { [IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc, [IRQ_PEND_EXT_CPU_TIMER] = __deliver_cpu_timer, [IRQ_PEND_RESTART] = __deliver_restart, - [IRQ_PEND_SIGP_STOP] = __deliver_stop, [IRQ_PEND_SET_PREFIX] = __deliver_set_prefix, [IRQ_PEND_PFAULT_INIT] = __deliver_pfault_init, }; @@ -738,21 +736,20 @@ static int __must_check __deliver_floating_interrupt(struct kvm_vcpu *vcpu, return rc; } -/* Check whether SIGP interpretation facility has an external call pending */ -int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu) +/* Check whether an external call is pending (deliverable or not) */ +int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) { - atomic_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl; + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; - if (!psw_extint_disabled(vcpu) && - (vcpu->arch.sie_block->gcr[0] & 0x2000ul) && - (atomic_read(sigp_ctrl) & SIGP_CTRL_C) && - (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND)) - return 1; + if (!sclp_has_sigpif()) + return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs); - return 0; + return (sigp_ctrl & SIGP_CTRL_C) && + (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND); } -int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) +int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop) { struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; struct kvm_s390_interrupt_info *inti; @@ -773,7 +770,13 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) if (!rc && kvm_cpu_has_pending_timer(vcpu)) rc = 1; - if (!rc && kvm_s390_si_ext_call_pending(vcpu)) + /* external call pending and deliverable */ + if (!rc && kvm_s390_ext_call_pending(vcpu) && + !psw_extint_disabled(vcpu) && + (vcpu->arch.sie_block->gcr[0] & 0x2000ul)) + rc = 1; + + if (!rc && !exclude_stop && kvm_s390_is_stop_irq_pending(vcpu)) rc = 1; return rc; @@ -804,14 +807,20 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; /* disabled wait */ } - __set_cpu_idle(vcpu); if (!ckc_interrupts_enabled(vcpu)) { VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); + __set_cpu_idle(vcpu); goto no_timer; } now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch; sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); + + /* underflow */ + if (vcpu->arch.sie_block->ckc < now) + return 0; + + __set_cpu_idle(vcpu); hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime); no_timer: @@ -820,7 +829,7 @@ no_timer: __unset_cpu_idle(vcpu); vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); + hrtimer_cancel(&vcpu->arch.ckc_timer); return 0; } @@ -840,10 +849,20 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) { struct kvm_vcpu *vcpu; + u64 now, sltime; vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); - kvm_s390_vcpu_wakeup(vcpu); + now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch; + sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); + /* + * If the monotonic clock runs faster than the tod clock we might be + * woken up too early and have to go back to sleep to avoid deadlocks. + */ + if (vcpu->arch.sie_block->ckc > now && + hrtimer_forward_now(timer, ns_to_ktime(sltime))) + return HRTIMER_RESTART; + kvm_s390_vcpu_wakeup(vcpu); return HRTIMER_NORESTART; } @@ -859,8 +878,7 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) /* clear pending external calls set by sigp interpretation facility */ atomic_clear_mask(CPUSTAT_ECALL_PEND, li->cpuflags); - atomic_clear_mask(SIGP_CTRL_C, - &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl); + vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl = 0; } int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) @@ -984,18 +1002,43 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) return 0; } -int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) +static int __inject_extcall_sigpif(struct kvm_vcpu *vcpu, uint16_t src_id) +{ + unsigned char new_val, old_val; + uint8_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; + + new_val = SIGP_CTRL_C | (src_id & SIGP_CTRL_SCN_MASK); + old_val = *sigp_ctrl & ~SIGP_CTRL_C; + if (cmpxchg(sigp_ctrl, old_val, new_val) != old_val) { + /* another external call is pending */ + return -EBUSY; + } + atomic_set_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); + return 0; +} + +static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_extcall_info *extcall = &li->irq.extcall; + uint16_t src_id = irq->u.extcall.code; VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u", - irq->u.extcall.code); + src_id); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL, - irq->u.extcall.code, 0, 2); + src_id, 0, 2); + + /* sending vcpu invalid */ + if (src_id >= KVM_MAX_VCPUS || + kvm_get_vcpu(vcpu->kvm, src_id) == NULL) + return -EINVAL; + if (sclp_has_sigpif()) + return __inject_extcall_sigpif(vcpu, src_id); + + if (!test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs)) + return -EBUSY; *extcall = irq->u.extcall; - set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); return 0; } @@ -1006,23 +1049,41 @@ static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) struct kvm_s390_prefix_info *prefix = &li->irq.prefix; VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)", - prefix->address); + irq->u.prefix.address); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX, - prefix->address, 0, 2); + irq->u.prefix.address, 0, 2); + + if (!is_vcpu_stopped(vcpu)) + return -EBUSY; *prefix = irq->u.prefix; set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs); return 0; } +#define KVM_S390_STOP_SUPP_FLAGS (KVM_S390_STOP_FLAG_STORE_STATUS) static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_stop_info *stop = &li->irq.stop; + int rc = 0; trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2); - li->action_bits |= ACTION_STOP_ON_STOP; - set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs); + if (irq->u.stop.flags & ~KVM_S390_STOP_SUPP_FLAGS) + return -EINVAL; + + if (is_vcpu_stopped(vcpu)) { + if (irq->u.stop.flags & KVM_S390_STOP_FLAG_STORE_STATUS) + rc = kvm_s390_store_status_unloaded(vcpu, + KVM_S390_STORE_STATUS_NOADDR); + return rc; + } + + if (test_and_set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs)) + return -EBUSY; + stop->flags = irq->u.stop.flags; + __set_cpuflag(vcpu, CPUSTAT_STOP_INT); return 0; } @@ -1042,14 +1103,13 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - struct kvm_s390_emerg_info *emerg = &li->irq.emerg; VCPU_EVENT(vcpu, 3, "inject: emergency %u\n", irq->u.emerg.code); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY, - emerg->code, 0, 2); + irq->u.emerg.code, 0, 2); - set_bit(emerg->code, li->sigp_emerg_pending); + set_bit(irq->u.emerg.code, li->sigp_emerg_pending); set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); return 0; @@ -1061,9 +1121,9 @@ static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) struct kvm_s390_mchk_info *mchk = &li->irq.mchk; VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx", - mchk->mcic); + irq->u.mchk.mcic); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0, - mchk->mcic, 2); + irq->u.mchk.mcic, 2); /* * Because repressible machine checks can be indicated along with @@ -1121,7 +1181,6 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, if ((!schid && !cr6) || (schid && cr6)) return NULL; - mutex_lock(&kvm->lock); fi = &kvm->arch.float_int; spin_lock(&fi->lock); inti = NULL; @@ -1149,7 +1208,6 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, if (list_empty(&fi->list)) atomic_set(&fi->active, 0); spin_unlock(&fi->lock); - mutex_unlock(&kvm->lock); return inti; } @@ -1162,7 +1220,6 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) int sigcpu; int rc = 0; - mutex_lock(&kvm->lock); fi = &kvm->arch.float_int; spin_lock(&fi->lock); if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) { @@ -1187,6 +1244,8 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) list_add_tail(&inti->list, &iter->list); } atomic_set(&fi->active, 1); + if (atomic_read(&kvm->online_vcpus) == 0) + goto unlock_fi; sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); if (sigcpu == KVM_MAX_VCPUS) { do { @@ -1213,7 +1272,6 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu)); unlock_fi: spin_unlock(&fi->lock); - mutex_unlock(&kvm->lock); return rc; } @@ -1221,6 +1279,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, struct kvm_s390_interrupt *s390int) { struct kvm_s390_interrupt_info *inti; + int rc; inti = kzalloc(sizeof(*inti), GFP_KERNEL); if (!inti) @@ -1239,7 +1298,6 @@ int kvm_s390_inject_vm(struct kvm *kvm, inti->ext.ext_params = s390int->parm; break; case KVM_S390_INT_PFAULT_DONE: - inti->type = s390int->type; inti->ext.ext_params2 = s390int->parm64; break; case KVM_S390_MCHK: @@ -1268,7 +1326,10 @@ int kvm_s390_inject_vm(struct kvm *kvm, trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64, 2); - return __inject_vm(kvm, inti); + rc = __inject_vm(kvm, inti); + if (rc) + kfree(inti); + return rc; } void kvm_s390_reinject_io_int(struct kvm *kvm, @@ -1290,13 +1351,16 @@ int s390int_to_s390irq(struct kvm_s390_interrupt *s390int, case KVM_S390_SIGP_SET_PREFIX: irq->u.prefix.address = s390int->parm; break; + case KVM_S390_SIGP_STOP: + irq->u.stop.flags = s390int->parm; + break; case KVM_S390_INT_EXTERNAL_CALL: - if (irq->u.extcall.code & 0xffff0000) + if (s390int->parm & 0xffff0000) return -EINVAL; irq->u.extcall.code = s390int->parm; break; case KVM_S390_INT_EMERGENCY: - if (irq->u.emerg.code & 0xffff0000) + if (s390int->parm & 0xffff0000) return -EINVAL; irq->u.emerg.code = s390int->parm; break; @@ -1307,6 +1371,23 @@ int s390int_to_s390irq(struct kvm_s390_interrupt *s390int, return 0; } +int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + + return test_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs); +} + +void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + + spin_lock(&li->lock); + li->irq.stop.flags = 0; + clear_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs); + spin_unlock(&li->lock); +} + int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; @@ -1363,7 +1444,6 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm) struct kvm_s390_float_interrupt *fi; struct kvm_s390_interrupt_info *n, *inti = NULL; - mutex_lock(&kvm->lock); fi = &kvm->arch.float_int; spin_lock(&fi->lock); list_for_each_entry_safe(inti, n, &fi->list, list) { @@ -1373,7 +1453,6 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm) fi->irq_count = 0; atomic_set(&fi->active, 0); spin_unlock(&fi->lock); - mutex_unlock(&kvm->lock); } static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti, @@ -1413,7 +1492,6 @@ static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len) int ret = 0; int n = 0; - mutex_lock(&kvm->lock); fi = &kvm->arch.float_int; spin_lock(&fi->lock); @@ -1432,7 +1510,6 @@ static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len) } spin_unlock(&fi->lock); - mutex_unlock(&kvm->lock); return ret < 0 ? ret : n; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 3e09801e3104..0c3623927563 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -22,6 +22,7 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> #include <linux/module.h> +#include <linux/random.h> #include <linux/slab.h> #include <linux/timer.h> #include <asm/asm-offsets.h> @@ -29,7 +30,6 @@ #include <asm/pgtable.h> #include <asm/nmi.h> #include <asm/switch_to.h> -#include <asm/facility.h> #include <asm/sclp.h> #include "kvm-s390.h" #include "gaccess.h" @@ -50,6 +50,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "exit_instruction", VCPU_STAT(exit_instruction) }, { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, + { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, { "instruction_lctl", VCPU_STAT(instruction_lctl) }, @@ -98,15 +99,20 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; -unsigned long *vfacilities; -static struct gmap_notifier gmap_notifier; +/* upper facilities limit for kvm */ +unsigned long kvm_s390_fac_list_mask[] = { + 0xff82fffbf4fc2000UL, + 0x005c000000000000UL, +}; -/* test availability of vfacility */ -int test_vfacility(unsigned long nr) +unsigned long kvm_s390_fac_list_mask_size(void) { - return __test_facility(nr, (void *) vfacilities); + BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64); + return ARRAY_SIZE(kvm_s390_fac_list_mask); } +static struct gmap_notifier gmap_notifier; + /* Section: not file related */ int kvm_arch_hardware_enable(void) { @@ -166,6 +172,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_IRQCHIP: case KVM_CAP_VM_ATTRIBUTES: case KVM_CAP_MP_STATE: + case KVM_CAP_S390_USER_SIGP: r = 1; break; case KVM_CAP_NR_VCPUS: @@ -254,6 +261,10 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) kvm->arch.use_irqchip = 1; r = 0; break; + case KVM_CAP_S390_USER_SIGP: + kvm->arch.user_sigp = 1; + r = 0; + break; default: r = -EINVAL; break; @@ -261,7 +272,24 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) return r; } -static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) +static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) +{ + int ret; + + switch (attr->attr) { + case KVM_S390_VM_MEM_LIMIT_SIZE: + ret = 0; + if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr)) + ret = -EFAULT; + break; + default: + ret = -ENXIO; + break; + } + return ret; +} + +static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) { int ret; unsigned int idx; @@ -283,6 +311,36 @@ static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) mutex_unlock(&kvm->lock); ret = 0; break; + case KVM_S390_VM_MEM_LIMIT_SIZE: { + unsigned long new_limit; + + if (kvm_is_ucontrol(kvm)) + return -EINVAL; + + if (get_user(new_limit, (u64 __user *)attr->addr)) + return -EFAULT; + + if (new_limit > kvm->arch.gmap->asce_end) + return -E2BIG; + + ret = -EBUSY; + mutex_lock(&kvm->lock); + if (atomic_read(&kvm->online_vcpus) == 0) { + /* gmap_alloc will round the limit up */ + struct gmap *new = gmap_alloc(current->mm, new_limit); + + if (!new) { + ret = -ENOMEM; + } else { + gmap_free(kvm->arch.gmap); + new->private = kvm; + kvm->arch.gmap = new; + ret = 0; + } + } + mutex_unlock(&kvm->lock); + break; + } default: ret = -ENXIO; break; @@ -290,13 +348,276 @@ static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr) return ret; } +static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu); + +static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) +{ + struct kvm_vcpu *vcpu; + int i; + + if (!test_kvm_facility(kvm, 76)) + return -EINVAL; + + mutex_lock(&kvm->lock); + switch (attr->attr) { + case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: + get_random_bytes( + kvm->arch.crypto.crycb->aes_wrapping_key_mask, + sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); + kvm->arch.crypto.aes_kw = 1; + break; + case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: + get_random_bytes( + kvm->arch.crypto.crycb->dea_wrapping_key_mask, + sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); + kvm->arch.crypto.dea_kw = 1; + break; + case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: + kvm->arch.crypto.aes_kw = 0; + memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, + sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); + break; + case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: + kvm->arch.crypto.dea_kw = 0; + memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, + sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); + break; + default: + mutex_unlock(&kvm->lock); + return -ENXIO; + } + + kvm_for_each_vcpu(i, vcpu, kvm) { + kvm_s390_vcpu_crypto_setup(vcpu); + exit_sie(vcpu); + } + mutex_unlock(&kvm->lock); + return 0; +} + +static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) +{ + u8 gtod_high; + + if (copy_from_user(>od_high, (void __user *)attr->addr, + sizeof(gtod_high))) + return -EFAULT; + + if (gtod_high != 0) + return -EINVAL; + + return 0; +} + +static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) +{ + struct kvm_vcpu *cur_vcpu; + unsigned int vcpu_idx; + u64 host_tod, gtod; + int r; + + if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) + return -EFAULT; + + r = store_tod_clock(&host_tod); + if (r) + return r; + + mutex_lock(&kvm->lock); + kvm->arch.epoch = gtod - host_tod; + kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) { + cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch; + exit_sie(cur_vcpu); + } + mutex_unlock(&kvm->lock); + return 0; +} + +static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) +{ + int ret; + + if (attr->flags) + return -EINVAL; + + switch (attr->attr) { + case KVM_S390_VM_TOD_HIGH: + ret = kvm_s390_set_tod_high(kvm, attr); + break; + case KVM_S390_VM_TOD_LOW: + ret = kvm_s390_set_tod_low(kvm, attr); + break; + default: + ret = -ENXIO; + break; + } + return ret; +} + +static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) +{ + u8 gtod_high = 0; + + if (copy_to_user((void __user *)attr->addr, >od_high, + sizeof(gtod_high))) + return -EFAULT; + + return 0; +} + +static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) +{ + u64 host_tod, gtod; + int r; + + r = store_tod_clock(&host_tod); + if (r) + return r; + + gtod = host_tod + kvm->arch.epoch; + if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) + return -EFAULT; + + return 0; +} + +static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) +{ + int ret; + + if (attr->flags) + return -EINVAL; + + switch (attr->attr) { + case KVM_S390_VM_TOD_HIGH: + ret = kvm_s390_get_tod_high(kvm, attr); + break; + case KVM_S390_VM_TOD_LOW: + ret = kvm_s390_get_tod_low(kvm, attr); + break; + default: + ret = -ENXIO; + break; + } + return ret; +} + +static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) +{ + struct kvm_s390_vm_cpu_processor *proc; + int ret = 0; + + mutex_lock(&kvm->lock); + if (atomic_read(&kvm->online_vcpus)) { + ret = -EBUSY; + goto out; + } + proc = kzalloc(sizeof(*proc), GFP_KERNEL); + if (!proc) { + ret = -ENOMEM; + goto out; + } + if (!copy_from_user(proc, (void __user *)attr->addr, + sizeof(*proc))) { + memcpy(&kvm->arch.model.cpu_id, &proc->cpuid, + sizeof(struct cpuid)); + kvm->arch.model.ibc = proc->ibc; + memcpy(kvm->arch.model.fac->kvm, proc->fac_list, + S390_ARCH_FAC_LIST_SIZE_BYTE); + } else + ret = -EFAULT; + kfree(proc); +out: + mutex_unlock(&kvm->lock); + return ret; +} + +static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) +{ + int ret = -ENXIO; + + switch (attr->attr) { + case KVM_S390_VM_CPU_PROCESSOR: + ret = kvm_s390_set_processor(kvm, attr); + break; + } + return ret; +} + +static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) +{ + struct kvm_s390_vm_cpu_processor *proc; + int ret = 0; + + proc = kzalloc(sizeof(*proc), GFP_KERNEL); + if (!proc) { + ret = -ENOMEM; + goto out; + } + memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid)); + proc->ibc = kvm->arch.model.ibc; + memcpy(&proc->fac_list, kvm->arch.model.fac->kvm, S390_ARCH_FAC_LIST_SIZE_BYTE); + if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) + ret = -EFAULT; + kfree(proc); +out: + return ret; +} + +static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) +{ + struct kvm_s390_vm_cpu_machine *mach; + int ret = 0; + + mach = kzalloc(sizeof(*mach), GFP_KERNEL); + if (!mach) { + ret = -ENOMEM; + goto out; + } + get_cpu_id((struct cpuid *) &mach->cpuid); + mach->ibc = sclp_get_ibc(); + memcpy(&mach->fac_mask, kvm_s390_fac_list_mask, + kvm_s390_fac_list_mask_size() * sizeof(u64)); + memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, + S390_ARCH_FAC_LIST_SIZE_U64); + if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) + ret = -EFAULT; + kfree(mach); +out: + return ret; +} + +static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) +{ + int ret = -ENXIO; + + switch (attr->attr) { + case KVM_S390_VM_CPU_PROCESSOR: + ret = kvm_s390_get_processor(kvm, attr); + break; + case KVM_S390_VM_CPU_MACHINE: + ret = kvm_s390_get_machine(kvm, attr); + break; + } + return ret; +} + static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) { int ret; switch (attr->group) { case KVM_S390_VM_MEM_CTRL: - ret = kvm_s390_mem_control(kvm, attr); + ret = kvm_s390_set_mem_control(kvm, attr); + break; + case KVM_S390_VM_TOD: + ret = kvm_s390_set_tod(kvm, attr); + break; + case KVM_S390_VM_CPU_MODEL: + ret = kvm_s390_set_cpu_model(kvm, attr); + break; + case KVM_S390_VM_CRYPTO: + ret = kvm_s390_vm_set_crypto(kvm, attr); break; default: ret = -ENXIO; @@ -308,7 +629,24 @@ static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr) { - return -ENXIO; + int ret; + + switch (attr->group) { + case KVM_S390_VM_MEM_CTRL: + ret = kvm_s390_get_mem_control(kvm, attr); + break; + case KVM_S390_VM_TOD: + ret = kvm_s390_get_tod(kvm, attr); + break; + case KVM_S390_VM_CPU_MODEL: + ret = kvm_s390_get_cpu_model(kvm, attr); + break; + default: + ret = -ENXIO; + break; + } + + return ret; } static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) @@ -320,6 +658,42 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) switch (attr->attr) { case KVM_S390_VM_MEM_ENABLE_CMMA: case KVM_S390_VM_MEM_CLR_CMMA: + case KVM_S390_VM_MEM_LIMIT_SIZE: + ret = 0; + break; + default: + ret = -ENXIO; + break; + } + break; + case KVM_S390_VM_TOD: + switch (attr->attr) { + case KVM_S390_VM_TOD_LOW: + case KVM_S390_VM_TOD_HIGH: + ret = 0; + break; + default: + ret = -ENXIO; + break; + } + break; + case KVM_S390_VM_CPU_MODEL: + switch (attr->attr) { + case KVM_S390_VM_CPU_PROCESSOR: + case KVM_S390_VM_CPU_MACHINE: + ret = 0; + break; + default: + ret = -ENXIO; + break; + } + break; + case KVM_S390_VM_CRYPTO: + switch (attr->attr) { + case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: + case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: + case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: + case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: ret = 0; break; default: @@ -401,9 +775,61 @@ long kvm_arch_vm_ioctl(struct file *filp, return r; } +static int kvm_s390_query_ap_config(u8 *config) +{ + u32 fcn_code = 0x04000000UL; + u32 cc; + + asm volatile( + "lgr 0,%1\n" + "lgr 2,%2\n" + ".long 0xb2af0000\n" /* PQAP(QCI) */ + "ipm %0\n" + "srl %0,28\n" + : "=r" (cc) + : "r" (fcn_code), "r" (config) + : "cc", "0", "2", "memory" + ); + + return cc; +} + +static int kvm_s390_apxa_installed(void) +{ + u8 config[128]; + int cc; + + if (test_facility(2) && test_facility(12)) { + cc = kvm_s390_query_ap_config(config); + + if (cc) + pr_err("PQAP(QCI) failed with cc=%d", cc); + else + return config[0] & 0x40; + } + + return 0; +} + +static void kvm_s390_set_crycb_format(struct kvm *kvm) +{ + kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb; + + if (kvm_s390_apxa_installed()) + kvm->arch.crypto.crycbd |= CRYCB_FORMAT2; + else + kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; +} + +static void kvm_s390_get_cpu_id(struct cpuid *cpu_id) +{ + get_cpu_id(cpu_id); + cpu_id->version = 0xff; +} + static int kvm_s390_crypto_init(struct kvm *kvm) { - if (!test_vfacility(76)) + if (!test_kvm_facility(kvm, 76)) return 0; kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb), @@ -411,15 +837,18 @@ static int kvm_s390_crypto_init(struct kvm *kvm) if (!kvm->arch.crypto.crycb) return -ENOMEM; - kvm->arch.crypto.crycbd = (__u32) (unsigned long) kvm->arch.crypto.crycb | - CRYCB_FORMAT1; + kvm_s390_set_crycb_format(kvm); + + /* Disable AES/DEA protected key functions by default */ + kvm->arch.crypto.aes_kw = 0; + kvm->arch.crypto.dea_kw = 0; return 0; } int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { - int rc; + int i, rc; char debug_name[16]; static unsigned long sca_offset; @@ -454,6 +883,46 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.dbf) goto out_nodbf; + /* + * The architectural maximum amount of facilities is 16 kbit. To store + * this amount, 2 kbyte of memory is required. Thus we need a full + * page to hold the active copy (arch.model.fac->sie) and the current + * facilities set (arch.model.fac->kvm). Its address size has to be + * 31 bits and word aligned. + */ + kvm->arch.model.fac = + (struct s390_model_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + if (!kvm->arch.model.fac) + goto out_nofac; + + memcpy(kvm->arch.model.fac->kvm, S390_lowcore.stfle_fac_list, + S390_ARCH_FAC_LIST_SIZE_U64); + + /* + * If this KVM host runs *not* in a LPAR, relax the facility bits + * of the kvm facility mask by all missing facilities. This will allow + * to determine the right CPU model by means of the remaining facilities. + * Live guest migration must prohibit the migration of KVMs running in + * a LPAR to non LPAR hosts. + */ + if (!MACHINE_IS_LPAR) + for (i = 0; i < kvm_s390_fac_list_mask_size(); i++) + kvm_s390_fac_list_mask[i] &= kvm->arch.model.fac->kvm[i]; + + /* + * Apply the kvm facility mask to limit the kvm supported/tolerated + * facility list. + */ + for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) { + if (i < kvm_s390_fac_list_mask_size()) + kvm->arch.model.fac->kvm[i] &= kvm_s390_fac_list_mask[i]; + else + kvm->arch.model.fac->kvm[i] = 0UL; + } + + kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id); + kvm->arch.model.ibc = sclp_get_ibc() & 0x0fff; + if (kvm_s390_crypto_init(kvm) < 0) goto out_crypto; @@ -477,6 +946,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.css_support = 0; kvm->arch.use_irqchip = 0; + kvm->arch.epoch = 0; spin_lock_init(&kvm->arch.start_stop_lock); @@ -484,6 +954,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) out_nogmap: kfree(kvm->arch.crypto.crycb); out_crypto: + free_page((unsigned long)kvm->arch.model.fac); +out_nofac: debug_unregister(kvm->arch.dbf); out_nodbf: free_page((unsigned long)(kvm->arch.sca)); @@ -536,6 +1008,7 @@ static void kvm_free_vcpus(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_free_vcpus(kvm); + free_page((unsigned long)kvm->arch.model.fac); free_page((unsigned long)(kvm->arch.sca)); debug_unregister(kvm->arch.dbf); kfree(kvm->arch.crypto.crycb); @@ -546,25 +1019,30 @@ void kvm_arch_destroy_vm(struct kvm *kvm) } /* Section: vcpu related */ +static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) +{ + vcpu->arch.gmap = gmap_alloc(current->mm, -1UL); + if (!vcpu->arch.gmap) + return -ENOMEM; + vcpu->arch.gmap->private = vcpu->kvm; + + return 0; +} + int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; kvm_clear_async_pf_completion_queue(vcpu); - if (kvm_is_ucontrol(vcpu->kvm)) { - vcpu->arch.gmap = gmap_alloc(current->mm, -1UL); - if (!vcpu->arch.gmap) - return -ENOMEM; - vcpu->arch.gmap->private = vcpu->kvm; - return 0; - } - - vcpu->arch.gmap = vcpu->kvm->arch.gmap; vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT; + + if (kvm_is_ucontrol(vcpu->kvm)) + return __kvm_ucontrol_vcpu_init(vcpu); + return 0; } @@ -615,16 +1093,27 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) kvm_s390_clear_local_irqs(vcpu); } -int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) +void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) { - return 0; + mutex_lock(&vcpu->kvm->lock); + vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; + mutex_unlock(&vcpu->kvm->lock); + if (!kvm_is_ucontrol(vcpu->kvm)) + vcpu->arch.gmap = vcpu->kvm->arch.gmap; } static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) { - if (!test_vfacility(76)) + if (!test_kvm_facility(vcpu->kvm, 76)) return; + vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); + + if (vcpu->kvm->arch.crypto.aes_kw) + vcpu->arch.sie_block->ecb3 |= ECB3_AES; + if (vcpu->kvm->arch.crypto.dea_kw) + vcpu->arch.sie_block->ecb3 |= ECB3_DEA; + vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; } @@ -654,14 +1143,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) CPUSTAT_STOPPED | CPUSTAT_GED); vcpu->arch.sie_block->ecb = 6; - if (test_vfacility(50) && test_vfacility(73)) + if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73)) vcpu->arch.sie_block->ecb |= 0x10; vcpu->arch.sie_block->ecb2 = 8; - vcpu->arch.sie_block->eca = 0xD1002000U; + vcpu->arch.sie_block->eca = 0xC1002000U; if (sclp_has_siif()) vcpu->arch.sie_block->eca |= 1; - vcpu->arch.sie_block->fac = (int) (long) vfacilities; + if (sclp_has_sigpif()) + vcpu->arch.sie_block->eca |= 0x10000000U; vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE | ICTL_TPROT; @@ -670,10 +1160,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) if (rc) return rc; } - hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); + hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; - get_cpu_id(&vcpu->arch.cpu_id); - vcpu->arch.cpu_id.version = 0xff; + + mutex_lock(&vcpu->kvm->lock); + vcpu->arch.cpu_id = vcpu->kvm->arch.model.cpu_id; + memcpy(vcpu->kvm->arch.model.fac->sie, vcpu->kvm->arch.model.fac->kvm, + S390_ARCH_FAC_LIST_SIZE_BYTE); + vcpu->arch.sie_block->ibc = vcpu->kvm->arch.model.ibc; + mutex_unlock(&vcpu->kvm->lock); kvm_s390_vcpu_crypto_setup(vcpu); @@ -717,6 +1212,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); } + vcpu->arch.sie_block->fac = (int) (long) kvm->arch.model.fac->sie; spin_lock_init(&vcpu->arch.local_int.lock); vcpu->arch.local_int.float_int = &kvm->arch.float_int; @@ -741,7 +1237,7 @@ out: int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { - return kvm_cpu_has_interrupt(vcpu); + return kvm_s390_vcpu_has_irq(vcpu, 0); } void s390_vcpu_block(struct kvm_vcpu *vcpu) @@ -869,6 +1365,8 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, case KVM_REG_S390_PFTOKEN: r = get_user(vcpu->arch.pfault_token, (u64 __user *)reg->addr); + if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) + kvm_clear_async_pf_completion_queue(vcpu); break; case KVM_REG_S390_PFCOMPARE: r = get_user(vcpu->arch.pfault_compare, @@ -1176,7 +1674,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu) return 0; if (psw_extint_disabled(vcpu)) return 0; - if (kvm_cpu_has_interrupt(vcpu)) + if (kvm_s390_vcpu_has_irq(vcpu, 0)) return 0; if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul)) return 0; @@ -1341,6 +1839,8 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu->arch.pfault_token = kvm_run->s.regs.pft; vcpu->arch.pfault_select = kvm_run->s.regs.pfs; vcpu->arch.pfault_compare = kvm_run->s.regs.pfc; + if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) + kvm_clear_async_pf_completion_queue(vcpu); } kvm_run->kvm_dirty_regs = 0; } @@ -1559,15 +2059,10 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) spin_lock(&vcpu->kvm->arch.start_stop_lock); online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); - /* Need to lock access to action_bits to avoid a SIGP race condition */ - spin_lock(&vcpu->arch.local_int.lock); - atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); - /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */ - vcpu->arch.local_int.action_bits &= - ~(ACTION_STOP_ON_STOP | ACTION_STORE_ON_STOP); - spin_unlock(&vcpu->arch.local_int.lock); + kvm_s390_clear_stop_irq(vcpu); + atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); __disable_ibs_on_vcpu(vcpu); for (i = 0; i < online_vcpus; i++) { @@ -1783,30 +2278,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, static int __init kvm_s390_init(void) { - int ret; - ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); - if (ret) - return ret; - - /* - * guests can ask for up to 255+1 double words, we need a full page - * to hold the maximum amount of facilities. On the other hand, we - * only set facilities that are known to work in KVM. - */ - vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); - if (!vfacilities) { - kvm_exit(); - return -ENOMEM; - } - memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16); - vfacilities[0] &= 0xff82fffbf47c2000UL; - vfacilities[1] &= 0x005c000000000000UL; - return 0; + return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); } static void __exit kvm_s390_exit(void) { - free_page((unsigned long) vfacilities); kvm_exit(); } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index a8f3d9b71c11..985c2114d7ef 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -18,12 +18,10 @@ #include <linux/hrtimer.h> #include <linux/kvm.h> #include <linux/kvm_host.h> +#include <asm/facility.h> typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); -/* declare vfacilities extern */ -extern unsigned long *vfacilities; - /* Transactional Memory Execution related macros */ #define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & 0x10)) #define TDB_FORMAT1 1 @@ -127,6 +125,12 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc) vcpu->arch.sie_block->gpsw.mask |= cc << 44; } +/* test availability of facility in a kvm intance */ +static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr) +{ + return __test_facility(nr, kvm->arch.model.fac->kvm); +} + /* are cpu states controlled by user space */ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm) { @@ -183,7 +187,8 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu); void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); /* is cmma enabled */ bool kvm_s390_cmma_enabled(struct kvm *kvm); -int test_vfacility(unsigned long nr); +unsigned long kvm_s390_fac_list_mask_size(void); +extern unsigned long kvm_s390_fac_list_mask[]; /* implemented in diag.c */ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); @@ -228,11 +233,13 @@ int s390int_to_s390irq(struct kvm_s390_interrupt *s390int, struct kvm_s390_irq *s390irq); /* implemented in interrupt.c */ -int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); +int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop); int psw_extint_disabled(struct kvm_vcpu *vcpu); void kvm_s390_destroy_adapters(struct kvm *kvm); -int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu); +int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu); extern struct kvm_device_ops kvm_flic_ops; +int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu); +void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu); /* implemented in guestdbg.c */ void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 1be578d64dfc..bdd9b5b17e03 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -337,19 +337,24 @@ static int handle_io_inst(struct kvm_vcpu *vcpu) static int handle_stfl(struct kvm_vcpu *vcpu) { int rc; + unsigned int fac; vcpu->stat.instruction_stfl++; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + /* + * We need to shift the lower 32 facility bits (bit 0-31) from a u64 + * into a u32 memory representation. They will remain bits 0-31. + */ + fac = *vcpu->kvm->arch.model.fac->sie >> 32; rc = write_guest_lc(vcpu, offsetof(struct _lowcore, stfl_fac_list), - vfacilities, 4); + &fac, sizeof(fac)); if (rc) return rc; - VCPU_EVENT(vcpu, 5, "store facility list value %x", - *(unsigned int *) vfacilities); - trace_kvm_s390_handle_stfl(vcpu, *(unsigned int *) vfacilities); + VCPU_EVENT(vcpu, 5, "store facility list value %x", fac); + trace_kvm_s390_handle_stfl(vcpu, fac); return 0; } diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 6651f9f73973..23b1e86b2122 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -26,15 +26,17 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu, struct kvm_s390_local_interrupt *li; int cpuflags; int rc; + int ext_call_pending; li = &dst_vcpu->arch.local_int; cpuflags = atomic_read(li->cpuflags); - if (!(cpuflags & (CPUSTAT_ECALL_PEND | CPUSTAT_STOPPED))) + ext_call_pending = kvm_s390_ext_call_pending(dst_vcpu); + if (!(cpuflags & CPUSTAT_STOPPED) && !ext_call_pending) rc = SIGP_CC_ORDER_CODE_ACCEPTED; else { *reg &= 0xffffffff00000000UL; - if (cpuflags & CPUSTAT_ECALL_PEND) + if (ext_call_pending) *reg |= SIGP_STATUS_EXT_CALL_PENDING; if (cpuflags & CPUSTAT_STOPPED) *reg |= SIGP_STATUS_STOPPED; @@ -96,7 +98,7 @@ static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, } static int __sigp_external_call(struct kvm_vcpu *vcpu, - struct kvm_vcpu *dst_vcpu) + struct kvm_vcpu *dst_vcpu, u64 *reg) { struct kvm_s390_irq irq = { .type = KVM_S390_INT_EXTERNAL_CALL, @@ -105,45 +107,31 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, int rc; rc = kvm_s390_inject_vcpu(dst_vcpu, &irq); - if (!rc) + if (rc == -EBUSY) { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_EXT_CALL_PENDING; + return SIGP_CC_STATUS_STORED; + } else if (rc == 0) { VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", dst_vcpu->vcpu_id); - - return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED; -} - -static int __inject_sigp_stop(struct kvm_vcpu *dst_vcpu, int action) -{ - struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int; - int rc = SIGP_CC_ORDER_CODE_ACCEPTED; - - spin_lock(&li->lock); - if (li->action_bits & ACTION_STOP_ON_STOP) { - /* another SIGP STOP is pending */ - rc = SIGP_CC_BUSY; - goto out; } - if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { - if ((action & ACTION_STORE_ON_STOP) != 0) - rc = -ESHUTDOWN; - goto out; - } - set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs); - li->action_bits |= action; - atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); - kvm_s390_vcpu_wakeup(dst_vcpu); -out: - spin_unlock(&li->lock); - return rc; + return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED; } static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu) { + struct kvm_s390_irq irq = { + .type = KVM_S390_SIGP_STOP, + }; int rc; - rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP); - VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", dst_vcpu->vcpu_id); + rc = kvm_s390_inject_vcpu(dst_vcpu, &irq); + if (rc == -EBUSY) + rc = SIGP_CC_BUSY; + else if (rc == 0) + VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", + dst_vcpu->vcpu_id); return rc; } @@ -151,20 +139,18 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu) static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu, u64 *reg) { + struct kvm_s390_irq irq = { + .type = KVM_S390_SIGP_STOP, + .u.stop.flags = KVM_S390_STOP_FLAG_STORE_STATUS, + }; int rc; - rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP | - ACTION_STORE_ON_STOP); - VCPU_EVENT(vcpu, 4, "sent sigp stop and store status to cpu %x", - dst_vcpu->vcpu_id); - - if (rc == -ESHUTDOWN) { - /* If the CPU has already been stopped, we still have - * to save the status when doing stop-and-store. This - * has to be done after unlocking all spinlocks. */ - rc = kvm_s390_store_status_unloaded(dst_vcpu, - KVM_S390_STORE_STATUS_NOADDR); - } + rc = kvm_s390_inject_vcpu(dst_vcpu, &irq); + if (rc == -EBUSY) + rc = SIGP_CC_BUSY; + else if (rc == 0) + VCPU_EVENT(vcpu, 4, "sent sigp stop and store status to cpu %x", + dst_vcpu->vcpu_id); return rc; } @@ -197,41 +183,33 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu, u32 address, u64 *reg) { - struct kvm_s390_local_interrupt *li; + struct kvm_s390_irq irq = { + .type = KVM_S390_SIGP_SET_PREFIX, + .u.prefix.address = address & 0x7fffe000u, + }; int rc; - li = &dst_vcpu->arch.local_int; - /* * Make sure the new value is valid memory. We only need to check the * first page, since address is 8k aligned and memory pieces are always * at least 1MB aligned and have at least a size of 1MB. */ - address &= 0x7fffe000u; - if (kvm_is_error_gpa(vcpu->kvm, address)) { + if (kvm_is_error_gpa(vcpu->kvm, irq.u.prefix.address)) { *reg &= 0xffffffff00000000UL; *reg |= SIGP_STATUS_INVALID_PARAMETER; return SIGP_CC_STATUS_STORED; } - spin_lock(&li->lock); - /* cpu must be in stopped state */ - if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { + rc = kvm_s390_inject_vcpu(dst_vcpu, &irq); + if (rc == -EBUSY) { *reg &= 0xffffffff00000000UL; *reg |= SIGP_STATUS_INCORRECT_STATE; - rc = SIGP_CC_STATUS_STORED; - goto out_li; + return SIGP_CC_STATUS_STORED; + } else if (rc == 0) { + VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", + dst_vcpu->vcpu_id, irq.u.prefix.address); } - li->irq.prefix.address = address; - set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs); - kvm_s390_vcpu_wakeup(dst_vcpu); - rc = SIGP_CC_ORDER_CODE_ACCEPTED; - - VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", dst_vcpu->vcpu_id, - address); -out_li: - spin_unlock(&li->lock); return rc; } @@ -242,9 +220,7 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, int flags; int rc; - spin_lock(&dst_vcpu->arch.local_int.lock); flags = atomic_read(dst_vcpu->arch.local_int.cpuflags); - spin_unlock(&dst_vcpu->arch.local_int.lock); if (!(flags & CPUSTAT_STOPPED)) { *reg &= 0xffffffff00000000UL; *reg |= SIGP_STATUS_INCORRECT_STATE; @@ -291,8 +267,9 @@ static int __prepare_sigp_re_start(struct kvm_vcpu *vcpu, /* handle (RE)START in user space */ int rc = -EOPNOTSUPP; + /* make sure we don't race with STOP irq injection */ spin_lock(&li->lock); - if (li->action_bits & ACTION_STOP_ON_STOP) + if (kvm_s390_is_stop_irq_pending(dst_vcpu)) rc = SIGP_CC_BUSY; spin_unlock(&li->lock); @@ -333,7 +310,7 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code, break; case SIGP_EXTERNAL_CALL: vcpu->stat.instruction_sigp_external_call++; - rc = __sigp_external_call(vcpu, dst_vcpu); + rc = __sigp_external_call(vcpu, dst_vcpu, status_reg); break; case SIGP_EMERGENCY_SIGNAL: vcpu->stat.instruction_sigp_emergency++; @@ -394,6 +371,53 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code, return rc; } +static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code) +{ + if (!vcpu->kvm->arch.user_sigp) + return 0; + + switch (order_code) { + case SIGP_SENSE: + case SIGP_EXTERNAL_CALL: + case SIGP_EMERGENCY_SIGNAL: + case SIGP_COND_EMERGENCY_SIGNAL: + case SIGP_SENSE_RUNNING: + return 0; + /* update counters as we're directly dropping to user space */ + case SIGP_STOP: + vcpu->stat.instruction_sigp_stop++; + break; + case SIGP_STOP_AND_STORE_STATUS: + vcpu->stat.instruction_sigp_stop_store_status++; + break; + case SIGP_STORE_STATUS_AT_ADDRESS: + vcpu->stat.instruction_sigp_store_status++; + break; + case SIGP_SET_PREFIX: + vcpu->stat.instruction_sigp_prefix++; + break; + case SIGP_START: + vcpu->stat.instruction_sigp_start++; + break; + case SIGP_RESTART: + vcpu->stat.instruction_sigp_restart++; + break; + case SIGP_INITIAL_CPU_RESET: + vcpu->stat.instruction_sigp_init_cpu_reset++; + break; + case SIGP_CPU_RESET: + vcpu->stat.instruction_sigp_cpu_reset++; + break; + default: + vcpu->stat.instruction_sigp_unknown++; + } + + VCPU_EVENT(vcpu, 4, "sigp order %u: completely handled in user space", + order_code); + + return 1; +} + int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) { int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; @@ -408,6 +432,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); order_code = kvm_s390_get_base_disp_rs(vcpu); + if (handle_sigp_order_in_user_space(vcpu, order_code)) + return -EOPNOTSUPP; if (r1 % 2) parameter = vcpu->run->s.regs.gprs[r1]; diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index 647e9d6a4818..653a7ec09ef5 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h @@ -209,19 +209,21 @@ TRACE_EVENT(kvm_s390_request_resets, * Trace point for a vcpu's stop requests. */ TRACE_EVENT(kvm_s390_stop_request, - TP_PROTO(unsigned int action_bits), - TP_ARGS(action_bits), + TP_PROTO(unsigned char stop_irq, unsigned char flags), + TP_ARGS(stop_irq, flags), TP_STRUCT__entry( - __field(unsigned int, action_bits) + __field(unsigned char, stop_irq) + __field(unsigned char, flags) ), TP_fast_assign( - __entry->action_bits = action_bits; + __entry->stop_irq = stop_irq; + __entry->flags = flags; ), - TP_printk("stop request, action_bits = %08x", - __entry->action_bits) + TP_printk("stop request, stop irq = %u, flags = %08x", + __entry->stop_irq, __entry->flags) ); |