diff options
34 files changed, 614 insertions, 334 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index bc78652b0d07..110484915aa0 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3396,6 +3396,7 @@ EOI was received. struct kvm_hyperv_exit { #define KVM_EXIT_HYPERV_SYNIC 1 +#define KVM_EXIT_HYPERV_HCALL 2 __u32 type; union { struct { @@ -3404,6 +3405,11 @@ EOI was received. __u64 evt_page; __u64 msg_page; } synic; + struct { + __u64 input; + __u64 result; + __u64 params[2]; + } hcall; } u; }; /* KVM_EXIT_HYPERV */ diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt index d1ad9d5cae46..e3e314cb83e8 100644 --- a/Documentation/virtual/kvm/devices/s390_flic.txt +++ b/Documentation/virtual/kvm/devices/s390_flic.txt @@ -88,6 +88,8 @@ struct kvm_s390_io_adapter_req { perform a gmap translation for the guest address provided in addr, pin a userspace page for the translated address and add it to the list of mappings + Note: A new mapping will be created unconditionally; therefore, + the calling code should avoid making duplicate mappings. KVM_S390_IO_ADAPTER_UNMAP release a userspace page for the translated address specified in addr diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt index f083a168eb35..a9ea8774a45f 100644 --- a/Documentation/virtual/kvm/devices/vm.txt +++ b/Documentation/virtual/kvm/devices/vm.txt @@ -84,3 +84,55 @@ Returns: -EBUSY in case 1 or more vcpus are already activated (only in write -EFAULT if the given address is not accessible from kernel space -ENOMEM if not enough memory is available to process the ioctl 0 in case of success + +3. GROUP: KVM_S390_VM_TOD +Architectures: s390 + +3.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH + +Allows user space to set/get the TOD clock extension (u8). + +Parameters: address of a buffer in user space to store the data (u8) to +Returns: -EFAULT if the given address is not accessible from kernel space + -EINVAL if setting the TOD clock extension to != 0 is not supported + +3.2. ATTRIBUTE: KVM_S390_VM_TOD_LOW + +Allows user space to set/get bits 0-63 of the TOD clock register as defined in +the POP (u64). + +Parameters: address of a buffer in user space to store the data (u64) to +Returns: -EFAULT if the given address is not accessible from kernel space + +4. GROUP: KVM_S390_VM_CRYPTO +Architectures: s390 + +4.1. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_AES_KW (w/o) + +Allows user space to enable aes key wrapping, including generating a new +wrapping key. + +Parameters: none +Returns: 0 + +4.2. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_DEA_KW (w/o) + +Allows user space to enable dea key wrapping, including generating a new +wrapping key. + +Parameters: none +Returns: 0 + +4.3. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_AES_KW (w/o) + +Allows user space to disable aes key wrapping, clearing the wrapping key. + +Parameters: none +Returns: 0 + +4.4. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_DEA_KW (w/o) + +Allows user space to disable dea key wrapping, clearing the wrapping key. + +Parameters: none +Returns: 0 diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 8959ebb6d2c9..727e7f7b33fd 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -229,17 +229,11 @@ struct kvm_s390_itdb { __u8 data[256]; } __packed; -struct kvm_s390_vregs { - __vector128 vrs[32]; - __u8 reserved200[512]; /* for future vector expansion */ -} __packed; - struct sie_page { struct kvm_s390_sie_block sie_block; __u8 reserved200[1024]; /* 0x0200 */ struct kvm_s390_itdb itdb; /* 0x0600 */ - __u8 reserved700[1280]; /* 0x0700 */ - struct kvm_s390_vregs vregs; /* 0x0c00 */ + __u8 reserved700[2304]; /* 0x0700 */ } __packed; struct kvm_vcpu_stat { diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index fe84bd5fe7ce..347fe5afa419 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -154,6 +154,7 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_PFAULT (1UL << 5) #define KVM_SYNC_VRS (1UL << 6) #define KVM_SYNC_RICCB (1UL << 7) +#define KVM_SYNC_FPRS (1UL << 8) /* definition of registers in kvm_run */ struct kvm_sync_regs { __u64 prefix; /* prefix register */ @@ -168,9 +169,12 @@ struct kvm_sync_regs { __u64 pft; /* pfault token [PFAULT] */ __u64 pfs; /* pfault select [PFAULT] */ __u64 pfc; /* pfault compare [PFAULT] */ - __u64 vrs[32][2]; /* vector registers */ + union { + __u64 vrs[32][2]; /* vector registers (KVM_SYNC_VRS) */ + __u64 fprs[16]; /* fp registers (KVM_SYNC_FPRS) */ + }; __u8 reserved[512]; /* for future vector expansion */ - __u32 fpc; /* only valid with vector registers */ + __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */ __u8 padding[52]; /* riccb needs to be 64byte aligned */ __u8 riccb[64]; /* runtime instrumentation controls block */ }; diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index d30db40437dc..66938d283b77 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -373,7 +373,7 @@ void ipte_unlock(struct kvm_vcpu *vcpu) } static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar, - int write) + enum gacc_mode mode) { union alet alet; struct ale ale; @@ -454,7 +454,7 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar, } } - if (ale.fo == 1 && write) + if (ale.fo == 1 && mode == GACC_STORE) return PGM_PROTECTION; asce->val = aste.asce; @@ -477,25 +477,28 @@ enum { }; static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, - ar_t ar, int write) + ar_t ar, enum gacc_mode mode) { int rc; - psw_t *psw = &vcpu->arch.sie_block->gpsw; + struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw); struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; struct trans_exc_code_bits *tec_bits; memset(pgm, 0, sizeof(*pgm)); tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - tec_bits->fsi = write ? FSI_STORE : FSI_FETCH; - tec_bits->as = psw_bits(*psw).as; + tec_bits->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH; + tec_bits->as = psw.as; - if (!psw_bits(*psw).t) { + if (!psw.t) { asce->val = 0; asce->r = 1; return 0; } - switch (psw_bits(vcpu->arch.sie_block->gpsw).as) { + if (mode == GACC_IFETCH) + psw.as = psw.as == PSW_AS_HOME ? PSW_AS_HOME : PSW_AS_PRIMARY; + + switch (psw.as) { case PSW_AS_PRIMARY: asce->val = vcpu->arch.sie_block->gcr[1]; return 0; @@ -506,7 +509,7 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, asce->val = vcpu->arch.sie_block->gcr[13]; return 0; case PSW_AS_ACCREG: - rc = ar_translation(vcpu, asce, ar, write); + rc = ar_translation(vcpu, asce, ar, mode); switch (rc) { case PGM_ALEN_TRANSLATION: case PGM_ALE_SEQUENCE: @@ -538,7 +541,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) * @gva: guest virtual address * @gpa: points to where guest physical (absolute) address should be stored * @asce: effective asce - * @write: indicates if access is a write access + * @mode: indicates the access mode to be used * * Translate a guest virtual address into a guest absolute address by means * of dynamic address translation as specified by the architecture. @@ -554,7 +557,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) */ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, unsigned long *gpa, const union asce asce, - int write) + enum gacc_mode mode) { union vaddress vaddr = {.addr = gva}; union raddress raddr = {.addr = gva}; @@ -699,7 +702,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, real_address: raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr); absolute_address: - if (write && dat_protection) + if (mode == GACC_STORE && dat_protection) return PGM_PROTECTION; if (kvm_is_error_gpa(vcpu->kvm, raddr.addr)) return PGM_ADDRESSING; @@ -728,7 +731,7 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, unsigned long *pages, unsigned long nr_pages, - const union asce asce, int write) + const union asce asce, enum gacc_mode mode) { struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; psw_t *psw = &vcpu->arch.sie_block->gpsw; @@ -740,13 +743,13 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, while (nr_pages) { ga = kvm_s390_logical_to_effective(vcpu, ga); tec_bits->addr = ga >> PAGE_SHIFT; - if (write && lap_enabled && is_low_address(ga)) { + if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) { pgm->code = PGM_PROTECTION; return pgm->code; } ga &= PAGE_MASK; if (psw_bits(*psw).t) { - rc = guest_translate(vcpu, ga, pages, asce, write); + rc = guest_translate(vcpu, ga, pages, asce, mode); if (rc < 0) return rc; if (rc == PGM_PROTECTION) @@ -768,7 +771,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, } int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, - unsigned long len, int write) + unsigned long len, enum gacc_mode mode) { psw_t *psw = &vcpu->arch.sie_block->gpsw; unsigned long _len, nr_pages, gpa, idx; @@ -780,7 +783,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, if (!len) return 0; - rc = get_vcpu_asce(vcpu, &asce, ar, write); + rc = get_vcpu_asce(vcpu, &asce, ar, mode); if (rc) return rc; nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1; @@ -792,11 +795,11 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, need_ipte_lock = psw_bits(*psw).t && !asce.r; if (need_ipte_lock) ipte_lock(vcpu); - rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, write); + rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, mode); for (idx = 0; idx < nr_pages && !rc; idx++) { gpa = *(pages + idx) + (ga & ~PAGE_MASK); _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); - if (write) + if (mode == GACC_STORE) rc = kvm_write_guest(vcpu->kvm, gpa, data, _len); else rc = kvm_read_guest(vcpu->kvm, gpa, data, _len); @@ -812,7 +815,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, } int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, - void *data, unsigned long len, int write) + void *data, unsigned long len, enum gacc_mode mode) { unsigned long _len, gpa; int rc = 0; @@ -820,7 +823,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, while (len && !rc) { gpa = kvm_s390_real_to_abs(vcpu, gra); _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); - if (write) + if (mode) rc = write_guest_abs(vcpu, gpa, data, _len); else rc = read_guest_abs(vcpu, gpa, data, _len); @@ -841,7 +844,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, * has to take care of this. */ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, - unsigned long *gpa, int write) + unsigned long *gpa, enum gacc_mode mode) { struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; psw_t *psw = &vcpu->arch.sie_block->gpsw; @@ -851,19 +854,19 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, gva = kvm_s390_logical_to_effective(vcpu, gva); tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - rc = get_vcpu_asce(vcpu, &asce, ar, write); + rc = get_vcpu_asce(vcpu, &asce, ar, mode); tec->addr = gva >> PAGE_SHIFT; if (rc) return rc; if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) { - if (write) { + if (mode == GACC_STORE) { rc = pgm->code = PGM_PROTECTION; return rc; } } if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */ - rc = guest_translate(vcpu, gva, gpa, asce, write); + rc = guest_translate(vcpu, gva, gpa, asce, mode); if (rc > 0) { if (rc == PGM_PROTECTION) tec->b61 = 1; @@ -883,7 +886,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, * check_gva_range - test a range of guest virtual addresses for accessibility */ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, - unsigned long length, int is_write) + unsigned long length, enum gacc_mode mode) { unsigned long gpa; unsigned long currlen; @@ -892,7 +895,7 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, ipte_lock(vcpu); while (length > 0 && !rc) { currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE)); - rc = guest_translate_address(vcpu, gva, ar, &gpa, is_write); + rc = guest_translate_address(vcpu, gva, ar, &gpa, mode); gva += currlen; length -= currlen; } diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index ef03726cc661..df0a79dd8159 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -155,16 +155,22 @@ int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data, return kvm_read_guest(vcpu->kvm, gpa, data, len); } +enum gacc_mode { + GACC_FETCH, + GACC_STORE, + GACC_IFETCH, +}; + int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, - ar_t ar, unsigned long *gpa, int write); + ar_t ar, unsigned long *gpa, enum gacc_mode mode); int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, - unsigned long length, int is_write); + unsigned long length, enum gacc_mode mode); int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, - unsigned long len, int write); + unsigned long len, enum gacc_mode mode); int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, - void *data, unsigned long len, int write); + void *data, unsigned long len, enum gacc_mode mode); /** * write_guest - copy data from kernel space to guest space @@ -215,7 +221,7 @@ static inline __must_check int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, unsigned long len) { - return access_guest(vcpu, ga, ar, data, len, 1); + return access_guest(vcpu, ga, ar, data, len, GACC_STORE); } /** @@ -235,7 +241,27 @@ static inline __must_check int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, unsigned long len) { - return access_guest(vcpu, ga, ar, data, len, 0); + return access_guest(vcpu, ga, ar, data, len, GACC_FETCH); +} + +/** + * read_guest_instr - copy instruction data from guest space to kernel space + * @vcpu: virtual cpu + * @data: destination address in kernel space + * @len: number of bytes to copy + * + * Copy @len bytes from the current psw address (guest space) to @data (kernel + * space). + * + * The behaviour of read_guest_instr is identical to read_guest, except that + * instruction data will be read from primary space when in home-space or + * address-space mode. + */ +static inline __must_check +int read_guest_instr(struct kvm_vcpu *vcpu, void *data, unsigned long len) +{ + return access_guest(vcpu, vcpu->arch.sie_block->gpsw.addr, 0, data, len, + GACC_IFETCH); } /** diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index d53c10753c46..2e6b54e4d3f9 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -38,17 +38,32 @@ static const intercept_handler_t instruction_handlers[256] = { [0xeb] = kvm_s390_handle_eb, }; -void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc) +u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu) { struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; + u8 ilen = 0; - /* Use the length of the EXECUTE instruction if necessary */ - if (sie_block->icptstatus & 1) { - ilc = (sie_block->icptstatus >> 4) & 0x6; - if (!ilc) - ilc = 4; + switch (vcpu->arch.sie_block->icptcode) { + case ICPT_INST: + case ICPT_INSTPROGI: + case ICPT_OPEREXC: + case ICPT_PARTEXEC: + case ICPT_IOINST: + /* instruction only stored for these icptcodes */ + ilen = insn_length(vcpu->arch.sie_block->ipa >> 8); + /* Use the length of the EXECUTE instruction if necessary */ + if (sie_block->icptstatus & 1) { + ilen = (sie_block->icptstatus >> 4) & 0x6; + if (!ilen) + ilen = 4; + } + break; + case ICPT_PROGI: + /* bit 1+2 of pgmilc are the ilc, so we directly get ilen */ + ilen = vcpu->arch.sie_block->pgmilc & 0x6; + break; } - sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilc); + return ilen; } static int handle_noop(struct kvm_vcpu *vcpu) @@ -121,11 +136,13 @@ static int handle_instruction(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } -static void __extract_prog_irq(struct kvm_vcpu *vcpu, - struct kvm_s390_pgm_info *pgm_info) +static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu) { - memset(pgm_info, 0, sizeof(struct kvm_s390_pgm_info)); - pgm_info->code = vcpu->arch.sie_block->iprcc; + struct kvm_s390_pgm_info pgm_info = { + .code = vcpu->arch.sie_block->iprcc, + /* the PSW has already been rewound */ + .flags = KVM_S390_PGM_FLAGS_NO_REWIND, + }; switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) { case PGM_AFX_TRANSLATION: @@ -138,7 +155,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu, case PGM_PRIMARY_AUTHORITY: case PGM_SECONDARY_AUTHORITY: case PGM_SPACE_SWITCH: - pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; + pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc; break; case PGM_ALEN_TRANSLATION: case PGM_ALE_SEQUENCE: @@ -146,7 +163,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu, case PGM_ASTE_SEQUENCE: case PGM_ASTE_VALIDITY: case PGM_EXTENDED_AUTHORITY: - pgm_info->exc_access_id = vcpu->arch.sie_block->eai; + pgm_info.exc_access_id = vcpu->arch.sie_block->eai; break; case PGM_ASCE_TYPE: case PGM_PAGE_TRANSLATION: @@ -154,32 +171,33 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu, case PGM_REGION_SECOND_TRANS: case PGM_REGION_THIRD_TRANS: case PGM_SEGMENT_TRANSLATION: - pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; - pgm_info->exc_access_id = vcpu->arch.sie_block->eai; - pgm_info->op_access_id = vcpu->arch.sie_block->oai; + pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc; + pgm_info.exc_access_id = vcpu->arch.sie_block->eai; + pgm_info.op_access_id = vcpu->arch.sie_block->oai; break; case PGM_MONITOR: - pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn; - pgm_info->mon_code = vcpu->arch.sie_block->tecmc; + pgm_info.mon_class_nr = vcpu->arch.sie_block->mcn; + pgm_info.mon_code = vcpu->arch.sie_block->tecmc; break; case PGM_VECTOR_PROCESSING: case PGM_DATA: - pgm_info->data_exc_code = vcpu->arch.sie_block->dxc; + pgm_info.data_exc_code = vcpu->arch.sie_block->dxc; break; case PGM_PROTECTION: - pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; - pgm_info->exc_access_id = vcpu->arch.sie_block->eai; + pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc; + pgm_info.exc_access_id = vcpu->arch.sie_block->eai; break; default: break; } if (vcpu->arch.sie_block->iprcc & PGM_PER) { - pgm_info->per_code = vcpu->arch.sie_block->perc; - pgm_info->per_atmid = vcpu->arch.sie_block->peratmid; - pgm_info->per_address = vcpu->arch.sie_block->peraddr; - pgm_info->per_access_id = vcpu->arch.sie_block->peraid; + pgm_info.per_code = vcpu->arch.sie_block->perc; + pgm_info.per_atmid = vcpu->arch.sie_block->peratmid; + pgm_info.per_address = vcpu->arch.sie_block->peraddr; + pgm_info.per_access_id = vcpu->arch.sie_block->peraid; } + return kvm_s390_inject_prog_irq(vcpu, &pgm_info); } /* @@ -208,7 +226,6 @@ static int handle_itdb(struct kvm_vcpu *vcpu) static int handle_prog(struct kvm_vcpu *vcpu) { - struct kvm_s390_pgm_info pgm_info; psw_t psw; int rc; @@ -234,8 +251,7 @@ static int handle_prog(struct kvm_vcpu *vcpu) if (rc) return rc; - __extract_prog_irq(vcpu, &pgm_info); - return kvm_s390_inject_prog_irq(vcpu, &pgm_info); + return inject_prog_on_prog_intercept(vcpu); } /** @@ -302,7 +318,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) /* Make sure that the source is paged-in */ rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2], - reg2, &srcaddr, 0); + reg2, &srcaddr, GACC_FETCH); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0); @@ -311,14 +327,14 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) /* Make sure that the destination is paged-in */ rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1], - reg1, &dstaddr, 1); + reg1, &dstaddr, GACC_STORE); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1); if (rc != 0) return rc; - kvm_s390_rewind_psw(vcpu, 4); + kvm_s390_retry_instr(vcpu); return 0; } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index f88ca72c3a77..87e2d1a89d74 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -335,23 +335,6 @@ static void set_intercept_indicators(struct kvm_vcpu *vcpu) set_intercept_indicators_stop(vcpu); } -static u16 get_ilc(struct kvm_vcpu *vcpu) -{ - switch (vcpu->arch.sie_block->icptcode) { - case ICPT_INST: - case ICPT_INSTPROGI: - case ICPT_OPEREXC: - case ICPT_PARTEXEC: - case ICPT_IOINST: - /* last instruction only stored for these icptcodes */ - return insn_length(vcpu->arch.sie_block->ipa >> 8); - case ICPT_PROGI: - return vcpu->arch.sie_block->pgmilc; - default: - return 0; - } -} - static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; @@ -588,7 +571,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_pgm_info pgm_info; int rc = 0, nullifying = false; - u16 ilc = get_ilc(vcpu); + u16 ilen; spin_lock(&li->lock); pgm_info = li->irq.pgm; @@ -596,8 +579,9 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) memset(&li->irq.pgm, 0, sizeof(pgm_info)); spin_unlock(&li->lock); - VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilc:%d", - pgm_info.code, ilc); + ilen = pgm_info.flags & KVM_S390_PGM_FLAGS_ILC_MASK; + VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilen:%d", + pgm_info.code, ilen); vcpu->stat.deliver_program_int++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, pgm_info.code, 0); @@ -681,10 +665,11 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) (u8 *) __LC_PER_ACCESS_ID); } - if (nullifying && vcpu->arch.sie_block->icptcode == ICPT_INST) - kvm_s390_rewind_psw(vcpu, ilc); + if (nullifying && !(pgm_info.flags & KVM_S390_PGM_FLAGS_NO_REWIND)) + kvm_s390_rewind_psw(vcpu, ilen); - rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC); + /* bit 1+2 of the target are the ilc, so we can directly use ilen */ + rc |= put_guest_lc(vcpu, ilen, (u16 *) __LC_PGM_ILC); rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea, (u64 *) __LC_LAST_BREAK); rc |= put_guest_lc(vcpu, pgm_info.code, @@ -1059,8 +1044,16 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, irq->u.pgm.code, 0); + if (!(irq->u.pgm.flags & KVM_S390_PGM_FLAGS_ILC_VALID)) { + /* auto detection if no valid ILC was given */ + irq->u.pgm.flags &= ~KVM_S390_PGM_FLAGS_ILC_MASK; + irq->u.pgm.flags |= kvm_s390_get_ilen(vcpu); + irq->u.pgm.flags |= KVM_S390_PGM_FLAGS_ILC_VALID; + } + if (irq->u.pgm.code == PGM_PER) { li->irq.pgm.code |= PGM_PER; + li->irq.pgm.flags = irq->u.pgm.flags; /* only modify PER related information */ li->irq.pgm.per_address = irq->u.pgm.per_address; li->irq.pgm.per_code = irq->u.pgm.per_code; @@ -1069,6 +1062,7 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) } else if (!(irq->u.pgm.code & PGM_PER)) { li->irq.pgm.code = (li->irq.pgm.code & PGM_PER) | irq->u.pgm.code; + li->irq.pgm.flags = irq->u.pgm.flags; /* only modify non-PER information */ li->irq.pgm.trans_exc_code = irq->u.pgm.trans_exc_code; li->irq.pgm.mon_code = irq->u.pgm.mon_code; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4af21c771f9b..28bd5ea1b08f 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -274,7 +274,6 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm, unsigned long address; struct gmap *gmap = kvm->arch.gmap; - down_read(&gmap->mm->mmap_sem); /* Loop over all guest pages */ last_gfn = memslot->base_gfn + memslot->npages; for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { @@ -282,8 +281,10 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm, if (gmap_test_and_clear_dirty(address, gmap)) mark_page_dirty(kvm, cur_gfn); + if (fatal_signal_pending(current)) + return; + cond_resched(); } - up_read(&gmap->mm->mmap_sem); } /* Section: vm related */ @@ -1414,8 +1415,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) KVM_SYNC_PFAULT; if (test_kvm_facility(vcpu->kvm, 64)) vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; - if (test_kvm_facility(vcpu->kvm, 129)) + /* fprs can be synchronized via vrs, even if the guest has no vx. With + * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. + */ + if (MACHINE_HAS_VX) vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; + else + vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; if (kvm_is_ucontrol(vcpu->kvm)) return __kvm_ucontrol_vcpu_init(vcpu); @@ -1430,10 +1436,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; - /* Depending on MACHINE_HAS_VX, data stored to vrs either - * has vector register or floating point register format. - */ - current->thread.fpu.regs = vcpu->run->s.regs.vrs; + if (MACHINE_HAS_VX) + current->thread.fpu.regs = vcpu->run->s.regs.vrs; + else + current->thread.fpu.regs = vcpu->run->s.regs.fprs; current->thread.fpu.fpc = vcpu->run->s.regs.fpc; if (test_fp_ctl(current->thread.fpu.fpc)) /* User space provided an invalid FPC, let's clear it */ @@ -2158,8 +2164,10 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) { - psw_t *psw = &vcpu->arch.sie_block->gpsw; - u8 opcode; + struct kvm_s390_pgm_info pgm_info = { + .code = PGM_ADDRESSING, + }; + u8 opcode, ilen; int rc; VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); @@ -2173,12 +2181,21 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) * to look up the current opcode to get the length of the instruction * to be able to forward the PSW. */ - rc = read_guest(vcpu, psw->addr, 0, &opcode, 1); - if (rc) - return kvm_s390_inject_prog_cond(vcpu, rc); - psw->addr = __rewind_psw(*psw, -insn_length(opcode)); - - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + rc = read_guest_instr(vcpu, &opcode, 1); + ilen = insn_length(opcode); + if (rc < 0) { + return rc; + } else if (rc) { + /* Instruction-Fetching Exceptions - we can't detect the ilen. + * Forward by arbitrary ilc, injection will take care of + * nullification if necessary. + */ + pgm_info = vcpu->arch.pgm; + ilen = 4; + } + pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; + kvm_s390_forward_psw(vcpu, ilen); + return kvm_s390_inject_prog_irq(vcpu, &pgm_info); } static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) @@ -2386,7 +2403,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) fprs, 128); } else { rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, - vcpu->run->s.regs.vrs, 128); + vcpu->run->s.regs.fprs, 128); } rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, vcpu->run->s.regs.gprs, 128); @@ -2605,7 +2622,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, switch (mop->op) { case KVM_S390_MEMOP_LOGICAL_READ: if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { - r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false); + r = check_gva_range(vcpu, mop->gaddr, mop->ar, + mop->size, GACC_FETCH); break; } r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); @@ -2616,7 +2634,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, break; case KVM_S390_MEMOP_LOGICAL_WRITE: if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { - r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true); + r = check_gva_range(vcpu, mop->gaddr, mop->ar, + mop->size, GACC_STORE); break; } if (copy_from_user(tmpbuf, uaddr, mop->size)) { diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index df1abada1f36..1c756c7dd0c2 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -19,6 +19,7 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> #include <asm/facility.h> +#include <asm/processor.h> typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); @@ -212,8 +213,22 @@ int kvm_s390_reinject_io_int(struct kvm *kvm, int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked); /* implemented in intercept.c */ -void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc); +u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu); int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); +static inline void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilen) +{ + struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; + + sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilen); +} +static inline void kvm_s390_forward_psw(struct kvm_vcpu *vcpu, int ilen) +{ + kvm_s390_rewind_psw(vcpu, -ilen); +} +static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu) +{ + kvm_s390_rewind_psw(vcpu, kvm_s390_get_ilen(vcpu)); +} /* implemented in priv.c */ int is_valid_psw(psw_t *psw); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index ed74e86d9b9e..add990945986 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -173,7 +173,7 @@ static int handle_skey(struct kvm_vcpu *vcpu) if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - kvm_s390_rewind_psw(vcpu, 4); + kvm_s390_retry_instr(vcpu); VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); return 0; } @@ -184,7 +184,7 @@ static int handle_ipte_interlock(struct kvm_vcpu *vcpu) if (psw_bits(vcpu->arch.sie_block->gpsw).p) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu)); - kvm_s390_rewind_psw(vcpu, 4); + kvm_s390_retry_instr(vcpu); VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation"); return 0; } @@ -759,8 +759,8 @@ static int handle_essa(struct kvm_vcpu *vcpu) if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - /* Rewind PSW to repeat the ESSA instruction */ - kvm_s390_rewind_psw(vcpu, 4); + /* Retry the ESSA instruction */ + kvm_s390_retry_instr(vcpu); vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */ cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo); down_read(&gmap->mm->mmap_sem); @@ -981,11 +981,12 @@ static int handle_tprot(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) ipte_lock(vcpu); - ret = guest_translate_address(vcpu, address1, ar, &gpa, 1); + ret = guest_translate_address(vcpu, address1, ar, &gpa, GACC_STORE); if (ret == PGM_PROTECTION) { /* Write protected? Try again with read-only... */ cc = 1; - ret = guest_translate_address(vcpu, address1, ar, &gpa, 0); + ret = guest_translate_address(vcpu, address1, ar, &gpa, + GACC_FETCH); } if (ret) { if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) { diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 44adbb819041..7b5459982433 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -754,6 +754,8 @@ struct kvm_arch { bool irqchip_split; u8 nr_reserved_ioapic_pins; + + bool disabled_lapic_found; }; struct kvm_vm_stat { diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 7956412d09bd..9b1a91834ac8 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -226,7 +226,9 @@ (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) /* Declare the various hypercall operations. */ -#define HV_X64_HV_NOTIFY_LONG_SPIN_WAIT 0x0008 +#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008 +#define HVCALL_POST_MESSAGE 0x005c +#define HVCALL_SIGNAL_EVENT 0x005d #define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE 0x00000001 #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT 12 diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c index 9dc091acd5fb..308b8597c691 100644 --- a/arch/x86/kvm/assigned-dev.c +++ b/arch/x86/kvm/assigned-dev.c @@ -51,11 +51,9 @@ struct kvm_assigned_dev_kernel { static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, int assigned_dev_id) { - struct list_head *ptr; struct kvm_assigned_dev_kernel *match; - list_for_each(ptr, head) { - match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); + list_for_each_entry(match, head, list) { if (match->assigned_dev_id == assigned_dev_id) return match; } @@ -373,14 +371,10 @@ static void kvm_free_assigned_device(struct kvm *kvm, void kvm_free_all_assigned_devices(struct kvm *kvm) { - struct list_head *ptr, *ptr2; - struct kvm_assigned_dev_kernel *assigned_dev; - - list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { - assigned_dev = list_entry(ptr, - struct kvm_assigned_dev_kernel, - list); + struct kvm_assigned_dev_kernel *assigned_dev, *tmp; + list_for_each_entry_safe(assigned_dev, tmp, + &kvm->arch.assigned_dev_head, list) { kvm_free_assigned_device(kvm, assigned_dev); } } diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index c58ba67175ac..5ff3485acb60 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1043,6 +1043,27 @@ bool kvm_hv_hypercall_enabled(struct kvm *kvm) return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE; } +static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) +{ + bool longmode; + + longmode = is_64_bit_mode(vcpu); + if (longmode) + kvm_register_write(vcpu, VCPU_REGS_RAX, result); + else { + kvm_register_write(vcpu, VCPU_REGS_RDX, result >> 32); + kvm_register_write(vcpu, VCPU_REGS_RAX, result & 0xffffffff); + } +} + +static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + + kvm_hv_hypercall_set_result(vcpu, run->hyperv.u.hcall.result); + return 1; +} + int kvm_hv_hypercall(struct kvm_vcpu *vcpu) { u64 param, ingpa, outgpa, ret; @@ -1055,7 +1076,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) */ if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { kvm_queue_exception(vcpu, UD_VECTOR); - return 0; + return 1; } longmode = is_64_bit_mode(vcpu); @@ -1083,22 +1104,33 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); + /* Hypercall continuation is not supported yet */ + if (rep_cnt || rep_idx) { + res = HV_STATUS_INVALID_HYPERCALL_CODE; + goto set_result; + } + switch (code) { - case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT: + case HVCALL_NOTIFY_LONG_SPIN_WAIT: kvm_vcpu_on_spin(vcpu); break; + case HVCALL_POST_MESSAGE: + case HVCALL_SIGNAL_EVENT: + vcpu->run->exit_reason = KVM_EXIT_HYPERV; + vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL; + vcpu->run->hyperv.u.hcall.input = param; + vcpu->run->hyperv.u.hcall.params[0] = ingpa; + vcpu->run->hyperv.u.hcall.params[1] = outgpa; + vcpu->arch.complete_userspace_io = + kvm_hv_hypercall_complete_userspace; + return 0; default: res = HV_STATUS_INVALID_HYPERCALL_CODE; break; } +set_result: ret = res | (((u64)rep_done & 0xfff) << 32); - if (longmode) { - kvm_register_write(vcpu, VCPU_REGS_RAX, ret); - } else { - kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32); - kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff); - } - + kvm_hv_hypercall_set_result(vcpu, ret); return 1; } diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 3982b479bb5f..95fcc7b13866 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -33,7 +33,10 @@ */ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - return apic_has_pending_timer(vcpu); + if (lapic_in_kernel(vcpu)) + return apic_has_pending_timer(vcpu); + + return 0; } EXPORT_SYMBOL(kvm_cpu_has_pending_timer); @@ -137,8 +140,8 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) { - kvm_inject_apic_timer_irqs(vcpu); - /* TODO: PIT, RTC etc. */ + if (lapic_in_kernel(vcpu)) + kvm_inject_apic_timer_irqs(vcpu); } EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index ae5c78f2337d..61ebdc13a29a 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -109,14 +109,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm) return ret; } -static inline int lapic_in_kernel(struct kvm_vcpu *vcpu) -{ - /* Same as irqchip_in_kernel(vcpu->kvm), but with less - * pointer chasing and no unnecessary memory barriers. - */ - return vcpu->arch.apic != NULL; -} - void kvm_pic_reset(struct kvm_kpic_state *s); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 8fc89efb5250..37217363887d 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -34,6 +34,7 @@ #include "lapic.h" #include "hyperv.h" +#include "x86.h" static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, @@ -57,6 +58,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, { int i, r = -1; struct kvm_vcpu *vcpu, *lowest = NULL; + unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)]; + unsigned int dest_vcpus = 0; if (irq->dest_mode == 0 && irq->dest_id == 0xff && kvm_lowest_prio_delivery(irq)) { @@ -67,6 +70,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map)) return r; + memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap)); + kvm_for_each_vcpu(i, vcpu, kvm) { if (!kvm_apic_present(vcpu)) continue; @@ -80,13 +85,25 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, r = 0; r += kvm_apic_set_irq(vcpu, irq, dest_map); } else if (kvm_lapic_enabled(vcpu)) { - if (!lowest) - lowest = vcpu; - else if (kvm_apic_compare_prio(vcpu, lowest) < 0) - lowest = vcpu; + if (!kvm_vector_hashing_enabled()) { + if (!lowest) + lowest = vcpu; + else if (kvm_apic_compare_prio(vcpu, lowest) < 0) + lowest = vcpu; + } else { + __set_bit(i, dest_vcpu_bitmap); + dest_vcpus++; + } } } + if (dest_vcpus != 0) { + int idx = kvm_vector_to_index(irq->vector, dest_vcpus, + dest_vcpu_bitmap, KVM_MAX_VCPUS); + + lowest = kvm_get_vcpu(kvm, idx); + } + if (lowest) r = kvm_apic_set_irq(lowest, irq, dest_map); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 36591faed13b..cf74404230ca 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -281,7 +281,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu) struct kvm_cpuid_entry2 *feat; u32 v = APIC_VERSION; - if (!kvm_vcpu_has_lapic(vcpu)) + if (!lapic_in_kernel(vcpu)) return; feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); @@ -475,18 +475,12 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) { - int highest_irr; - /* This may race with setting of irr in __apic_accept_irq() and * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq * will cause vmexit immediately and the value will be recalculated * on the next vmentry. */ - if (!kvm_vcpu_has_lapic(vcpu)) - return 0; - highest_irr = apic_find_highest_irr(vcpu->arch.apic); - - return highest_irr; + return apic_find_highest_irr(vcpu->arch.apic); } static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, @@ -675,6 +669,31 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, } } +int kvm_vector_to_index(u32 vector, u32 dest_vcpus, + const unsigned long *bitmap, u32 bitmap_size) +{ + u32 mod; + int i, idx = -1; + + mod = vector % dest_vcpus; + + for (i = 0; i <= mod; i++) { + idx = find_next_bit(bitmap, bitmap_size, idx + 1); + BUG_ON(idx == bitmap_size); + } + + return idx; +} + +static void kvm_apic_disabled_lapic_found(struct kvm *kvm) +{ + if (!kvm->arch.disabled_lapic_found) { + kvm->arch.disabled_lapic_found = true; + printk(KERN_INFO + "Disabled LAPIC found during irq injection\n"); + } +} + bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map) { @@ -727,21 +746,42 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, dst = map->logical_map[cid]; - if (kvm_lowest_prio_delivery(irq)) { + if (!kvm_lowest_prio_delivery(irq)) + goto set_irq; + + if (!kvm_vector_hashing_enabled()) { int l = -1; for_each_set_bit(i, &bitmap, 16) { if (!dst[i]) continue; if (l < 0) l = i; - else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0) + else if (kvm_apic_compare_prio(dst[i]->vcpu, + dst[l]->vcpu) < 0) l = i; } - bitmap = (l >= 0) ? 1 << l : 0; + } else { + int idx; + unsigned int dest_vcpus; + + dest_vcpus = hweight16(bitmap); + if (dest_vcpus == 0) + goto out; + + idx = kvm_vector_to_index(irq->vector, + dest_vcpus, &bitmap, 16); + + if (!dst[idx]) { + kvm_apic_disabled_lapic_found(kvm); + goto out; + } + + bitmap = (idx >= 0) ? 1 << idx : 0; } } +set_irq: for_each_set_bit(i, &bitmap, 16) { if (!dst[i]) continue; @@ -754,6 +794,20 @@ out: return ret; } +/* + * This routine tries to handler interrupts in posted mode, here is how + * it deals with different cases: + * - For single-destination interrupts, handle it in posted mode + * - Else if vector hashing is enabled and it is a lowest-priority + * interrupt, handle it in posted mode and use the following mechanism + * to find the destinaiton vCPU. + * 1. For lowest-priority interrupts, store all the possible + * destination vCPUs in an array. + * 2. Use "guest vector % max number of destination vCPUs" to find + * the right destination vCPU in the array for the lowest-priority + * interrupt. + * - Otherwise, use remapped mode to inject the interrupt. + */ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu) { @@ -795,16 +849,37 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, if (cid >= ARRAY_SIZE(map->logical_map)) goto out; - for_each_set_bit(i, &bitmap, 16) { - dst = map->logical_map[cid][i]; - if (++r == 2) + if (kvm_vector_hashing_enabled() && + kvm_lowest_prio_delivery(irq)) { + int idx; + unsigned int dest_vcpus; + + dest_vcpus = hweight16(bitmap); + if (dest_vcpus == 0) goto out; - } - if (dst && kvm_apic_present(dst->vcpu)) + idx = kvm_vector_to_index(irq->vector, dest_vcpus, + &bitmap, 16); + + dst = map->logical_map[cid][idx]; + if (!dst) { + kvm_apic_disabled_lapic_found(kvm); + goto out; + } + *dest_vcpu = dst->vcpu; - else - goto out; + } else { + for_each_set_bit(i, &bitmap, 16) { + dst = map->logical_map[cid][i]; + if (++r == 2) + goto out; + } + + if (dst && kvm_apic_present(dst->vcpu)) + *dest_vcpu = dst->vcpu; + else + goto out; + } } ret = true; @@ -1239,7 +1314,7 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) struct kvm_lapic *apic = vcpu->arch.apic; u64 guest_tsc, tsc_deadline; - if (!kvm_vcpu_has_lapic(vcpu)) + if (!lapic_in_kernel(vcpu)) return; if (apic->lapic_timer.expired_tscdeadline == 0) @@ -1515,8 +1590,7 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) { - if (kvm_vcpu_has_lapic(vcpu)) - apic_reg_write(vcpu->arch.apic, APIC_EOI, 0); + apic_reg_write(vcpu->arch.apic, APIC_EOI, 0); } EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); @@ -1566,7 +1640,7 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || + if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) return 0; @@ -1577,7 +1651,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || + if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) return; @@ -1590,9 +1664,6 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!kvm_vcpu_has_lapic(vcpu)) - return; - apic_set_tpr(apic, ((cr8 & 0x0f) << 4) | (kvm_apic_get_reg(apic, APIC_TASKPRI) & 4)); } @@ -1601,9 +1672,6 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) { u64 tpr; - if (!kvm_vcpu_has_lapic(vcpu)) - return 0; - tpr = (u64) kvm_apic_get_reg(vcpu->arch.apic, APIC_TASKPRI); return (tpr & 0xf0) >> 4; @@ -1728,8 +1796,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (kvm_vcpu_has_lapic(vcpu) && apic_enabled(apic) && - apic_lvt_enabled(apic, APIC_LVTT)) + if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT)) return atomic_read(&apic->lapic_timer.pending); return 0; @@ -1826,7 +1893,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) struct kvm_lapic *apic = vcpu->arch.apic; int highest_irr; - if (!kvm_vcpu_has_lapic(vcpu) || !apic_enabled(apic)) + if (!apic_enabled(apic)) return -1; apic_update_ppr(apic); @@ -1854,9 +1921,6 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!kvm_vcpu_has_lapic(vcpu)) - return; - if (atomic_read(&apic->lapic_timer.pending) > 0) { kvm_apic_local_deliver(apic, APIC_LVTT); if (apic_lvtt_tscdeadline(apic)) @@ -1932,7 +1996,7 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) { struct hrtimer *timer; - if (!kvm_vcpu_has_lapic(vcpu)) + if (!lapic_in_kernel(vcpu)) return; timer = &vcpu->arch.apic->lapic_timer.timer; @@ -2105,7 +2169,7 @@ int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!kvm_vcpu_has_lapic(vcpu)) + if (!lapic_in_kernel(vcpu)) return 1; /* if this is ICR write vector before command */ @@ -2119,7 +2183,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) struct kvm_lapic *apic = vcpu->arch.apic; u32 low, high = 0; - if (!kvm_vcpu_has_lapic(vcpu)) + if (!lapic_in_kernel(vcpu)) return 1; if (apic_reg_read(apic, reg, 4, &low)) @@ -2151,7 +2215,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) u8 sipi_vector; unsigned long pe; - if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events) + if (!lapic_in_kernel(vcpu) || !apic->pending_events) return; /* diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 41bdb35b4b67..59610099af04 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -103,7 +103,7 @@ static inline u32 kvm_apic_get_reg(struct kvm_lapic *apic, int reg_off) extern struct static_key kvm_no_apic_vcpu; -static inline bool kvm_vcpu_has_lapic(struct kvm_vcpu *vcpu) +static inline bool lapic_in_kernel(struct kvm_vcpu *vcpu) { if (static_key_false(&kvm_no_apic_vcpu)) return vcpu->arch.apic; @@ -130,7 +130,7 @@ static inline bool kvm_apic_sw_enabled(struct kvm_lapic *apic) static inline bool kvm_apic_present(struct kvm_vcpu *vcpu) { - return kvm_vcpu_has_lapic(vcpu) && kvm_apic_hw_enabled(vcpu->arch.apic); + return lapic_in_kernel(vcpu) && kvm_apic_hw_enabled(vcpu->arch.apic); } static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu) @@ -150,7 +150,7 @@ static inline bool kvm_vcpu_apicv_active(struct kvm_vcpu *vcpu) static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) { - return kvm_vcpu_has_lapic(vcpu) && vcpu->arch.apic->pending_events; + return lapic_in_kernel(vcpu) && vcpu->arch.apic->pending_events; } static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq) @@ -161,7 +161,7 @@ static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq) static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) { - return kvm_vcpu_has_lapic(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); + return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); } static inline int kvm_apic_id(struct kvm_lapic *apic) @@ -175,4 +175,6 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu); bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu); +int kvm_vector_to_index(u32 vector, u32 dest_vcpus, + const unsigned long *bitmap, u32 bitmap_size); #endif diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 95a955de5964..07f4c26a10d3 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2354,8 +2354,8 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm, if (list_empty(&kvm->arch.active_mmu_pages)) return false; - sp = list_entry(kvm->arch.active_mmu_pages.prev, - struct kvm_mmu_page, link); + sp = list_last_entry(&kvm->arch.active_mmu_pages, + struct kvm_mmu_page, link); kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); return true; @@ -3273,7 +3273,7 @@ static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level) return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level); } -static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct) +static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) { if (direct) return vcpu_match_mmio_gpa(vcpu, addr); @@ -3332,7 +3332,7 @@ int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) u64 spte; bool reserved; - if (quickly_check_mmio_pf(vcpu, addr, direct)) + if (mmio_info_in_cache(vcpu, addr, direct)) return RET_MMIO_PF_EMULATE; reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte); @@ -3370,13 +3370,6 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); - if (unlikely(error_code & PFERR_RSVD_MASK)) { - r = handle_mmio_page_fault(vcpu, gva, true); - - if (likely(r != RET_MMIO_PF_INVALID)) - return r; - } - r = mmu_topup_memory_caches(vcpu); if (r) return r; @@ -3460,13 +3453,6 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); - if (unlikely(error_code & PFERR_RSVD_MASK)) { - r = handle_mmio_page_fault(vcpu, gpa, true); - - if (likely(r != RET_MMIO_PF_INVALID)) - return r; - } - r = mmu_topup_memory_caches(vcpu); if (r) return r; @@ -4354,32 +4340,34 @@ static void make_mmu_pages_available(struct kvm_vcpu *vcpu) kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); } -static bool is_mmio_page_fault(struct kvm_vcpu *vcpu, gva_t addr) -{ - if (vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu)) - return vcpu_match_mmio_gpa(vcpu, addr); - - return vcpu_match_mmio_gva(vcpu, addr); -} - int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, void *insn, int insn_len) { int r, emulation_type = EMULTYPE_RETRY; enum emulation_result er; + bool direct = vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu); + + if (unlikely(error_code & PFERR_RSVD_MASK)) { + r = handle_mmio_page_fault(vcpu, cr2, direct); + if (r == RET_MMIO_PF_EMULATE) { + emulation_type = 0; + goto emulate; + } + if (r == RET_MMIO_PF_RETRY) + return 1; + if (r < 0) + return r; + } r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false); if (r < 0) - goto out; - - if (!r) { - r = 1; - goto out; - } + return r; + if (!r) + return 1; - if (is_mmio_page_fault(vcpu, cr2)) + if (mmio_info_in_cache(vcpu, cr2, direct)) emulation_type = 0; - +emulate: er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len); switch (er) { @@ -4393,8 +4381,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, default: BUG(); } -out: - return r; } EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6c9fed957cce..05827ff7bd2e 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -702,24 +702,17 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); - if (unlikely(error_code & PFERR_RSVD_MASK)) { - r = handle_mmio_page_fault(vcpu, addr, mmu_is_nested(vcpu)); - if (likely(r != RET_MMIO_PF_INVALID)) - return r; - - /* - * page fault with PFEC.RSVD = 1 is caused by shadow - * page fault, should not be used to walk guest page - * table. - */ - error_code &= ~PFERR_RSVD_MASK; - }; - r = mmu_topup_memory_caches(vcpu); if (r) return r; /* + * If PFEC.RSVD is set, this is a shadow page fault. + * The bit needs to be cleared before walking guest page tables. + */ + error_code &= ~PFERR_RSVD_MASK; + + /* * Look up the guest pte for the faulting address. */ r = FNAME(walk_addr)(&walker, vcpu, addr, error_code); diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 31aa2c85dc97..06ce377dcbc9 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -257,7 +257,7 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu) { - if (vcpu->arch.apic) + if (lapic_in_kernel(vcpu)) kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC); } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c13a64b7d789..95070386d599 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1858,8 +1858,7 @@ static int halt_interception(struct vcpu_svm *svm) static int vmmcall_interception(struct vcpu_svm *svm) { svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - kvm_emulate_hypercall(&svm->vcpu); - return 1; + return kvm_emulate_hypercall(&svm->vcpu); } static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index ad9f6a23f139..2f1ea2f61e1f 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -996,11 +996,13 @@ TRACE_EVENT(kvm_enter_smm, * Tracepoint for VT-d posted-interrupts. */ TRACE_EVENT(kvm_pi_irte_update, - TP_PROTO(unsigned int vcpu_id, unsigned int gsi, - unsigned int gvec, u64 pi_desc_addr, bool set), - TP_ARGS(vcpu_id, gsi, gvec, pi_desc_addr, set), + TP_PROTO(unsigned int host_irq, unsigned int vcpu_id, + unsigned int gsi, unsigned int gvec, + u64 pi_desc_addr, bool set), + TP_ARGS(host_irq, vcpu_id, gsi, gvec, pi_desc_addr, set), TP_STRUCT__entry( + __field( unsigned int, host_irq ) __field( unsigned int, vcpu_id ) __field( unsigned int, gsi ) __field( unsigned int, gvec ) @@ -1009,6 +1011,7 @@ TRACE_EVENT(kvm_pi_irte_update, ), TP_fast_assign( + __entry->host_irq = host_irq; __entry->vcpu_id = vcpu_id; __entry->gsi = gsi; __entry->gvec = gvec; @@ -1016,9 +1019,10 @@ TRACE_EVENT(kvm_pi_irte_update, __entry->set = set; ), - TP_printk("VT-d PI is %s for this irq, vcpu %u, gsi: 0x%x, " + TP_printk("VT-d PI is %s for irq %u, vcpu %u, gsi: 0x%x, " "gvec: 0x%x, pi_desc_addr: 0x%llx", __entry->set ? "enabled and being updated" : "disabled", + __entry->host_irq, __entry->vcpu_id, __entry->gsi, __entry->gvec, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e2951b6edbbc..aa16d5874fe6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -961,25 +961,36 @@ static const u32 vmx_msr_index[] = { MSR_EFER, MSR_TSC_AUX, MSR_STAR, }; -static inline bool is_page_fault(u32 intr_info) +static inline bool is_exception_n(u32 intr_info, u8 vector) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == - (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); + (INTR_TYPE_HARD_EXCEPTION | vector | INTR_INFO_VALID_MASK); +} + +static inline bool is_debug(u32 intr_info) +{ + return is_exception_n(intr_info, DB_VECTOR); +} + +static inline bool is_breakpoint(u32 intr_info) +{ + return is_exception_n(intr_info, BP_VECTOR); +} + +static inline bool is_page_fault(u32 intr_info) +{ + return is_exception_n(intr_info, PF_VECTOR); } static inline bool is_no_device(u32 intr_info) { - return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | - INTR_INFO_VALID_MASK)) == - (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); + return is_exception_n(intr_info, NM_VECTOR); } static inline bool is_invalid_opcode(u32 intr_info) { - return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | - INTR_INFO_VALID_MASK)) == - (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); + return is_exception_n(intr_info, UD_VECTOR); } static inline bool is_external_interrupt(u32 intr_info) @@ -5747,8 +5758,7 @@ static int handle_halt(struct kvm_vcpu *vcpu) static int handle_vmcall(struct kvm_vcpu *vcpu) { - kvm_emulate_hypercall(vcpu); - return 1; + return kvm_emulate_hypercall(vcpu); } static int handle_invd(struct kvm_vcpu *vcpu) @@ -6435,8 +6445,8 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { /* Recycle the least recently used VMCS. */ - item = list_entry(vmx->nested.vmcs02_pool.prev, - struct vmcs02_list, list); + item = list_last_entry(&vmx->nested.vmcs02_pool, + struct vmcs02_list, list); item->vmptr = vmx->nested.current_vmptr; list_move(&item->list, &vmx->nested.vmcs02_pool); return &item->vmcs02; @@ -7752,6 +7762,13 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) else if (is_no_device(intr_info) && !(vmcs12->guest_cr0 & X86_CR0_TS)) return false; + else if (is_debug(intr_info) && + vcpu->guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) + return false; + else if (is_breakpoint(intr_info) && + vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) + return false; return vmcs12->exception_bitmap & (1u << (intr_info & INTR_INFO_VECTOR_MASK)); case EXIT_REASON_EXTERNAL_INTERRUPT: @@ -10764,13 +10781,26 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, */ kvm_set_msi_irq(e, &irq); - if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) + if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) { + /* + * Make sure the IRTE is in remapped mode if + * we don't handle it in posted mode. + */ + ret = irq_set_vcpu_affinity(host_irq, NULL); + if (ret < 0) { + printk(KERN_INFO + "failed to back to remapped mode, irq: %u\n", + host_irq); + goto out; + } + continue; + } vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); vcpu_info.vector = irq.vector; - trace_kvm_pi_irte_update(vcpu->vcpu_id, e->gsi, + trace_kvm_pi_irte_update(vcpu->vcpu_id, host_irq, e->gsi, vcpu_info.vector, vcpu_info.pi_desc_addr, set); if (set) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4244c2baf57d..2fb92c0af803 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -123,6 +123,9 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); unsigned int __read_mostly lapic_timer_advance_ns = 0; module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); +static bool __read_mostly vector_hashing = true; +module_param(vector_hashing, bool, S_IRUGO); + static bool __read_mostly backwards_tsc_observed = false; #define KVM_NR_SHARED_MSRS 16 @@ -1196,17 +1199,11 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) static uint32_t div_frac(uint32_t dividend, uint32_t divisor) { - uint32_t quotient, remainder; - - /* Don't try to replace with do_div(), this one calculates - * "(dividend << 32) / divisor" */ - __asm__ ( "divl %4" - : "=a" (quotient), "=d" (remainder) - : "0" (0), "1" (dividend), "r" (divisor) ); - return quotient; + do_shl32_div32(dividend, divisor); + return dividend; } -static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz, +static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz, s8 *pshift, u32 *pmultiplier) { uint64_t scaled64; @@ -1214,8 +1211,8 @@ static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz, uint64_t tps64; uint32_t tps32; - tps64 = base_khz * 1000LL; - scaled64 = scaled_khz * 1000LL; + tps64 = base_hz; + scaled64 = scaled_hz; while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) { tps64 >>= 1; shift--; @@ -1233,8 +1230,8 @@ static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz, *pshift = shift; *pmultiplier = div_frac(scaled64, tps32); - pr_debug("%s: base_khz %u => %u, shift %d, mul %u\n", - __func__, base_khz, scaled_khz, shift, *pmultiplier); + pr_debug("%s: base_hz %llu => %llu, shift %d, mul %u\n", + __func__, base_hz, scaled_hz, shift, *pmultiplier); } #ifdef CONFIG_X86_64 @@ -1293,23 +1290,23 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) return 0; } -static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) +static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) { u32 thresh_lo, thresh_hi; int use_scaling = 0; /* tsc_khz can be zero if TSC calibration fails */ - if (this_tsc_khz == 0) { + if (user_tsc_khz == 0) { /* set tsc_scaling_ratio to a safe value */ vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio; return -1; } /* Compute a scale to convert nanoseconds in TSC cycles */ - kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, + kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC, &vcpu->arch.virtual_tsc_shift, &vcpu->arch.virtual_tsc_mult); - vcpu->arch.virtual_tsc_khz = this_tsc_khz; + vcpu->arch.virtual_tsc_khz = user_tsc_khz; /* * Compute the variation in TSC rate which is acceptable @@ -1319,11 +1316,11 @@ static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) */ thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm); thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm); - if (this_tsc_khz < thresh_lo || this_tsc_khz > thresh_hi) { - pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi); + if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) { + pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi); use_scaling = 1; } - return set_tsc_khz(vcpu, this_tsc_khz, use_scaling); + return set_tsc_khz(vcpu, user_tsc_khz, use_scaling); } static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) @@ -1716,7 +1713,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) static int kvm_guest_time_update(struct kvm_vcpu *v) { - unsigned long flags, this_tsc_khz, tgt_tsc_khz; + unsigned long flags, tgt_tsc_khz; struct kvm_vcpu_arch *vcpu = &v->arch; struct kvm_arch *ka = &v->kvm->arch; s64 kernel_ns; @@ -1742,8 +1739,8 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) /* Keep irq disabled to prevent changes to the clock */ local_irq_save(flags); - this_tsc_khz = __this_cpu_read(cpu_tsc_khz); - if (unlikely(this_tsc_khz == 0)) { + tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz); + if (unlikely(tgt_tsc_khz == 0)) { local_irq_restore(flags); kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); return 1; @@ -1778,13 +1775,14 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) if (!vcpu->pv_time_enabled) return 0; - if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { - tgt_tsc_khz = kvm_has_tsc_control ? - vcpu->virtual_tsc_khz : this_tsc_khz; - kvm_get_time_scale(NSEC_PER_SEC / 1000, tgt_tsc_khz, + if (kvm_has_tsc_control) + tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz); + + if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) { + kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL, &vcpu->hv_clock.tsc_shift, &vcpu->hv_clock.tsc_to_system_mul); - vcpu->hw_tsc_khz = this_tsc_khz; + vcpu->hw_tsc_khz = tgt_tsc_khz; } /* With all the info we got, fill in the values */ @@ -2752,6 +2750,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); + vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD; } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -2987,7 +2986,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR && - kvm_vcpu_has_lapic(vcpu)) + lapic_in_kernel(vcpu)) vcpu->arch.apic->sipi_vector = events->sipi_vector; if (events->flags & KVM_VCPUEVENT_VALID_SMM) { @@ -3000,7 +2999,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; else vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK; - if (kvm_vcpu_has_lapic(vcpu)) { + if (lapic_in_kernel(vcpu)) { if (events->smi.latched_init) set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); else @@ -3240,7 +3239,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, switch (ioctl) { case KVM_GET_LAPIC: { r = -EINVAL; - if (!vcpu->arch.apic) + if (!lapic_in_kernel(vcpu)) goto out; u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); @@ -3258,7 +3257,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } case KVM_SET_LAPIC: { r = -EINVAL; - if (!vcpu->arch.apic) + if (!lapic_in_kernel(vcpu)) goto out; u.lapic = memdup_user(argp, sizeof(*u.lapic)); if (IS_ERR(u.lapic)) @@ -4093,7 +4092,7 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, do { n = min(len, 8); - if (!(vcpu->arch.apic && + if (!(lapic_in_kernel(vcpu) && !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v)) && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v)) break; @@ -4113,7 +4112,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) do { n = min(len, 8); - if (!(vcpu->arch.apic && + if (!(lapic_in_kernel(vcpu) && !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev, addr, n, v)) && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) @@ -6010,7 +6009,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) if (!kvm_x86_ops->update_cr8_intercept) return; - if (!vcpu->arch.apic) + if (!lapic_in_kernel(vcpu)) return; if (vcpu->arch.apicv_active) @@ -7038,7 +7037,7 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - if (!kvm_vcpu_has_lapic(vcpu) && + if (!lapic_in_kernel(vcpu) && mp_state->mp_state != KVM_MP_STATE_RUNNABLE) return -EINVAL; @@ -7593,6 +7592,7 @@ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) } struct static_key kvm_no_apic_vcpu __read_mostly; +EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu); int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { @@ -8370,6 +8370,12 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set); } +bool kvm_vector_hashing_enabled(void) +{ + return vector_hashing; +} +EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled); + EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index f2afa5fe48a6..007940faa5c6 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -179,6 +179,7 @@ int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data); int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int page_num); +bool kvm_vector_hashing_enabled(void); #define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \ @@ -192,4 +193,19 @@ extern unsigned int min_timer_period_us; extern unsigned int lapic_timer_advance_ns; extern struct static_key kvm_no_apic_vcpu; + +/* Same "calling convention" as do_div: + * - divide (n << 32) by base + * - put result in n + * - return remainder + */ +#define do_shl32_div32(n, base) \ + ({ \ + u32 __quot, __rem; \ + asm("divl %2" : "=a" (__quot), "=d" (__rem) \ + : "rm" (base), "0" (0), "1" ((u32) n)); \ + n = __quot; \ + __rem; \ + }) + #endif diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 4ebc796b4f33..2f8c0f40930b 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -256,12 +256,6 @@ struct hv_monitor_page { u8 rsvdz4[1984]; }; -/* Declare the various hypercall operations. */ -enum hv_call_code { - HVCALL_POST_MESSAGE = 0x005c, - HVCALL_SIGNAL_EVENT = 0x005d, -}; - /* Definition of the hv_post_message hypercall input structure. */ struct hv_input_post_message { union hv_connection_id connectionid; diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index d6f83222a6a1..aa69253ecc7d 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -359,14 +359,15 @@ TRACE_EVENT( #endif TRACE_EVENT(kvm_halt_poll_ns, - TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old), + TP_PROTO(bool grow, unsigned int vcpu_id, unsigned int new, + unsigned int old), TP_ARGS(grow, vcpu_id, new, old), TP_STRUCT__entry( __field(bool, grow) __field(unsigned int, vcpu_id) - __field(int, new) - __field(int, old) + __field(unsigned int, new) + __field(unsigned int, old) ), TP_fast_assign( @@ -376,7 +377,7 @@ TRACE_EVENT(kvm_halt_poll_ns, __entry->old = old; ), - TP_printk("vcpu %u: halt_poll_ns %d (%s %d)", + TP_printk("vcpu %u: halt_poll_ns %u (%s %u)", __entry->vcpu_id, __entry->new, __entry->grow ? "grow" : "shrink", diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index b06208b2669c..c251f06f4e11 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -157,6 +157,7 @@ struct kvm_s390_skeys { struct kvm_hyperv_exit { #define KVM_EXIT_HYPERV_SYNIC 1 +#define KVM_EXIT_HYPERV_HCALL 2 __u32 type; union { struct { @@ -165,6 +166,11 @@ struct kvm_hyperv_exit { __u64 evt_page; __u64 msg_page; } synic; + struct { + __u64 input; + __u64 result; + __u64 params[2]; + } hcall; } u; }; @@ -541,7 +547,13 @@ struct kvm_s390_pgm_info { __u8 exc_access_id; __u8 per_access_id; __u8 op_access_id; - __u8 pad[3]; +#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01 +#define KVM_S390_PGM_FLAGS_ILC_0 0x02 +#define KVM_S390_PGM_FLAGS_ILC_1 0x04 +#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06 +#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08 + __u8 flags; + __u8 pad[2]; }; struct kvm_s390_prefix_info { diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 353159922456..c7e447c4296e 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -109,8 +109,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) /* cancel outstanding work queue item */ while (!list_empty(&vcpu->async_pf.queue)) { struct kvm_async_pf *work = - list_entry(vcpu->async_pf.queue.next, - typeof(*work), queue); + list_first_entry(&vcpu->async_pf.queue, + typeof(*work), queue); list_del(&work->queue); #ifdef CONFIG_KVM_ASYNC_PF_SYNC @@ -127,8 +127,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) spin_lock(&vcpu->async_pf.lock); while (!list_empty(&vcpu->async_pf.done)) { struct kvm_async_pf *work = - list_entry(vcpu->async_pf.done.next, - typeof(*work), link); + list_first_entry(&vcpu->async_pf.done, + typeof(*work), link); list_del(&work->link); kmem_cache_free(async_pf_cache, work); } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a11cfd20a6a0..39c36d4f4f5c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -72,11 +72,11 @@ module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); /* Default doubles per-vcpu halt_poll_ns. */ static unsigned int halt_poll_ns_grow = 2; -module_param(halt_poll_ns_grow, int, S_IRUGO); +module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR); /* Default resets per-vcpu halt_poll_ns . */ static unsigned int halt_poll_ns_shrink; -module_param(halt_poll_ns_shrink, int, S_IRUGO); +module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR); /* * Ordering of locks: @@ -620,13 +620,10 @@ void *kvm_kvzalloc(unsigned long size) static void kvm_destroy_devices(struct kvm *kvm) { - struct list_head *node, *tmp; + struct kvm_device *dev, *tmp; - list_for_each_safe(node, tmp, &kvm->devices) { - struct kvm_device *dev = - list_entry(node, struct kvm_device, vm_node); - - list_del(node); + list_for_each_entry_safe(dev, tmp, &kvm->devices, vm_node) { + list_del(&dev->vm_node); dev->ops->destroy(dev); } } @@ -1943,14 +1940,15 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty); static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) { - int old, val; + unsigned int old, val, grow; old = val = vcpu->halt_poll_ns; + grow = READ_ONCE(halt_poll_ns_grow); /* 10us base */ - if (val == 0 && halt_poll_ns_grow) + if (val == 0 && grow) val = 10000; else - val *= halt_poll_ns_grow; + val *= grow; vcpu->halt_poll_ns = val; trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); @@ -1958,13 +1956,14 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu) { - int old, val; + unsigned int old, val, shrink; old = val = vcpu->halt_poll_ns; - if (halt_poll_ns_shrink == 0) + shrink = READ_ONCE(halt_poll_ns_shrink); + if (shrink == 0) val = 0; else - val /= halt_poll_ns_shrink; + val /= shrink; vcpu->halt_poll_ns = val; trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old); |