diff options
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_radix.c | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 19 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 21 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 19 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 21 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 28 | ||||
-rw-r--r-- | arch/x86/kvm/x86.h | 2 | ||||
-rwxr-xr-x | tools/kvm/kvm_stat/kvm_stat | 59 |
9 files changed, 117 insertions, 60 deletions
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 3c0e8fb2b773..68e14afecac8 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -358,7 +358,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned long pp, key; unsigned long v, orig_v, gr; __be64 *hptep; - int index; + long int index; int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); if (kvm_is_radix(vcpu->kvm)) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 0af1c0aea1fe..fd6e8c13685f 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -725,10 +725,10 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, gpa, shift); kvmppc_radix_tlbie_page(kvm, gpa, shift); if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) { - unsigned long npages = 1; + unsigned long psize = PAGE_SIZE; if (shift) - npages = 1ul << (shift - PAGE_SHIFT); - kvmppc_update_dirty_map(memslot, gfn, npages); + psize = 1ul << shift; + kvmppc_update_dirty_map(memslot, gfn, psize); } } return 0; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 00ddb0c9e612..e12916e7c2fb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1237,19 +1237,12 @@ enum emulation_result { #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) -#define EMULTYPE_RETRY (1 << 3) -#define EMULTYPE_NO_REEXECUTE (1 << 4) -#define EMULTYPE_NO_UD_ON_FAIL (1 << 5) -#define EMULTYPE_VMWARE (1 << 6) -int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, - int emulation_type, void *insn, int insn_len); - -static inline int emulate_instruction(struct kvm_vcpu *vcpu, - int emulation_type) -{ - return x86_emulate_instruction(vcpu, 0, - emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0); -} +#define EMULTYPE_ALLOW_RETRY (1 << 3) +#define EMULTYPE_NO_UD_ON_FAIL (1 << 4) +#define EMULTYPE_VMWARE (1 << 5) +int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); +int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, + void *insn, int insn_len); void kvm_enable_efer_bits(u64); bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a282321329b5..f7e83b1e0eb2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5217,7 +5217,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu) int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, void *insn, int insn_len) { - int r, emulation_type = EMULTYPE_RETRY; + int r, emulation_type = 0; enum emulation_result er; bool direct = vcpu->arch.mmu.direct_map; @@ -5230,10 +5230,8 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, r = RET_PF_INVALID; if (unlikely(error_code & PFERR_RSVD_MASK)) { r = handle_mmio_page_fault(vcpu, cr2, direct); - if (r == RET_PF_EMULATE) { - emulation_type = 0; + if (r == RET_PF_EMULATE) goto emulate; - } } if (r == RET_PF_INVALID) { @@ -5260,8 +5258,19 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, return 1; } - if (mmio_info_in_cache(vcpu, cr2, direct)) - emulation_type = 0; + /* + * vcpu->arch.mmu.page_fault returned RET_PF_EMULATE, but we can still + * optimistically try to just unprotect the page and let the processor + * re-execute the instruction that caused the page fault. Do not allow + * retrying MMIO emulation, as it's not only pointless but could also + * cause us to enter an infinite loop because the processor will keep + * faulting on the non-existent MMIO address. Retrying an instruction + * from a nested guest is also pointless and dangerous as we are only + * explicitly shadowing L1's page tables, i.e. unprotecting something + * for L1 isn't going to magically fix whatever issue cause L2 to fail. + */ + if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu)) + emulation_type = EMULTYPE_ALLOW_RETRY; emulate: /* * On AMD platforms, under certain conditions insn_len may be zero on #NPF. diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6276140044d0..89c4c5aa15f1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -776,7 +776,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) } if (!svm->next_rip) { - if (emulate_instruction(vcpu, EMULTYPE_SKIP) != + if (kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) != EMULATE_DONE) printk(KERN_DEBUG "%s: NOP\n", __func__); return; @@ -2715,7 +2715,7 @@ static int gp_interception(struct vcpu_svm *svm) WARN_ON_ONCE(!enable_vmware_backdoor); - er = emulate_instruction(vcpu, + er = kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); if (er == EMULATE_USER_EXIT) return 0; @@ -2819,7 +2819,7 @@ static int io_interception(struct vcpu_svm *svm) string = (io_info & SVM_IOIO_STR_MASK) != 0; in = (io_info & SVM_IOIO_TYPE_MASK) != 0; if (string) - return emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; port = io_info >> 16; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; @@ -3861,7 +3861,7 @@ static int iret_interception(struct vcpu_svm *svm) static int invlpg_interception(struct vcpu_svm *svm) { if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) - return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1); return kvm_skip_emulated_instruction(&svm->vcpu); @@ -3869,13 +3869,13 @@ static int invlpg_interception(struct vcpu_svm *svm) static int emulate_on_interception(struct vcpu_svm *svm) { - return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; } static int rsm_interception(struct vcpu_svm *svm) { - return x86_emulate_instruction(&svm->vcpu, 0, 0, - rsm_ins_bytes, 2) == EMULATE_DONE; + return kvm_emulate_instruction_from_buffer(&svm->vcpu, + rsm_ins_bytes, 2) == EMULATE_DONE; } static int rdpmc_interception(struct vcpu_svm *svm) @@ -4700,7 +4700,7 @@ static int avic_unaccelerated_access_interception(struct vcpu_svm *svm) ret = avic_unaccel_trap_write(svm); } else { /* Handling Fault */ - ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE); + ret = (kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE); } return ret; @@ -6747,7 +6747,7 @@ e_free: static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) { unsigned long vaddr, vaddr_end, next_vaddr; - unsigned long dst_vaddr, dst_vaddr_end; + unsigned long dst_vaddr; struct page **src_p, **dst_p; struct kvm_sev_dbg debug; unsigned long n; @@ -6763,7 +6763,6 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) size = debug.len; vaddr_end = vaddr + size; dst_vaddr = debug.dst_uaddr; - dst_vaddr_end = dst_vaddr + size; for (; vaddr < vaddr_end; vaddr = next_vaddr) { int len, s_off, d_off; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1d26f3c4985b..f910d33858d9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6983,7 +6983,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, * Cause the #SS fault with 0 error code in VM86 mode. */ if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) { - if (emulate_instruction(vcpu, 0) == EMULATE_DONE) { + if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) { if (vcpu->arch.halt_request) { vcpu->arch.halt_request = 0; return kvm_vcpu_halt(vcpu); @@ -7054,7 +7054,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) { WARN_ON_ONCE(!enable_vmware_backdoor); - er = emulate_instruction(vcpu, + er = kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); if (er == EMULATE_USER_EXIT) return 0; @@ -7157,7 +7157,7 @@ static int handle_io(struct kvm_vcpu *vcpu) ++vcpu->stat.io_exits; if (string) - return emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; port = exit_qualification >> 16; size = (exit_qualification & 7) + 1; @@ -7231,7 +7231,7 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) static int handle_desc(struct kvm_vcpu *vcpu) { WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP)); - return emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; } static int handle_cr(struct kvm_vcpu *vcpu) @@ -7480,7 +7480,7 @@ static int handle_vmcall(struct kvm_vcpu *vcpu) static int handle_invd(struct kvm_vcpu *vcpu) { - return emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; } static int handle_invlpg(struct kvm_vcpu *vcpu) @@ -7547,7 +7547,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } } - return emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; } static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) @@ -7704,8 +7704,8 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) return kvm_skip_emulated_instruction(vcpu); else - return x86_emulate_instruction(vcpu, gpa, EMULTYPE_SKIP, - NULL, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) == + EMULATE_DONE; } return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); @@ -7748,7 +7748,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (kvm_test_request(KVM_REQ_EVENT, vcpu)) return 1; - err = emulate_instruction(vcpu, 0); + err = kvm_emulate_instruction(vcpu, 0); if (err == EMULATE_USER_EXIT) { ++vcpu->stat.mmio_exits; @@ -13988,9 +13988,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, check_vmentry_postreqs(vcpu, vmcs12, &exit_qual)) return -EINVAL; - if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING) - vmx->nested.nested_run_pending = 1; - vmx->nested.dirty_vmcs12 = true; ret = enter_vmx_non_root_mode(vcpu, NULL); if (ret) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 506bd2b4b8bb..542f6315444d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4987,7 +4987,7 @@ int handle_ud(struct kvm_vcpu *vcpu) emul_type = 0; } - er = emulate_instruction(vcpu, emul_type); + er = kvm_emulate_instruction(vcpu, emul_type); if (er == EMULATE_USER_EXIT) return 0; if (er != EMULATE_DONE) @@ -5870,7 +5870,10 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, gpa_t gpa = cr2; kvm_pfn_t pfn; - if (emulation_type & EMULTYPE_NO_REEXECUTE) + if (!(emulation_type & EMULTYPE_ALLOW_RETRY)) + return false; + + if (WARN_ON_ONCE(is_guest_mode(vcpu))) return false; if (!vcpu->arch.mmu.direct_map) { @@ -5958,7 +5961,10 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, */ vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0; - if (!(emulation_type & EMULTYPE_RETRY)) + if (!(emulation_type & EMULTYPE_ALLOW_RETRY)) + return false; + + if (WARN_ON_ONCE(is_guest_mode(vcpu))) return false; if (x86_page_table_writing_insn(ctxt)) @@ -6276,7 +6282,19 @@ restart: return r; } -EXPORT_SYMBOL_GPL(x86_emulate_instruction); + +int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type) +{ + return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); +} +EXPORT_SYMBOL_GPL(kvm_emulate_instruction); + +int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, + void *insn, int insn_len) +{ + return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len); +} +EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer); static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) @@ -7734,7 +7752,7 @@ static inline int complete_emulated_io(struct kvm_vcpu *vcpu) { int r; vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); + r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE); srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (r != EMULATE_DONE) return 0; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 257f27620bc2..67b9568613f3 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -274,6 +274,8 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int page_num); bool kvm_vector_hashing_enabled(void); +int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, + int emulation_type, void *insn, int insn_len); #define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \ diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 56c4b3f8a01b..439b8a27488d 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -759,12 +759,18 @@ class DebugfsProvider(Provider): if len(vms) == 0: self.do_read = False - self.paths = filter(lambda x: "{}-".format(pid) in x, vms) + self.paths = list(filter(lambda x: "{}-".format(pid) in x, vms)) else: self.paths = [] self.do_read = True - self.reset() + + def _verify_paths(self): + """Remove invalid paths""" + for path in self.paths: + if not os.path.exists(os.path.join(PATH_DEBUGFS_KVM, path)): + self.paths.remove(path) + continue def read(self, reset=0, by_guest=0): """Returns a dict with format:'file name / field -> current value'. @@ -780,6 +786,7 @@ class DebugfsProvider(Provider): # If no debugfs filtering support is available, then don't read. if not self.do_read: return results + self._verify_paths() paths = self.paths if self._pid == 0: @@ -1096,15 +1103,16 @@ class Tui(object): pid = self.stats.pid_filter self.screen.erase() gname = self.get_gname_from_pid(pid) + self._gname = gname if gname: gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...' if len(gname) > MAX_GUEST_NAME_LEN else gname)) if pid > 0: - self.screen.addstr(0, 0, 'kvm statistics - pid {0} {1}' - .format(pid, gname), curses.A_BOLD) + self._headline = 'kvm statistics - pid {0} {1}'.format(pid, gname) else: - self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD) + self._headline = 'kvm statistics - summary' + self.screen.addstr(0, 0, self._headline, curses.A_BOLD) if self.stats.fields_filter: regex = self.stats.fields_filter if len(regex) > MAX_REGEX_LEN: @@ -1162,6 +1170,19 @@ class Tui(object): return sorted_items + if not self._is_running_guest(self.stats.pid_filter): + if self._gname: + try: # ...to identify the guest by name in case it's back + pids = self.get_pid_from_gname(self._gname) + if len(pids) == 1: + self._refresh_header(pids[0]) + self._update_pid(pids[0]) + return + except: + pass + self._display_guest_dead() + # leave final data on screen + return row = 3 self.screen.move(row, 0) self.screen.clrtobot() @@ -1184,6 +1205,7 @@ class Tui(object): # print events tavg = 0 tcur = 0 + guest_removed = False for key, values in get_sorted_events(self, stats): if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0): break @@ -1191,7 +1213,10 @@ class Tui(object): key = self.get_gname_from_pid(key) if not key: continue - cur = int(round(values.delta / sleeptime)) if values.delta else '' + cur = int(round(values.delta / sleeptime)) if values.delta else 0 + if cur < 0: + guest_removed = True + continue if key[0] != ' ': if values.delta: tcur += values.delta @@ -1204,13 +1229,21 @@ class Tui(object): values.value * 100 / float(ltotal), cur)) row += 1 if row == 3: - self.screen.addstr(4, 1, 'No matching events reported yet') + if guest_removed: + self.screen.addstr(4, 1, 'Guest removed, updating...') + else: + self.screen.addstr(4, 1, 'No matching events reported yet') if row > 4: tavg = int(round(tcur / sleeptime)) if tcur > 0 else '' self.screen.addstr(row, 1, '%-40s %10d %8s' % ('Total', total, tavg), curses.A_BOLD) self.screen.refresh() + def _display_guest_dead(self): + marker = ' Guest is DEAD ' + y = min(len(self._headline), 80 - len(marker)) + self.screen.addstr(0, y, marker, curses.A_BLINK | curses.A_STANDOUT) + def _show_msg(self, text): """Display message centered text and exit on key press""" hint = 'Press any key to continue' @@ -1219,10 +1252,10 @@ class Tui(object): (x, term_width) = self.screen.getmaxyx() row = 2 for line in text: - start = (term_width - len(line)) / 2 + start = (term_width - len(line)) // 2 self.screen.addstr(row, start, line) row += 1 - self.screen.addstr(row + 1, (term_width - len(hint)) / 2, hint, + self.screen.addstr(row + 1, (term_width - len(hint)) // 2, hint, curses.A_STANDOUT) self.screen.getkey() @@ -1319,6 +1352,12 @@ class Tui(object): msg = '"' + str(val) + '": Invalid value' self._refresh_header() + def _is_running_guest(self, pid): + """Check if pid is still a running process.""" + if not pid: + return True + return os.path.isdir(os.path.join('/proc/', str(pid))) + def _show_vm_selection_by_guest(self): """Draws guest selection mask. @@ -1346,7 +1385,7 @@ class Tui(object): if not guest or guest == '0': break if guest.isdigit(): - if not os.path.isdir(os.path.join('/proc/', guest)): + if not self._is_running_guest(guest): msg = '"' + guest + '": Not a running process' continue pid = int(guest) |