summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
authorOliver Upton <oupton@google.com>2021-09-16 21:15:35 +0300
committerPaolo Bonzini <pbonzini@redhat.com>2021-10-18 21:43:44 +0300
commitc68dc1b577eabd5605c6c7c08f3e07ae18d30d5d (patch)
treee7d06e5afc7e9eda6259256c18da24fe42c9a327 /arch/x86/kvm/x86.c
parent3d5e7a28b1ea2d603dea478e58e37ce75b9597ab (diff)
downloadlinux-c68dc1b577eabd5605c6c7c08f3e07ae18d30d5d.tar.xz
KVM: x86: Report host tsc and realtime values in KVM_GET_CLOCK
Handling the migration of TSCs correctly is difficult, in part because Linux does not provide userspace with the ability to retrieve a (TSC, realtime) clock pair for a single instant in time. In lieu of a more convenient facility, KVM can report similar information in the kvm_clock structure. Provide userspace with a host TSC & realtime pair iff the realtime clock is based on the TSC. If userspace provides KVM_SET_CLOCK with a valid realtime value, advance the KVM clock by the amount of elapsed time. Do not step the KVM clock backwards, though, as it is a monotonic oscillator. Suggested-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Oliver Upton <oupton@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Message-Id: <20210916181538.968978-5-oupton@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c47
1 files changed, 33 insertions, 14 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3ea4f6ef2474..d3631d149187 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2787,6 +2787,7 @@ static void get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
struct pvclock_vcpu_time_info hv_clock;
unsigned long flags;
+ data->flags = 0;
spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
if (!ka->use_master_clock) {
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
@@ -2803,10 +2804,20 @@ static void get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
get_cpu();
if (__this_cpu_read(cpu_tsc_khz)) {
+#ifdef CONFIG_X86_64
+ struct timespec64 ts;
+
+ if (kvm_get_walltime_and_clockread(&ts, &data->host_tsc)) {
+ data->realtime = ts.tv_nsec + NSEC_PER_SEC * ts.tv_sec;
+ data->flags |= KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC;
+ } else
+#endif
+ data->host_tsc = rdtsc();
+
kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
&hv_clock.tsc_shift,
&hv_clock.tsc_to_system_mul);
- data->clock = __pvclock_read_cycles(&hv_clock, rdtsc());
+ data->clock = __pvclock_read_cycles(&hv_clock, data->host_tsc);
} else {
data->clock = get_kvmclock_base_ns() + ka->kvmclock_offset;
}
@@ -2818,12 +2829,6 @@ u64 get_kvmclock_ns(struct kvm *kvm)
{
struct kvm_clock_data data;
- /*
- * Zero flags as it's accessed RMW, leave everything else uninitialized
- * as clock is always written and no other fields are consumed.
- */
- data.flags = 0;
-
get_kvmclock(kvm, &data);
return data.clock;
}
@@ -4050,7 +4055,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_SYNC_X86_VALID_FIELDS;
break;
case KVM_CAP_ADJUST_CLOCK:
- r = KVM_CLOCK_TSC_STABLE;
+ r = KVM_CLOCK_VALID_FLAGS;
break;
case KVM_CAP_X86_DISABLE_EXITS:
r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE |
@@ -5847,12 +5852,16 @@ static int kvm_vm_ioctl_set_clock(struct kvm *kvm, void __user *argp)
{
struct kvm_arch *ka = &kvm->arch;
struct kvm_clock_data data;
- u64 now_ns;
+ u64 now_raw_ns;
if (copy_from_user(&data, argp, sizeof(data)))
return -EFAULT;
- if (data.flags)
+ /*
+ * Only KVM_CLOCK_REALTIME is used, but allow passing the
+ * result of KVM_GET_CLOCK back to KVM_SET_CLOCK.
+ */
+ if (data.flags & ~KVM_CLOCK_VALID_FLAGS)
return -EINVAL;
kvm_hv_invalidate_tsc_page(kvm);
@@ -5866,11 +5875,21 @@ static int kvm_vm_ioctl_set_clock(struct kvm *kvm, void __user *argp)
* is slightly ahead) here we risk going negative on unsigned
* 'system_time' when 'data.clock' is very small.
*/
- if (kvm->arch.use_master_clock)
- now_ns = ka->master_kernel_ns;
+ if (data.flags & KVM_CLOCK_REALTIME) {
+ u64 now_real_ns = ktime_get_real_ns();
+
+ /*
+ * Avoid stepping the kvmclock backwards.
+ */
+ if (now_real_ns > data.realtime)
+ data.clock += now_real_ns - data.realtime;
+ }
+
+ if (ka->use_master_clock)
+ now_raw_ns = ka->master_kernel_ns;
else
- now_ns = get_kvmclock_base_ns();
- ka->kvmclock_offset = data.clock - now_ns;
+ now_raw_ns = get_kvmclock_base_ns();
+ ka->kvmclock_offset = data.clock - now_raw_ns;
kvm_end_pvclock_update(kvm);
return 0;
}