diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-09-04 21:26:29 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-09-04 21:26:29 +0300 |
commit | 0b90c5637dfea8a08f87db5dd16000eb679013a3 (patch) | |
tree | 1e64c72282c411c563758a26d34cfe023a56668a /arch/x86 | |
parent | e4f1b8202fb59c56a3de7642d50326923670513f (diff) | |
parent | 284930a0146ade1ce0250a1d3bae7a675af4bb3b (diff) | |
download | linux-0b90c5637dfea8a08f87db5dd16000eb679013a3.tar.xz |
Merge tag 'hyperv-next-signed-20230902' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux
Pull hyperv updates from Wei Liu:
- Support for SEV-SNP guests on Hyper-V (Tianyu Lan)
- Support for TDX guests on Hyper-V (Dexuan Cui)
- Use SBRM API in Hyper-V balloon driver (Mitchell Levy)
- Avoid dereferencing ACPI root object handle in VMBus driver (Maciej
Szmigiero)
- A few misecllaneous fixes (Jiapeng Chong, Nathan Chancellor, Saurabh
Sengar)
* tag 'hyperv-next-signed-20230902' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux: (24 commits)
x86/hyperv: Remove duplicate include
x86/hyperv: Move the code in ivm.c around to avoid unnecessary ifdef's
x86/hyperv: Remove hv_isolation_type_en_snp
x86/hyperv: Use TDX GHCI to access some MSRs in a TDX VM with the paravisor
Drivers: hv: vmbus: Bring the post_msg_page back for TDX VMs with the paravisor
x86/hyperv: Introduce a global variable hyperv_paravisor_present
Drivers: hv: vmbus: Support >64 VPs for a fully enlightened TDX/SNP VM
x86/hyperv: Fix serial console interrupts for fully enlightened TDX guests
Drivers: hv: vmbus: Support fully enlightened TDX guests
x86/hyperv: Support hypercalls for fully enlightened TDX guests
x86/hyperv: Add hv_isolation_type_tdx() to detect TDX guests
x86/hyperv: Fix undefined reference to isolation_type_en_snp without CONFIG_HYPERV
x86/hyperv: Add missing 'inline' to hv_snp_boot_ap() stub
hv: hyperv.h: Replace one-element array with flexible-array member
Drivers: hv: vmbus: Don't dereference ACPI root object handle
x86/hyperv: Add hyperv-specific handling for VMMCALL under SEV-ES
x86/hyperv: Add smp support for SEV-SNP guest
clocksource: hyper-v: Mark hyperv tsc page unencrypted in sev-snp enlightened guest
x86/hyperv: Use vmmcall to implement Hyper-V hypercall in sev-snp enlightened guest
drivers: hv: Mark percpu hvcall input arg page unencrypted in SEV-SNP enlightened guest
...
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/hyperv/hv_apic.c | 15 | ||||
-rw-r--r-- | arch/x86/hyperv/hv_init.c | 105 | ||||
-rw-r--r-- | arch/x86/hyperv/ivm.c | 263 | ||||
-rw-r--r-- | arch/x86/include/asm/hyperv-tlfs.h | 10 | ||||
-rw-r--r-- | arch/x86/include/asm/mshyperv.h | 71 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mshyperv.c | 91 |
6 files changed, 516 insertions, 39 deletions
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 187e13b15e9a..97bfe5f0531f 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -175,8 +175,11 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector, (exclude_self && weight == 1 && cpumask_test_cpu(this_cpu, mask))) return true; - if (!hv_hypercall_pg) - return false; + /* A fully enlightened TDX VM uses GHCI rather than hv_hypercall_pg. */ + if (!hv_hypercall_pg) { + if (ms_hyperv.paravisor_present || !hv_isolation_type_tdx()) + return false; + } if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) return false; @@ -229,9 +232,15 @@ static bool __send_ipi_one(int cpu, int vector) trace_hyperv_send_ipi_one(cpu, vector); - if (!hv_hypercall_pg || (vp == VP_INVAL)) + if (vp == VP_INVAL) return false; + /* A fully enlightened TDX VM uses GHCI rather than hv_hypercall_pg. */ + if (!hv_hypercall_pg) { + if (ms_hyperv.paravisor_present || !hv_isolation_type_tdx()) + return false; + } + if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) return false; diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 507d98331e7c..783ed339f341 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -19,6 +19,7 @@ #include <asm/hyperv-tlfs.h> #include <asm/mshyperv.h> #include <asm/idtentry.h> +#include <asm/set_memory.h> #include <linux/kexec.h> #include <linux/version.h> #include <linux/vmalloc.h> @@ -52,7 +53,7 @@ static int hyperv_init_ghcb(void) void *ghcb_va; void **ghcb_base; - if (!hv_isolation_type_snp()) + if (!ms_hyperv.paravisor_present || !hv_isolation_type_snp()) return 0; if (!hv_ghcb_pg) @@ -80,7 +81,7 @@ static int hyperv_init_ghcb(void) static int hv_cpu_init(unsigned int cpu) { union hv_vp_assist_msr_contents msr = { 0 }; - struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu]; + struct hv_vp_assist_page **hvp; int ret; ret = hv_common_cpu_init(cpu); @@ -90,6 +91,7 @@ static int hv_cpu_init(unsigned int cpu) if (!hv_vp_assist_page) return 0; + hvp = &hv_vp_assist_page[cpu]; if (hv_root_partition) { /* * For root partition we get the hypervisor provided VP assist @@ -107,8 +109,21 @@ static int hv_cpu_init(unsigned int cpu) * in hv_cpu_die(), otherwise a CPU may not be stopped in the * case of CPU offlining and the VM will hang. */ - if (!*hvp) + if (!*hvp) { *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO); + + /* + * Hyper-V should never specify a VM that is a Confidential + * VM and also running in the root partition. Root partition + * is blocked to run in Confidential VM. So only decrypt assist + * page in non-root partition here. + */ + if (*hvp && !ms_hyperv.paravisor_present && hv_isolation_type_snp()) { + WARN_ON_ONCE(set_memory_decrypted((unsigned long)(*hvp), 1)); + memset(*hvp, 0, PAGE_SIZE); + } + } + if (*hvp) msr.pfn = vmalloc_to_pfn(*hvp); @@ -379,6 +394,36 @@ static void __init hv_get_partition_id(void) local_irq_restore(flags); } +static u8 __init get_vtl(void) +{ + u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_REGISTERS; + struct hv_get_vp_registers_input *input; + struct hv_get_vp_registers_output *output; + unsigned long flags; + u64 ret; + + local_irq_save(flags); + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + output = (struct hv_get_vp_registers_output *)input; + + memset(input, 0, struct_size(input, element, 1)); + input->header.partitionid = HV_PARTITION_ID_SELF; + input->header.vpindex = HV_VP_INDEX_SELF; + input->header.inputvtl = 0; + input->element[0].name0 = HV_X64_REGISTER_VSM_VP_STATUS; + + ret = hv_do_hypercall(control, input, output); + if (hv_result_success(ret)) { + ret = output->as64.low & HV_X64_VTL_MASK; + } else { + pr_err("Failed to get VTL(%lld) and set VTL to zero by default.\n", ret); + ret = 0; + } + + local_irq_restore(flags); + return ret; +} + /* * This function is to be invoked early in the boot sequence after the * hypervisor has been detected. @@ -399,14 +444,24 @@ void __init hyperv_init(void) if (hv_common_init()) return; - hv_vp_assist_page = kcalloc(num_possible_cpus(), - sizeof(*hv_vp_assist_page), GFP_KERNEL); + /* + * The VP assist page is useless to a TDX guest: the only use we + * would have for it is lazy EOI, which can not be used with TDX. + */ + if (hv_isolation_type_tdx()) + hv_vp_assist_page = NULL; + else + hv_vp_assist_page = kcalloc(num_possible_cpus(), + sizeof(*hv_vp_assist_page), + GFP_KERNEL); if (!hv_vp_assist_page) { ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; - goto common_free; + + if (!hv_isolation_type_tdx()) + goto common_free; } - if (hv_isolation_type_snp()) { + if (ms_hyperv.paravisor_present && hv_isolation_type_snp()) { /* Negotiate GHCB Version. */ if (!hv_ghcb_negotiate_protocol()) hv_ghcb_terminate(SEV_TERM_SET_GEN, @@ -426,12 +481,32 @@ void __init hyperv_init(void) * Setup the hypercall page and enable hypercalls. * 1. Register the guest ID * 2. Enable the hypercall and register the hypercall page + * + * A TDX VM with no paravisor only uses TDX GHCI rather than hv_hypercall_pg: + * when the hypercall input is a page, such a VM must pass a decrypted + * page to Hyper-V, e.g. hv_post_message() uses the per-CPU page + * hyperv_pcpu_input_arg, which is decrypted if no paravisor is present. + * + * A TDX VM with the paravisor uses hv_hypercall_pg for most hypercalls, + * which are handled by the paravisor and the VM must use an encrypted + * input page: in such a VM, the hyperv_pcpu_input_arg is encrypted and + * used in the hypercalls, e.g. see hv_mark_gpa_visibility() and + * hv_arch_irq_unmask(). Such a VM uses TDX GHCI for two hypercalls: + * 1. HVCALL_SIGNAL_EVENT: see vmbus_set_event() and _hv_do_fast_hypercall8(). + * 2. HVCALL_POST_MESSAGE: the input page must be a decrypted page, i.e. + * hv_post_message() in such a VM can't use the encrypted hyperv_pcpu_input_arg; + * instead, hv_post_message() uses the post_msg_page, which is decrypted + * in such a VM and is only used in such a VM. */ guest_id = hv_generate_guest_id(LINUX_VERSION_CODE); wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); - /* Hyper-V requires to write guest os id via ghcb in SNP IVM. */ - hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id); + /* With the paravisor, the VM must also write the ID via GHCB/GHCI */ + hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id); + + /* A TDX VM with no paravisor only uses TDX GHCI rather than hv_hypercall_pg */ + if (hv_isolation_type_tdx() && !ms_hyperv.paravisor_present) + goto skip_hypercall_pg_init; hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX, @@ -472,6 +547,7 @@ void __init hyperv_init(void) wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); } +skip_hypercall_pg_init: /* * Some versions of Hyper-V that provide IBT in guest VMs have a bug * in that there's no ENDBR64 instruction at the entry to the @@ -527,11 +603,15 @@ void __init hyperv_init(void) /* Query the VMs extended capability once, so that it can be cached. */ hv_query_ext_cap(0); + /* Find the VTL */ + if (!ms_hyperv.paravisor_present && hv_isolation_type_snp()) + ms_hyperv.vtl = get_vtl(); + return; clean_guest_os_id: wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); - hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, 0); + hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0); cpuhp_remove_state(cpuhp); free_ghcb_page: free_percpu(hv_ghcb_pg); @@ -552,7 +632,7 @@ void hyperv_cleanup(void) /* Reset our OS id */ wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); - hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, 0); + hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0); /* * Reset hypercall page reference before reset the page, @@ -615,6 +695,9 @@ bool hv_is_hyperv_initialized(void) if (x86_hyper_type != X86_HYPER_MS_HYPERV) return false; + /* A TDX VM with no paravisor uses TDX GHCI call rather than hv_hypercall_pg */ + if (hv_isolation_type_tdx() && !ms_hyperv.paravisor_present) + return true; /* * Verify that earlier initialization succeeded by checking * that the hypercall page is setup diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 28be6df88063..8c6bf07f7d2b 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -18,6 +18,11 @@ #include <asm/mshyperv.h> #include <asm/hypervisor.h> #include <asm/mtrr.h> +#include <asm/io_apic.h> +#include <asm/realmode.h> +#include <asm/e820/api.h> +#include <asm/desc.h> +#include <uapi/asm/vmx.h> #ifdef CONFIG_AMD_MEM_ENCRYPT @@ -56,8 +61,10 @@ union hv_ghcb { } hypercall; } __packed __aligned(HV_HYP_PAGE_SIZE); +/* Only used in an SNP VM with the paravisor */ static u16 hv_ghcb_version __ro_after_init; +/* Functions only used in an SNP VM with the paravisor go here. */ u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size) { union hv_ghcb *hv_ghcb; @@ -175,7 +182,7 @@ bool hv_ghcb_negotiate_protocol(void) return true; } -void hv_ghcb_msr_write(u64 msr, u64 value) +static void hv_ghcb_msr_write(u64 msr, u64 value) { union hv_ghcb *hv_ghcb; void **ghcb_base; @@ -203,9 +210,8 @@ void hv_ghcb_msr_write(u64 msr, u64 value) local_irq_restore(flags); } -EXPORT_SYMBOL_GPL(hv_ghcb_msr_write); -void hv_ghcb_msr_read(u64 msr, u64 *value) +static void hv_ghcb_msr_read(u64 msr, u64 *value) { union hv_ghcb *hv_ghcb; void **ghcb_base; @@ -235,7 +241,217 @@ void hv_ghcb_msr_read(u64 msr, u64 *value) | ((u64)lower_32_bits(hv_ghcb->ghcb.save.rdx) << 32); local_irq_restore(flags); } -EXPORT_SYMBOL_GPL(hv_ghcb_msr_read); + +/* Only used in a fully enlightened SNP VM, i.e. without the paravisor */ +static u8 ap_start_input_arg[PAGE_SIZE] __bss_decrypted __aligned(PAGE_SIZE); +static u8 ap_start_stack[PAGE_SIZE] __aligned(PAGE_SIZE); +static DEFINE_PER_CPU(struct sev_es_save_area *, hv_sev_vmsa); + +/* Functions only used in an SNP VM without the paravisor go here. */ + +#define hv_populate_vmcb_seg(seg, gdtr_base) \ +do { \ + if (seg.selector) { \ + seg.base = 0; \ + seg.limit = HV_AP_SEGMENT_LIMIT; \ + seg.attrib = *(u16 *)(gdtr_base + seg.selector + 5); \ + seg.attrib = (seg.attrib & 0xFF) | ((seg.attrib >> 4) & 0xF00); \ + } \ +} while (0) \ + +static int snp_set_vmsa(void *va, bool vmsa) +{ + u64 attrs; + + /* + * Running at VMPL0 allows the kernel to change the VMSA bit for a page + * using the RMPADJUST instruction. However, for the instruction to + * succeed it must target the permissions of a lesser privileged + * (higher numbered) VMPL level, so use VMPL1 (refer to the RMPADJUST + * instruction in the AMD64 APM Volume 3). + */ + attrs = 1; + if (vmsa) + attrs |= RMPADJUST_VMSA_PAGE_BIT; + + return rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); +} + +static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa) +{ + int err; + + err = snp_set_vmsa(vmsa, false); + if (err) + pr_err("clear VMSA page failed (%u), leaking page\n", err); + else + free_page((unsigned long)vmsa); +} + +int hv_snp_boot_ap(int cpu, unsigned long start_ip) +{ + struct sev_es_save_area *vmsa = (struct sev_es_save_area *) + __get_free_page(GFP_KERNEL | __GFP_ZERO); + struct sev_es_save_area *cur_vmsa; + struct desc_ptr gdtr; + u64 ret, retry = 5; + struct hv_enable_vp_vtl *start_vp_input; + unsigned long flags; + + if (!vmsa) + return -ENOMEM; + + native_store_gdt(&gdtr); + + vmsa->gdtr.base = gdtr.address; + vmsa->gdtr.limit = gdtr.size; + + asm volatile("movl %%es, %%eax;" : "=a" (vmsa->es.selector)); + hv_populate_vmcb_seg(vmsa->es, vmsa->gdtr.base); + + asm volatile("movl %%cs, %%eax;" : "=a" (vmsa->cs.selector)); + hv_populate_vmcb_seg(vmsa->cs, vmsa->gdtr.base); + + asm volatile("movl %%ss, %%eax;" : "=a" (vmsa->ss.selector)); + hv_populate_vmcb_seg(vmsa->ss, vmsa->gdtr.base); + + asm volatile("movl %%ds, %%eax;" : "=a" (vmsa->ds.selector)); + hv_populate_vmcb_seg(vmsa->ds, vmsa->gdtr.base); + + vmsa->efer = native_read_msr(MSR_EFER); + + asm volatile("movq %%cr4, %%rax;" : "=a" (vmsa->cr4)); + asm volatile("movq %%cr3, %%rax;" : "=a" (vmsa->cr3)); + asm volatile("movq %%cr0, %%rax;" : "=a" (vmsa->cr0)); + + vmsa->xcr0 = 1; + vmsa->g_pat = HV_AP_INIT_GPAT_DEFAULT; + vmsa->rip = (u64)secondary_startup_64_no_verify; + vmsa->rsp = (u64)&ap_start_stack[PAGE_SIZE]; + + /* + * Set the SNP-specific fields for this VMSA: + * VMPL level + * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits) + */ + vmsa->vmpl = 0; + vmsa->sev_features = sev_status >> 2; + + ret = snp_set_vmsa(vmsa, true); + if (!ret) { + pr_err("RMPADJUST(%llx) failed: %llx\n", (u64)vmsa, ret); + free_page((u64)vmsa); + return ret; + } + + local_irq_save(flags); + start_vp_input = (struct hv_enable_vp_vtl *)ap_start_input_arg; + memset(start_vp_input, 0, sizeof(*start_vp_input)); + start_vp_input->partition_id = -1; + start_vp_input->vp_index = cpu; + start_vp_input->target_vtl.target_vtl = ms_hyperv.vtl; + *(u64 *)&start_vp_input->vp_context = __pa(vmsa) | 1; + + do { + ret = hv_do_hypercall(HVCALL_START_VP, + start_vp_input, NULL); + } while (hv_result(ret) == HV_STATUS_TIME_OUT && retry--); + + local_irq_restore(flags); + + if (!hv_result_success(ret)) { + pr_err("HvCallStartVirtualProcessor failed: %llx\n", ret); + snp_cleanup_vmsa(vmsa); + vmsa = NULL; + } + + cur_vmsa = per_cpu(hv_sev_vmsa, cpu); + /* Free up any previous VMSA page */ + if (cur_vmsa) + snp_cleanup_vmsa(cur_vmsa); + + /* Record the current VMSA page */ + per_cpu(hv_sev_vmsa, cpu) = vmsa; + + return ret; +} + +#else +static inline void hv_ghcb_msr_write(u64 msr, u64 value) {} +static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {} +#endif /* CONFIG_AMD_MEM_ENCRYPT */ + +#ifdef CONFIG_INTEL_TDX_GUEST +static void hv_tdx_msr_write(u64 msr, u64 val) +{ + struct tdx_hypercall_args args = { + .r10 = TDX_HYPERCALL_STANDARD, + .r11 = EXIT_REASON_MSR_WRITE, + .r12 = msr, + .r13 = val, + }; + + u64 ret = __tdx_hypercall(&args); + + WARN_ONCE(ret, "Failed to emulate MSR write: %lld\n", ret); +} + +static void hv_tdx_msr_read(u64 msr, u64 *val) +{ + struct tdx_hypercall_args args = { + .r10 = TDX_HYPERCALL_STANDARD, + .r11 = EXIT_REASON_MSR_READ, + .r12 = msr, + }; + + u64 ret = __tdx_hypercall_ret(&args); + + if (WARN_ONCE(ret, "Failed to emulate MSR read: %lld\n", ret)) + *val = 0; + else + *val = args.r11; +} + +u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2) +{ + struct tdx_hypercall_args args = { }; + + args.r10 = control; + args.rdx = param1; + args.r8 = param2; + + (void)__tdx_hypercall_ret(&args); + + return args.r11; +} + +#else +static inline void hv_tdx_msr_write(u64 msr, u64 value) {} +static inline void hv_tdx_msr_read(u64 msr, u64 *value) {} +#endif /* CONFIG_INTEL_TDX_GUEST */ + +#if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) +void hv_ivm_msr_write(u64 msr, u64 value) +{ + if (!ms_hyperv.paravisor_present) + return; + + if (hv_isolation_type_tdx()) + hv_tdx_msr_write(msr, value); + else if (hv_isolation_type_snp()) + hv_ghcb_msr_write(msr, value); +} + +void hv_ivm_msr_read(u64 msr, u64 *value) +{ + if (!ms_hyperv.paravisor_present) + return; + + if (hv_isolation_type_tdx()) + hv_tdx_msr_read(msr, value); + else if (hv_isolation_type_snp()) + hv_ghcb_msr_read(msr, value); +} /* * hv_mark_gpa_visibility - Set pages visible to host via hvcall. @@ -358,13 +574,34 @@ static bool hv_is_private_mmio(u64 addr) void __init hv_vtom_init(void) { + enum hv_isolation_type type = hv_get_isolation_type(); + + switch (type) { + case HV_ISOLATION_TYPE_VBS: + fallthrough; /* * By design, a VM using vTOM doesn't see the SEV setting, * so SEV initialization is bypassed and sev_status isn't set. * Set it here to indicate a vTOM VM. + * + * Note: if CONFIG_AMD_MEM_ENCRYPT is not set, sev_status is + * defined as 0ULL, to which we can't assigned a value. */ - sev_status = MSR_AMD64_SNP_VTOM; - cc_vendor = CC_VENDOR_AMD; +#ifdef CONFIG_AMD_MEM_ENCRYPT + case HV_ISOLATION_TYPE_SNP: + sev_status = MSR_AMD64_SNP_VTOM; + cc_vendor = CC_VENDOR_AMD; + break; +#endif + + case HV_ISOLATION_TYPE_TDX: + cc_vendor = CC_VENDOR_INTEL; + break; + + default: + panic("hv_vtom_init: unsupported isolation type %d\n", type); + } + cc_set_mask(ms_hyperv.shared_gpa_boundary); physical_mask &= ms_hyperv.shared_gpa_boundary - 1; @@ -377,7 +614,7 @@ void __init hv_vtom_init(void) mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK); } -#endif /* CONFIG_AMD_MEM_ENCRYPT */ +#endif /* defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) */ enum hv_isolation_type hv_get_isolation_type(void) { @@ -405,10 +642,20 @@ bool hv_is_isolation_supported(void) DEFINE_STATIC_KEY_FALSE(isolation_type_snp); /* - * hv_isolation_type_snp - Check system runs in the AMD SEV-SNP based + * hv_isolation_type_snp - Check if the system runs in an AMD SEV-SNP based * isolation VM. */ bool hv_isolation_type_snp(void) { return static_branch_unlikely(&isolation_type_snp); } + +DEFINE_STATIC_KEY_FALSE(isolation_type_tdx); +/* + * hv_isolation_type_tdx - Check if the system runs in an Intel TDX based + * isolated VM. + */ +bool hv_isolation_type_tdx(void) +{ + return static_branch_unlikely(&isolation_type_tdx); +} diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index cea95dcd27c2..2ff26f53cd62 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -169,7 +169,8 @@ enum hv_isolation_type { HV_ISOLATION_TYPE_NONE = 0, HV_ISOLATION_TYPE_VBS = 1, - HV_ISOLATION_TYPE_SNP = 2 + HV_ISOLATION_TYPE_SNP = 2, + HV_ISOLATION_TYPE_TDX = 3 }; /* Hyper-V specific model specific registers (MSRs) */ @@ -301,6 +302,13 @@ enum hv_isolation_type { #define HV_X64_MSR_TIME_REF_COUNT HV_REGISTER_TIME_REF_COUNT #define HV_X64_MSR_REFERENCE_TSC HV_REGISTER_REFERENCE_TSC +/* + * Registers are only accessible via HVCALL_GET_VP_REGISTERS hvcall and + * there is not associated MSR address. + */ +#define HV_X64_REGISTER_VSM_VP_STATUS 0x000D0003 +#define HV_X64_VTL_MASK GENMASK(3, 0) + /* Hyper-V memory host visibility */ enum hv_mem_host_visibility { VMBUS_PAGE_NOT_VISIBLE = 0, diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index fa83d88e4c99..033b53f993c6 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -26,6 +26,7 @@ union hv_ghcb; DECLARE_STATIC_KEY_FALSE(isolation_type_snp); +DECLARE_STATIC_KEY_FALSE(isolation_type_tdx); typedef int (*hyperv_fill_flush_list_func)( struct hv_guest_mapping_flush_list *flush, @@ -40,6 +41,7 @@ static inline unsigned char hv_get_nmi_reason(void) #if IS_ENABLED(CONFIG_HYPERV) extern int hyperv_init_cpuhp; +extern bool hyperv_paravisor_present; extern void *hv_hypercall_pg; @@ -47,10 +49,25 @@ extern u64 hv_current_partition_id; extern union hv_ghcb * __percpu *hv_ghcb_pg; +bool hv_isolation_type_snp(void); +bool hv_isolation_type_tdx(void); +u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2); + +/* + * DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA + * to start AP in enlightened SEV guest. + */ +#define HV_AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL +#define HV_AP_SEGMENT_LIMIT 0xffffffff + int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages); int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id); int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags); +/* + * If the hypercall involves no input or output parameters, the hypervisor + * ignores the corresponding GPA pointer. + */ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) { u64 input_address = input ? virt_to_phys(input) : 0; @@ -58,6 +75,19 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) u64 hv_status; #ifdef CONFIG_X86_64 + if (hv_isolation_type_tdx() && !hyperv_paravisor_present) + return hv_tdx_hypercall(control, input_address, output_address); + + if (hv_isolation_type_snp() && !hyperv_paravisor_present) { + __asm__ __volatile__("mov %4, %%r8\n" + "vmmcall" + : "=a" (hv_status), ASM_CALL_CONSTRAINT, + "+c" (control), "+d" (input_address) + : "r" (output_address) + : "cc", "memory", "r8", "r9", "r10", "r11"); + return hv_status; + } + if (!hv_hypercall_pg) return U64_MAX; @@ -101,7 +131,16 @@ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1) u64 hv_status; #ifdef CONFIG_X86_64 - { + if (hv_isolation_type_tdx() && !hyperv_paravisor_present) + return hv_tdx_hypercall(control, input1, 0); + + if (hv_isolation_type_snp() && !hyperv_paravisor_present) { + __asm__ __volatile__( + "vmmcall" + : "=a" (hv_status), ASM_CALL_CONSTRAINT, + "+c" (control), "+d" (input1) + :: "cc", "r8", "r9", "r10", "r11"); + } else { __asm__ __volatile__(CALL_NOSPEC : "=a" (hv_status), ASM_CALL_CONSTRAINT, "+c" (control), "+d" (input1) @@ -146,7 +185,17 @@ static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2) u64 hv_status; #ifdef CONFIG_X86_64 - { + if (hv_isolation_type_tdx() && !hyperv_paravisor_present) + return hv_tdx_hypercall(control, input1, input2); + + if (hv_isolation_type_snp() && !hyperv_paravisor_present) { + __asm__ __volatile__("mov %4, %%r8\n" + "vmmcall" + : "=a" (hv_status), ASM_CALL_CONSTRAINT, + "+c" (control), "+d" (input1) + : "r" (input2) + : "cc", "r8", "r9", "r10", "r11"); + } else { __asm__ __volatile__("mov %4, %%r8\n" CALL_NOSPEC : "=a" (hv_status), ASM_CALL_CONSTRAINT, @@ -225,20 +274,24 @@ int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector, int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); #ifdef CONFIG_AMD_MEM_ENCRYPT -void hv_ghcb_msr_write(u64 msr, u64 value); -void hv_ghcb_msr_read(u64 msr, u64 *value); bool hv_ghcb_negotiate_protocol(void); void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason); -void hv_vtom_init(void); +int hv_snp_boot_ap(int cpu, unsigned long start_ip); #else -static inline void hv_ghcb_msr_write(u64 msr, u64 value) {} -static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {} static inline bool hv_ghcb_negotiate_protocol(void) { return false; } static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} -static inline void hv_vtom_init(void) {} +static inline int hv_snp_boot_ap(int cpu, unsigned long start_ip) { return 0; } #endif -extern bool hv_isolation_type_snp(void); +#if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) +void hv_vtom_init(void); +void hv_ivm_msr_write(u64 msr, u64 value); +void hv_ivm_msr_read(u64 msr, u64 *value); +#else +static inline void hv_vtom_init(void) {} +static inline void hv_ivm_msr_write(u64 msr, u64 value) {} +static inline void hv_ivm_msr_read(u64 msr, u64 *value) {} +#endif static inline bool hv_is_synic_reg(unsigned int reg) { diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 0100468e72ca..e6bba12c759c 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -32,6 +32,7 @@ #include <asm/nmi.h> #include <clocksource/hyperv_timer.h> #include <asm/numa.h> +#include <asm/svm.h> /* Is Linux running as the root partition? */ bool hv_root_partition; @@ -39,6 +40,10 @@ bool hv_root_partition; bool hv_nested; struct ms_hyperv_info ms_hyperv; +/* Used in modules via hv_do_hypercall(): see arch/x86/include/asm/mshyperv.h */ +bool hyperv_paravisor_present __ro_after_init; +EXPORT_SYMBOL_GPL(hyperv_paravisor_present); + #if IS_ENABLED(CONFIG_HYPERV) static inline unsigned int hv_get_nested_reg(unsigned int reg) { @@ -65,8 +70,8 @@ u64 hv_get_non_nested_register(unsigned int reg) { u64 value; - if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) - hv_ghcb_msr_read(reg, &value); + if (hv_is_synic_reg(reg) && ms_hyperv.paravisor_present) + hv_ivm_msr_read(reg, &value); else rdmsrl(reg, value); return value; @@ -75,8 +80,8 @@ EXPORT_SYMBOL_GPL(hv_get_non_nested_register); void hv_set_non_nested_register(unsigned int reg, u64 value) { - if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) { - hv_ghcb_msr_write(reg, value); + if (hv_is_synic_reg(reg) && ms_hyperv.paravisor_present) { + hv_ivm_msr_write(reg, value); /* Write proxy bit via wrmsl instruction */ if (hv_is_sint_reg(reg)) @@ -295,6 +300,15 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus) native_smp_prepare_cpus(max_cpus); + /* + * Override wakeup_secondary_cpu_64 callback for SEV-SNP + * enlightened guest. + */ + if (!ms_hyperv.paravisor_present && hv_isolation_type_snp()) { + apic->wakeup_secondary_cpu_64 = hv_snp_boot_ap; + return; + } + #ifdef CONFIG_X86_64 for_each_present_cpu(i) { if (i == 0) @@ -313,6 +327,26 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus) } #endif +/* + * When a fully enlightened TDX VM runs on Hyper-V, the firmware sets the + * HW_REDUCED flag: refer to acpi_tb_create_local_fadt(). Consequently ttyS0 + * interrupts can't work because request_irq() -> ... -> irq_to_desc() returns + * NULL for ttyS0. This happens because mp_config_acpi_legacy_irqs() sees a + * nr_legacy_irqs() of 0, so it doesn't initialize the array 'mp_irqs[]', and + * later setup_IO_APIC_irqs() -> find_irq_entry() fails to find the legacy irqs + * from the array and hence doesn't create the necessary irq description info. + * + * Clone arch/x86/kernel/acpi/boot.c: acpi_generic_reduced_hw_init() here, + * except don't change 'legacy_pic', which keeps its default value + * 'default_legacy_pic'. This way, mp_config_acpi_legacy_irqs() sees a non-zero + * nr_legacy_irqs() and eventually serial console interrupts works properly. + */ +static void __init reduced_hw_init(void) +{ + x86_init.timers.timer_init = x86_init_noop; + x86_init.irqs.pre_vector_init = x86_init_noop; +} + static void __init ms_hyperv_init_platform(void) { int hv_max_functions_eax; @@ -399,11 +433,33 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.shared_gpa_boundary = BIT_ULL(ms_hyperv.shared_gpa_boundary_bits); + hyperv_paravisor_present = !!ms_hyperv.paravisor_present; + pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n", ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b); - if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) + + if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) { static_branch_enable(&isolation_type_snp); + } else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) { + static_branch_enable(&isolation_type_tdx); + + /* A TDX VM must use x2APIC and doesn't use lazy EOI. */ + ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED; + + if (!ms_hyperv.paravisor_present) { + /* To be supported: more work is required. */ + ms_hyperv.features &= ~HV_MSR_REFERENCE_TSC_AVAILABLE; + + /* HV_REGISTER_CRASH_CTL is unsupported. */ + ms_hyperv.misc_features &= ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; + + /* Don't trust Hyper-V's TLB-flushing hypercalls. */ + ms_hyperv.hints &= ~HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED; + + x86_init.acpi.reduced_hw_early_init = reduced_hw_init; + } + } } if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) { @@ -473,7 +529,7 @@ static void __init ms_hyperv_init_platform(void) #if IS_ENABLED(CONFIG_HYPERV) if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) || - (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP)) + ms_hyperv.paravisor_present) hv_vtom_init(); /* * Setup the hook to get control post apic initialization. @@ -497,7 +553,8 @@ static void __init ms_hyperv_init_platform(void) # ifdef CONFIG_SMP smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu; - if (hv_root_partition) + if (hv_root_partition || + (!ms_hyperv.paravisor_present && hv_isolation_type_snp())) smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus; # endif @@ -560,6 +617,22 @@ static bool __init ms_hyperv_msi_ext_dest_id(void) return eax & HYPERV_VS_PROPERTIES_EAX_EXTENDED_IOAPIC_RTE; } +#ifdef CONFIG_AMD_MEM_ENCRYPT +static void hv_sev_es_hcall_prepare(struct ghcb *ghcb, struct pt_regs *regs) +{ + /* RAX and CPL are already in the GHCB */ + ghcb_set_rcx(ghcb, regs->cx); + ghcb_set_rdx(ghcb, regs->dx); + ghcb_set_r8(ghcb, regs->r8); +} + +static bool hv_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs) +{ + /* No checking of the return state needed */ + return true; +} +#endif + const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = { .name = "Microsoft Hyper-V", .detect = ms_hyperv_platform, @@ -567,4 +640,8 @@ const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = { .init.x2apic_available = ms_hyperv_x2apic_available, .init.msi_ext_dest_id = ms_hyperv_msi_ext_dest_id, .init.init_platform = ms_hyperv_init_platform, +#ifdef CONFIG_AMD_MEM_ENCRYPT + .runtime.sev_es_hcall_prepare = hv_sev_es_hcall_prepare, + .runtime.sev_es_hcall_finish = hv_sev_es_hcall_finish, +#endif }; |