diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2024-01-02 21:19:40 +0300 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2024-01-02 21:19:40 +0300 |
commit | 9cc52627c702b73c633737db40914ab8fc467de8 (patch) | |
tree | 50dedcb98dcd3c239cddcad006fe815f62046f94 /arch | |
parent | 731859dde86e23b0f11e700431e460ea76769584 (diff) | |
parent | aad86da229bc9d0390dc2c02eb0db9ab1f50d059 (diff) | |
download | linux-9cc52627c702b73c633737db40914ab8fc467de8.tar.xz |
Merge tag 'kvm-riscv-6.8-1' of https://github.com/kvm-riscv/linux into HEAD
KVM/riscv changes for 6.8 part #1
- KVM_GET_REG_LIST improvement for vector registers
- Generate ISA extension reg_list using macros in get-reg-list selftest
- Steal time account support along with selftest
Diffstat (limited to 'arch')
58 files changed, 989 insertions, 504 deletions
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 3162db540ee9..1b0483c51cc1 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -49,7 +49,6 @@ config ARC select OF select OF_EARLY_FLATTREE select PCI_SYSCALL if PCI - select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32 select TRACE_IRQFLAGS_SUPPORT @@ -232,10 +231,6 @@ config ARC_CACHE_PAGES Note that Global I/D ENABLE + Per Page DISABLE works but corollary Global DISABLE + Per Page ENABLE won't work -config ARC_CACHE_VIPT_ALIASING - bool "Support VIPT Aliasing D$" - depends on ARC_HAS_DCACHE && ISA_ARCOMPACT - endif #ARC_CACHE config ARC_HAS_ICCM diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h index bd5b1a9a0544..563af3e75f01 100644 --- a/arch/arc/include/asm/cacheflush.h +++ b/arch/arc/include/asm/cacheflush.h @@ -44,31 +44,10 @@ void dma_cache_wback(phys_addr_t start, unsigned long sz); #define flush_cache_dup_mm(mm) /* called on fork (VIVT only) */ -#ifndef CONFIG_ARC_CACHE_VIPT_ALIASING - #define flush_cache_mm(mm) /* called on munmap/exit */ #define flush_cache_range(mm, u_vstart, u_vend) #define flush_cache_page(vma, u_vaddr, pfn) /* PF handling/COW-break */ -#else /* VIPT aliasing dcache */ - -/* To clear out stale userspace mappings */ -void flush_cache_mm(struct mm_struct *mm); -void flush_cache_range(struct vm_area_struct *vma, - unsigned long start,unsigned long end); -void flush_cache_page(struct vm_area_struct *vma, - unsigned long user_addr, unsigned long page); - -/* - * To make sure that userspace mapping is flushed to memory before - * get_user_pages() uses a kernel mapping to access the page - */ -#define ARCH_HAS_FLUSH_ANON_PAGE -void flush_anon_page(struct vm_area_struct *vma, - struct page *page, unsigned long u_vaddr); - -#endif /* CONFIG_ARC_CACHE_VIPT_ALIASING */ - /* * A new pagecache page has PG_arch_1 clear - thus dcache dirty by default * This works around some PIO based drivers which don't call flush_dcache_page @@ -76,28 +55,6 @@ void flush_anon_page(struct vm_area_struct *vma, */ #define PG_dc_clean PG_arch_1 -#define CACHE_COLORS_NUM 4 -#define CACHE_COLORS_MSK (CACHE_COLORS_NUM - 1) -#define CACHE_COLOR(addr) (((unsigned long)(addr) >> (PAGE_SHIFT)) & CACHE_COLORS_MSK) - -/* - * Simple wrapper over config option - * Bootup code ensures that hardware matches kernel configuration - */ -static inline int cache_is_vipt_aliasing(void) -{ - return IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); -} - -/* - * checks if two addresses (after page aligning) index into same cache set - */ -#define addr_not_cache_congruent(addr1, addr2) \ -({ \ - cache_is_vipt_aliasing() ? \ - (CACHE_COLOR(addr1) != CACHE_COLOR(addr2)) : 0; \ -}) - #define copy_to_user_page(vma, page, vaddr, dst, src, len) \ do { \ memcpy(dst, src, len); \ diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h index 4d13320e0c1b..3802a2daaf86 100644 --- a/arch/arc/include/asm/entry-arcv2.h +++ b/arch/arc/include/asm/entry-arcv2.h @@ -291,4 +291,36 @@ /* M = 8-1 N = 8 */ .endm +.macro SAVE_ABI_CALLEE_REGS + push r13 + push r14 + push r15 + push r16 + push r17 + push r18 + push r19 + push r20 + push r21 + push r22 + push r23 + push r24 + push r25 +.endm + +.macro RESTORE_ABI_CALLEE_REGS + pop r25 + pop r24 + pop r23 + pop r22 + pop r21 + pop r20 + pop r19 + pop r18 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 +.endm + #endif diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h index a0e760eb35a8..92c3e9f13252 100644 --- a/arch/arc/include/asm/entry-compact.h +++ b/arch/arc/include/asm/entry-compact.h @@ -33,6 +33,91 @@ #include <asm/irqflags-compact.h> #include <asm/thread_info.h> /* For THREAD_SIZE */ +/* Note on the LD/ST addr modes with addr reg wback + * + * LD.a same as LD.aw + * + * LD.a reg1, [reg2, x] => Pre Incr + * Eff Addr for load = [reg2 + x] + * + * LD.ab reg1, [reg2, x] => Post Incr + * Eff Addr for load = [reg2] + */ + +.macro PUSHAX aux + lr r9, [\aux] + push r9 +.endm + +.macro POPAX aux + pop r9 + sr r9, [\aux] +.endm + +.macro SAVE_R0_TO_R12 + push r0 + push r1 + push r2 + push r3 + push r4 + push r5 + push r6 + push r7 + push r8 + push r9 + push r10 + push r11 + push r12 +.endm + +.macro RESTORE_R12_TO_R0 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop r7 + pop r6 + pop r5 + pop r4 + pop r3 + pop r2 + pop r1 + pop r0 +.endm + +.macro SAVE_ABI_CALLEE_REGS + push r13 + push r14 + push r15 + push r16 + push r17 + push r18 + push r19 + push r20 + push r21 + push r22 + push r23 + push r24 + push r25 +.endm + +.macro RESTORE_ABI_CALLEE_REGS + pop r25 + pop r24 + pop r23 + pop r22 + pop r21 + pop r20 + pop r19 + pop r18 + pop r17 + pop r16 + pop r15 + pop r14 + pop r13 +.endm + /*-------------------------------------------------------------- * Switch to Kernel Mode stack if SP points to User Mode stack * @@ -235,7 +320,7 @@ SWITCH_TO_KERNEL_STK - PUSH 0x003\LVL\()abcd /* Dummy ECR */ + st.a 0x003\LVL\()abcd, [sp, -4] /* Dummy ECR */ sub sp, sp, 8 /* skip orig_r0 (not needed) skip pt_regs->sp, already saved above */ diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index 49c2e090cb5c..cf1ba376e992 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -21,114 +21,12 @@ #include <asm/entry-arcv2.h> #endif -/* Note on the LD/ST addr modes with addr reg wback - * - * LD.a same as LD.aw - * - * LD.a reg1, [reg2, x] => Pre Incr - * Eff Addr for load = [reg2 + x] - * - * LD.ab reg1, [reg2, x] => Post Incr - * Eff Addr for load = [reg2] - */ - -.macro PUSH reg - st.a \reg, [sp, -4] -.endm - -.macro PUSHAX aux - lr r9, [\aux] - PUSH r9 -.endm - -.macro POP reg - ld.ab \reg, [sp, 4] -.endm - -.macro POPAX aux - POP r9 - sr r9, [\aux] -.endm - -/*-------------------------------------------------------------- - * Helpers to save/restore Scratch Regs: - * used by Interrupt/Exception Prologue/Epilogue - *-------------------------------------------------------------*/ -.macro SAVE_R0_TO_R12 - PUSH r0 - PUSH r1 - PUSH r2 - PUSH r3 - PUSH r4 - PUSH r5 - PUSH r6 - PUSH r7 - PUSH r8 - PUSH r9 - PUSH r10 - PUSH r11 - PUSH r12 -.endm - -.macro RESTORE_R12_TO_R0 - POP r12 - POP r11 - POP r10 - POP r9 - POP r8 - POP r7 - POP r6 - POP r5 - POP r4 - POP r3 - POP r2 - POP r1 - POP r0 - -.endm - -/*-------------------------------------------------------------- - * Helpers to save/restore callee-saved regs: - * used by several macros below - *-------------------------------------------------------------*/ -.macro SAVE_R13_TO_R25 - PUSH r13 - PUSH r14 - PUSH r15 - PUSH r16 - PUSH r17 - PUSH r18 - PUSH r19 - PUSH r20 - PUSH r21 - PUSH r22 - PUSH r23 - PUSH r24 - PUSH r25 -.endm - -.macro RESTORE_R25_TO_R13 - POP r25 - POP r24 - POP r23 - POP r22 - POP r21 - POP r20 - POP r19 - POP r18 - POP r17 - POP r16 - POP r15 - POP r14 - POP r13 -.endm - /* * save user mode callee regs as struct callee_regs * - needed by fork/do_signal/unaligned-access-emulation. */ .macro SAVE_CALLEE_SAVED_USER - SAVE_R13_TO_R25 + SAVE_ABI_CALLEE_REGS .endm /* @@ -136,18 +34,18 @@ * - could have been changed by ptrace tracer or unaligned-access fixup */ .macro RESTORE_CALLEE_SAVED_USER - RESTORE_R25_TO_R13 + RESTORE_ABI_CALLEE_REGS .endm /* * save/restore kernel mode callee regs at the time of context switch */ .macro SAVE_CALLEE_SAVED_KERNEL - SAVE_R13_TO_R25 + SAVE_ABI_CALLEE_REGS .endm .macro RESTORE_CALLEE_SAVED_KERNEL - RESTORE_R25_TO_R13 + RESTORE_ABI_CALLEE_REGS .endm /*-------------------------------------------------------------- diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h index ef8d4166370c..8a2441670a8f 100644 --- a/arch/arc/include/asm/hugepage.h +++ b/arch/arc/include/asm/hugepage.h @@ -10,6 +10,13 @@ #include <linux/types.h> #include <asm-generic/pgtable-nopmd.h> +/* + * Hugetlb definitions. + */ +#define HPAGE_SHIFT PMD_SHIFT +#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) + static inline pte_t pmd_pte(pmd_t pmd) { return __pte(pmd_val(pmd)); diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index 4a2b30fb5a98..00b9318e551e 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -54,6 +54,10 @@ struct pt_regs { ecr_reg ecr; }; +struct callee_regs { + unsigned long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13; +}; + #define MAX_REG_OFFSET offsetof(struct pt_regs, ecr) #else @@ -92,16 +96,14 @@ struct pt_regs { unsigned long status32; }; -#define MAX_REG_OFFSET offsetof(struct pt_regs, status32) - -#endif - -/* Callee saved registers - need to be saved only when you are scheduled out */ - struct callee_regs { unsigned long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13; }; +#define MAX_REG_OFFSET offsetof(struct pt_regs, status32) + +#endif + #define instruction_pointer(regs) ((regs)->ret) #define profile_pc(regs) instruction_pointer(regs) diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 4dcf8589b708..d08a5092c2b4 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -153,7 +153,7 @@ static int arcv2_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len) { int n = 0; #ifdef CONFIG_ISA_ARCV2 - const char *release, *cpu_nm, *isa_nm = "ARCv2"; + const char *release = "", *cpu_nm = "HS38", *isa_nm = "ARCv2"; int dual_issue = 0, dual_enb = 0, mpy_opt, present; int bpu_full, bpu_cache, bpu_pred, bpu_ret_stk; char mpy_nm[16], lpb_nm[32]; @@ -172,8 +172,6 @@ static int arcv2_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len) * releases only update it. */ - cpu_nm = "HS38"; - if (info->arcver > 0x50 && info->arcver <= 0x53) { release = arc_hs_rel[info->arcver - 0x51].str; } else { diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index 0b3bb529d246..8f6f4a542964 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -62,7 +62,7 @@ struct rt_sigframe { unsigned int sigret_magic; }; -static int save_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs) +static int save_arcv2_regs(struct sigcontext __user *mctx, struct pt_regs *regs) { int err = 0; #ifndef CONFIG_ISA_ARCOMPACT @@ -75,12 +75,12 @@ static int save_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs) #else v2abi.r58 = v2abi.r59 = 0; #endif - err = __copy_to_user(&mctx->v2abi, &v2abi, sizeof(v2abi)); + err = __copy_to_user(&mctx->v2abi, (void const *)&v2abi, sizeof(v2abi)); #endif return err; } -static int restore_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs) +static int restore_arcv2_regs(struct sigcontext __user *mctx, struct pt_regs *regs) { int err = 0; #ifndef CONFIG_ISA_ARCOMPACT diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index f7e05c146637..9106ceac323c 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -145,10 +145,9 @@ dc_chk: p_dc->sz_k = 1 << (dbcr.sz - 1); n += scnprintf(buf + n, len - n, - "D-Cache\t\t: %uK, %dway/set, %uB Line, %s%s%s\n", + "D-Cache\t\t: %uK, %dway/set, %uB Line, %s%s\n", p_dc->sz_k, assoc, p_dc->line_len, vipt ? "VIPT" : "PIPT", - p_dc->colors > 1 ? " aliasing" : "", IS_USED_CFG(CONFIG_ARC_HAS_DCACHE)); slc_chk: @@ -703,51 +702,10 @@ static inline void arc_slc_enable(void) * Exported APIs */ -/* - * Handle cache congruency of kernel and userspace mappings of page when kernel - * writes-to/reads-from - * - * The idea is to defer flushing of kernel mapping after a WRITE, possible if: - * -dcache is NOT aliasing, hence any U/K-mappings of page are congruent - * -U-mapping doesn't exist yet for page (finalised in update_mmu_cache) - * -In SMP, if hardware caches are coherent - * - * There's a corollary case, where kernel READs from a userspace mapped page. - * If the U-mapping is not congruent to K-mapping, former needs flushing. - */ void flush_dcache_folio(struct folio *folio) { - struct address_space *mapping; - - if (!cache_is_vipt_aliasing()) { - clear_bit(PG_dc_clean, &folio->flags); - return; - } - - /* don't handle anon pages here */ - mapping = folio_flush_mapping(folio); - if (!mapping) - return; - - /* - * pagecache page, file not yet mapped to userspace - * Make a note that K-mapping is dirty - */ - if (!mapping_mapped(mapping)) { - clear_bit(PG_dc_clean, &folio->flags); - } else if (folio_mapped(folio)) { - /* kernel reading from page with U-mapping */ - phys_addr_t paddr = (unsigned long)folio_address(folio); - unsigned long vaddr = folio_pos(folio); - - /* - * vaddr is not actually the virtual address, but is - * congruent to every user mapping. - */ - if (addr_not_cache_congruent(paddr, vaddr)) - __flush_dcache_pages(paddr, vaddr, - folio_nr_pages(folio)); - } + clear_bit(PG_dc_clean, &folio->flags); + return; } EXPORT_SYMBOL(flush_dcache_folio); @@ -921,44 +879,6 @@ noinline void flush_cache_all(void) } -#ifdef CONFIG_ARC_CACHE_VIPT_ALIASING - -void flush_cache_mm(struct mm_struct *mm) -{ - flush_cache_all(); -} - -void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr, - unsigned long pfn) -{ - phys_addr_t paddr = pfn << PAGE_SHIFT; - - u_vaddr &= PAGE_MASK; - - __flush_dcache_pages(paddr, u_vaddr, 1); - - if (vma->vm_flags & VM_EXEC) - __inv_icache_pages(paddr, u_vaddr, 1); -} - -void flush_cache_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - flush_cache_all(); -} - -void flush_anon_page(struct vm_area_struct *vma, struct page *page, - unsigned long u_vaddr) -{ - /* TBD: do we really need to clear the kernel mapping */ - __flush_dcache_pages((phys_addr_t)page_address(page), u_vaddr, 1); - __flush_dcache_pages((phys_addr_t)page_address(page), - (phys_addr_t)page_address(page), 1); - -} - -#endif - void copy_user_highpage(struct page *to, struct page *from, unsigned long u_vaddr, struct vm_area_struct *vma) { @@ -966,46 +886,11 @@ void copy_user_highpage(struct page *to, struct page *from, struct folio *dst = page_folio(to); void *kfrom = kmap_atomic(from); void *kto = kmap_atomic(to); - int clean_src_k_mappings = 0; - - /* - * If SRC page was already mapped in userspace AND it's U-mapping is - * not congruent with K-mapping, sync former to physical page so that - * K-mapping in memcpy below, sees the right data - * - * Note that while @u_vaddr refers to DST page's userspace vaddr, it is - * equally valid for SRC page as well - * - * For !VIPT cache, all of this gets compiled out as - * addr_not_cache_congruent() is 0 - */ - if (page_mapcount(from) && addr_not_cache_congruent(kfrom, u_vaddr)) { - __flush_dcache_pages((unsigned long)kfrom, u_vaddr, 1); - clean_src_k_mappings = 1; - } copy_page(kto, kfrom); - /* - * Mark DST page K-mapping as dirty for a later finalization by - * update_mmu_cache(). Although the finalization could have been done - * here as well (given that both vaddr/paddr are available). - * But update_mmu_cache() already has code to do that for other - * non copied user pages (e.g. read faults which wire in pagecache page - * directly). - */ clear_bit(PG_dc_clean, &dst->flags); - - /* - * if SRC was already usermapped and non-congruent to kernel mapping - * sync the kernel mapping back to physical page - */ - if (clean_src_k_mappings) { - __flush_dcache_pages((unsigned long)kfrom, - (unsigned long)kfrom, 1); - } else { - clear_bit(PG_dc_clean, &src->flags); - } + clear_bit(PG_dc_clean, &src->flags); kunmap_atomic(kto); kunmap_atomic(kfrom); @@ -1140,17 +1025,8 @@ static noinline void __init arc_cache_init_master(void) dc->line_len, L1_CACHE_BYTES); /* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */ - if (is_isa_arcompact()) { - int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); - - if (dc->colors > 1) { - if (!handled) - panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); - if (CACHE_COLORS_NUM != dc->colors) - panic("CACHE_COLORS_NUM not optimized for config\n"); - } else if (handled && dc->colors == 1) { - panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); - } + if (is_isa_arcompact() && dc->colors > 1) { + panic("Aliasing VIPT cache not supported\n"); } } diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c index fce5fa2b4f52..3c1c7ae73292 100644 --- a/arch/arc/mm/mmap.c +++ b/arch/arc/mm/mmap.c @@ -14,10 +14,6 @@ #include <asm/cacheflush.h> -#define COLOUR_ALIGN(addr, pgoff) \ - ((((addr) + SHMLBA - 1) & ~(SHMLBA - 1)) + \ - (((pgoff) << PAGE_SHIFT) & (SHMLBA - 1))) - /* * Ensure that shared mappings are correctly aligned to * avoid aliasing issues with VIPT caches. @@ -31,21 +27,13 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - int do_align = 0; - int aliasing = cache_is_vipt_aliasing(); struct vm_unmapped_area_info info; /* - * We only need to do colour alignment if D cache aliases. - */ - if (aliasing) - do_align = filp || (flags & MAP_SHARED); - - /* * We enforce the MAP_FIXED case. */ if (flags & MAP_FIXED) { - if (aliasing && flags & MAP_SHARED && + if (flags & MAP_SHARED && (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)) return -EINVAL; return addr; @@ -55,10 +43,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, return -ENOMEM; if (addr) { - if (do_align) - addr = COLOUR_ALIGN(addr, pgoff); - else - addr = PAGE_ALIGN(addr); + addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && @@ -70,7 +55,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, info.length = len; info.low_limit = mm->mmap_base; info.high_limit = TASK_SIZE; - info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; + info.align_mask = 0; info.align_offset = pgoff << PAGE_SHIFT; return vm_unmapped_area(&info); } diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index e536b2dcd4b0..ad702b49aeb3 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -478,21 +478,15 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, create_tlb(vma, vaddr, ptep); - if (page == ZERO_PAGE(0)) { + if (page == ZERO_PAGE(0)) return; - } /* - * Exec page : Independent of aliasing/page-color considerations, - * since icache doesn't snoop dcache on ARC, any dirty - * K-mapping of a code page needs to be wback+inv so that - * icache fetch by userspace sees code correctly. - * !EXEC page: If K-mapping is NOT congruent to U-mapping, flush it - * so userspace sees the right data. - * (Avoids the flush for Non-exec + congruent mapping case) + * For executable pages, since icache doesn't snoop dcache, any + * dirty K-mapping of a code page needs to be wback+inv so that + * icache fetch by userspace sees code correctly. */ - if ((vma->vm_flags & VM_EXEC) || - addr_not_cache_congruent(paddr, vaddr)) { + if (vma->vm_flags & VM_EXEC) { struct folio *folio = page_folio(page); int dirty = !test_and_set_bit(PG_dc_clean, &folio->flags); if (dirty) { diff --git a/arch/arm/boot/dts/ti/omap/am33xx.dtsi b/arch/arm/boot/dts/ti/omap/am33xx.dtsi index 1a2cd5baf402..5b9e01a8aa5d 100644 --- a/arch/arm/boot/dts/ti/omap/am33xx.dtsi +++ b/arch/arm/boot/dts/ti/omap/am33xx.dtsi @@ -359,6 +359,7 @@ <SYSC_IDLE_NO>, <SYSC_IDLE_SMART>, <SYSC_IDLE_SMART_WKUP>; + ti,sysc-delay-us = <2>; clocks = <&l3s_clkctrl AM3_L3S_USB_OTG_HS_CLKCTRL 0>; clock-names = "fck"; #address-cells = <1>; diff --git a/arch/arm/boot/dts/ti/omap/dra7.dtsi b/arch/arm/boot/dts/ti/omap/dra7.dtsi index 3f3e52e3b375..6509c742fb58 100644 --- a/arch/arm/boot/dts/ti/omap/dra7.dtsi +++ b/arch/arm/boot/dts/ti/omap/dra7.dtsi @@ -147,7 +147,7 @@ l3-noc@44000000 { compatible = "ti,dra7-l3-noc"; - reg = <0x44000000 0x1000>, + reg = <0x44000000 0x1000000>, <0x45000000 0x1000>; interrupts-extended = <&crossbar_mpu GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, <&wakeupgen GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c index 98999aa8cc0c..7f387706368a 100644 --- a/arch/arm/mach-omap2/id.c +++ b/arch/arm/mach-omap2/id.c @@ -793,11 +793,16 @@ void __init omap_soc_device_init(void) soc_dev_attr->machine = soc_name; soc_dev_attr->family = omap_get_family(); + if (!soc_dev_attr->family) { + kfree(soc_dev_attr); + return; + } soc_dev_attr->revision = soc_rev; soc_dev_attr->custom_attr_group = omap_soc_groups[0]; soc_dev = soc_device_register(soc_dev_attr); if (IS_ERR(soc_dev)) { + kfree(soc_dev_attr->family); kfree(soc_dev_attr); return; } diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi index 15290e6892fc..fc7315b94406 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero.dtsi @@ -68,10 +68,7 @@ &emac0 { pinctrl-names = "default"; pinctrl-0 = <&ext_rgmii_pins>; - phy-mode = "rgmii"; phy-handle = <&ext_rgmii_phy>; - allwinner,rx-delay-ps = <3100>; - allwinner,tx-delay-ps = <700>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts index d83852e72f06..b5d713926a34 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts @@ -13,6 +13,9 @@ }; &emac0 { + allwinner,rx-delay-ps = <3100>; + allwinner,tx-delay-ps = <700>; + phy-mode = "rgmii"; phy-supply = <®_dcdce>; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts b/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts index 00fe28caac93..b3b1b8692125 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts @@ -13,6 +13,8 @@ }; &emac0 { + allwinner,tx-delay-ps = <700>; + phy-mode = "rgmii-rxid"; phy-supply = <®_dldo1>; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts b/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts index 70b465f7c6a7..00ac59a873e8 100644 --- a/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts +++ b/arch/arm64/boot/dts/mediatek/mt8395-genio-1200-evk.dts @@ -238,6 +238,7 @@ mt6360: pmic@34 { compatible = "mediatek,mt6360"; reg = <0x34>; + interrupt-parent = <&pio>; interrupts = <128 IRQ_TYPE_EDGE_FALLING>; interrupt-names = "IRQB"; interrupt-controller; diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h index d977713ec0ba..abb57bc54305 100644 --- a/arch/arm64/include/asm/syscall_wrapper.h +++ b/arch/arm64/include/asm/syscall_wrapper.h @@ -44,9 +44,6 @@ return sys_ni_syscall(); \ } -#define COMPAT_SYS_NI(name) \ - SYSCALL_ALIAS(__arm64_compat_sys_##name, sys_ni_posix_timers); - #endif /* CONFIG_COMPAT */ #define __SYSCALL_DEFINEx(x, name, ...) \ @@ -81,6 +78,5 @@ } asmlinkage long __arm64_sys_ni_syscall(const struct pt_regs *__unused); -#define SYS_NI(name) SYSCALL_ALIAS(__arm64_sys_##name, sys_ni_posix_timers); #endif /* __ASM_SYSCALL_WRAPPER_H */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e5f75f1f1085..4796104c4471 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -410,7 +410,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); kvm_timer_vcpu_terminate(vcpu); kvm_pmu_vcpu_destroy(vcpu); - + kvm_vgic_vcpu_destroy(vcpu); kvm_arm_vcpu_destroy(vcpu); } diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index c8c3cb812783..e949e1d0fd9f 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -368,7 +368,7 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm) vgic_v4_teardown(kvm); } -void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) +static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -379,29 +379,39 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) vgic_flush_pending_lpis(vcpu); INIT_LIST_HEAD(&vgic_cpu->ap_list_head); - vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + vgic_unregister_redist_iodev(vcpu); + vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; + } } -static void __kvm_vgic_destroy(struct kvm *kvm) +void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + + mutex_lock(&kvm->slots_lock); + __kvm_vgic_vcpu_destroy(vcpu); + mutex_unlock(&kvm->slots_lock); +} + +void kvm_vgic_destroy(struct kvm *kvm) { struct kvm_vcpu *vcpu; unsigned long i; - lockdep_assert_held(&kvm->arch.config_lock); + mutex_lock(&kvm->slots_lock); vgic_debug_destroy(kvm); kvm_for_each_vcpu(i, vcpu, kvm) - kvm_vgic_vcpu_destroy(vcpu); + __kvm_vgic_vcpu_destroy(vcpu); + + mutex_lock(&kvm->arch.config_lock); kvm_vgic_dist_destroy(kvm); -} -void kvm_vgic_destroy(struct kvm *kvm) -{ - mutex_lock(&kvm->arch.config_lock); - __kvm_vgic_destroy(kvm); mutex_unlock(&kvm->arch.config_lock); + mutex_unlock(&kvm->slots_lock); } /** @@ -469,25 +479,26 @@ int kvm_vgic_map_resources(struct kvm *kvm) type = VGIC_V3; } - if (ret) { - __kvm_vgic_destroy(kvm); + if (ret) goto out; - } + dist->ready = true; dist_base = dist->vgic_dist_base; mutex_unlock(&kvm->arch.config_lock); ret = vgic_register_dist_iodev(kvm, dist_base, type); - if (ret) { + if (ret) kvm_err("Unable to register VGIC dist MMIO regions\n"); - kvm_vgic_destroy(kvm); - } - mutex_unlock(&kvm->slots_lock); - return ret; + goto out_slots; out: mutex_unlock(&kvm->arch.config_lock); +out_slots: mutex_unlock(&kvm->slots_lock); + + if (ret) + kvm_vgic_destroy(kvm); + return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 89117ba2528a..a764b0ab8bf9 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -820,7 +820,7 @@ out_unlock: return ret; } -static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu) +void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu) { struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; @@ -833,6 +833,8 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm) unsigned long c; int ret = 0; + lockdep_assert_held(&kvm->slots_lock); + kvm_for_each_vcpu(c, vcpu, kvm) { ret = vgic_register_redist_iodev(vcpu); if (ret) diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 0ab09b0d4440..8d134569d0a1 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -241,6 +241,7 @@ int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq); int vgic_v3_save_pending_tables(struct kvm *kvm); int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count); int vgic_register_redist_iodev(struct kvm_vcpu *vcpu); +void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu); bool vgic_v3_check_base(struct kvm *kvm); void vgic_v3_load(struct kvm_vcpu *vcpu); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 24c1799e2ec4..ef8b7b012a0b 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -724,6 +724,25 @@ config COMPAT If you want to execute 32-bit userspace applications, say Y. +config PARAVIRT + bool "Enable paravirtualization code" + depends on RISCV_SBI + help + This changes the kernel so it can modify itself when it is run + under a hypervisor, potentially improving performance significantly + over full virtualization. + +config PARAVIRT_TIME_ACCOUNTING + bool "Paravirtual steal time accounting" + depends on PARAVIRT + help + Select this option to enable fine granularity task steal time + accounting. Time spent executing other tasks in parallel with + the current vCPU is discounted from the vCPU power. To account for + that, there can be a small performance impact. + + If in doubt, say N here. + config RELOCATABLE bool "Build a relocatable kernel" depends on MMU && 64BIT && !XIP_KERNEL diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index 6964dd235e97..484d04a92fa6 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -41,6 +41,7 @@ KVM_ARCH_REQ_FLAGS(4, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_HFENCE \ KVM_ARCH_REQ_FLAGS(5, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(6) enum kvm_riscv_hfence_type { KVM_RISCV_HFENCE_UNKNOWN = 0, @@ -262,6 +263,12 @@ struct kvm_vcpu_arch { /* 'static' configurations which are set only once */ struct kvm_vcpu_config cfg; + + /* SBI steal-time accounting */ + struct { + gpa_t shmem; + u64 last_steal; + } sta; }; static inline void kvm_arch_sync_events(struct kvm *kvm) {} @@ -370,4 +377,7 @@ bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask); void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_sbi_sta_reset(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu); + #endif /* __RISCV_KVM_HOST_H__ */ diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h index 6a453f7f8b56..b96705258cf9 100644 --- a/arch/riscv/include/asm/kvm_vcpu_sbi.h +++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h @@ -15,9 +15,10 @@ #define KVM_SBI_VERSION_MINOR 0 enum kvm_riscv_sbi_ext_status { - KVM_RISCV_SBI_EXT_UNINITIALIZED, - KVM_RISCV_SBI_EXT_AVAILABLE, - KVM_RISCV_SBI_EXT_UNAVAILABLE, + KVM_RISCV_SBI_EXT_STATUS_UNINITIALIZED, + KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE, + KVM_RISCV_SBI_EXT_STATUS_ENABLED, + KVM_RISCV_SBI_EXT_STATUS_DISABLED, }; struct kvm_vcpu_sbi_context { @@ -36,7 +37,7 @@ struct kvm_vcpu_sbi_extension { unsigned long extid_start; unsigned long extid_end; - bool default_unavail; + bool default_disabled; /** * SBI extension handler. It can be defined for a given extension or group of @@ -59,11 +60,21 @@ int kvm_riscv_vcpu_set_reg_sbi_ext(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); +int kvm_riscv_vcpu_set_reg_sbi(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg); +int kvm_riscv_vcpu_get_reg_sbi(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg); const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext( struct kvm_vcpu *vcpu, unsigned long extid); +bool riscv_vcpu_supports_sbi_ext(struct kvm_vcpu *vcpu, int idx); int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run); void kvm_riscv_vcpu_sbi_init(struct kvm_vcpu *vcpu); +int kvm_riscv_vcpu_get_reg_sbi_sta(struct kvm_vcpu *vcpu, unsigned long reg_num, + unsigned long *reg_val); +int kvm_riscv_vcpu_set_reg_sbi_sta(struct kvm_vcpu *vcpu, unsigned long reg_num, + unsigned long reg_val); + #ifdef CONFIG_RISCV_SBI_V01 extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01; #endif @@ -74,6 +85,7 @@ extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_dbcn; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_sta; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor; diff --git a/arch/riscv/include/asm/paravirt.h b/arch/riscv/include/asm/paravirt.h new file mode 100644 index 000000000000..c0abde70fc2c --- /dev/null +++ b/arch/riscv/include/asm/paravirt.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_PARAVIRT_H +#define _ASM_RISCV_PARAVIRT_H + +#ifdef CONFIG_PARAVIRT +#include <linux/static_call_types.h> + +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; + +u64 dummy_steal_clock(int cpu); + +DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock); + +static inline u64 paravirt_steal_clock(int cpu) +{ + return static_call(pv_steal_clock)(cpu); +} + +int __init pv_time_init(void); + +#else + +#define pv_time_init() do {} while (0) + +#endif /* CONFIG_PARAVIRT */ +#endif /* _ASM_RISCV_PARAVIRT_H */ diff --git a/arch/riscv/include/asm/paravirt_api_clock.h b/arch/riscv/include/asm/paravirt_api_clock.h new file mode 100644 index 000000000000..65ac7cee0dad --- /dev/null +++ b/arch/riscv/include/asm/paravirt_api_clock.h @@ -0,0 +1 @@ +#include <asm/paravirt.h> diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 0892f4421bc4..b6f898c56940 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -31,6 +31,7 @@ enum sbi_ext_id { SBI_EXT_SRST = 0x53525354, SBI_EXT_PMU = 0x504D55, SBI_EXT_DBCN = 0x4442434E, + SBI_EXT_STA = 0x535441, /* Experimentals extensions must lie within this range */ SBI_EXT_EXPERIMENTAL_START = 0x08000000, @@ -243,6 +244,22 @@ enum sbi_ext_dbcn_fid { SBI_EXT_DBCN_CONSOLE_WRITE_BYTE = 2, }; +/* SBI STA (steal-time accounting) extension */ +enum sbi_ext_sta_fid { + SBI_EXT_STA_STEAL_TIME_SET_SHMEM = 0, +}; + +struct sbi_sta_struct { + __le32 sequence; + __le32 flags; + __le64 steal; + u8 preempted; + u8 pad[47]; +} __packed; + +#define SBI_STA_SHMEM_DISABLE -1 + +/* SBI spec version fields */ #define SBI_SPEC_VERSION_DEFAULT 0x1 #define SBI_SPEC_VERSION_MAJOR_SHIFT 24 #define SBI_SPEC_VERSION_MAJOR_MASK 0x7f diff --git a/arch/riscv/include/asm/syscall_wrapper.h b/arch/riscv/include/asm/syscall_wrapper.h index 1d7942c8a6cb..eeec04b7dae6 100644 --- a/arch/riscv/include/asm/syscall_wrapper.h +++ b/arch/riscv/include/asm/syscall_wrapper.h @@ -46,9 +46,6 @@ asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *); return sys_ni_syscall(); \ } -#define COMPAT_SYS_NI(name) \ - SYSCALL_ALIAS(__riscv_compat_sys_##name, sys_ni_posix_timers); - #endif /* CONFIG_COMPAT */ #define __SYSCALL_DEFINEx(x, name, ...) \ @@ -82,6 +79,4 @@ asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *); return sys_ni_syscall(); \ } -#define SYS_NI(name) SYSCALL_ALIAS(__riscv_sys_##name, sys_ni_posix_timers); - #endif /* __ASM_SYSCALL_WRAPPER_H */ diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 60d3b21dead7..d6b7a5b95874 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -157,9 +157,16 @@ enum KVM_RISCV_SBI_EXT_ID { KVM_RISCV_SBI_EXT_EXPERIMENTAL, KVM_RISCV_SBI_EXT_VENDOR, KVM_RISCV_SBI_EXT_DBCN, + KVM_RISCV_SBI_EXT_STA, KVM_RISCV_SBI_EXT_MAX, }; +/* SBI STA extension registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ +struct kvm_riscv_sbi_sta { + unsigned long shmem_lo; + unsigned long shmem_hi; +}; + /* Possible states for kvm_riscv_timer */ #define KVM_RISCV_TIMER_STATE_OFF 0 #define KVM_RISCV_TIMER_STATE_ON 1 @@ -241,6 +248,12 @@ enum KVM_RISCV_SBI_EXT_ID { #define KVM_REG_RISCV_VECTOR_REG(n) \ ((n) + sizeof(struct __riscv_v_ext_state) / sizeof(unsigned long)) +/* Registers for specific SBI extensions are mapped as type 10 */ +#define KVM_REG_RISCV_SBI_STATE (0x0a << KVM_REG_RISCV_TYPE_SHIFT) +#define KVM_REG_RISCV_SBI_STA (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT) +#define KVM_REG_RISCV_SBI_STA_REG(name) \ + (offsetof(struct kvm_riscv_sbi_sta, name) / sizeof(unsigned long)) + /* Device Control API: RISC-V AIA */ #define KVM_DEV_RISCV_APLIC_ALIGN 0x1000 #define KVM_DEV_RISCV_APLIC_SIZE 0x4000 diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index fee22a3d1b53..807c2bde1f83 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -85,6 +85,7 @@ obj-$(CONFIG_SMP) += sbi-ipi.o obj-$(CONFIG_SMP) += cpu_ops_sbi.o endif obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o +obj-$(CONFIG_PARAVIRT) += paravirt.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c new file mode 100644 index 000000000000..8e114f5930ce --- /dev/null +++ b/arch/riscv/kernel/paravirt.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023 Ventana Micro Systems Inc. + */ + +#define pr_fmt(fmt) "riscv-pv: " fmt + +#include <linux/cpuhotplug.h> +#include <linux/compiler.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/jump_label.h> +#include <linux/kconfig.h> +#include <linux/kernel.h> +#include <linux/percpu-defs.h> +#include <linux/printk.h> +#include <linux/static_call.h> +#include <linux/types.h> + +#include <asm/barrier.h> +#include <asm/page.h> +#include <asm/paravirt.h> +#include <asm/sbi.h> + +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; + +static u64 native_steal_clock(int cpu) +{ + return 0; +} + +DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); + +static bool steal_acc = true; +static int __init parse_no_stealacc(char *arg) +{ + steal_acc = false; + return 0; +} + +early_param("no-steal-acc", parse_no_stealacc); + +DEFINE_PER_CPU(struct sbi_sta_struct, steal_time) __aligned(64); + +static bool __init has_pv_steal_clock(void) +{ + if (sbi_spec_version >= sbi_mk_version(2, 0) && + sbi_probe_extension(SBI_EXT_STA) > 0) { + pr_info("SBI STA extension detected\n"); + return true; + } + + return false; +} + +static int sbi_sta_steal_time_set_shmem(unsigned long lo, unsigned long hi, + unsigned long flags) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_STA, SBI_EXT_STA_STEAL_TIME_SET_SHMEM, + lo, hi, flags, 0, 0, 0); + if (ret.error) { + if (lo == SBI_STA_SHMEM_DISABLE && hi == SBI_STA_SHMEM_DISABLE) + pr_warn("Failed to disable steal-time shmem"); + else + pr_warn("Failed to set steal-time shmem"); + return sbi_err_map_linux_errno(ret.error); + } + + return 0; +} + +static int pv_time_cpu_online(unsigned int cpu) +{ + struct sbi_sta_struct *st = this_cpu_ptr(&steal_time); + phys_addr_t pa = __pa(st); + unsigned long lo = (unsigned long)pa; + unsigned long hi = IS_ENABLED(CONFIG_32BIT) ? upper_32_bits((u64)pa) : 0; + + return sbi_sta_steal_time_set_shmem(lo, hi, 0); +} + +static int pv_time_cpu_down_prepare(unsigned int cpu) +{ + return sbi_sta_steal_time_set_shmem(SBI_STA_SHMEM_DISABLE, + SBI_STA_SHMEM_DISABLE, 0); +} + +static u64 pv_time_steal_clock(int cpu) +{ + struct sbi_sta_struct *st = per_cpu_ptr(&steal_time, cpu); + u32 sequence; + u64 steal; + + /* + * Check the sequence field before and after reading the steal + * field. Repeat the read if it is different or odd. + */ + do { + sequence = READ_ONCE(st->sequence); + virt_rmb(); + steal = READ_ONCE(st->steal); + virt_rmb(); + } while ((le32_to_cpu(sequence) & 1) || + sequence != READ_ONCE(st->sequence)); + + return le64_to_cpu(steal); +} + +int __init pv_time_init(void) +{ + int ret; + + if (!has_pv_steal_clock()) + return 0; + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "riscv/pv_time:online", + pv_time_cpu_online, + pv_time_cpu_down_prepare); + if (ret < 0) + return ret; + + static_call_update(pv_steal_clock, pv_time_steal_clock); + + static_key_slow_inc(¶virt_steal_enabled); + if (steal_acc) + static_key_slow_inc(¶virt_steal_rq_enabled); + + pr_info("Computing paravirt steal-time\n"); + + return 0; +} diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c index 23641e82a9df..ba3477197789 100644 --- a/arch/riscv/kernel/time.c +++ b/arch/riscv/kernel/time.c @@ -12,6 +12,7 @@ #include <asm/sbi.h> #include <asm/processor.h> #include <asm/timex.h> +#include <asm/paravirt.h> unsigned long riscv_timebase __ro_after_init; EXPORT_SYMBOL_GPL(riscv_timebase); @@ -45,4 +46,6 @@ void __init time_init(void) timer_probe(); tick_setup_hrtimer_broadcast(); + + pv_time_init(); } diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig index 2b0b51035302..1fd76aee3b71 100644 --- a/arch/riscv/kvm/Kconfig +++ b/arch/riscv/kvm/Kconfig @@ -30,6 +30,7 @@ config KVM select KVM_XFER_TO_GUEST_WORK select KVM_GENERIC_MMU_NOTIFIER select PREEMPT_NOTIFIERS + select SCHED_INFO help Support hosting virtualized guest machines. diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile index 4c2067fc59fc..c9646521f113 100644 --- a/arch/riscv/kvm/Makefile +++ b/arch/riscv/kvm/Makefile @@ -26,6 +26,7 @@ kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o kvm-y += vcpu_sbi_base.o kvm-y += vcpu_sbi_replace.o kvm-y += vcpu_sbi_hsm.o +kvm-y += vcpu_sbi_sta.o kvm-y += vcpu_timer.o kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o kvm-y += aia.o diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index 6cf23b8adb71..e808723a85f1 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -55,6 +55,7 @@ struct imsic { /* IMSIC SW-file */ struct imsic_mrif *swfile; phys_addr_t swfile_pa; + spinlock_t swfile_extirq_lock; }; #define imsic_vs_csr_read(__c) \ @@ -613,12 +614,23 @@ static void imsic_swfile_extirq_update(struct kvm_vcpu *vcpu) { struct imsic *imsic = vcpu->arch.aia_context.imsic_state; struct imsic_mrif *mrif = imsic->swfile; + unsigned long flags; + + /* + * The critical section is necessary during external interrupt + * updates to avoid the risk of losing interrupts due to potential + * interruptions between reading topei and updating pending status. + */ + + spin_lock_irqsave(&imsic->swfile_extirq_lock, flags); if (imsic_mrif_atomic_read(mrif, &mrif->eidelivery) && imsic_mrif_topei(mrif, imsic->nr_eix, imsic->nr_msis)) kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT); else kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT); + + spin_unlock_irqrestore(&imsic->swfile_extirq_lock, flags); } static void imsic_swfile_read(struct kvm_vcpu *vcpu, bool clear, @@ -1039,6 +1051,7 @@ int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu) } imsic->swfile = page_to_virt(swfile_page); imsic->swfile_pa = page_to_phys(swfile_page); + spin_lock_init(&imsic->swfile_extirq_lock); /* Setup IO device */ kvm_iodevice_init(&imsic->iodev, &imsic_iodoev_ops); diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index e087c809073c..b5ca9f2e98ac 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -83,6 +83,8 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) vcpu->arch.hfence_tail = 0; memset(vcpu->arch.hfence_queue, 0, sizeof(vcpu->arch.hfence_queue)); + kvm_riscv_vcpu_sbi_sta_reset(vcpu); + /* Reset the guest CSRs for hotplug usecase */ if (loaded) kvm_arch_vcpu_load(vcpu, smp_processor_id()); @@ -541,6 +543,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_riscv_vcpu_aia_load(vcpu, cpu); + kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); + vcpu->cpu = cpu; } @@ -614,6 +618,9 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_HFENCE, vcpu)) kvm_riscv_hfence_process(vcpu); + + if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) + kvm_riscv_vcpu_record_steal_time(vcpu); } } @@ -757,8 +764,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) /* Update HVIP CSR for current CPU */ kvm_riscv_update_hvip(vcpu); - if (ret <= 0 || - kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) || + if (kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) || kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending()) { vcpu->mode = OUTSIDE_GUEST_MODE; diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index f8c9fa0c03c5..fc34557f5356 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -485,7 +485,7 @@ static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu, if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) rc = kvm_riscv_vcpu_smstateen_set_csr(vcpu, reg_num, reg_val); -break; + break; default: rc = -ENOENT; break; @@ -931,50 +931,106 @@ static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu) return copy_isa_ext_reg_indices(vcpu, NULL);; } -static inline unsigned long num_sbi_ext_regs(void) +static int copy_sbi_ext_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) { - /* - * number of KVM_REG_RISCV_SBI_SINGLE + - * 2 x (number of KVM_REG_RISCV_SBI_MULTI) - */ - return KVM_RISCV_SBI_EXT_MAX + 2*(KVM_REG_RISCV_SBI_MULTI_REG_LAST+1); -} - -static int copy_sbi_ext_reg_indices(u64 __user *uindices) -{ - int n; + unsigned int n = 0; - /* copy KVM_REG_RISCV_SBI_SINGLE */ - n = KVM_RISCV_SBI_EXT_MAX; - for (int i = 0; i < n; i++) { + for (int i = 0; i < KVM_RISCV_SBI_EXT_MAX; i++) { u64 size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | i; + if (!riscv_vcpu_supports_sbi_ext(vcpu, i)) + continue; + if (uindices) { if (put_user(reg, uindices)) return -EFAULT; uindices++; } + + n++; } - /* copy KVM_REG_RISCV_SBI_MULTI */ - n = KVM_REG_RISCV_SBI_MULTI_REG_LAST + 1; - for (int i = 0; i < n; i++) { - u64 size = IS_ENABLED(CONFIG_32BIT) ? - KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; - u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT | - KVM_REG_RISCV_SBI_MULTI_EN | i; + return n; +} + +static unsigned long num_sbi_ext_regs(struct kvm_vcpu *vcpu) +{ + return copy_sbi_ext_reg_indices(vcpu, NULL); +} + +static int copy_sbi_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; + int total = 0; + + if (scontext->ext_status[KVM_RISCV_SBI_EXT_STA] == KVM_RISCV_SBI_EXT_STATUS_ENABLED) { + u64 size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; + int n = sizeof(struct kvm_riscv_sbi_sta) / sizeof(unsigned long); + + for (int i = 0; i < n; i++) { + u64 reg = KVM_REG_RISCV | size | + KVM_REG_RISCV_SBI_STATE | + KVM_REG_RISCV_SBI_STA | i; + + if (uindices) { + if (put_user(reg, uindices)) + return -EFAULT; + uindices++; + } + } + + total += n; + } + + return total; +} + +static inline unsigned long num_sbi_regs(struct kvm_vcpu *vcpu) +{ + return copy_sbi_reg_indices(vcpu, NULL); +} + +static inline unsigned long num_vector_regs(const struct kvm_vcpu *vcpu) +{ + if (!riscv_isa_extension_available(vcpu->arch.isa, v)) + return 0; + + /* vstart, vl, vtype, vcsr, vlenb and 32 vector regs */ + return 37; +} + +static int copy_vector_reg_indices(const struct kvm_vcpu *vcpu, + u64 __user *uindices) +{ + const struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; + int n = num_vector_regs(vcpu); + u64 reg, size; + int i; + + if (n == 0) + return 0; + + /* copy vstart, vl, vtype, vcsr and vlenb */ + size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; + for (i = 0; i < 5; i++) { + reg = KVM_REG_RISCV | size | KVM_REG_RISCV_VECTOR | i; if (uindices) { if (put_user(reg, uindices)) return -EFAULT; uindices++; } + } - reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT | - KVM_REG_RISCV_SBI_MULTI_DIS | i; + /* vector_regs have a variable 'vlenb' size */ + size = __builtin_ctzl(cntx->vector.vlenb); + size <<= KVM_REG_SIZE_SHIFT; + for (i = 0; i < 32; i++) { + reg = KVM_REG_RISCV | KVM_REG_RISCV_VECTOR | size | + KVM_REG_RISCV_VECTOR_REG(i); if (uindices) { if (put_user(reg, uindices)) @@ -983,7 +1039,7 @@ static int copy_sbi_ext_reg_indices(u64 __user *uindices) } } - return num_sbi_ext_regs(); + return n; } /* @@ -1001,8 +1057,10 @@ unsigned long kvm_riscv_vcpu_num_regs(struct kvm_vcpu *vcpu) res += num_timer_regs(); res += num_fp_f_regs(vcpu); res += num_fp_d_regs(vcpu); + res += num_vector_regs(vcpu); res += num_isa_ext_regs(vcpu); - res += num_sbi_ext_regs(); + res += num_sbi_ext_regs(vcpu); + res += num_sbi_regs(vcpu); return res; } @@ -1045,14 +1103,25 @@ int kvm_riscv_vcpu_copy_reg_indices(struct kvm_vcpu *vcpu, return ret; uindices += ret; + ret = copy_vector_reg_indices(vcpu, uindices); + if (ret < 0) + return ret; + uindices += ret; + ret = copy_isa_ext_reg_indices(vcpu, uindices); if (ret < 0) return ret; uindices += ret; - ret = copy_sbi_ext_reg_indices(uindices); + ret = copy_sbi_ext_reg_indices(vcpu, uindices); if (ret < 0) return ret; + uindices += ret; + + ret = copy_sbi_reg_indices(vcpu, uindices); + if (ret < 0) + return ret; + uindices += ret; return 0; } @@ -1075,12 +1144,14 @@ int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu, case KVM_REG_RISCV_FP_D: return kvm_riscv_vcpu_set_reg_fp(vcpu, reg, KVM_REG_RISCV_FP_D); + case KVM_REG_RISCV_VECTOR: + return kvm_riscv_vcpu_set_reg_vector(vcpu, reg); case KVM_REG_RISCV_ISA_EXT: return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg); case KVM_REG_RISCV_SBI_EXT: return kvm_riscv_vcpu_set_reg_sbi_ext(vcpu, reg); - case KVM_REG_RISCV_VECTOR: - return kvm_riscv_vcpu_set_reg_vector(vcpu, reg); + case KVM_REG_RISCV_SBI_STATE: + return kvm_riscv_vcpu_set_reg_sbi(vcpu, reg); default: break; } @@ -1106,12 +1177,14 @@ int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu, case KVM_REG_RISCV_FP_D: return kvm_riscv_vcpu_get_reg_fp(vcpu, reg, KVM_REG_RISCV_FP_D); + case KVM_REG_RISCV_VECTOR: + return kvm_riscv_vcpu_get_reg_vector(vcpu, reg); case KVM_REG_RISCV_ISA_EXT: return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg); case KVM_REG_RISCV_SBI_EXT: return kvm_riscv_vcpu_get_reg_sbi_ext(vcpu, reg); - case KVM_REG_RISCV_VECTOR: - return kvm_riscv_vcpu_get_reg_vector(vcpu, reg); + case KVM_REG_RISCV_SBI_STATE: + return kvm_riscv_vcpu_get_reg_sbi(vcpu, reg); default: break; } diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index a04ff98085d9..72a2ffb8dcd1 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -71,6 +71,10 @@ static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = { .ext_ptr = &vcpu_sbi_ext_dbcn, }, { + .ext_idx = KVM_RISCV_SBI_EXT_STA, + .ext_ptr = &vcpu_sbi_ext_sta, + }, + { .ext_idx = KVM_RISCV_SBI_EXT_EXPERIMENTAL, .ext_ptr = &vcpu_sbi_ext_experimental, }, @@ -80,6 +84,34 @@ static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = { }, }; +static const struct kvm_riscv_sbi_extension_entry * +riscv_vcpu_get_sbi_ext(struct kvm_vcpu *vcpu, unsigned long idx) +{ + const struct kvm_riscv_sbi_extension_entry *sext = NULL; + + if (idx >= KVM_RISCV_SBI_EXT_MAX) + return NULL; + + for (int i = 0; i < ARRAY_SIZE(sbi_ext); i++) { + if (sbi_ext[i].ext_idx == idx) { + sext = &sbi_ext[i]; + break; + } + } + + return sext; +} + +bool riscv_vcpu_supports_sbi_ext(struct kvm_vcpu *vcpu, int idx) +{ + struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; + const struct kvm_riscv_sbi_extension_entry *sext; + + sext = riscv_vcpu_get_sbi_ext(vcpu, idx); + + return sext && scontext->ext_status[sext->ext_idx] != KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE; +} + void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run) { struct kvm_cpu_context *cp = &vcpu->arch.guest_context; @@ -140,28 +172,19 @@ static int riscv_vcpu_set_sbi_ext_single(struct kvm_vcpu *vcpu, unsigned long reg_num, unsigned long reg_val) { - unsigned long i; - const struct kvm_riscv_sbi_extension_entry *sext = NULL; struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; - - if (reg_num >= KVM_RISCV_SBI_EXT_MAX) - return -ENOENT; + const struct kvm_riscv_sbi_extension_entry *sext; if (reg_val != 1 && reg_val != 0) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { - if (sbi_ext[i].ext_idx == reg_num) { - sext = &sbi_ext[i]; - break; - } - } - if (!sext) + sext = riscv_vcpu_get_sbi_ext(vcpu, reg_num); + if (!sext || scontext->ext_status[sext->ext_idx] == KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE) return -ENOENT; scontext->ext_status[sext->ext_idx] = (reg_val) ? - KVM_RISCV_SBI_EXT_AVAILABLE : - KVM_RISCV_SBI_EXT_UNAVAILABLE; + KVM_RISCV_SBI_EXT_STATUS_ENABLED : + KVM_RISCV_SBI_EXT_STATUS_DISABLED; return 0; } @@ -170,24 +193,16 @@ static int riscv_vcpu_get_sbi_ext_single(struct kvm_vcpu *vcpu, unsigned long reg_num, unsigned long *reg_val) { - unsigned long i; - const struct kvm_riscv_sbi_extension_entry *sext = NULL; struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; + const struct kvm_riscv_sbi_extension_entry *sext; - if (reg_num >= KVM_RISCV_SBI_EXT_MAX) - return -ENOENT; - - for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { - if (sbi_ext[i].ext_idx == reg_num) { - sext = &sbi_ext[i]; - break; - } - } - if (!sext) + sext = riscv_vcpu_get_sbi_ext(vcpu, reg_num); + if (!sext || scontext->ext_status[sext->ext_idx] == KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE) return -ENOENT; *reg_val = scontext->ext_status[sext->ext_idx] == - KVM_RISCV_SBI_EXT_AVAILABLE; + KVM_RISCV_SBI_EXT_STATUS_ENABLED; + return 0; } @@ -310,6 +325,69 @@ int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu, return 0; } +int kvm_riscv_vcpu_set_reg_sbi(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_SBI_STATE); + unsigned long reg_subtype, reg_val; + + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + return -EINVAL; + + if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; + reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; + + switch (reg_subtype) { + case KVM_REG_RISCV_SBI_STA: + return kvm_riscv_vcpu_set_reg_sbi_sta(vcpu, reg_num, reg_val); + default: + return -EINVAL; + } + + return 0; +} + +int kvm_riscv_vcpu_get_reg_sbi(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + unsigned long __user *uaddr = + (unsigned long __user *)(unsigned long)reg->addr; + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | + KVM_REG_SIZE_MASK | + KVM_REG_RISCV_SBI_STATE); + unsigned long reg_subtype, reg_val; + int ret; + + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) + return -EINVAL; + + reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; + reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; + + switch (reg_subtype) { + case KVM_REG_RISCV_SBI_STA: + ret = kvm_riscv_vcpu_get_reg_sbi_sta(vcpu, reg_num, ®_val); + break; + default: + return -EINVAL; + } + + if (ret) + return ret; + + if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + return 0; +} + const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext( struct kvm_vcpu *vcpu, unsigned long extid) { @@ -325,7 +403,7 @@ const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext( if (ext->extid_start <= extid && ext->extid_end >= extid) { if (entry->ext_idx >= KVM_RISCV_SBI_EXT_MAX || scontext->ext_status[entry->ext_idx] == - KVM_RISCV_SBI_EXT_AVAILABLE) + KVM_RISCV_SBI_EXT_STATUS_ENABLED) return ext; return NULL; @@ -413,12 +491,12 @@ void kvm_riscv_vcpu_sbi_init(struct kvm_vcpu *vcpu) if (ext->probe && !ext->probe(vcpu)) { scontext->ext_status[entry->ext_idx] = - KVM_RISCV_SBI_EXT_UNAVAILABLE; + KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE; continue; } - scontext->ext_status[entry->ext_idx] = ext->default_unavail ? - KVM_RISCV_SBI_EXT_UNAVAILABLE : - KVM_RISCV_SBI_EXT_AVAILABLE; + scontext->ext_status[entry->ext_idx] = ext->default_disabled ? + KVM_RISCV_SBI_EXT_STATUS_DISABLED : + KVM_RISCV_SBI_EXT_STATUS_ENABLED; } } diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c index 23b57c931b15..9c2ab3dfa93a 100644 --- a/arch/riscv/kvm/vcpu_sbi_replace.c +++ b/arch/riscv/kvm/vcpu_sbi_replace.c @@ -204,6 +204,6 @@ static int kvm_sbi_ext_dbcn_handler(struct kvm_vcpu *vcpu, const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_dbcn = { .extid_start = SBI_EXT_DBCN, .extid_end = SBI_EXT_DBCN, - .default_unavail = true, + .default_disabled = true, .handler = kvm_sbi_ext_dbcn_handler, }; diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c new file mode 100644 index 000000000000..01f09fe8c3b0 --- /dev/null +++ b/arch/riscv/kvm/vcpu_sbi_sta.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2023 Ventana Micro Systems Inc. + */ + +#include <linux/kconfig.h> +#include <linux/kernel.h> +#include <linux/kvm_host.h> +#include <linux/mm.h> +#include <linux/sizes.h> + +#include <asm/bug.h> +#include <asm/current.h> +#include <asm/kvm_vcpu_sbi.h> +#include <asm/page.h> +#include <asm/sbi.h> +#include <asm/uaccess.h> + +void kvm_riscv_vcpu_sbi_sta_reset(struct kvm_vcpu *vcpu) +{ + vcpu->arch.sta.shmem = INVALID_GPA; + vcpu->arch.sta.last_steal = 0; +} + +void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu) +{ + gpa_t shmem = vcpu->arch.sta.shmem; + u64 last_steal = vcpu->arch.sta.last_steal; + u32 *sequence_ptr, sequence; + u64 *steal_ptr, steal; + unsigned long hva; + gfn_t gfn; + + if (shmem == INVALID_GPA) + return; + + /* + * shmem is 64-byte aligned (see the enforcement in + * kvm_sbi_sta_steal_time_set_shmem()) and the size of sbi_sta_struct + * is 64 bytes, so we know all its offsets are in the same page. + */ + gfn = shmem >> PAGE_SHIFT; + hva = kvm_vcpu_gfn_to_hva(vcpu, gfn); + + if (WARN_ON(kvm_is_error_hva(hva))) { + vcpu->arch.sta.shmem = INVALID_GPA; + return; + } + + sequence_ptr = (u32 *)(hva + offset_in_page(shmem) + + offsetof(struct sbi_sta_struct, sequence)); + steal_ptr = (u64 *)(hva + offset_in_page(shmem) + + offsetof(struct sbi_sta_struct, steal)); + + if (WARN_ON(get_user(sequence, sequence_ptr))) + return; + + sequence = le32_to_cpu(sequence); + sequence += 1; + + if (WARN_ON(put_user(cpu_to_le32(sequence), sequence_ptr))) + return; + + if (!WARN_ON(get_user(steal, steal_ptr))) { + steal = le64_to_cpu(steal); + vcpu->arch.sta.last_steal = READ_ONCE(current->sched_info.run_delay); + steal += vcpu->arch.sta.last_steal - last_steal; + WARN_ON(put_user(cpu_to_le64(steal), steal_ptr)); + } + + sequence += 1; + WARN_ON(put_user(cpu_to_le32(sequence), sequence_ptr)); + + kvm_vcpu_mark_page_dirty(vcpu, gfn); +} + +static int kvm_sbi_sta_steal_time_set_shmem(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + unsigned long shmem_phys_lo = cp->a0; + unsigned long shmem_phys_hi = cp->a1; + u32 flags = cp->a2; + struct sbi_sta_struct zero_sta = {0}; + unsigned long hva; + bool writable; + gpa_t shmem; + int ret; + + if (flags != 0) + return SBI_ERR_INVALID_PARAM; + + if (shmem_phys_lo == SBI_STA_SHMEM_DISABLE && + shmem_phys_hi == SBI_STA_SHMEM_DISABLE) { + vcpu->arch.sta.shmem = INVALID_GPA; + return 0; + } + + if (shmem_phys_lo & (SZ_64 - 1)) + return SBI_ERR_INVALID_PARAM; + + shmem = shmem_phys_lo; + + if (shmem_phys_hi != 0) { + if (IS_ENABLED(CONFIG_32BIT)) + shmem |= ((gpa_t)shmem_phys_hi << 32); + else + return SBI_ERR_INVALID_ADDRESS; + } + + hva = kvm_vcpu_gfn_to_hva_prot(vcpu, shmem >> PAGE_SHIFT, &writable); + if (kvm_is_error_hva(hva) || !writable) + return SBI_ERR_INVALID_ADDRESS; + + ret = kvm_vcpu_write_guest(vcpu, shmem, &zero_sta, sizeof(zero_sta)); + if (ret) + return SBI_ERR_FAILURE; + + vcpu->arch.sta.shmem = shmem; + vcpu->arch.sta.last_steal = current->sched_info.run_delay; + + return 0; +} + +static int kvm_sbi_ext_sta_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_vcpu_sbi_return *retdata) +{ + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + unsigned long funcid = cp->a6; + int ret; + + switch (funcid) { + case SBI_EXT_STA_STEAL_TIME_SET_SHMEM: + ret = kvm_sbi_sta_steal_time_set_shmem(vcpu); + break; + default: + ret = SBI_ERR_NOT_SUPPORTED; + break; + } + + retdata->err_val = ret; + + return 0; +} + +static unsigned long kvm_sbi_ext_sta_probe(struct kvm_vcpu *vcpu) +{ + return !!sched_info_on(); +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_sta = { + .extid_start = SBI_EXT_STA, + .extid_end = SBI_EXT_STA, + .handler = kvm_sbi_ext_sta_handler, + .probe = kvm_sbi_ext_sta_probe, +}; + +int kvm_riscv_vcpu_get_reg_sbi_sta(struct kvm_vcpu *vcpu, + unsigned long reg_num, + unsigned long *reg_val) +{ + switch (reg_num) { + case KVM_REG_RISCV_SBI_STA_REG(shmem_lo): + *reg_val = (unsigned long)vcpu->arch.sta.shmem; + break; + case KVM_REG_RISCV_SBI_STA_REG(shmem_hi): + if (IS_ENABLED(CONFIG_32BIT)) + *reg_val = upper_32_bits(vcpu->arch.sta.shmem); + else + *reg_val = 0; + break; + default: + return -EINVAL; + } + + return 0; +} + +int kvm_riscv_vcpu_set_reg_sbi_sta(struct kvm_vcpu *vcpu, + unsigned long reg_num, + unsigned long reg_val) +{ + switch (reg_num) { + case KVM_REG_RISCV_SBI_STA_REG(shmem_lo): + if (IS_ENABLED(CONFIG_32BIT)) { + gpa_t hi = upper_32_bits(vcpu->arch.sta.shmem); + + vcpu->arch.sta.shmem = reg_val; + vcpu->arch.sta.shmem |= hi << 32; + } else { + vcpu->arch.sta.shmem = reg_val; + } + break; + case KVM_REG_RISCV_SBI_STA_REG(shmem_hi): + if (IS_ENABLED(CONFIG_32BIT)) { + gpa_t lo = lower_32_bits(vcpu->arch.sta.shmem); + + vcpu->arch.sta.shmem = ((gpa_t)reg_val << 32); + vcpu->arch.sta.shmem |= lo; + } else if (reg_val != 0) { + return -EINVAL; + } + break; + default: + return -EINVAL; + } + + return 0; +} diff --git a/arch/riscv/kvm/vcpu_switch.S b/arch/riscv/kvm/vcpu_switch.S index d74df8eb4d71..0c26189aa01c 100644 --- a/arch/riscv/kvm/vcpu_switch.S +++ b/arch/riscv/kvm/vcpu_switch.S @@ -15,7 +15,7 @@ .altmacro .option norelax -ENTRY(__kvm_riscv_switch_to) +SYM_FUNC_START(__kvm_riscv_switch_to) /* Save Host GPRs (except A0 and T0-T6) */ REG_S ra, (KVM_ARCH_HOST_RA)(a0) REG_S sp, (KVM_ARCH_HOST_SP)(a0) @@ -45,7 +45,7 @@ ENTRY(__kvm_riscv_switch_to) REG_L t0, (KVM_ARCH_GUEST_SSTATUS)(a0) REG_L t1, (KVM_ARCH_GUEST_HSTATUS)(a0) REG_L t2, (KVM_ARCH_GUEST_SCOUNTEREN)(a0) - la t4, __kvm_switch_return + la t4, .Lkvm_switch_return REG_L t5, (KVM_ARCH_GUEST_SEPC)(a0) /* Save Host and Restore Guest SSTATUS */ @@ -113,7 +113,7 @@ ENTRY(__kvm_riscv_switch_to) /* Back to Host */ .align 2 -__kvm_switch_return: +.Lkvm_switch_return: /* Swap Guest A0 with SSCRATCH */ csrrw a0, CSR_SSCRATCH, a0 @@ -208,9 +208,9 @@ __kvm_switch_return: /* Return to C code */ ret -ENDPROC(__kvm_riscv_switch_to) +SYM_FUNC_END(__kvm_riscv_switch_to) -ENTRY(__kvm_riscv_unpriv_trap) +SYM_CODE_START(__kvm_riscv_unpriv_trap) /* * We assume that faulting unpriv load/store instruction is * 4-byte long and blindly increment SEPC by 4. @@ -231,12 +231,10 @@ ENTRY(__kvm_riscv_unpriv_trap) csrr a1, CSR_HTINST REG_S a1, (KVM_ARCH_TRAP_HTINST)(a0) sret -ENDPROC(__kvm_riscv_unpriv_trap) +SYM_CODE_END(__kvm_riscv_unpriv_trap) #ifdef CONFIG_FPU - .align 3 - .global __kvm_riscv_fp_f_save -__kvm_riscv_fp_f_save: +SYM_FUNC_START(__kvm_riscv_fp_f_save) csrr t2, CSR_SSTATUS li t1, SR_FS csrs CSR_SSTATUS, t1 @@ -276,10 +274,9 @@ __kvm_riscv_fp_f_save: sw t0, KVM_ARCH_FP_F_FCSR(a0) csrw CSR_SSTATUS, t2 ret +SYM_FUNC_END(__kvm_riscv_fp_f_save) - .align 3 - .global __kvm_riscv_fp_d_save -__kvm_riscv_fp_d_save: +SYM_FUNC_START(__kvm_riscv_fp_d_save) csrr t2, CSR_SSTATUS li t1, SR_FS csrs CSR_SSTATUS, t1 @@ -319,10 +316,9 @@ __kvm_riscv_fp_d_save: sw t0, KVM_ARCH_FP_D_FCSR(a0) csrw CSR_SSTATUS, t2 ret +SYM_FUNC_END(__kvm_riscv_fp_d_save) - .align 3 - .global __kvm_riscv_fp_f_restore -__kvm_riscv_fp_f_restore: +SYM_FUNC_START(__kvm_riscv_fp_f_restore) csrr t2, CSR_SSTATUS li t1, SR_FS lw t0, KVM_ARCH_FP_F_FCSR(a0) @@ -362,10 +358,9 @@ __kvm_riscv_fp_f_restore: fscsr t0 csrw CSR_SSTATUS, t2 ret +SYM_FUNC_END(__kvm_riscv_fp_f_restore) - .align 3 - .global __kvm_riscv_fp_d_restore -__kvm_riscv_fp_d_restore: +SYM_FUNC_START(__kvm_riscv_fp_d_restore) csrr t2, CSR_SSTATUS li t1, SR_FS lw t0, KVM_ARCH_FP_D_FCSR(a0) @@ -405,4 +400,5 @@ __kvm_riscv_fp_d_restore: fscsr t0 csrw CSR_SSTATUS, t2 ret +SYM_FUNC_END(__kvm_riscv_fp_d_restore) #endif diff --git a/arch/riscv/kvm/vcpu_vector.c b/arch/riscv/kvm/vcpu_vector.c index b339a2682f25..d92d1348045c 100644 --- a/arch/riscv/kvm/vcpu_vector.c +++ b/arch/riscv/kvm/vcpu_vector.c @@ -76,6 +76,7 @@ int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu, cntx->vector.datap = kmalloc(riscv_v_vsize, GFP_KERNEL); if (!cntx->vector.datap) return -ENOMEM; + cntx->vector.vlenb = riscv_v_vsize / 32; vcpu->arch.host_context.vector.datap = kzalloc(riscv_v_vsize, GFP_KERNEL); if (!vcpu->arch.host_context.vector.datap) @@ -115,6 +116,9 @@ static int kvm_riscv_vcpu_vreg_addr(struct kvm_vcpu *vcpu, case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr): *reg_addr = &cntx->vector.vcsr; break; + case KVM_REG_RISCV_VECTOR_CSR_REG(vlenb): + *reg_addr = &cntx->vector.vlenb; + break; case KVM_REG_RISCV_VECTOR_CSR_REG(datap): default: return -ENOENT; @@ -173,6 +177,18 @@ int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu, if (!riscv_isa_extension_available(isa, v)) return -ENOENT; + if (reg_num == KVM_REG_RISCV_VECTOR_CSR_REG(vlenb)) { + struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; + unsigned long reg_val; + + if (copy_from_user(®_val, uaddr, reg_size)) + return -EFAULT; + if (reg_val != cntx->vector.vlenb) + return -EINVAL; + + return 0; + } + rc = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size, ®_addr); if (rc) return rc; diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index dd0608629310..6de44ede4e14 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -44,8 +44,7 @@ CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y CONFIG_CRASH_DUMP=y CONFIG_LIVEPATCH=y -CONFIG_MARCH_ZEC12=y -CONFIG_TUNE_ZEC12=y +CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y CONFIG_HZ_100=y @@ -76,7 +75,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_MODULE_SIG_SHA256=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y @@ -93,6 +91,7 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y +CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y CONFIG_ZSMALLOC_STAT=y CONFIG_SLUB_STATS=y # CONFIG_COMPAT_BRK is not set @@ -619,6 +618,9 @@ CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_BTRFS_DEBUG=y CONFIG_BTRFS_ASSERT=y CONFIG_NILFS2_FS=m +CONFIG_BCACHEFS_FS=y +CONFIG_BCACHEFS_QUOTA=y +CONFIG_BCACHEFS_POSIX_ACL=y CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y @@ -691,7 +693,6 @@ CONFIG_PERSISTENT_KEYRINGS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y -CONFIG_SECURITY_NETWORK=y CONFIG_HARDENED_USERCOPY=y CONFIG_FORTIFY_SOURCE=y CONFIG_SECURITY_SELINUX=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 1b8150e50f6a..bcae47da6b7c 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -42,8 +42,7 @@ CONFIG_KEXEC_FILE=y CONFIG_KEXEC_SIG=y CONFIG_CRASH_DUMP=y CONFIG_LIVEPATCH=y -CONFIG_MARCH_ZEC12=y -CONFIG_TUNE_ZEC12=y +CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y CONFIG_HZ_100=y @@ -71,7 +70,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_MODULE_SIG_SHA256=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y @@ -88,6 +86,7 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y +CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y CONFIG_ZSMALLOC_STAT=y # CONFIG_COMPAT_BRK is not set CONFIG_MEMORY_HOTPLUG=y @@ -605,6 +604,9 @@ CONFIG_OCFS2_FS=m CONFIG_BTRFS_FS=y CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_NILFS2_FS=m +CONFIG_BCACHEFS_FS=m +CONFIG_BCACHEFS_QUOTA=y +CONFIG_BCACHEFS_POSIX_ACL=y CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y @@ -677,7 +679,6 @@ CONFIG_PERSISTENT_KEYRINGS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y -CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_LOCKDOWN_LSM=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index b831083b4edd..47028450eee1 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -9,8 +9,7 @@ CONFIG_BPF_SYSCALL=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_CRASH_DUMP=y -CONFIG_MARCH_ZEC12=y -CONFIG_TUNE_ZEC12=y +CONFIG_MARCH_Z13=y # CONFIG_COMPAT is not set CONFIG_NR_CPUS=2 CONFIG_HZ_100=y diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h index b714ed0ef688..9acf48e53a87 100644 --- a/arch/s390/include/asm/fpu/api.h +++ b/arch/s390/include/asm/fpu/api.h @@ -79,7 +79,7 @@ static inline int test_fp_ctl(u32 fpc) #define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23|KERNEL_VXR_V24V31) #define KERNEL_VXR (KERNEL_VXR_LOW|KERNEL_VXR_HIGH) -#define KERNEL_FPR (KERNEL_FPC|KERNEL_VXR_V0V7) +#define KERNEL_FPR (KERNEL_FPC|KERNEL_VXR_LOW) struct kernel_fpu; diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h index 9286430fe729..35c1d1b860d8 100644 --- a/arch/s390/include/asm/syscall_wrapper.h +++ b/arch/s390/include/asm/syscall_wrapper.h @@ -63,10 +63,6 @@ cond_syscall(__s390x_sys_##name); \ cond_syscall(__s390_sys_##name) -#define SYS_NI(name) \ - SYSCALL_ALIAS(__s390x_sys_##name, sys_ni_posix_timers); \ - SYSCALL_ALIAS(__s390_sys_##name, sys_ni_posix_timers) - #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ long __s390_compat_sys##name(struct pt_regs *regs); \ ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO); \ @@ -85,15 +81,11 @@ /* * As some compat syscalls may not be implemented, we need to expand - * COND_SYSCALL_COMPAT in kernel/sys_ni.c and COMPAT_SYS_NI in - * kernel/time/posix-stubs.c to cover this case as well. + * COND_SYSCALL_COMPAT in kernel/sys_ni.c to cover this case as well. */ #define COND_SYSCALL_COMPAT(name) \ cond_syscall(__s390_compat_sys_##name) -#define COMPAT_SYS_NI(name) \ - SYSCALL_ALIAS(__s390_compat_sys_##name, sys_ni_posix_timers) - #define __S390_SYS_STUBx(x, name, ...) \ long __s390_sys##name(struct pt_regs *regs); \ ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \ @@ -124,9 +116,6 @@ #define COND_SYSCALL(name) \ cond_syscall(__s390x_sys_##name) -#define SYS_NI(name) \ - SYSCALL_ALIAS(__s390x_sys_##name, sys_ni_posix_timers) - #define __S390_SYS_STUBx(x, fullname, name, ...) #endif /* CONFIG_COMPAT */ diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index fd2669b1cb2d..21f9407be5d3 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -86,9 +86,6 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); return sys_ni_syscall(); \ } -#define __SYS_NI(abi, name) \ - SYSCALL_ALIAS(__##abi##_##name, sys_ni_posix_timers); - #ifdef CONFIG_X86_64 #define __X64_SYS_STUB0(name) \ __SYS_STUB0(x64, sys_##name) @@ -100,13 +97,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #define __X64_COND_SYSCALL(name) \ __COND_SYSCALL(x64, sys_##name) -#define __X64_SYS_NI(name) \ - __SYS_NI(x64, sys_##name) #else /* CONFIG_X86_64 */ #define __X64_SYS_STUB0(name) #define __X64_SYS_STUBx(x, name, ...) #define __X64_COND_SYSCALL(name) -#define __X64_SYS_NI(name) #endif /* CONFIG_X86_64 */ #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) @@ -120,13 +114,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #define __IA32_COND_SYSCALL(name) \ __COND_SYSCALL(ia32, sys_##name) -#define __IA32_SYS_NI(name) \ - __SYS_NI(ia32, sys_##name) #else /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ #define __IA32_SYS_STUB0(name) #define __IA32_SYS_STUBx(x, name, ...) #define __IA32_COND_SYSCALL(name) -#define __IA32_SYS_NI(name) #endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ #ifdef CONFIG_IA32_EMULATION @@ -135,8 +126,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); * additional wrappers (aptly named __ia32_sys_xyzzy) which decode the * ia32 regs in the proper order for shared or "common" syscalls. As some * syscalls may not be implemented, we need to expand COND_SYSCALL in - * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this - * case as well. + * kernel/sys_ni.c to cover this case as well. */ #define __IA32_COMPAT_SYS_STUB0(name) \ __SYS_STUB0(ia32, compat_sys_##name) @@ -148,14 +138,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #define __IA32_COMPAT_COND_SYSCALL(name) \ __COND_SYSCALL(ia32, compat_sys_##name) -#define __IA32_COMPAT_SYS_NI(name) \ - __SYS_NI(ia32, compat_sys_##name) - #else /* CONFIG_IA32_EMULATION */ #define __IA32_COMPAT_SYS_STUB0(name) #define __IA32_COMPAT_SYS_STUBx(x, name, ...) #define __IA32_COMPAT_COND_SYSCALL(name) -#define __IA32_COMPAT_SYS_NI(name) #endif /* CONFIG_IA32_EMULATION */ @@ -175,13 +161,10 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #define __X32_COMPAT_COND_SYSCALL(name) \ __COND_SYSCALL(x64, compat_sys_##name) -#define __X32_COMPAT_SYS_NI(name) \ - __SYS_NI(x64, compat_sys_##name) #else /* CONFIG_X86_X32_ABI */ #define __X32_COMPAT_SYS_STUB0(name) #define __X32_COMPAT_SYS_STUBx(x, name, ...) #define __X32_COMPAT_COND_SYSCALL(name) -#define __X32_COMPAT_SYS_NI(name) #endif /* CONFIG_X86_X32_ABI */ @@ -212,17 +195,12 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); /* * As some compat syscalls may not be implemented, we need to expand - * COND_SYSCALL_COMPAT in kernel/sys_ni.c and COMPAT_SYS_NI in - * kernel/time/posix-stubs.c to cover this case as well. + * COND_SYSCALL_COMPAT in kernel/sys_ni.c to cover this case as well. */ #define COND_SYSCALL_COMPAT(name) \ __IA32_COMPAT_COND_SYSCALL(name) \ __X32_COMPAT_COND_SYSCALL(name) -#define COMPAT_SYS_NI(name) \ - __IA32_COMPAT_SYS_NI(name) \ - __X32_COMPAT_SYS_NI(name) - #endif /* CONFIG_COMPAT */ #define __SYSCALL_DEFINEx(x, name, ...) \ @@ -243,8 +221,8 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); * As the generic SYSCALL_DEFINE0() macro does not decode any parameters for * obvious reasons, and passing struct pt_regs *regs to it in %rdi does not * hurt, we only need to re-define it here to keep the naming congruent to - * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() and SYS_NI() - * macros to work correctly. + * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() macro + * to work correctly. */ #define SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ @@ -257,10 +235,6 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); __X64_COND_SYSCALL(name) \ __IA32_COND_SYSCALL(name) -#define SYS_NI(name) \ - __X64_SYS_NI(name) \ - __IA32_SYS_NI(name) - /* * For VSYSCALLS, we need to declare these three syscalls with the new diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 1a0dd80d81ac..85a3ce2a3666 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -293,6 +293,7 @@ acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end) processor->processor_id, /* ACPI ID */ processor->lapic_flags & ACPI_MADT_ENABLED); + has_lapic_cpus = true; return 0; } @@ -1134,7 +1135,6 @@ static int __init acpi_parse_madt_lapic_entries(void) if (!count) { count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, acpi_parse_lapic, MAX_LOCAL_APIC); - has_lapic_cpus = count > 0; x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, acpi_parse_x2apic, MAX_LOCAL_APIC); } diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 73be3931e4f0..aae7456ece07 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -255,6 +255,16 @@ static void __init_or_module noinline optimize_nops(u8 *instr, size_t len) } } +static void __init_or_module noinline optimize_nops_inplace(u8 *instr, size_t len) +{ + unsigned long flags; + + local_irq_save(flags); + optimize_nops(instr, len); + sync_core(); + local_irq_restore(flags); +} + /* * In this context, "source" is where the instructions are placed in the * section .altinstr_replacement, for example during kernel build by the @@ -438,7 +448,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, * patch if feature is *NOT* present. */ if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) { - optimize_nops(instr, a->instrlen); + optimize_nops_inplace(instr, a->instrlen); continue; } @@ -1685,8 +1695,8 @@ void __init_or_module text_poke_early(void *addr, const void *opcode, } else { local_irq_save(flags); memcpy(addr, opcode, len); - local_irq_restore(flags); sync_core(); + local_irq_restore(flags); /* * Could also do a CLFLUSH here to speed up CPU recovery; but diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 086a2c3aaaa0..0f8103240fda 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -255,6 +255,22 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) testl $X2APIC_ENABLE, %eax jnz .Lread_apicid_msr +#ifdef CONFIG_X86_X2APIC + /* + * If system is in X2APIC mode then MMIO base might not be + * mapped causing the MMIO read below to fault. Faults can't + * be handled at that point. + */ + cmpl $0, x2apic_mode(%rip) + jz .Lread_apicid_mmio + + /* Force the AP into X2APIC mode. */ + orl $X2APIC_ENABLE, %eax + wrmsr + jmp .Lread_apicid_msr +#endif + +.Lread_apicid_mmio: /* Read the APIC ID from the fix-mapped MMIO space. */ movq apic_mmio_base(%rip), %rcx addq $APIC_ID, %rcx diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 4900c078045a..6ee925d66648 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2972,6 +2972,25 @@ static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm) set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, v_tsc_aux, v_tsc_aux); } + + /* + * For SEV-ES, accesses to MSR_IA32_XSS should not be intercepted if + * the host/guest supports its use. + * + * guest_can_use() checks a number of requirements on the host/guest to + * ensure that MSR_IA32_XSS is available, but it might report true even + * if X86_FEATURE_XSAVES isn't configured in the guest to ensure host + * MSR_IA32_XSS is always properly restored. For SEV-ES, it is better + * to further check that the guest CPUID actually supports + * X86_FEATURE_XSAVES so that accesses to MSR_IA32_XSS by misbehaved + * guests will still get intercepted and caught in the normal + * kvm_emulate_rdmsr()/kvm_emulated_wrmsr() paths. + */ + if (guest_can_use(vcpu, X86_FEATURE_XSAVES) && + guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 1, 1); + else + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 0, 0); } void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index f3bb30b40876..a8bd4e909a1e 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -103,6 +103,7 @@ static const struct svm_direct_access_msrs { { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, { .index = MSR_IA32_LASTINTFROMIP, .always = false }, { .index = MSR_IA32_LASTINTTOIP, .always = false }, + { .index = MSR_IA32_XSS, .always = false }, { .index = MSR_EFER, .always = false }, { .index = MSR_IA32_CR_PAT, .always = false }, { .index = MSR_AMD64_SEV_ES_GHCB, .always = true }, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index be67ab7fdd10..c409f934c377 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -30,7 +30,7 @@ #define IOPM_SIZE PAGE_SIZE * 3 #define MSRPM_SIZE PAGE_SIZE * 2 -#define MAX_DIRECT_ACCESS_MSRS 46 +#define MAX_DIRECT_ACCESS_MSRS 47 #define MSRPM_OFFSETS 32 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; extern bool npt_enabled; diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 9b1ec5d8c99c..a65fc2ae15b4 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -9,6 +9,7 @@ config XEN select PARAVIRT_CLOCK select X86_HV_CALLBACK_VECTOR depends on X86_64 || (X86_32 && X86_PAE) + depends on X86_64 || (X86_GENERIC || MPENTIUM4 || MCORE2 || MATOM || MK8) depends on X86_LOCAL_APIC && X86_TSC help This is the Linux Xen port. Enabling this will allow the |