diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/fault.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 117 | ||||
-rw-r--r-- | arch/x86/mm/mem_encrypt.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/mem_encrypt_amd.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/pat/set_memory.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 57 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 59 |
8 files changed, 121 insertions, 125 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index e6c469b323cc..296d294142c8 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -7,7 +7,6 @@ #include <linux/sched.h> /* test_thread_flag(), ... */ #include <linux/sched/task_stack.h> /* task_stack_*(), ... */ #include <linux/kdebug.h> /* oops_begin/end, ... */ -#include <linux/extable.h> /* search_exception_tables */ #include <linux/memblock.h> /* max_low_pfn */ #include <linux/kfence.h> /* kfence_handle_page_fault */ #include <linux/kprobes.h> /* NOKPROBE_SYMBOL, ... */ @@ -678,7 +677,7 @@ page_fault_oops(struct pt_regs *regs, unsigned long error_code, ASM_CALL_ARG3, , [arg1] "r" (regs), [arg2] "r" (address), [arg3] "r" (&info)); - unreachable(); + BUG(); } #endif diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index c6d29f283001..62aa4d66a032 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -1080,7 +1080,8 @@ struct execmem_info __init *execmem_arch_setup(void) start = MODULES_VADDR + offset; - if (IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX)) { + if (IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX) && + cpu_feature_enabled(X86_FEATURE_PSE)) { pgprot = PAGE_KERNEL_ROX; flags = EXECMEM_KASAN_SHADOW | EXECMEM_ROX_CACHE; } else { diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 8d29163568a7..38ff7791a9c7 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -593,8 +593,7 @@ static bool memremap_should_map_decrypted(resource_size_t phys_addr, * Examine the physical address to determine if it is EFI data. Check * it against the boot params structure and EFI tables and memory types. */ -static bool memremap_is_efi_data(resource_size_t phys_addr, - unsigned long size) +static bool memremap_is_efi_data(resource_size_t phys_addr) { u64 paddr; @@ -632,42 +631,54 @@ static bool memremap_is_efi_data(resource_size_t phys_addr, * Examine the physical address to determine if it is boot data by checking * it against the boot params setup_data chain. */ -static bool memremap_is_setup_data(resource_size_t phys_addr, - unsigned long size) +static bool __ref __memremap_is_setup_data(resource_size_t phys_addr, bool early) { + unsigned int setup_data_sz = sizeof(struct setup_data); struct setup_indirect *indirect; struct setup_data *data; u64 paddr, paddr_next; paddr = boot_params.hdr.setup_data; while (paddr) { - unsigned int len; + unsigned int len, size; if (phys_addr == paddr) return true; - data = memremap(paddr, sizeof(*data), - MEMREMAP_WB | MEMREMAP_DEC); + if (early) + data = early_memremap_decrypted(paddr, setup_data_sz); + else + data = memremap(paddr, setup_data_sz, MEMREMAP_WB | MEMREMAP_DEC); if (!data) { - pr_warn("failed to memremap setup_data entry\n"); + pr_warn("failed to remap setup_data entry\n"); return false; } + size = setup_data_sz; + paddr_next = data->next; len = data->len; if ((phys_addr > paddr) && - (phys_addr < (paddr + sizeof(struct setup_data) + len))) { - memunmap(data); + (phys_addr < (paddr + setup_data_sz + len))) { + if (early) + early_memunmap(data, setup_data_sz); + else + memunmap(data); return true; } if (data->type == SETUP_INDIRECT) { - memunmap(data); - data = memremap(paddr, sizeof(*data) + len, - MEMREMAP_WB | MEMREMAP_DEC); + size += len; + if (early) { + early_memunmap(data, setup_data_sz); + data = early_memremap_decrypted(paddr, size); + } else { + memunmap(data); + data = memremap(paddr, size, MEMREMAP_WB | MEMREMAP_DEC); + } if (!data) { - pr_warn("failed to memremap indirect setup_data\n"); + pr_warn("failed to remap indirect setup_data\n"); return false; } @@ -679,7 +690,10 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, } } - memunmap(data); + if (early) + early_memunmap(data, size); + else + memunmap(data); if ((phys_addr > paddr) && (phys_addr < (paddr + len))) return true; @@ -690,67 +704,14 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, return false; } -/* - * Examine the physical address to determine if it is boot data by checking - * it against the boot params setup_data chain (early boot version). - */ -static bool __init early_memremap_is_setup_data(resource_size_t phys_addr, - unsigned long size) +static bool memremap_is_setup_data(resource_size_t phys_addr) { - struct setup_indirect *indirect; - struct setup_data *data; - u64 paddr, paddr_next; - - paddr = boot_params.hdr.setup_data; - while (paddr) { - unsigned int len, size; - - if (phys_addr == paddr) - return true; - - data = early_memremap_decrypted(paddr, sizeof(*data)); - if (!data) { - pr_warn("failed to early memremap setup_data entry\n"); - return false; - } - - size = sizeof(*data); - - paddr_next = data->next; - len = data->len; - - if ((phys_addr > paddr) && - (phys_addr < (paddr + sizeof(struct setup_data) + len))) { - early_memunmap(data, sizeof(*data)); - return true; - } - - if (data->type == SETUP_INDIRECT) { - size += len; - early_memunmap(data, sizeof(*data)); - data = early_memremap_decrypted(paddr, size); - if (!data) { - pr_warn("failed to early memremap indirect setup_data\n"); - return false; - } - - indirect = (struct setup_indirect *)data->data; - - if (indirect->type != SETUP_INDIRECT) { - paddr = indirect->addr; - len = indirect->len; - } - } - - early_memunmap(data, size); - - if ((phys_addr > paddr) && (phys_addr < (paddr + len))) - return true; - - paddr = paddr_next; - } + return __memremap_is_setup_data(phys_addr, false); +} - return false; +static bool __init early_memremap_is_setup_data(resource_size_t phys_addr) +{ + return __memremap_is_setup_data(phys_addr, true); } /* @@ -771,8 +732,8 @@ bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size, return false; if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) { - if (memremap_is_setup_data(phys_addr, size) || - memremap_is_efi_data(phys_addr, size)) + if (memremap_is_setup_data(phys_addr) || + memremap_is_efi_data(phys_addr)) return false; } @@ -797,8 +758,8 @@ pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr, encrypted_prot = true; if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) { - if (early_memremap_is_setup_data(phys_addr, size) || - memremap_is_efi_data(phys_addr, size)) + if (early_memremap_is_setup_data(phys_addr) || + memremap_is_efi_data(phys_addr)) encrypted_prot = false; } diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 0a120d85d7bb..95bae74fdab2 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -94,6 +94,8 @@ void __init mem_encrypt_init(void) /* Call into SWIOTLB to update the SWIOTLB DMA buffers */ swiotlb_update_mem_attributes(); + snp_secure_tsc_prepare(); + print_mem_encrypt_feature_info(); } diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 774f9677458f..b56c5c073003 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -541,6 +541,9 @@ void __init sme_early_init(void) * kernel mapped. */ snp_update_svsm_ca(); + + if (sev_status & MSR_AMD64_SNP_SECURE_TSC) + setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); } void __init mem_encrypt_free_decrypted_mem(void) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 95bc50a8541c..ef4514d64c05 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -32,8 +32,6 @@ #include <asm/pgalloc.h> #include <asm/proto.h> #include <asm/memtype.h> -#include <asm/hyperv-tlfs.h> -#include <asm/mshyperv.h> #include "../mm_internal.h" diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 5745a354a241..1fef5ad32d5a 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -19,12 +19,23 @@ EXPORT_SYMBOL(physical_mask); #endif #ifndef CONFIG_PARAVIRT +#ifndef CONFIG_PT_RECLAIM static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table) { - tlb_remove_page(tlb, table); + struct ptdesc *ptdesc = (struct ptdesc *)table; + + pagetable_dtor(ptdesc); + tlb_remove_page(tlb, ptdesc_page(ptdesc)); } -#endif +#else +static inline +void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + tlb_remove_table(tlb, table); +} +#endif /* !CONFIG_PT_RECLAIM */ +#endif /* !CONFIG_PARAVIRT */ gfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM; @@ -52,15 +63,13 @@ early_param("userpte", setup_userpte); void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) { - pagetable_pte_dtor(page_ptdesc(pte)); paravirt_release_pte(page_to_pfn(pte)); - paravirt_tlb_remove_table(tlb, pte); + paravirt_tlb_remove_table(tlb, page_ptdesc(pte)); } #if CONFIG_PGTABLE_LEVELS > 2 void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) { - struct ptdesc *ptdesc = virt_to_ptdesc(pmd); paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT); /* * NOTE! For PAE, any changes to the top page-directory-pointer-table @@ -69,25 +78,21 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) #ifdef CONFIG_X86_PAE tlb->need_flush_all = 1; #endif - pagetable_pmd_dtor(ptdesc); - paravirt_tlb_remove_table(tlb, ptdesc_page(ptdesc)); + paravirt_tlb_remove_table(tlb, virt_to_ptdesc(pmd)); } #if CONFIG_PGTABLE_LEVELS > 3 void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) { - struct ptdesc *ptdesc = virt_to_ptdesc(pud); - - pagetable_pud_dtor(ptdesc); paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); - paravirt_tlb_remove_table(tlb, virt_to_page(pud)); + paravirt_tlb_remove_table(tlb, virt_to_ptdesc(pud)); } #if CONFIG_PGTABLE_LEVELS > 4 void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d) { paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT); - paravirt_tlb_remove_table(tlb, virt_to_page(p4d)); + paravirt_tlb_remove_table(tlb, virt_to_ptdesc(p4d)); } #endif /* CONFIG_PGTABLE_LEVELS > 4 */ #endif /* CONFIG_PGTABLE_LEVELS > 3 */ @@ -222,7 +227,7 @@ static void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count) if (pmds[i]) { ptdesc = virt_to_ptdesc(pmds[i]); - pagetable_pmd_dtor(ptdesc); + pagetable_dtor(ptdesc); pagetable_free(ptdesc); mm_dec_nr_pmds(mm); } @@ -392,15 +397,14 @@ void __init pgtable_cache_init(void) SLAB_PANIC, NULL); } -static inline pgd_t *_pgd_alloc(void) +static inline pgd_t *_pgd_alloc(struct mm_struct *mm) { /* * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain. * We allocate one page for pgd. */ if (!SHARED_KERNEL_PMD) - return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER, - PGD_ALLOCATION_ORDER); + return __pgd_alloc(mm, PGD_ALLOCATION_ORDER); /* * Now PAE kernel is not running as a Xen domain. We can allocate @@ -409,24 +413,23 @@ static inline pgd_t *_pgd_alloc(void) return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER); } -static inline void _pgd_free(pgd_t *pgd) +static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd) { if (!SHARED_KERNEL_PMD) - free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER); + __pgd_free(mm, pgd); else kmem_cache_free(pgd_cache, pgd); } #else -static inline pgd_t *_pgd_alloc(void) +static inline pgd_t *_pgd_alloc(struct mm_struct *mm) { - return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER, - PGD_ALLOCATION_ORDER); + return __pgd_alloc(mm, PGD_ALLOCATION_ORDER); } -static inline void _pgd_free(pgd_t *pgd) +static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd) { - free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER); + __pgd_free(mm, pgd); } #endif /* CONFIG_X86_PAE */ @@ -436,7 +439,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS]; pmd_t *pmds[MAX_PREALLOCATED_PMDS]; - pgd = _pgd_alloc(); + pgd = _pgd_alloc(mm); if (pgd == NULL) goto out; @@ -479,7 +482,7 @@ out_free_pmds: if (sizeof(pmds) != 0) free_pmds(mm, pmds, PREALLOCATED_PMDS); out_free_pgd: - _pgd_free(pgd); + _pgd_free(mm, pgd); out: return NULL; } @@ -489,7 +492,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) pgd_mop_up_pmds(mm, pgd); pgd_dtor(pgd); paravirt_pgd_free(mm, pgd); - _pgd_free(pgd); + _pgd_free(mm, pgd); } /* @@ -856,7 +859,7 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr) free_page((unsigned long)pmd_sv); - pagetable_pmd_dtor(virt_to_ptdesc(pmd)); + pagetable_dtor(virt_to_ptdesc(pmd)); free_page((unsigned long)pmd); return 1; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index a2becb85bea7..6cf881a942bb 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -607,18 +607,15 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, cond_mitigation(tsk); /* - * Stop remote flushes for the previous mm. - * Skip kernel threads; we never send init_mm TLB flushing IPIs, - * but the bitmap manipulation can cause cache line contention. + * Leave this CPU in prev's mm_cpumask. Atomic writes to + * mm_cpumask can be expensive under contention. The CPU + * will be removed lazily at TLB flush time. */ - if (prev != &init_mm) { - VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, - mm_cpumask(prev))); - cpumask_clear_cpu(cpu, mm_cpumask(prev)); - } + VM_WARN_ON_ONCE(prev != &init_mm && !cpumask_test_cpu(cpu, + mm_cpumask(prev))); /* Start receiving IPIs and then read tlb_gen (and LAM below) */ - if (next != &init_mm) + if (next != &init_mm && !cpumask_test_cpu(cpu, mm_cpumask(next))) cpumask_set_cpu(cpu, mm_cpumask(next)); next_tlb_gen = atomic64_read(&next->context.tlb_gen); @@ -760,10 +757,13 @@ static void flush_tlb_func(void *info) if (!local) { inc_irq_stat(irq_tlb_count); count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); + } - /* Can only happen on remote CPUs */ - if (f->mm && f->mm != loaded_mm) - return; + /* The CPU was left in the mm_cpumask of the target mm. Clear it. */ + if (f->mm && f->mm != loaded_mm) { + cpumask_clear_cpu(raw_smp_processor_id(), mm_cpumask(f->mm)); + trace_tlb_flush(TLB_REMOTE_WRONG_CPU, 0); + return; } if (unlikely(loaded_mm == &init_mm)) @@ -893,9 +893,36 @@ done: nr_invalidate); } -static bool tlb_is_not_lazy(int cpu, void *data) +static bool should_flush_tlb(int cpu, void *data) +{ + struct flush_tlb_info *info = data; + + /* Lazy TLB will get flushed at the next context switch. */ + if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu)) + return false; + + /* No mm means kernel memory flush. */ + if (!info->mm) + return true; + + /* The target mm is loaded, and the CPU is not lazy. */ + if (per_cpu(cpu_tlbstate.loaded_mm, cpu) == info->mm) + return true; + + /* In cpumask, but not the loaded mm? Periodically remove by flushing. */ + if (info->trim_cpumask) + return true; + + return false; +} + +static bool should_trim_cpumask(struct mm_struct *mm) { - return !per_cpu(cpu_tlbstate_shared.is_lazy, cpu); + if (time_after(jiffies, READ_ONCE(mm->context.next_trim_cpumask))) { + WRITE_ONCE(mm->context.next_trim_cpumask, jiffies + HZ); + return true; + } + return false; } DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared); @@ -929,7 +956,7 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask, if (info->freed_tables) on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true); else - on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func, + on_each_cpu_cond_mask(should_flush_tlb, flush_tlb_func, (void *)info, 1, cpumask); } @@ -980,6 +1007,7 @@ static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm, info->freed_tables = freed_tables; info->new_tlb_gen = new_tlb_gen; info->initiating_cpu = smp_processor_id(); + info->trim_cpumask = 0; return info; } @@ -1022,6 +1050,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, * flush_tlb_func_local() directly in this case. */ if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) { + info->trim_cpumask = should_trim_cpumask(mm); flush_tlb_multi(mm_cpumask(mm), info); } else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) { lockdep_assert_irqs_enabled(); |