diff options
Diffstat (limited to 'arch/x86/mm/pat')
-rw-r--r-- | arch/x86/mm/pat/cpa-test.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/pat/memtype.c | 226 | ||||
-rw-r--r-- | arch/x86/mm/pat/memtype_interval.c | 63 | ||||
-rw-r--r-- | arch/x86/mm/pat/set_memory.c | 238 |
4 files changed, 263 insertions, 266 deletions
diff --git a/arch/x86/mm/pat/cpa-test.c b/arch/x86/mm/pat/cpa-test.c index 3d2f7f0a6ed1..ad3c1feec990 100644 --- a/arch/x86/mm/pat/cpa-test.c +++ b/arch/x86/mm/pat/cpa-test.c @@ -183,7 +183,7 @@ static int pageattr_test(void) break; case 1: - err = change_page_attr_set(addrs, len[1], PAGE_CPA_TEST, 1); + err = change_page_attr_set(addrs, len[i], PAGE_CPA_TEST, 1); break; case 2: diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index feb8cc6a12bf..2e7923844afe 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -38,11 +38,13 @@ #include <linux/kernel.h> #include <linux/pfn_t.h> #include <linux/slab.h> +#include <linux/io.h> #include <linux/mm.h> #include <linux/highmem.h> #include <linux/fs.h> #include <linux/rbtree.h> +#include <asm/cpu_device_id.h> #include <asm/cacheflush.h> #include <asm/cacheinfo.h> #include <asm/processor.h> @@ -231,7 +233,7 @@ void pat_cpu_init(void) panic("x86/PAT: PAT enabled, but not supported by secondary CPU\n"); } - wrmsrl(MSR_IA32_CR_PAT, pat_msr_val); + wrmsrq(MSR_IA32_CR_PAT, pat_msr_val); __flush_tlb_all(); } @@ -255,7 +257,7 @@ void __init pat_bp_init(void) if (!cpu_feature_enabled(X86_FEATURE_PAT)) pat_disable("PAT not supported by the CPU."); else - rdmsrl(MSR_IA32_CR_PAT, pat_msr_val); + rdmsrq(MSR_IA32_CR_PAT, pat_msr_val); if (!pat_msr_val) { pat_disable("PAT support disabled by the firmware."); @@ -290,9 +292,8 @@ void __init pat_bp_init(void) return; } - if ((c->x86_vendor == X86_VENDOR_INTEL) && - (((c->x86 == 0x6) && (c->x86_model <= 0xd)) || - ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) { + if ((c->x86_vfm >= INTEL_PENTIUM_PRO && c->x86_vfm <= INTEL_PENTIUM_M_DOTHAN) || + (c->x86_vfm >= INTEL_P4_WILLAMETTE && c->x86_vfm <= INTEL_P4_CEDARMILL)) { /* * PAT support with the lower four entries. Intel Pentium 2, * 3, M, and 4 are affected by PAT errata, which makes the @@ -682,6 +683,7 @@ static enum page_cache_mode lookup_memtype(u64 paddr) /** * pat_pfn_immune_to_uc_mtrr - Check whether the PAT memory type * of @pfn cannot be overridden by UC MTRR memory type. + * @pfn: The page frame number to check. * * Only to be called when PAT is enabled. * @@ -773,46 +775,27 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, return vma_prot; } -#ifdef CONFIG_STRICT_DEVMEM -/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM */ -static inline int range_is_allowed(unsigned long pfn, unsigned long size) +static inline void pgprot_set_cachemode(pgprot_t *prot, enum page_cache_mode pcm) { - return 1; -} -#else -/* This check is needed to avoid cache aliasing when PAT is enabled */ -static inline int range_is_allowed(unsigned long pfn, unsigned long size) -{ - u64 from = ((u64)pfn) << PAGE_SHIFT; - u64 to = from + size; - u64 cursor = from; - - if (!pat_enabled()) - return 1; - - while (cursor < to) { - if (!devmem_is_allowed(pfn)) - return 0; - cursor += PAGE_SIZE; - pfn++; - } - return 1; + *prot = __pgprot((pgprot_val(*prot) & ~_PAGE_CACHE_MASK) | + cachemode2protval(pcm)); } -#endif /* CONFIG_STRICT_DEVMEM */ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot) { enum page_cache_mode pcm = _PAGE_CACHE_MODE_WB; + if (!pat_enabled()) + return 1; + if (!range_is_allowed(pfn, size)) return 0; if (file->f_flags & O_DSYNC) pcm = _PAGE_CACHE_MODE_UC_MINUS; - *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) | - cachemode2protval(pcm)); + pgprot_set_cachemode(vma_prot, pcm); return 1; } @@ -853,8 +836,7 @@ int memtype_kernel_map_sync(u64 base, unsigned long size, * Reserved non RAM regions only and after successful memtype_reserve, * this func also keeps identity mapping (if any) in sync with this new prot. */ -static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, - int strict_prot) +static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot) { int is_ram = 0; int ret; @@ -880,9 +862,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, (unsigned long long)paddr, (unsigned long long)(paddr + size - 1), cattr_name(pcm)); - *vma_prot = __pgprot((pgprot_val(*vma_prot) & - (~_PAGE_CACHE_MASK)) | - cachemode2protval(pcm)); + pgprot_set_cachemode(vma_prot, pcm); } return 0; } @@ -892,8 +872,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, return ret; if (pcm != want_pcm) { - if (strict_prot || - !is_new_memtype_allowed(paddr, size, want_pcm, pcm)) { + if (!is_new_memtype_allowed(paddr, size, want_pcm, pcm)) { memtype_free(paddr, paddr + size); pr_err("x86/PAT: %s:%d map pfn expected mapping type %s for [mem %#010Lx-%#010Lx], got %s\n", current->comm, current->pid, @@ -903,13 +882,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, cattr_name(pcm)); return -EINVAL; } - /* - * We allow returning different type than the one requested in - * non strict case. - */ - *vma_prot = __pgprot((pgprot_val(*vma_prot) & - (~_PAGE_CACHE_MASK)) | - cachemode2protval(pcm)); + pgprot_set_cachemode(vma_prot, pcm); } if (memtype_kernel_map_sync(paddr, size, pcm) < 0) { @@ -932,111 +905,14 @@ static void free_pfn_range(u64 paddr, unsigned long size) memtype_free(paddr, paddr + size); } -static int follow_phys(struct vm_area_struct *vma, unsigned long *prot, - resource_size_t *phys) -{ - struct follow_pfnmap_args args = { .vma = vma, .address = vma->vm_start }; - - if (follow_pfnmap_start(&args)) - return -EINVAL; - - /* Never return PFNs of anon folios in COW mappings. */ - if (!args.special) { - follow_pfnmap_end(&args); - return -EINVAL; - } - - *prot = pgprot_val(args.pgprot); - *phys = (resource_size_t)args.pfn << PAGE_SHIFT; - follow_pfnmap_end(&args); - return 0; -} - -static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr, - pgprot_t *pgprot) -{ - unsigned long prot; - - VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT)); - - /* - * We need the starting PFN and cachemode used for track_pfn_remap() - * that covered the whole VMA. For most mappings, we can obtain that - * information from the page tables. For COW mappings, we might now - * suddenly have anon folios mapped and follow_phys() will fail. - * - * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to - * detect the PFN. If we need the cachemode as well, we're out of luck - * for now and have to fail fork(). - */ - if (!follow_phys(vma, &prot, paddr)) { - if (pgprot) - *pgprot = __pgprot(prot); - return 0; - } - if (is_cow_mapping(vma->vm_flags)) { - if (pgprot) - return -EINVAL; - *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; - return 0; - } - WARN_ON_ONCE(1); - return -EINVAL; -} - -/* - * track_pfn_copy is called when vma that is covering the pfnmap gets - * copied through copy_page_range(). - * - * If the vma has a linear pfn mapping for the entire range, we get the prot - * from pte and reserve the entire vma range with single reserve_pfn_range call. - */ -int track_pfn_copy(struct vm_area_struct *vma) -{ - resource_size_t paddr; - unsigned long vma_size = vma->vm_end - vma->vm_start; - pgprot_t pgprot; - - if (vma->vm_flags & VM_PAT) { - if (get_pat_info(vma, &paddr, &pgprot)) - return -EINVAL; - /* reserve the whole chunk covered by vma. */ - return reserve_pfn_range(paddr, vma_size, &pgprot, 1); - } - - return 0; -} - -/* - * prot is passed in as a parameter for the new mapping. If the vma has - * a linear pfn mapping for the entire range, or no vma is provided, - * reserve the entire pfn + size range with single reserve_pfn_range - * call. - */ -int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, - unsigned long pfn, unsigned long addr, unsigned long size) +int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size, pgprot_t *prot) { resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT; enum page_cache_mode pcm; - /* reserve the whole chunk starting from paddr */ - if (!vma || (addr == vma->vm_start - && size == (vma->vm_end - vma->vm_start))) { - int ret; - - ret = reserve_pfn_range(paddr, size, prot, 0); - if (ret == 0 && vma) - vm_flags_set(vma, VM_PAT); - return ret; - } - if (!pat_enabled()) return 0; - /* - * For anything smaller than the vma size we set prot based on the - * lookup. - */ pcm = lookup_memtype(paddr); /* Check memtype for the remaining pages */ @@ -1047,79 +923,35 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, return -EINVAL; } - *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) | - cachemode2protval(pcm)); - + pgprot_set_cachemode(prot, pcm); return 0; } -void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, pfn_t pfn) +int pfnmap_track(unsigned long pfn, unsigned long size, pgprot_t *prot) { - enum page_cache_mode pcm; - - if (!pat_enabled()) - return; + const resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT; - /* Set prot based on lookup */ - pcm = lookup_memtype(pfn_t_to_phys(pfn)); - *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) | - cachemode2protval(pcm)); + return reserve_pfn_range(paddr, size, prot); } -/* - * untrack_pfn is called while unmapping a pfnmap for a region. - * untrack can be called for a specific region indicated by pfn and size or - * can be for the entire vma (in which case pfn, size are zero). - */ -void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, - unsigned long size, bool mm_wr_locked) +void pfnmap_untrack(unsigned long pfn, unsigned long size) { - resource_size_t paddr; + const resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT; - if (vma && !(vma->vm_flags & VM_PAT)) - return; - - /* free the chunk starting from pfn or the whole chunk */ - paddr = (resource_size_t)pfn << PAGE_SHIFT; - if (!paddr && !size) { - if (get_pat_info(vma, &paddr, NULL)) - return; - size = vma->vm_end - vma->vm_start; - } free_pfn_range(paddr, size); - if (vma) { - if (mm_wr_locked) - vm_flags_clear(vma, VM_PAT); - else - __vm_flags_mod(vma, 0, VM_PAT); - } -} - -/* - * untrack_pfn_clear is called if the following situation fits: - * - * 1) while mremapping a pfnmap for a new region, with the old vma after - * its pfnmap page table has been removed. The new vma has a new pfnmap - * to the same pfn & cache type with VM_PAT set. - * 2) while duplicating vm area, the new vma fails to copy the pgtable from - * old vma. - */ -void untrack_pfn_clear(struct vm_area_struct *vma) -{ - vm_flags_clear(vma, VM_PAT); } pgprot_t pgprot_writecombine(pgprot_t prot) { - return __pgprot(pgprot_val(prot) | - cachemode2protval(_PAGE_CACHE_MODE_WC)); + pgprot_set_cachemode(&prot, _PAGE_CACHE_MODE_WC); + return prot; } EXPORT_SYMBOL_GPL(pgprot_writecombine); pgprot_t pgprot_writethrough(pgprot_t prot) { - return __pgprot(pgprot_val(prot) | - cachemode2protval(_PAGE_CACHE_MODE_WT)); + pgprot_set_cachemode(&prot, _PAGE_CACHE_MODE_WT); + return prot; } EXPORT_SYMBOL_GPL(pgprot_writethrough); diff --git a/arch/x86/mm/pat/memtype_interval.c b/arch/x86/mm/pat/memtype_interval.c index 645613d59942..e5844ed1311e 100644 --- a/arch/x86/mm/pat/memtype_interval.c +++ b/arch/x86/mm/pat/memtype_interval.c @@ -49,32 +49,6 @@ INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end, static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED; -enum { - MEMTYPE_EXACT_MATCH = 0, - MEMTYPE_END_MATCH = 1 -}; - -static struct memtype *memtype_match(u64 start, u64 end, int match_type) -{ - struct memtype *entry_match; - - entry_match = interval_iter_first(&memtype_rbroot, start, end-1); - - while (entry_match != NULL && entry_match->start < end) { - if ((match_type == MEMTYPE_EXACT_MATCH) && - (entry_match->start == start) && (entry_match->end == end)) - return entry_match; - - if ((match_type == MEMTYPE_END_MATCH) && - (entry_match->start < start) && (entry_match->end == end)) - return entry_match; - - entry_match = interval_iter_next(entry_match, start, end-1); - } - - return NULL; /* Returns NULL if there is no match */ -} - static int memtype_check_conflict(u64 start, u64 end, enum page_cache_mode reqtype, enum page_cache_mode *newtype) @@ -130,35 +104,16 @@ int memtype_check_insert(struct memtype *entry_new, enum page_cache_mode *ret_ty struct memtype *memtype_erase(u64 start, u64 end) { - struct memtype *entry_old; - - /* - * Since the memtype_rbroot tree allows overlapping ranges, - * memtype_erase() checks with EXACT_MATCH first, i.e. free - * a whole node for the munmap case. If no such entry is found, - * it then checks with END_MATCH, i.e. shrink the size of a node - * from the end for the mremap case. - */ - entry_old = memtype_match(start, end, MEMTYPE_EXACT_MATCH); - if (!entry_old) { - entry_old = memtype_match(start, end, MEMTYPE_END_MATCH); - if (!entry_old) - return ERR_PTR(-EINVAL); + struct memtype *entry = interval_iter_first(&memtype_rbroot, start, end - 1); + + while (entry && entry->start < end) { + if (entry->start == start && entry->end == end) { + interval_remove(entry, &memtype_rbroot); + return entry; + } + entry = interval_iter_next(entry, start, end - 1); } - - if (entry_old->start == start) { - /* munmap: erase this node */ - interval_remove(entry_old, &memtype_rbroot); - } else { - /* mremap: update the end value of this node */ - interval_remove(entry_old, &memtype_rbroot); - entry_old->end = start; - interval_insert(entry_old, &memtype_rbroot); - - return NULL; - } - - return entry_old; + return ERR_PTR(-EINVAL); } struct memtype *memtype_lookup(u64 addr) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 95bc50a8541c..8834c76f91c9 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -32,8 +32,6 @@ #include <asm/pgalloc.h> #include <asm/proto.h> #include <asm/memtype.h> -#include <asm/hyperv-tlfs.h> -#include <asm/mshyperv.h> #include "../mm_internal.h" @@ -75,6 +73,7 @@ static DEFINE_SPINLOCK(cpa_lock); #define CPA_ARRAY 2 #define CPA_PAGES_ARRAY 4 #define CPA_NO_CHECK_ALIAS 8 /* Do not search for aliases */ +#define CPA_COLLAPSE 16 /* try to collapse large pages */ static inline pgprot_t cachemode2pgprot(enum page_cache_mode pcm) { @@ -107,6 +106,18 @@ static void split_page_count(int level) direct_pages_count[level - 1] += PTRS_PER_PTE; } +static void collapse_page_count(int level) +{ + direct_pages_count[level]++; + if (system_state == SYSTEM_RUNNING) { + if (level == PG_LEVEL_2M) + count_vm_event(DIRECT_MAP_LEVEL2_COLLAPSE); + else if (level == PG_LEVEL_1G) + count_vm_event(DIRECT_MAP_LEVEL3_COLLAPSE); + } + direct_pages_count[level - 1] -= PTRS_PER_PTE; +} + void arch_report_meminfo(struct seq_file *m) { seq_printf(m, "DirectMap4k: %8lu kB\n", @@ -124,6 +135,7 @@ void arch_report_meminfo(struct seq_file *m) } #else static inline void split_page_count(int level) { } +static inline void collapse_page_count(int level) { } #endif #ifdef CONFIG_X86_CPA_STATISTICS @@ -213,14 +225,14 @@ within(unsigned long addr, unsigned long start, unsigned long end) return addr >= start && addr < end; } +#ifdef CONFIG_X86_64 + static inline int within_inclusive(unsigned long addr, unsigned long start, unsigned long end) { return addr >= start && addr <= end; } -#ifdef CONFIG_X86_64 - /* * The kernel image is mapped into two places in the virtual address space * (addresses without KASLR, of course): @@ -396,16 +408,49 @@ static void __cpa_flush_tlb(void *data) flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i))); } -static void cpa_flush(struct cpa_data *data, int cache) +static int collapse_large_pages(unsigned long addr, struct list_head *pgtables); + +static void cpa_collapse_large_pages(struct cpa_data *cpa) +{ + unsigned long start, addr, end; + struct ptdesc *ptdesc, *tmp; + LIST_HEAD(pgtables); + int collapsed = 0; + int i; + + if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { + for (i = 0; i < cpa->numpages; i++) + collapsed += collapse_large_pages(__cpa_addr(cpa, i), + &pgtables); + } else { + addr = __cpa_addr(cpa, 0); + start = addr & PMD_MASK; + end = addr + PAGE_SIZE * cpa->numpages; + + for (addr = start; within(addr, start, end); addr += PMD_SIZE) + collapsed += collapse_large_pages(addr, &pgtables); + } + + if (!collapsed) + return; + + flush_tlb_all(); + + list_for_each_entry_safe(ptdesc, tmp, &pgtables, pt_list) { + list_del(&ptdesc->pt_list); + __free_page(ptdesc_page(ptdesc)); + } +} + +static void cpa_flush(struct cpa_data *cpa, int cache) { - struct cpa_data *cpa = data; unsigned int i; BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) { cpa_flush_all(cache); - return; + goto collapse_large_pages; } if (cpa->force_flush_all || cpa->numpages > tlb_single_page_flush_ceiling) @@ -414,7 +459,7 @@ static void cpa_flush(struct cpa_data *data, int cache) on_each_cpu(__cpa_flush_tlb, cpa, 1); if (!cache) - return; + goto collapse_large_pages; mb(); for (i = 0; i < cpa->numpages; i++) { @@ -430,6 +475,10 @@ static void cpa_flush(struct cpa_data *data, int cache) clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE); } mb(); + +collapse_large_pages: + if (cpa->flags & CPA_COLLAPSE) + cpa_collapse_large_pages(cpa); } static bool overlaps(unsigned long r1_start, unsigned long r1_end, @@ -840,7 +889,7 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) /* change init_mm */ set_pte_atomic(kpte, pte); #ifdef CONFIG_X86_32 - if (!SHARED_KERNEL_PMD) { + { struct page *page; list_for_each_entry(page, &pgd_list, lru) { @@ -1199,6 +1248,164 @@ static int split_large_page(struct cpa_data *cpa, pte_t *kpte, return 0; } +static int collapse_pmd_page(pmd_t *pmd, unsigned long addr, + struct list_head *pgtables) +{ + pmd_t _pmd, old_pmd; + pte_t *pte, first; + unsigned long pfn; + pgprot_t pgprot; + int i = 0; + + if (!cpu_feature_enabled(X86_FEATURE_PSE)) + return 0; + + addr &= PMD_MASK; + pte = pte_offset_kernel(pmd, addr); + first = *pte; + pfn = pte_pfn(first); + + /* Make sure alignment is suitable */ + if (PFN_PHYS(pfn) & ~PMD_MASK) + return 0; + + /* The page is 4k intentionally */ + if (pte_flags(first) & _PAGE_KERNEL_4K) + return 0; + + /* Check that the rest of PTEs are compatible with the first one */ + for (i = 1, pte++; i < PTRS_PER_PTE; i++, pte++) { + pte_t entry = *pte; + + if (!pte_present(entry)) + return 0; + if (pte_flags(entry) != pte_flags(first)) + return 0; + if (pte_pfn(entry) != pte_pfn(first) + i) + return 0; + } + + old_pmd = *pmd; + + /* Success: set up a large page */ + pgprot = pgprot_4k_2_large(pte_pgprot(first)); + pgprot_val(pgprot) |= _PAGE_PSE; + _pmd = pfn_pmd(pfn, pgprot); + set_pmd(pmd, _pmd); + + /* Queue the page table to be freed after TLB flush */ + list_add(&page_ptdesc(pmd_page(old_pmd))->pt_list, pgtables); + + if (IS_ENABLED(CONFIG_X86_32)) { + struct page *page; + + /* Update all PGD tables to use the same large page */ + list_for_each_entry(page, &pgd_list, lru) { + pgd_t *pgd = (pgd_t *)page_address(page) + pgd_index(addr); + p4d_t *p4d = p4d_offset(pgd, addr); + pud_t *pud = pud_offset(p4d, addr); + pmd_t *pmd = pmd_offset(pud, addr); + /* Something is wrong if entries doesn't match */ + if (WARN_ON(pmd_val(old_pmd) != pmd_val(*pmd))) + continue; + set_pmd(pmd, _pmd); + } + } + + if (virt_addr_valid(addr) && pfn_range_is_mapped(pfn, pfn + 1)) + collapse_page_count(PG_LEVEL_2M); + + return 1; +} + +static int collapse_pud_page(pud_t *pud, unsigned long addr, + struct list_head *pgtables) +{ + unsigned long pfn; + pmd_t *pmd, first; + int i; + + if (!direct_gbpages) + return 0; + + addr &= PUD_MASK; + pmd = pmd_offset(pud, addr); + first = *pmd; + + /* + * To restore PUD page all PMD entries must be large and + * have suitable alignment + */ + pfn = pmd_pfn(first); + if (!pmd_leaf(first) || (PFN_PHYS(pfn) & ~PUD_MASK)) + return 0; + + /* + * To restore PUD page, all following PMDs must be compatible with the + * first one. + */ + for (i = 1, pmd++; i < PTRS_PER_PMD; i++, pmd++) { + pmd_t entry = *pmd; + + if (!pmd_present(entry) || !pmd_leaf(entry)) + return 0; + if (pmd_flags(entry) != pmd_flags(first)) + return 0; + if (pmd_pfn(entry) != pmd_pfn(first) + i * PTRS_PER_PTE) + return 0; + } + + /* Restore PUD page and queue page table to be freed after TLB flush */ + list_add(&page_ptdesc(pud_page(*pud))->pt_list, pgtables); + set_pud(pud, pfn_pud(pfn, pmd_pgprot(first))); + + if (virt_addr_valid(addr) && pfn_range_is_mapped(pfn, pfn + 1)) + collapse_page_count(PG_LEVEL_1G); + + return 1; +} + +/* + * Collapse PMD and PUD pages in the kernel mapping around the address where + * possible. + * + * Caller must flush TLB and free page tables queued on the list before + * touching the new entries. CPU must not see TLB entries of different size + * with different attributes. + */ +static int collapse_large_pages(unsigned long addr, struct list_head *pgtables) +{ + int collapsed = 0; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + addr &= PMD_MASK; + + spin_lock(&pgd_lock); + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + goto out; + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) + goto out; + pud = pud_offset(p4d, addr); + if (!pud_present(*pud) || pud_leaf(*pud)) + goto out; + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd) || pmd_leaf(*pmd)) + goto out; + + collapsed = collapse_pmd_page(pmd, addr, pgtables); + if (collapsed) + collapsed += collapse_pud_page(pud, addr, pgtables); + +out: + spin_unlock(&pgd_lock); + return collapsed; +} + static bool try_to_free_pte_page(pte_t *pte) { int i; @@ -2083,6 +2290,7 @@ int set_mce_nospec(unsigned long pfn) pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); return rc; } +EXPORT_SYMBOL_GPL(set_mce_nospec); /* Restore full speculative operation to the pfn. */ int clear_mce_nospec(unsigned long pfn) @@ -2122,7 +2330,8 @@ int set_memory_rox(unsigned long addr, int numpages) if (__supported_pte_mask & _PAGE_NX) clr.pgprot |= _PAGE_NX; - return change_page_attr_clear(&addr, numpages, clr, 0); + return change_page_attr_set_clr(&addr, numpages, __pgprot(0), clr, 0, + CPA_COLLAPSE, NULL); } int set_memory_rw(unsigned long addr, int numpages) @@ -2149,7 +2358,8 @@ int set_memory_p(unsigned long addr, int numpages) int set_memory_4k(unsigned long addr, int numpages) { - return change_page_attr_set_clr(&addr, numpages, __pgprot(0), + return change_page_attr_set_clr(&addr, numpages, + __pgprot(_PAGE_KERNEL_4K), __pgprot(0), 1, 0, NULL); } @@ -2422,7 +2632,7 @@ static int __set_pages_np(struct page *page, int numpages) .pgd = NULL, .numpages = numpages, .mask_set = __pgprot(0), - .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), + .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY), .flags = CPA_NO_CHECK_ALIAS }; /* @@ -2509,7 +2719,7 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, .pgd = pgd, .numpages = numpages, .mask_set = __pgprot(0), - .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)), + .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW|_PAGE_DIRTY)), .flags = CPA_NO_CHECK_ALIAS, }; @@ -2552,7 +2762,7 @@ int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address, .pgd = pgd, .numpages = numpages, .mask_set = __pgprot(0), - .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), + .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY), .flags = CPA_NO_CHECK_ALIAS, }; |