summaryrefslogtreecommitdiff
path: root/arch/x86/mm/pat
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm/pat')
-rw-r--r--arch/x86/mm/pat/cpa-test.c2
-rw-r--r--arch/x86/mm/pat/memtype.c226
-rw-r--r--arch/x86/mm/pat/memtype_interval.c63
-rw-r--r--arch/x86/mm/pat/set_memory.c238
4 files changed, 263 insertions, 266 deletions
diff --git a/arch/x86/mm/pat/cpa-test.c b/arch/x86/mm/pat/cpa-test.c
index 3d2f7f0a6ed1..ad3c1feec990 100644
--- a/arch/x86/mm/pat/cpa-test.c
+++ b/arch/x86/mm/pat/cpa-test.c
@@ -183,7 +183,7 @@ static int pageattr_test(void)
break;
case 1:
- err = change_page_attr_set(addrs, len[1], PAGE_CPA_TEST, 1);
+ err = change_page_attr_set(addrs, len[i], PAGE_CPA_TEST, 1);
break;
case 2:
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index feb8cc6a12bf..2e7923844afe 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -38,11 +38,13 @@
#include <linux/kernel.h>
#include <linux/pfn_t.h>
#include <linux/slab.h>
+#include <linux/io.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/fs.h>
#include <linux/rbtree.h>
+#include <asm/cpu_device_id.h>
#include <asm/cacheflush.h>
#include <asm/cacheinfo.h>
#include <asm/processor.h>
@@ -231,7 +233,7 @@ void pat_cpu_init(void)
panic("x86/PAT: PAT enabled, but not supported by secondary CPU\n");
}
- wrmsrl(MSR_IA32_CR_PAT, pat_msr_val);
+ wrmsrq(MSR_IA32_CR_PAT, pat_msr_val);
__flush_tlb_all();
}
@@ -255,7 +257,7 @@ void __init pat_bp_init(void)
if (!cpu_feature_enabled(X86_FEATURE_PAT))
pat_disable("PAT not supported by the CPU.");
else
- rdmsrl(MSR_IA32_CR_PAT, pat_msr_val);
+ rdmsrq(MSR_IA32_CR_PAT, pat_msr_val);
if (!pat_msr_val) {
pat_disable("PAT support disabled by the firmware.");
@@ -290,9 +292,8 @@ void __init pat_bp_init(void)
return;
}
- if ((c->x86_vendor == X86_VENDOR_INTEL) &&
- (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
- ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
+ if ((c->x86_vfm >= INTEL_PENTIUM_PRO && c->x86_vfm <= INTEL_PENTIUM_M_DOTHAN) ||
+ (c->x86_vfm >= INTEL_P4_WILLAMETTE && c->x86_vfm <= INTEL_P4_CEDARMILL)) {
/*
* PAT support with the lower four entries. Intel Pentium 2,
* 3, M, and 4 are affected by PAT errata, which makes the
@@ -682,6 +683,7 @@ static enum page_cache_mode lookup_memtype(u64 paddr)
/**
* pat_pfn_immune_to_uc_mtrr - Check whether the PAT memory type
* of @pfn cannot be overridden by UC MTRR memory type.
+ * @pfn: The page frame number to check.
*
* Only to be called when PAT is enabled.
*
@@ -773,46 +775,27 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
return vma_prot;
}
-#ifdef CONFIG_STRICT_DEVMEM
-/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM */
-static inline int range_is_allowed(unsigned long pfn, unsigned long size)
+static inline void pgprot_set_cachemode(pgprot_t *prot, enum page_cache_mode pcm)
{
- return 1;
-}
-#else
-/* This check is needed to avoid cache aliasing when PAT is enabled */
-static inline int range_is_allowed(unsigned long pfn, unsigned long size)
-{
- u64 from = ((u64)pfn) << PAGE_SHIFT;
- u64 to = from + size;
- u64 cursor = from;
-
- if (!pat_enabled())
- return 1;
-
- while (cursor < to) {
- if (!devmem_is_allowed(pfn))
- return 0;
- cursor += PAGE_SIZE;
- pfn++;
- }
- return 1;
+ *prot = __pgprot((pgprot_val(*prot) & ~_PAGE_CACHE_MASK) |
+ cachemode2protval(pcm));
}
-#endif /* CONFIG_STRICT_DEVMEM */
int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t *vma_prot)
{
enum page_cache_mode pcm = _PAGE_CACHE_MODE_WB;
+ if (!pat_enabled())
+ return 1;
+
if (!range_is_allowed(pfn, size))
return 0;
if (file->f_flags & O_DSYNC)
pcm = _PAGE_CACHE_MODE_UC_MINUS;
- *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
- cachemode2protval(pcm));
+ pgprot_set_cachemode(vma_prot, pcm);
return 1;
}
@@ -853,8 +836,7 @@ int memtype_kernel_map_sync(u64 base, unsigned long size,
* Reserved non RAM regions only and after successful memtype_reserve,
* this func also keeps identity mapping (if any) in sync with this new prot.
*/
-static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
- int strict_prot)
+static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot)
{
int is_ram = 0;
int ret;
@@ -880,9 +862,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
(unsigned long long)paddr,
(unsigned long long)(paddr + size - 1),
cattr_name(pcm));
- *vma_prot = __pgprot((pgprot_val(*vma_prot) &
- (~_PAGE_CACHE_MASK)) |
- cachemode2protval(pcm));
+ pgprot_set_cachemode(vma_prot, pcm);
}
return 0;
}
@@ -892,8 +872,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
return ret;
if (pcm != want_pcm) {
- if (strict_prot ||
- !is_new_memtype_allowed(paddr, size, want_pcm, pcm)) {
+ if (!is_new_memtype_allowed(paddr, size, want_pcm, pcm)) {
memtype_free(paddr, paddr + size);
pr_err("x86/PAT: %s:%d map pfn expected mapping type %s for [mem %#010Lx-%#010Lx], got %s\n",
current->comm, current->pid,
@@ -903,13 +882,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
cattr_name(pcm));
return -EINVAL;
}
- /*
- * We allow returning different type than the one requested in
- * non strict case.
- */
- *vma_prot = __pgprot((pgprot_val(*vma_prot) &
- (~_PAGE_CACHE_MASK)) |
- cachemode2protval(pcm));
+ pgprot_set_cachemode(vma_prot, pcm);
}
if (memtype_kernel_map_sync(paddr, size, pcm) < 0) {
@@ -932,111 +905,14 @@ static void free_pfn_range(u64 paddr, unsigned long size)
memtype_free(paddr, paddr + size);
}
-static int follow_phys(struct vm_area_struct *vma, unsigned long *prot,
- resource_size_t *phys)
-{
- struct follow_pfnmap_args args = { .vma = vma, .address = vma->vm_start };
-
- if (follow_pfnmap_start(&args))
- return -EINVAL;
-
- /* Never return PFNs of anon folios in COW mappings. */
- if (!args.special) {
- follow_pfnmap_end(&args);
- return -EINVAL;
- }
-
- *prot = pgprot_val(args.pgprot);
- *phys = (resource_size_t)args.pfn << PAGE_SHIFT;
- follow_pfnmap_end(&args);
- return 0;
-}
-
-static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
- pgprot_t *pgprot)
-{
- unsigned long prot;
-
- VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT));
-
- /*
- * We need the starting PFN and cachemode used for track_pfn_remap()
- * that covered the whole VMA. For most mappings, we can obtain that
- * information from the page tables. For COW mappings, we might now
- * suddenly have anon folios mapped and follow_phys() will fail.
- *
- * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to
- * detect the PFN. If we need the cachemode as well, we're out of luck
- * for now and have to fail fork().
- */
- if (!follow_phys(vma, &prot, paddr)) {
- if (pgprot)
- *pgprot = __pgprot(prot);
- return 0;
- }
- if (is_cow_mapping(vma->vm_flags)) {
- if (pgprot)
- return -EINVAL;
- *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
- return 0;
- }
- WARN_ON_ONCE(1);
- return -EINVAL;
-}
-
-/*
- * track_pfn_copy is called when vma that is covering the pfnmap gets
- * copied through copy_page_range().
- *
- * If the vma has a linear pfn mapping for the entire range, we get the prot
- * from pte and reserve the entire vma range with single reserve_pfn_range call.
- */
-int track_pfn_copy(struct vm_area_struct *vma)
-{
- resource_size_t paddr;
- unsigned long vma_size = vma->vm_end - vma->vm_start;
- pgprot_t pgprot;
-
- if (vma->vm_flags & VM_PAT) {
- if (get_pat_info(vma, &paddr, &pgprot))
- return -EINVAL;
- /* reserve the whole chunk covered by vma. */
- return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
- }
-
- return 0;
-}
-
-/*
- * prot is passed in as a parameter for the new mapping. If the vma has
- * a linear pfn mapping for the entire range, or no vma is provided,
- * reserve the entire pfn + size range with single reserve_pfn_range
- * call.
- */
-int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
- unsigned long pfn, unsigned long addr, unsigned long size)
+int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size, pgprot_t *prot)
{
resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
enum page_cache_mode pcm;
- /* reserve the whole chunk starting from paddr */
- if (!vma || (addr == vma->vm_start
- && size == (vma->vm_end - vma->vm_start))) {
- int ret;
-
- ret = reserve_pfn_range(paddr, size, prot, 0);
- if (ret == 0 && vma)
- vm_flags_set(vma, VM_PAT);
- return ret;
- }
-
if (!pat_enabled())
return 0;
- /*
- * For anything smaller than the vma size we set prot based on the
- * lookup.
- */
pcm = lookup_memtype(paddr);
/* Check memtype for the remaining pages */
@@ -1047,79 +923,35 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
return -EINVAL;
}
- *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
- cachemode2protval(pcm));
-
+ pgprot_set_cachemode(prot, pcm);
return 0;
}
-void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, pfn_t pfn)
+int pfnmap_track(unsigned long pfn, unsigned long size, pgprot_t *prot)
{
- enum page_cache_mode pcm;
-
- if (!pat_enabled())
- return;
+ const resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
- /* Set prot based on lookup */
- pcm = lookup_memtype(pfn_t_to_phys(pfn));
- *prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
- cachemode2protval(pcm));
+ return reserve_pfn_range(paddr, size, prot);
}
-/*
- * untrack_pfn is called while unmapping a pfnmap for a region.
- * untrack can be called for a specific region indicated by pfn and size or
- * can be for the entire vma (in which case pfn, size are zero).
- */
-void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
- unsigned long size, bool mm_wr_locked)
+void pfnmap_untrack(unsigned long pfn, unsigned long size)
{
- resource_size_t paddr;
+ const resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
- if (vma && !(vma->vm_flags & VM_PAT))
- return;
-
- /* free the chunk starting from pfn or the whole chunk */
- paddr = (resource_size_t)pfn << PAGE_SHIFT;
- if (!paddr && !size) {
- if (get_pat_info(vma, &paddr, NULL))
- return;
- size = vma->vm_end - vma->vm_start;
- }
free_pfn_range(paddr, size);
- if (vma) {
- if (mm_wr_locked)
- vm_flags_clear(vma, VM_PAT);
- else
- __vm_flags_mod(vma, 0, VM_PAT);
- }
-}
-
-/*
- * untrack_pfn_clear is called if the following situation fits:
- *
- * 1) while mremapping a pfnmap for a new region, with the old vma after
- * its pfnmap page table has been removed. The new vma has a new pfnmap
- * to the same pfn & cache type with VM_PAT set.
- * 2) while duplicating vm area, the new vma fails to copy the pgtable from
- * old vma.
- */
-void untrack_pfn_clear(struct vm_area_struct *vma)
-{
- vm_flags_clear(vma, VM_PAT);
}
pgprot_t pgprot_writecombine(pgprot_t prot)
{
- return __pgprot(pgprot_val(prot) |
- cachemode2protval(_PAGE_CACHE_MODE_WC));
+ pgprot_set_cachemode(&prot, _PAGE_CACHE_MODE_WC);
+ return prot;
}
EXPORT_SYMBOL_GPL(pgprot_writecombine);
pgprot_t pgprot_writethrough(pgprot_t prot)
{
- return __pgprot(pgprot_val(prot) |
- cachemode2protval(_PAGE_CACHE_MODE_WT));
+ pgprot_set_cachemode(&prot, _PAGE_CACHE_MODE_WT);
+ return prot;
}
EXPORT_SYMBOL_GPL(pgprot_writethrough);
diff --git a/arch/x86/mm/pat/memtype_interval.c b/arch/x86/mm/pat/memtype_interval.c
index 645613d59942..e5844ed1311e 100644
--- a/arch/x86/mm/pat/memtype_interval.c
+++ b/arch/x86/mm/pat/memtype_interval.c
@@ -49,32 +49,6 @@ INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end,
static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED;
-enum {
- MEMTYPE_EXACT_MATCH = 0,
- MEMTYPE_END_MATCH = 1
-};
-
-static struct memtype *memtype_match(u64 start, u64 end, int match_type)
-{
- struct memtype *entry_match;
-
- entry_match = interval_iter_first(&memtype_rbroot, start, end-1);
-
- while (entry_match != NULL && entry_match->start < end) {
- if ((match_type == MEMTYPE_EXACT_MATCH) &&
- (entry_match->start == start) && (entry_match->end == end))
- return entry_match;
-
- if ((match_type == MEMTYPE_END_MATCH) &&
- (entry_match->start < start) && (entry_match->end == end))
- return entry_match;
-
- entry_match = interval_iter_next(entry_match, start, end-1);
- }
-
- return NULL; /* Returns NULL if there is no match */
-}
-
static int memtype_check_conflict(u64 start, u64 end,
enum page_cache_mode reqtype,
enum page_cache_mode *newtype)
@@ -130,35 +104,16 @@ int memtype_check_insert(struct memtype *entry_new, enum page_cache_mode *ret_ty
struct memtype *memtype_erase(u64 start, u64 end)
{
- struct memtype *entry_old;
-
- /*
- * Since the memtype_rbroot tree allows overlapping ranges,
- * memtype_erase() checks with EXACT_MATCH first, i.e. free
- * a whole node for the munmap case. If no such entry is found,
- * it then checks with END_MATCH, i.e. shrink the size of a node
- * from the end for the mremap case.
- */
- entry_old = memtype_match(start, end, MEMTYPE_EXACT_MATCH);
- if (!entry_old) {
- entry_old = memtype_match(start, end, MEMTYPE_END_MATCH);
- if (!entry_old)
- return ERR_PTR(-EINVAL);
+ struct memtype *entry = interval_iter_first(&memtype_rbroot, start, end - 1);
+
+ while (entry && entry->start < end) {
+ if (entry->start == start && entry->end == end) {
+ interval_remove(entry, &memtype_rbroot);
+ return entry;
+ }
+ entry = interval_iter_next(entry, start, end - 1);
}
-
- if (entry_old->start == start) {
- /* munmap: erase this node */
- interval_remove(entry_old, &memtype_rbroot);
- } else {
- /* mremap: update the end value of this node */
- interval_remove(entry_old, &memtype_rbroot);
- entry_old->end = start;
- interval_insert(entry_old, &memtype_rbroot);
-
- return NULL;
- }
-
- return entry_old;
+ return ERR_PTR(-EINVAL);
}
struct memtype *memtype_lookup(u64 addr)
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 95bc50a8541c..8834c76f91c9 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -32,8 +32,6 @@
#include <asm/pgalloc.h>
#include <asm/proto.h>
#include <asm/memtype.h>
-#include <asm/hyperv-tlfs.h>
-#include <asm/mshyperv.h>
#include "../mm_internal.h"
@@ -75,6 +73,7 @@ static DEFINE_SPINLOCK(cpa_lock);
#define CPA_ARRAY 2
#define CPA_PAGES_ARRAY 4
#define CPA_NO_CHECK_ALIAS 8 /* Do not search for aliases */
+#define CPA_COLLAPSE 16 /* try to collapse large pages */
static inline pgprot_t cachemode2pgprot(enum page_cache_mode pcm)
{
@@ -107,6 +106,18 @@ static void split_page_count(int level)
direct_pages_count[level - 1] += PTRS_PER_PTE;
}
+static void collapse_page_count(int level)
+{
+ direct_pages_count[level]++;
+ if (system_state == SYSTEM_RUNNING) {
+ if (level == PG_LEVEL_2M)
+ count_vm_event(DIRECT_MAP_LEVEL2_COLLAPSE);
+ else if (level == PG_LEVEL_1G)
+ count_vm_event(DIRECT_MAP_LEVEL3_COLLAPSE);
+ }
+ direct_pages_count[level - 1] -= PTRS_PER_PTE;
+}
+
void arch_report_meminfo(struct seq_file *m)
{
seq_printf(m, "DirectMap4k: %8lu kB\n",
@@ -124,6 +135,7 @@ void arch_report_meminfo(struct seq_file *m)
}
#else
static inline void split_page_count(int level) { }
+static inline void collapse_page_count(int level) { }
#endif
#ifdef CONFIG_X86_CPA_STATISTICS
@@ -213,14 +225,14 @@ within(unsigned long addr, unsigned long start, unsigned long end)
return addr >= start && addr < end;
}
+#ifdef CONFIG_X86_64
+
static inline int
within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
{
return addr >= start && addr <= end;
}
-#ifdef CONFIG_X86_64
-
/*
* The kernel image is mapped into two places in the virtual address space
* (addresses without KASLR, of course):
@@ -396,16 +408,49 @@ static void __cpa_flush_tlb(void *data)
flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i)));
}
-static void cpa_flush(struct cpa_data *data, int cache)
+static int collapse_large_pages(unsigned long addr, struct list_head *pgtables);
+
+static void cpa_collapse_large_pages(struct cpa_data *cpa)
+{
+ unsigned long start, addr, end;
+ struct ptdesc *ptdesc, *tmp;
+ LIST_HEAD(pgtables);
+ int collapsed = 0;
+ int i;
+
+ if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
+ for (i = 0; i < cpa->numpages; i++)
+ collapsed += collapse_large_pages(__cpa_addr(cpa, i),
+ &pgtables);
+ } else {
+ addr = __cpa_addr(cpa, 0);
+ start = addr & PMD_MASK;
+ end = addr + PAGE_SIZE * cpa->numpages;
+
+ for (addr = start; within(addr, start, end); addr += PMD_SIZE)
+ collapsed += collapse_large_pages(addr, &pgtables);
+ }
+
+ if (!collapsed)
+ return;
+
+ flush_tlb_all();
+
+ list_for_each_entry_safe(ptdesc, tmp, &pgtables, pt_list) {
+ list_del(&ptdesc->pt_list);
+ __free_page(ptdesc_page(ptdesc));
+ }
+}
+
+static void cpa_flush(struct cpa_data *cpa, int cache)
{
- struct cpa_data *cpa = data;
unsigned int i;
BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
cpa_flush_all(cache);
- return;
+ goto collapse_large_pages;
}
if (cpa->force_flush_all || cpa->numpages > tlb_single_page_flush_ceiling)
@@ -414,7 +459,7 @@ static void cpa_flush(struct cpa_data *data, int cache)
on_each_cpu(__cpa_flush_tlb, cpa, 1);
if (!cache)
- return;
+ goto collapse_large_pages;
mb();
for (i = 0; i < cpa->numpages; i++) {
@@ -430,6 +475,10 @@ static void cpa_flush(struct cpa_data *data, int cache)
clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE);
}
mb();
+
+collapse_large_pages:
+ if (cpa->flags & CPA_COLLAPSE)
+ cpa_collapse_large_pages(cpa);
}
static bool overlaps(unsigned long r1_start, unsigned long r1_end,
@@ -840,7 +889,7 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
/* change init_mm */
set_pte_atomic(kpte, pte);
#ifdef CONFIG_X86_32
- if (!SHARED_KERNEL_PMD) {
+ {
struct page *page;
list_for_each_entry(page, &pgd_list, lru) {
@@ -1199,6 +1248,164 @@ static int split_large_page(struct cpa_data *cpa, pte_t *kpte,
return 0;
}
+static int collapse_pmd_page(pmd_t *pmd, unsigned long addr,
+ struct list_head *pgtables)
+{
+ pmd_t _pmd, old_pmd;
+ pte_t *pte, first;
+ unsigned long pfn;
+ pgprot_t pgprot;
+ int i = 0;
+
+ if (!cpu_feature_enabled(X86_FEATURE_PSE))
+ return 0;
+
+ addr &= PMD_MASK;
+ pte = pte_offset_kernel(pmd, addr);
+ first = *pte;
+ pfn = pte_pfn(first);
+
+ /* Make sure alignment is suitable */
+ if (PFN_PHYS(pfn) & ~PMD_MASK)
+ return 0;
+
+ /* The page is 4k intentionally */
+ if (pte_flags(first) & _PAGE_KERNEL_4K)
+ return 0;
+
+ /* Check that the rest of PTEs are compatible with the first one */
+ for (i = 1, pte++; i < PTRS_PER_PTE; i++, pte++) {
+ pte_t entry = *pte;
+
+ if (!pte_present(entry))
+ return 0;
+ if (pte_flags(entry) != pte_flags(first))
+ return 0;
+ if (pte_pfn(entry) != pte_pfn(first) + i)
+ return 0;
+ }
+
+ old_pmd = *pmd;
+
+ /* Success: set up a large page */
+ pgprot = pgprot_4k_2_large(pte_pgprot(first));
+ pgprot_val(pgprot) |= _PAGE_PSE;
+ _pmd = pfn_pmd(pfn, pgprot);
+ set_pmd(pmd, _pmd);
+
+ /* Queue the page table to be freed after TLB flush */
+ list_add(&page_ptdesc(pmd_page(old_pmd))->pt_list, pgtables);
+
+ if (IS_ENABLED(CONFIG_X86_32)) {
+ struct page *page;
+
+ /* Update all PGD tables to use the same large page */
+ list_for_each_entry(page, &pgd_list, lru) {
+ pgd_t *pgd = (pgd_t *)page_address(page) + pgd_index(addr);
+ p4d_t *p4d = p4d_offset(pgd, addr);
+ pud_t *pud = pud_offset(p4d, addr);
+ pmd_t *pmd = pmd_offset(pud, addr);
+ /* Something is wrong if entries doesn't match */
+ if (WARN_ON(pmd_val(old_pmd) != pmd_val(*pmd)))
+ continue;
+ set_pmd(pmd, _pmd);
+ }
+ }
+
+ if (virt_addr_valid(addr) && pfn_range_is_mapped(pfn, pfn + 1))
+ collapse_page_count(PG_LEVEL_2M);
+
+ return 1;
+}
+
+static int collapse_pud_page(pud_t *pud, unsigned long addr,
+ struct list_head *pgtables)
+{
+ unsigned long pfn;
+ pmd_t *pmd, first;
+ int i;
+
+ if (!direct_gbpages)
+ return 0;
+
+ addr &= PUD_MASK;
+ pmd = pmd_offset(pud, addr);
+ first = *pmd;
+
+ /*
+ * To restore PUD page all PMD entries must be large and
+ * have suitable alignment
+ */
+ pfn = pmd_pfn(first);
+ if (!pmd_leaf(first) || (PFN_PHYS(pfn) & ~PUD_MASK))
+ return 0;
+
+ /*
+ * To restore PUD page, all following PMDs must be compatible with the
+ * first one.
+ */
+ for (i = 1, pmd++; i < PTRS_PER_PMD; i++, pmd++) {
+ pmd_t entry = *pmd;
+
+ if (!pmd_present(entry) || !pmd_leaf(entry))
+ return 0;
+ if (pmd_flags(entry) != pmd_flags(first))
+ return 0;
+ if (pmd_pfn(entry) != pmd_pfn(first) + i * PTRS_PER_PTE)
+ return 0;
+ }
+
+ /* Restore PUD page and queue page table to be freed after TLB flush */
+ list_add(&page_ptdesc(pud_page(*pud))->pt_list, pgtables);
+ set_pud(pud, pfn_pud(pfn, pmd_pgprot(first)));
+
+ if (virt_addr_valid(addr) && pfn_range_is_mapped(pfn, pfn + 1))
+ collapse_page_count(PG_LEVEL_1G);
+
+ return 1;
+}
+
+/*
+ * Collapse PMD and PUD pages in the kernel mapping around the address where
+ * possible.
+ *
+ * Caller must flush TLB and free page tables queued on the list before
+ * touching the new entries. CPU must not see TLB entries of different size
+ * with different attributes.
+ */
+static int collapse_large_pages(unsigned long addr, struct list_head *pgtables)
+{
+ int collapsed = 0;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ addr &= PMD_MASK;
+
+ spin_lock(&pgd_lock);
+ pgd = pgd_offset_k(addr);
+ if (pgd_none(*pgd))
+ goto out;
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d))
+ goto out;
+ pud = pud_offset(p4d, addr);
+ if (!pud_present(*pud) || pud_leaf(*pud))
+ goto out;
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_present(*pmd) || pmd_leaf(*pmd))
+ goto out;
+
+ collapsed = collapse_pmd_page(pmd, addr, pgtables);
+ if (collapsed)
+ collapsed += collapse_pud_page(pud, addr, pgtables);
+
+out:
+ spin_unlock(&pgd_lock);
+ return collapsed;
+}
+
static bool try_to_free_pte_page(pte_t *pte)
{
int i;
@@ -2083,6 +2290,7 @@ int set_mce_nospec(unsigned long pfn)
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
return rc;
}
+EXPORT_SYMBOL_GPL(set_mce_nospec);
/* Restore full speculative operation to the pfn. */
int clear_mce_nospec(unsigned long pfn)
@@ -2122,7 +2330,8 @@ int set_memory_rox(unsigned long addr, int numpages)
if (__supported_pte_mask & _PAGE_NX)
clr.pgprot |= _PAGE_NX;
- return change_page_attr_clear(&addr, numpages, clr, 0);
+ return change_page_attr_set_clr(&addr, numpages, __pgprot(0), clr, 0,
+ CPA_COLLAPSE, NULL);
}
int set_memory_rw(unsigned long addr, int numpages)
@@ -2149,7 +2358,8 @@ int set_memory_p(unsigned long addr, int numpages)
int set_memory_4k(unsigned long addr, int numpages)
{
- return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
+ return change_page_attr_set_clr(&addr, numpages,
+ __pgprot(_PAGE_KERNEL_4K),
__pgprot(0), 1, 0, NULL);
}
@@ -2422,7 +2632,7 @@ static int __set_pages_np(struct page *page, int numpages)
.pgd = NULL,
.numpages = numpages,
.mask_set = __pgprot(0),
- .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
+ .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY),
.flags = CPA_NO_CHECK_ALIAS };
/*
@@ -2509,7 +2719,7 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
.pgd = pgd,
.numpages = numpages,
.mask_set = __pgprot(0),
- .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)),
+ .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW|_PAGE_DIRTY)),
.flags = CPA_NO_CHECK_ALIAS,
};
@@ -2552,7 +2762,7 @@ int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address,
.pgd = pgd,
.numpages = numpages,
.mask_set = __pgprot(0),
- .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
+ .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY),
.flags = CPA_NO_CHECK_ALIAS,
};