summaryrefslogtreecommitdiff
path: root/arch/x86/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/fault.c8
-rw-r--r--arch/x86/mm/init_32.c48
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/mm/iomap_32.c10
-rw-r--r--arch/x86/mm/ioremap.c25
-rw-r--r--arch/x86/mm/pageattr.c49
-rw-r--r--arch/x86/mm/pat.c43
7 files changed, 126 insertions, 59 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 90dfae511a41..c76ef1d701c9 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -603,8 +603,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
si_code = SEGV_MAPERR;
- if (notify_page_fault(regs))
- return;
if (unlikely(kmmio_fault(regs, address)))
return;
@@ -634,6 +632,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (spurious_fault(address, error_code))
return;
+ /* kprobes don't want to hook the spurious faults. */
+ if (notify_page_fault(regs))
+ return;
/*
* Don't take the mm semaphore here. If we fixup a prefetch
* fault we could otherwise deadlock.
@@ -641,6 +642,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
goto bad_area_nosemaphore;
}
+ /* kprobes don't want to hook the spurious faults. */
+ if (notify_page_fault(regs))
+ return;
/*
* It's safe to allow irq's after cr2 has been saved and the
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 88f1b10de3be..2cef05074413 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -138,6 +138,47 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
return pte_offset_kernel(pmd, 0);
}
+static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
+ unsigned long vaddr, pte_t *lastpte)
+{
+#ifdef CONFIG_HIGHMEM
+ /*
+ * Something (early fixmap) may already have put a pte
+ * page here, which causes the page table allocation
+ * to become nonlinear. Attempt to fix it, and if it
+ * is still nonlinear then we have to bug.
+ */
+ int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
+ int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
+
+ if (pmd_idx_kmap_begin != pmd_idx_kmap_end
+ && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
+ && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end
+ && ((__pa(pte) >> PAGE_SHIFT) < table_start
+ || (__pa(pte) >> PAGE_SHIFT) >= table_end)) {
+ pte_t *newpte;
+ int i;
+
+ BUG_ON(after_init_bootmem);
+ newpte = alloc_low_page();
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ set_pte(newpte + i, pte[i]);
+
+ paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT);
+ set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE));
+ BUG_ON(newpte != pte_offset_kernel(pmd, 0));
+ __flush_tlb_all();
+
+ paravirt_release_pte(__pa(pte) >> PAGE_SHIFT);
+ pte = newpte;
+ }
+ BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1)
+ && vaddr > fix_to_virt(FIX_KMAP_END)
+ && lastpte && lastpte + PTRS_PER_PTE != pte);
+#endif
+ return pte;
+}
+
/*
* This function initializes a certain range of kernel virtual memory
* with new bootmem page tables, everywhere page tables are missing in
@@ -154,6 +195,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
unsigned long vaddr;
pgd_t *pgd;
pmd_t *pmd;
+ pte_t *pte = NULL;
vaddr = start;
pgd_idx = pgd_index(vaddr);
@@ -165,7 +207,8 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
pmd = pmd + pmd_index(vaddr);
for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
pmd++, pmd_idx++) {
- one_page_table_init(pmd);
+ pte = page_table_kmap_check(one_page_table_init(pmd),
+ pmd, vaddr, pte);
vaddr += PMD_SIZE;
}
@@ -508,7 +551,6 @@ static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base)
* Fixed mappings, only the page table structure has to be
* created - mappings will be set by set_fixmap():
*/
- early_ioremap_clear();
vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
page_table_range_init(vaddr, end, pgd_base);
@@ -801,7 +843,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse)
tables += PAGE_ALIGN(ptes * sizeof(pte_t));
/* for fixmap */
- tables += PAGE_SIZE * 2;
+ tables += PAGE_ALIGN(__end_of_fixed_addresses * sizeof(pte_t));
/*
* RED-PEN putting page tables only on node 0 could
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 23f68e77ad1f..e6d36b490250 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -596,7 +596,7 @@ static void __init init_gbpages(void)
direct_gbpages = 0;
}
-static unsigned long __init kernel_physical_mapping_init(unsigned long start,
+static unsigned long __meminit kernel_physical_mapping_init(unsigned long start,
unsigned long end,
unsigned long page_size_mask)
{
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index d0151d8ce452..ca53224fc56c 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -17,6 +17,7 @@
*/
#include <asm/iomap.h>
+#include <asm/pat.h>
#include <linux/module.h>
/* Map 'pfn' using fixed map 'type' and protections 'prot'
@@ -29,6 +30,15 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
pagefault_disable();
+ /*
+ * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS.
+ * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the
+ * MTRR is UC or WC. UC_MINUS gets the real intention, of the
+ * user, which is "WC if the MTRR is WC, UC if you can't do that."
+ */
+ if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC))
+ prot = PAGE_KERNEL_UC_MINUS;
+
idx = type + KM_TYPE_NR*smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
set_pte(kmap_pte-idx, pfn_pte(pfn, prot));
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index bd85d42819e1..af750ab973b6 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -557,34 +557,9 @@ void __init early_ioremap_init(void)
}
}
-void __init early_ioremap_clear(void)
-{
- pmd_t *pmd;
-
- if (early_ioremap_debug)
- printk(KERN_INFO "early_ioremap_clear()\n");
-
- pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
- pmd_clear(pmd);
- paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT);
- __flush_tlb_all();
-}
-
void __init early_ioremap_reset(void)
{
- enum fixed_addresses idx;
- unsigned long addr, phys;
- pte_t *pte;
-
after_paging_init = 1;
- for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) {
- addr = fix_to_virt(idx);
- pte = early_ioremap_pte(addr);
- if (pte_present(*pte)) {
- phys = pte_val(*pte) & PAGE_MASK;
- set_fixmap(idx, phys);
- }
- }
}
static void __init __early_set_fixmap(enum fixed_addresses idx,
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index e89d24815f26..84ba74820ad6 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -534,6 +534,36 @@ out_unlock:
return 0;
}
+static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
+ int primary)
+{
+ /*
+ * Ignore all non primary paths.
+ */
+ if (!primary)
+ return 0;
+
+ /*
+ * Ignore the NULL PTE for kernel identity mapping, as it is expected
+ * to have holes.
+ * Also set numpages to '1' indicating that we processed cpa req for
+ * one virtual address page and its pfn. TBD: numpages can be set based
+ * on the initial value and the level returned by lookup_address().
+ */
+ if (within(vaddr, PAGE_OFFSET,
+ PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
+ cpa->numpages = 1;
+ cpa->pfn = __pa(vaddr) >> PAGE_SHIFT;
+ return 0;
+ } else {
+ WARN(1, KERN_WARNING "CPA: called for zero pte. "
+ "vaddr = %lx cpa->vaddr = %lx\n", vaddr,
+ *cpa->vaddr);
+
+ return -EFAULT;
+ }
+}
+
static int __change_page_attr(struct cpa_data *cpa, int primary)
{
unsigned long address;
@@ -549,17 +579,11 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
repeat:
kpte = lookup_address(address, &level);
if (!kpte)
- return 0;
+ return __cpa_process_fault(cpa, address, primary);
old_pte = *kpte;
- if (!pte_val(old_pte)) {
- if (!primary)
- return 0;
- WARN(1, KERN_WARNING "CPA: called for zero pte. "
- "vaddr = %lx cpa->vaddr = %lx\n", address,
- *cpa->vaddr);
- return -EINVAL;
- }
+ if (!pte_val(old_pte))
+ return __cpa_process_fault(cpa, address, primary);
if (level == PG_LEVEL_4K) {
pte_t new_pte;
@@ -657,12 +681,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
vaddr = *cpa->vaddr;
if (!(within(vaddr, PAGE_OFFSET,
- PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT))
-#ifdef CONFIG_X86_64
- || within(vaddr, PAGE_OFFSET + (1UL<<32),
- PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))
-#endif
- )) {
+ PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) {
alias_cpa = *cpa;
temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 8b08fb955274..7b61036427df 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -333,11 +333,23 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
req_type & _PAGE_CACHE_MASK);
}
- is_range_ram = pagerange_is_ram(start, end);
- if (is_range_ram == 1)
- return reserve_ram_pages_type(start, end, req_type, new_type);
- else if (is_range_ram < 0)
- return -EINVAL;
+ if (new_type)
+ *new_type = actual_type;
+
+ /*
+ * For legacy reasons, some parts of the physical address range in the
+ * legacy 1MB region is treated as non-RAM (even when listed as RAM in
+ * the e820 tables). So we will track the memory attributes of this
+ * legacy 1MB region using the linear memtype_list always.
+ */
+ if (end >= ISA_END_ADDRESS) {
+ is_range_ram = pagerange_is_ram(start, end);
+ if (is_range_ram == 1)
+ return reserve_ram_pages_type(start, end, req_type,
+ new_type);
+ else if (is_range_ram < 0)
+ return -EINVAL;
+ }
new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
if (!new)
@@ -347,9 +359,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
new->end = end;
new->type = actual_type;
- if (new_type)
- *new_type = actual_type;
-
spin_lock(&memtype_lock);
if (cached_entry && start >= cached_start)
@@ -437,11 +446,19 @@ int free_memtype(u64 start, u64 end)
if (is_ISA_range(start, end - 1))
return 0;
- is_range_ram = pagerange_is_ram(start, end);
- if (is_range_ram == 1)
- return free_ram_pages_type(start, end);
- else if (is_range_ram < 0)
- return -EINVAL;
+ /*
+ * For legacy reasons, some parts of the physical address range in the
+ * legacy 1MB region is treated as non-RAM (even when listed as RAM in
+ * the e820 tables). So we will track the memory attributes of this
+ * legacy 1MB region using the linear memtype_list always.
+ */
+ if (end >= ISA_END_ADDRESS) {
+ is_range_ram = pagerange_is_ram(start, end);
+ if (is_range_ram == 1)
+ return free_ram_pages_type(start, end);
+ else if (is_range_ram < 0)
+ return -EINVAL;
+ }
spin_lock(&memtype_lock);
list_for_each_entry(entry, &memtype_list, nd) {