summaryrefslogtreecommitdiff
path: root/arch/x86/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/extable.c6
-rw-r--r--arch/x86/mm/fault.c25
-rw-r--r--arch/x86/mm/init_32.c134
-rw-r--r--arch/x86/mm/ioremap.c21
-rw-r--r--arch/x86/mm/mmap.c2
-rw-r--r--arch/x86/mm/numa_64.c4
-rw-r--r--arch/x86/mm/pageattr.c15
-rw-r--r--arch/x86/mm/pat.c114
-rw-r--r--arch/x86/mm/tlb.c4
10 files changed, 187 insertions, 140 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 9f05157220f5..2b938a384910 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,7 +1,7 @@
obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
pat.o pgtable.o gup.o
-obj-$(CONFIG_X86_SMP) += tlb.o
+obj-$(CONFIG_SMP) += tlb.o
obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 7e8db53528a7..61b41ca3b5a2 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -23,6 +23,12 @@ int fixup_exception(struct pt_regs *regs)
fixup = search_exception_tables(regs->ip);
if (fixup) {
+ /* If fixup is less than 16, it means uaccess error */
+ if (fixup->fixup < 16) {
+ current_thread_info()->uaccess_err = -EFAULT;
+ regs->ip += fixup->fixup;
+ return 1;
+ }
regs->ip = fixup->fixup;
return 1;
}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 976b5a72ec30..29644175490f 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -420,7 +420,6 @@ static noinline void pgtable_bad(struct pt_regs *regs,
printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
tsk->comm, address);
dump_pagetable(address);
- tsk = current;
tsk->thread.cr2 = address;
tsk->thread.trap_no = 14;
tsk->thread.error_code = error_code;
@@ -783,6 +782,15 @@ static inline int access_error(unsigned long error_code, int write,
return 0;
}
+static int fault_in_kernel_space(unsigned long address)
+{
+#ifdef CONFIG_X86_32
+ return address >= TASK_SIZE;
+#else /* !CONFIG_X86_32 */
+ return address >= TASK_SIZE64;
+#endif /* CONFIG_X86_32 */
+}
+
/*
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
@@ -807,7 +815,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
/* get the address */
address = read_cr2();
- if (unlikely(notify_page_fault(regs)))
if (unlikely(kmmio_fault(regs, address)))
return;
@@ -824,11 +831,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
* (error_code & 4) == 0, and that the fault was not a
* protection error (error_code & 9) == 0.
*/
-#ifdef CONFIG_X86_32
- if (unlikely(address >= TASK_SIZE)) {
-#else
- if (unlikely(address >= TASK_SIZE64)) {
-#endif
+ if (unlikely(fault_in_kernel_space(address))) {
if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
vmalloc_fault(address) >= 0)
return;
@@ -849,7 +852,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
}
/* kprobes don't want to hook the spurious faults. */
- if (notify_page_fault(regs))
+ if (unlikely(notify_page_fault(regs)))
return;
/*
* It's safe to allow irq's after cr2 has been saved and the
@@ -901,6 +904,12 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
return;
}
down_read(&mm->mmap_sem);
+ } else {
+ /*
+ * The above down_read_trylock() might have succeeded in which
+ * case we'll have missed the might_sleep() from down_read().
+ */
+ might_sleep();
}
vma = find_vma(mm, address);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 00263bf07a88..06708ee94aa4 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -674,75 +674,97 @@ static int __init parse_highmem(char *arg)
}
early_param("highmem", parse_highmem);
+#define MSG_HIGHMEM_TOO_BIG \
+ "highmem size (%luMB) is bigger than pages available (%luMB)!\n"
+
+#define MSG_LOWMEM_TOO_SMALL \
+ "highmem size (%luMB) results in <64MB lowmem, ignoring it!\n"
/*
- * Determine low and high memory ranges:
+ * All of RAM fits into lowmem - but if user wants highmem
+ * artificially via the highmem=x boot parameter then create
+ * it:
*/
-void __init find_low_pfn_range(void)
+void __init lowmem_pfn_init(void)
{
- /* it could update max_pfn */
-
/* max_low_pfn is 0, we already have early_res support */
-
max_low_pfn = max_pfn;
- if (max_low_pfn > MAXMEM_PFN) {
- if (highmem_pages == -1)
- highmem_pages = max_pfn - MAXMEM_PFN;
- if (highmem_pages + MAXMEM_PFN < max_pfn)
- max_pfn = MAXMEM_PFN + highmem_pages;
- if (highmem_pages + MAXMEM_PFN > max_pfn) {
- printk(KERN_WARNING "only %luMB highmem pages "
- "available, ignoring highmem size of %uMB.\n",
- pages_to_mb(max_pfn - MAXMEM_PFN),
+
+ if (highmem_pages == -1)
+ highmem_pages = 0;
+#ifdef CONFIG_HIGHMEM
+ if (highmem_pages >= max_pfn) {
+ printk(KERN_ERR MSG_HIGHMEM_TOO_BIG,
+ pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
+ highmem_pages = 0;
+ }
+ if (highmem_pages) {
+ if (max_low_pfn - highmem_pages < 64*1024*1024/PAGE_SIZE) {
+ printk(KERN_ERR MSG_LOWMEM_TOO_SMALL,
pages_to_mb(highmem_pages));
highmem_pages = 0;
}
- max_low_pfn = MAXMEM_PFN;
+ max_low_pfn -= highmem_pages;
+ }
+#else
+ if (highmem_pages)
+ printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
+#endif
+}
+
+#define MSG_HIGHMEM_TOO_SMALL \
+ "only %luMB highmem pages available, ignoring highmem size of %luMB!\n"
+
+#define MSG_HIGHMEM_TRIMMED \
+ "Warning: only 4GB will be used. Use a HIGHMEM64G enabled kernel!\n"
+/*
+ * We have more RAM than fits into lowmem - we try to put it into
+ * highmem, also taking the highmem=x boot parameter into account:
+ */
+void __init highmem_pfn_init(void)
+{
+ max_low_pfn = MAXMEM_PFN;
+
+ if (highmem_pages == -1)
+ highmem_pages = max_pfn - MAXMEM_PFN;
+
+ if (highmem_pages + MAXMEM_PFN < max_pfn)
+ max_pfn = MAXMEM_PFN + highmem_pages;
+
+ if (highmem_pages + MAXMEM_PFN > max_pfn) {
+ printk(KERN_WARNING MSG_HIGHMEM_TOO_SMALL,
+ pages_to_mb(max_pfn - MAXMEM_PFN),
+ pages_to_mb(highmem_pages));
+ highmem_pages = 0;
+ }
#ifndef CONFIG_HIGHMEM
- /* Maximum memory usable is what is directly addressable */
- printk(KERN_WARNING "Warning only %ldMB will be used.\n",
- MAXMEM>>20);
- if (max_pfn > MAX_NONPAE_PFN)
- printk(KERN_WARNING
- "Use a HIGHMEM64G enabled kernel.\n");
- else
- printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
- max_pfn = MAXMEM_PFN;
+ /* Maximum memory usable is what is directly addressable */
+ printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20);
+ if (max_pfn > MAX_NONPAE_PFN)
+ printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
+ else
+ printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
+ max_pfn = MAXMEM_PFN;
#else /* !CONFIG_HIGHMEM */
#ifndef CONFIG_HIGHMEM64G
- if (max_pfn > MAX_NONPAE_PFN) {
- max_pfn = MAX_NONPAE_PFN;
- printk(KERN_WARNING "Warning only 4GB will be used."
- "Use a HIGHMEM64G enabled kernel.\n");
- }
+ if (max_pfn > MAX_NONPAE_PFN) {
+ max_pfn = MAX_NONPAE_PFN;
+ printk(KERN_WARNING MSG_HIGHMEM_TRIMMED);
+ }
#endif /* !CONFIG_HIGHMEM64G */
#endif /* !CONFIG_HIGHMEM */
- } else {
- if (highmem_pages == -1)
- highmem_pages = 0;
-#ifdef CONFIG_HIGHMEM
- if (highmem_pages >= max_pfn) {
- printk(KERN_ERR "highmem size specified (%uMB) is "
- "bigger than pages available (%luMB)!.\n",
- pages_to_mb(highmem_pages),
- pages_to_mb(max_pfn));
- highmem_pages = 0;
- }
- if (highmem_pages) {
- if (max_low_pfn - highmem_pages <
- 64*1024*1024/PAGE_SIZE){
- printk(KERN_ERR "highmem size %uMB results in "
- "smaller than 64MB lowmem, ignoring it.\n"
- , pages_to_mb(highmem_pages));
- highmem_pages = 0;
- }
- max_low_pfn -= highmem_pages;
- }
-#else
- if (highmem_pages)
- printk(KERN_ERR "ignoring highmem size on non-highmem"
- " kernel!\n");
-#endif
- }
+}
+
+/*
+ * Determine low and high memory ranges:
+ */
+void __init find_low_pfn_range(void)
+{
+ /* it could update max_pfn */
+
+ if (max_pfn <= MAXMEM_PFN)
+ lowmem_pfn_init();
+ else
+ highmem_pfn_init();
}
#ifndef CONFIG_NEED_MULTIPLE_NODES
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index af750ab973b6..433f7bd4648a 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -134,25 +134,6 @@ int page_is_ram(unsigned long pagenr)
return 0;
}
-int pagerange_is_ram(unsigned long start, unsigned long end)
-{
- int ram_page = 0, not_rampage = 0;
- unsigned long page_nr;
-
- for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
- ++page_nr) {
- if (page_is_ram(page_nr))
- ram_page = 1;
- else
- not_rampage = 1;
-
- if (ram_page == not_rampage)
- return -1;
- }
-
- return ram_page;
-}
-
/*
* Fix up the linear direct mapping of the kernel to avoid cache attribute
* conflicts.
@@ -367,7 +348,7 @@ EXPORT_SYMBOL(ioremap_nocache);
*
* Must be freed with iounmap.
*/
-void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
+void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
{
if (pat_enabled)
return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 56fe7124fbec..165829600566 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -4,7 +4,7 @@
* Based on code by Ingo Molnar and Andi Kleen, copyrighted
* as follows:
*
- * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
+ * Copyright 2003-2009 Red Hat Inc.
* All Rights Reserved.
* Copyright 2005 Andi Kleen, SUSE Labs.
* Copyright 2007 Jiri Kosina, SUSE Labs.
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 08d140fbc31b..deb1c1ab7868 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -702,7 +702,7 @@ void __cpuinit numa_set_node(int cpu, int node)
}
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
- if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) {
+ if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
dump_stack();
return;
@@ -790,7 +790,7 @@ int early_cpu_to_node(int cpu)
if (early_per_cpu_ptr(x86_cpu_to_node_map))
return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
- if (!per_cpu_offset(cpu)) {
+ if (!cpu_possible(cpu)) {
printk(KERN_WARNING
"early_cpu_to_node(%d): no per_cpu area!\n", cpu);
dump_stack();
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 84ba74820ad6..8ca0d8566fc8 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -575,7 +575,6 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
address = cpa->vaddr[cpa->curpage];
else
address = *cpa->vaddr;
-
repeat:
kpte = lookup_address(address, &level);
if (!kpte)
@@ -812,6 +811,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
vm_unmap_aliases();
+ /*
+ * If we're called with lazy mmu updates enabled, the
+ * in-memory pte state may be stale. Flush pending updates to
+ * bring them up to date.
+ */
+ arch_flush_lazy_mmu_mode();
+
cpa.vaddr = addr;
cpa.numpages = numpages;
cpa.mask_set = mask_set;
@@ -854,6 +860,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
} else
cpa_flush_all(cache);
+ /*
+ * If we've been called with lazy mmu updates enabled, then
+ * make sure that everything gets flushed out before we
+ * return.
+ */
+ arch_flush_lazy_mmu_mode();
+
out:
return ret;
}
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 7b61036427df..05f9aef6818a 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -30,7 +30,7 @@
#ifdef CONFIG_X86_PAT
int __read_mostly pat_enabled = 1;
-void __cpuinit pat_disable(char *reason)
+void __cpuinit pat_disable(const char *reason)
{
pat_enabled = 0;
printk(KERN_INFO "%s\n", reason);
@@ -42,6 +42,11 @@ static int __init nopat(char *str)
return 0;
}
early_param("nopat", nopat);
+#else
+static inline void pat_disable(const char *reason)
+{
+ (void)reason;
+}
#endif
@@ -78,16 +83,20 @@ void pat_init(void)
if (!pat_enabled)
return;
- /* Paranoia check. */
- if (!cpu_has_pat && boot_pat_state) {
- /*
- * If this happens we are on a secondary CPU, but
- * switched to PAT on the boot CPU. We have no way to
- * undo PAT.
- */
- printk(KERN_ERR "PAT enabled, "
- "but not supported by secondary CPU\n");
- BUG();
+ if (!cpu_has_pat) {
+ if (!boot_pat_state) {
+ pat_disable("PAT not supported by CPU.");
+ return;
+ } else {
+ /*
+ * If this happens we are on a secondary CPU, but
+ * switched to PAT on the boot CPU. We have no way to
+ * undo PAT.
+ */
+ printk(KERN_ERR "PAT enabled, "
+ "but not supported by secondary CPU\n");
+ BUG();
+ }
}
/* Set PWT to Write-Combining. All other bits stay the same */
@@ -211,6 +220,33 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type)
static struct memtype *cached_entry;
static u64 cached_start;
+static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
+{
+ int ram_page = 0, not_rampage = 0;
+ unsigned long page_nr;
+
+ for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
+ ++page_nr) {
+ /*
+ * For legacy reasons, physical address range in the legacy ISA
+ * region is tracked as non-RAM. This will allow users of
+ * /dev/mem to map portions of legacy ISA region, even when
+ * some of those portions are listed(or not even listed) with
+ * different e820 types(RAM/reserved/..)
+ */
+ if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) &&
+ page_is_ram(page_nr))
+ ram_page = 1;
+ else
+ not_rampage = 1;
+
+ if (ram_page == not_rampage)
+ return -1;
+ }
+
+ return ram_page;
+}
+
/*
* For RAM pages, mark the pages as non WB memory type using
* PageNonWB (PG_arch_1). We allow only one set_memory_uc() or
@@ -336,20 +372,12 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
if (new_type)
*new_type = actual_type;
- /*
- * For legacy reasons, some parts of the physical address range in the
- * legacy 1MB region is treated as non-RAM (even when listed as RAM in
- * the e820 tables). So we will track the memory attributes of this
- * legacy 1MB region using the linear memtype_list always.
- */
- if (end >= ISA_END_ADDRESS) {
- is_range_ram = pagerange_is_ram(start, end);
- if (is_range_ram == 1)
- return reserve_ram_pages_type(start, end, req_type,
- new_type);
- else if (is_range_ram < 0)
- return -EINVAL;
- }
+ is_range_ram = pat_pagerange_is_ram(start, end);
+ if (is_range_ram == 1)
+ return reserve_ram_pages_type(start, end, req_type,
+ new_type);
+ else if (is_range_ram < 0)
+ return -EINVAL;
new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
if (!new)
@@ -446,19 +474,11 @@ int free_memtype(u64 start, u64 end)
if (is_ISA_range(start, end - 1))
return 0;
- /*
- * For legacy reasons, some parts of the physical address range in the
- * legacy 1MB region is treated as non-RAM (even when listed as RAM in
- * the e820 tables). So we will track the memory attributes of this
- * legacy 1MB region using the linear memtype_list always.
- */
- if (end >= ISA_END_ADDRESS) {
- is_range_ram = pagerange_is_ram(start, end);
- if (is_range_ram == 1)
- return free_ram_pages_type(start, end);
- else if (is_range_ram < 0)
- return -EINVAL;
- }
+ is_range_ram = pat_pagerange_is_ram(start, end);
+ if (is_range_ram == 1)
+ return free_ram_pages_type(start, end);
+ else if (is_range_ram < 0)
+ return -EINVAL;
spin_lock(&memtype_lock);
list_for_each_entry(entry, &memtype_list, nd) {
@@ -626,17 +646,13 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
unsigned long flags;
unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
- is_ram = pagerange_is_ram(paddr, paddr + size);
+ is_ram = pat_pagerange_is_ram(paddr, paddr + size);
- if (is_ram != 0) {
- /*
- * For mapping RAM pages, drivers need to call
- * set_memory_[uc|wc|wb] directly, for reserve and free, before
- * setting up the PTE.
- */
- WARN_ON_ONCE(1);
- return 0;
- }
+ /*
+ * reserve_pfn_range() doesn't support RAM pages.
+ */
+ if (is_ram != 0)
+ return -EINVAL;
ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
if (ret)
@@ -693,7 +709,7 @@ static void free_pfn_range(u64 paddr, unsigned long size)
{
int is_ram;
- is_ram = pagerange_is_ram(paddr, paddr + size);
+ is_ram = pat_pagerange_is_ram(paddr, paddr + size);
if (is_ram == 0)
free_memtype(paddr, paddr + size);
}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 72a6d4ebe34d..14c5af4d11e6 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -14,7 +14,7 @@
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
= { &init_mm, 0, };
-#include <mach_ipi.h>
+#include <asm/genapic.h>
/*
* Smarter SMP flushing macros.
* c/o Linus Torvalds.
@@ -196,7 +196,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
* We have to send the IPI only to
* CPUs affected.
*/
- send_IPI_mask(to_cpumask(f->flush_cpumask),
+ apic->send_IPI_mask(to_cpumask(f->flush_cpumask),
INVALIDATE_TLB_VECTOR_START + sender);
while (!cpumask_empty(to_cpumask(f->flush_cpumask)))