summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c3
-rw-r--r--mm/hugetlb.c54
-rw-r--r--mm/internal.h29
-rw-r--r--mm/memory.c70
-rw-r--r--mm/memory_hotplug.c12
-rw-r--r--mm/mempolicy.c18
-rw-r--r--mm/migrate.c66
-rw-r--r--mm/mlock.c18
-rw-r--r--mm/mmap.c2
-rw-r--r--mm/oom_kill.c3
-rw-r--r--mm/page_alloc.c32
-rw-r--r--mm/page_cgroup.c63
-rw-r--r--mm/page_isolation.c5
-rw-r--r--mm/slob.c2
-rw-r--r--mm/slub.c8
-rw-r--r--mm/sparse-vmemmap.c2
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/swap.c20
-rw-r--r--mm/swapfile.c9
-rw-r--r--mm/vmalloc.c52
-rw-r--r--mm/vmscan.c46
21 files changed, 344 insertions, 172 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index f2e574dbc300..801c08b046e6 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -176,6 +176,9 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
int ret = 0;
struct device *dev;
+ if (bdi->dev) /* The driver needs to use separate queues per device */
+ goto exit;
+
va_start(args, fmt);
dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args);
va_end(args);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 421aee99b84a..6058b53dcb89 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -354,11 +354,26 @@ static int vma_has_reserves(struct vm_area_struct *vma)
return 0;
}
+static void clear_gigantic_page(struct page *page,
+ unsigned long addr, unsigned long sz)
+{
+ int i;
+ struct page *p = page;
+
+ might_sleep();
+ for (i = 0; i < sz/PAGE_SIZE; i++, p = mem_map_next(p, page, i)) {
+ cond_resched();
+ clear_user_highpage(p, addr + i * PAGE_SIZE);
+ }
+}
static void clear_huge_page(struct page *page,
unsigned long addr, unsigned long sz)
{
int i;
+ if (unlikely(sz > MAX_ORDER_NR_PAGES))
+ return clear_gigantic_page(page, addr, sz);
+
might_sleep();
for (i = 0; i < sz/PAGE_SIZE; i++) {
cond_resched();
@@ -366,12 +381,32 @@ static void clear_huge_page(struct page *page,
}
}
+static void copy_gigantic_page(struct page *dst, struct page *src,
+ unsigned long addr, struct vm_area_struct *vma)
+{
+ int i;
+ struct hstate *h = hstate_vma(vma);
+ struct page *dst_base = dst;
+ struct page *src_base = src;
+ might_sleep();
+ for (i = 0; i < pages_per_huge_page(h); ) {
+ cond_resched();
+ copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma);
+
+ i++;
+ dst = mem_map_next(dst, dst_base, i);
+ src = mem_map_next(src, src_base, i);
+ }
+}
static void copy_huge_page(struct page *dst, struct page *src,
unsigned long addr, struct vm_area_struct *vma)
{
int i;
struct hstate *h = hstate_vma(vma);
+ if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES))
+ return copy_gigantic_page(dst, src, addr, vma);
+
might_sleep();
for (i = 0; i < pages_per_huge_page(h); i++) {
cond_resched();
@@ -456,6 +491,8 @@ static void update_and_free_page(struct hstate *h, struct page *page)
{
int i;
+ VM_BUG_ON(h->order >= MAX_ORDER);
+
h->nr_huge_pages--;
h->nr_huge_pages_node[page_to_nid(page)]--;
for (i = 0; i < pages_per_huge_page(h); i++) {
@@ -970,6 +1007,14 @@ found:
return 1;
}
+static void prep_compound_huge_page(struct page *page, int order)
+{
+ if (unlikely(order > (MAX_ORDER - 1)))
+ prep_compound_gigantic_page(page, order);
+ else
+ prep_compound_page(page, order);
+}
+
/* Put bootmem huge pages into the standard lists after mem_map is up */
static void __init gather_bootmem_prealloc(void)
{
@@ -980,7 +1025,7 @@ static void __init gather_bootmem_prealloc(void)
struct hstate *h = m->hstate;
__ClearPageReserved(page);
WARN_ON(page_count(page) != 1);
- prep_compound_page(page, h->order);
+ prep_compound_huge_page(page, h->order);
prep_new_huge_page(h, page, page_to_nid(page));
}
}
@@ -1751,6 +1796,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
struct page *page, unsigned long address)
{
+ struct hstate *h = hstate_vma(vma);
struct vm_area_struct *iter_vma;
struct address_space *mapping;
struct prio_tree_iter iter;
@@ -1760,7 +1806,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
* vm_pgoff is in PAGE_SIZE units, hence the different calculation
* from page cache lookup which is in HPAGE_SIZE units.
*/
- address = address & huge_page_mask(hstate_vma(vma));
+ address = address & huge_page_mask(h);
pgoff = ((address - vma->vm_start) >> PAGE_SHIFT)
+ (vma->vm_pgoff >> PAGE_SHIFT);
mapping = (struct address_space *)page_private(page);
@@ -1779,7 +1825,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
*/
if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER))
unmap_hugepage_range(iter_vma,
- address, address + HPAGE_SIZE,
+ address, address + huge_page_size(h),
page);
}
@@ -2130,7 +2176,7 @@ same_page:
if (zeropage_ok)
pages[i] = ZERO_PAGE(0);
else
- pages[i] = page + pfn_offset;
+ pages[i] = mem_map_offset(page, pfn_offset);
get_page(pages[i]);
}
diff --git a/mm/internal.h b/mm/internal.h
index e4e728bdf324..13333bc2eb68 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -17,6 +17,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
unsigned long floor, unsigned long ceiling);
extern void prep_compound_page(struct page *page, unsigned long order);
+extern void prep_compound_gigantic_page(struct page *page, unsigned long order);
static inline void set_page_count(struct page *page, int v)
{
@@ -176,6 +177,34 @@ static inline void free_page_mlock(struct page *page) { }
#endif /* CONFIG_UNEVICTABLE_LRU */
/*
+ * Return the mem_map entry representing the 'offset' subpage within
+ * the maximally aligned gigantic page 'base'. Handle any discontiguity
+ * in the mem_map at MAX_ORDER_NR_PAGES boundaries.
+ */
+static inline struct page *mem_map_offset(struct page *base, int offset)
+{
+ if (unlikely(offset >= MAX_ORDER_NR_PAGES))
+ return pfn_to_page(page_to_pfn(base) + offset);
+ return base + offset;
+}
+
+/*
+ * Iterator over all subpages withing the maximally aligned gigantic
+ * page 'base'. Handle any discontiguity in the mem_map.
+ */
+static inline struct page *mem_map_next(struct page *iter,
+ struct page *base, int offset)
+{
+ if (unlikely((offset & (MAX_ORDER_NR_PAGES - 1)) == 0)) {
+ unsigned long pfn = page_to_pfn(base) + offset;
+ if (!pfn_valid(pfn))
+ return NULL;
+ return pfn_to_page(pfn);
+ }
+ return iter + 1;
+}
+
+/*
* FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node,
* so all functions starting at paging_init should be marked __init
* in those cases. SPARSEMEM, however, allows for memory hotplug,
diff --git a/mm/memory.c b/mm/memory.c
index 164951c47305..f01b7eed6e16 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -669,6 +669,16 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
if (is_vm_hugetlb_page(vma))
return copy_hugetlb_page_range(dst_mm, src_mm, vma);
+ if (unlikely(is_pfn_mapping(vma))) {
+ /*
+ * We do not free on error cases below as remove_vma
+ * gets called on error from higher level routine
+ */
+ ret = track_pfn_vma_copy(vma);
+ if (ret)
+ return ret;
+ }
+
/*
* We need to invalidate the secondary MMU mappings only when
* there could be a permission downgrade on the ptes of the
@@ -915,6 +925,9 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
if (vma->vm_flags & VM_ACCOUNT)
*nr_accounted += (end - start) >> PAGE_SHIFT;
+ if (unlikely(is_pfn_mapping(vma)))
+ untrack_pfn_vma(vma, 0, 0);
+
while (start != end) {
if (!tlb_start_valid) {
tlb_start = start;
@@ -1430,6 +1443,7 @@ out:
int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn)
{
+ int ret;
/*
* Technically, architectures with pte_special can avoid all these
* restrictions (same for remap_pfn_range). However we would like
@@ -1444,7 +1458,15 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
if (addr < vma->vm_start || addr >= vma->vm_end)
return -EFAULT;
- return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
+ if (track_pfn_vma_new(vma, vma->vm_page_prot, pfn, PAGE_SIZE))
+ return -EINVAL;
+
+ ret = insert_pfn(vma, addr, pfn, vma->vm_page_prot);
+
+ if (ret)
+ untrack_pfn_vma(vma, pfn, PAGE_SIZE);
+
+ return ret;
}
EXPORT_SYMBOL(vm_insert_pfn);
@@ -1575,14 +1597,17 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
* behaviour that some programs depend on. We mark the "original"
* un-COW'ed pages by matching them up with "vma->vm_pgoff".
*/
- if (is_cow_mapping(vma->vm_flags)) {
- if (addr != vma->vm_start || end != vma->vm_end)
- return -EINVAL;
+ if (addr == vma->vm_start && end == vma->vm_end)
vma->vm_pgoff = pfn;
- }
+ else if (is_cow_mapping(vma->vm_flags))
+ return -EINVAL;
vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
+ err = track_pfn_vma_new(vma, prot, pfn, PAGE_ALIGN(size));
+ if (err)
+ return -EINVAL;
+
BUG_ON(addr >= end);
pfn -= addr >> PAGE_SHIFT;
pgd = pgd_offset(mm, addr);
@@ -1594,6 +1619,10 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
if (err)
break;
} while (pgd++, addr = next, addr != end);
+
+ if (err)
+ untrack_pfn_vma(vma, pfn, PAGE_ALIGN(size));
+
return err;
}
EXPORT_SYMBOL(remap_pfn_range);
@@ -2865,9 +2894,9 @@ int in_gate_area_no_task(unsigned long addr)
#endif /* __HAVE_ARCH_GATE_AREA */
#ifdef CONFIG_HAVE_IOREMAP_PROT
-static resource_size_t follow_phys(struct vm_area_struct *vma,
- unsigned long address, unsigned int flags,
- unsigned long *prot)
+int follow_phys(struct vm_area_struct *vma,
+ unsigned long address, unsigned int flags,
+ unsigned long *prot, resource_size_t *phys)
{
pgd_t *pgd;
pud_t *pud;
@@ -2876,24 +2905,26 @@ static resource_size_t follow_phys(struct vm_area_struct *vma,
spinlock_t *ptl;
resource_size_t phys_addr = 0;
struct mm_struct *mm = vma->vm_mm;
+ int ret = -EINVAL;
- VM_BUG_ON(!(vma->vm_flags & (VM_IO | VM_PFNMAP)));
+ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+ goto out;
pgd = pgd_offset(mm, address);
if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
- goto no_page_table;
+ goto out;
pud = pud_offset(pgd, address);
if (pud_none(*pud) || unlikely(pud_bad(*pud)))
- goto no_page_table;
+ goto out;
pmd = pmd_offset(pud, address);
if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
- goto no_page_table;
+ goto out;
/* We cannot handle huge page PFN maps. Luckily they don't exist. */
if (pmd_huge(*pmd))
- goto no_page_table;
+ goto out;
ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
if (!ptep)
@@ -2908,13 +2939,13 @@ static resource_size_t follow_phys(struct vm_area_struct *vma,
phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */
*prot = pgprot_val(pte_pgprot(pte));
+ *phys = phys_addr;
+ ret = 0;
unlock:
pte_unmap_unlock(ptep, ptl);
out:
- return phys_addr;
-no_page_table:
- return 0;
+ return ret;
}
int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
@@ -2925,12 +2956,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
void *maddr;
int offset = addr & (PAGE_SIZE-1);
- if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
- return -EINVAL;
-
- phys_addr = follow_phys(vma, addr, write, &prot);
-
- if (!phys_addr)
+ if (follow_phys(vma, addr, write, &prot, &phys_addr))
return -EINVAL;
maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 6837a1014372..b17371185468 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -22,7 +22,6 @@
#include <linux/highmem.h>
#include <linux/vmalloc.h>
#include <linux/ioport.h>
-#include <linux/cpuset.h>
#include <linux/delay.h>
#include <linux/migrate.h>
#include <linux/page-isolation.h>
@@ -190,7 +189,7 @@ static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
pgdat->node_start_pfn;
}
-static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)
+static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
{
struct pglist_data *pgdat = zone->zone_pgdat;
int nr_pages = PAGES_PER_SECTION;
@@ -217,7 +216,7 @@ static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)
return 0;
}
-static int __add_section(struct zone *zone, unsigned long phys_start_pfn)
+static int __meminit __add_section(struct zone *zone, unsigned long phys_start_pfn)
{
int nr_pages = PAGES_PER_SECTION;
int ret;
@@ -274,7 +273,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms)
* call this function after deciding the zone to which to
* add the new pages.
*/
-int __add_pages(struct zone *zone, unsigned long phys_start_pfn,
+int __ref __add_pages(struct zone *zone, unsigned long phys_start_pfn,
unsigned long nr_pages)
{
unsigned long i;
@@ -471,7 +470,8 @@ static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
}
-int add_memory(int nid, u64 start, u64 size)
+/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
+int __ref add_memory(int nid, u64 start, u64 size)
{
pg_data_t *pgdat = NULL;
int new_pgdat = 0;
@@ -498,8 +498,6 @@ int add_memory(int nid, u64 start, u64 size)
/* we online node here. we can't roll back from here. */
node_set_online(nid);
- cpuset_track_online_nodes();
-
if (new_pgdat) {
ret = register_one_node(nid);
/*
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 36f42573a335..e9493b1c1117 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -489,12 +489,6 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
int err;
struct vm_area_struct *first, *vma, *prev;
- if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
-
- err = migrate_prep();
- if (err)
- return ERR_PTR(err);
- }
first = find_vma(mm, start);
if (!first)
@@ -809,9 +803,13 @@ int do_migrate_pages(struct mm_struct *mm,
const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags)
{
int busy = 0;
- int err = 0;
+ int err;
nodemask_t tmp;
+ err = migrate_prep();
+ if (err)
+ return err;
+
down_read(&mm->mmap_sem);
err = migrate_vmas(mm, from_nodes, to_nodes, flags);
@@ -974,6 +972,12 @@ static long do_mbind(unsigned long start, unsigned long len,
start, start + len, mode, mode_flags,
nmask ? nodes_addr(*nmask)[0] : -1);
+ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+
+ err = migrate_prep();
+ if (err)
+ return err;
+ }
down_write(&mm->mmap_sem);
vma = check_range(mm, start, end, nmask,
flags | MPOL_MF_INVERT, &pagelist);
diff --git a/mm/migrate.c b/mm/migrate.c
index 6602941bfab0..037b0967c1e3 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -522,15 +522,12 @@ static int writeout(struct address_space *mapping, struct page *page)
remove_migration_ptes(page, page);
rc = mapping->a_ops->writepage(page, &wbc);
- if (rc < 0)
- /* I/O Error writing */
- return -EIO;
if (rc != AOP_WRITEPAGE_ACTIVATE)
/* unlocked. Relock */
lock_page(page);
- return -EAGAIN;
+ return (rc < 0) ? -EIO : -EAGAIN;
}
/*
@@ -841,12 +838,12 @@ static int do_move_page_to_node_array(struct mm_struct *mm,
struct page_to_node *pp;
LIST_HEAD(pagelist);
+ migrate_prep();
down_read(&mm->mmap_sem);
/*
* Build a list of pages to migrate
*/
- migrate_prep();
for (pp = pm; pp->node != MAX_NUMNODES; pp++) {
struct vm_area_struct *vma;
struct page *page;
@@ -990,25 +987,18 @@ out:
/*
* Determine the nodes of an array of pages and store it in an array of status.
*/
-static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
- const void __user * __user *pages,
- int __user *status)
+static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
+ const void __user **pages, int *status)
{
unsigned long i;
- int err;
down_read(&mm->mmap_sem);
for (i = 0; i < nr_pages; i++) {
- const void __user *p;
- unsigned long addr;
+ unsigned long addr = (unsigned long)(*pages);
struct vm_area_struct *vma;
struct page *page;
-
- err = -EFAULT;
- if (get_user(p, pages+i))
- goto out;
- addr = (unsigned long) p;
+ int err = -EFAULT;
vma = find_vma(mm, addr);
if (!vma)
@@ -1027,12 +1017,52 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
err = page_to_nid(page);
set_status:
- put_user(err, status+i);
+ *status = err;
+
+ pages++;
+ status++;
+ }
+
+ up_read(&mm->mmap_sem);
+}
+
+/*
+ * Determine the nodes of a user array of pages and store it in
+ * a user array of status.
+ */
+static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
+ const void __user * __user *pages,
+ int __user *status)
+{
+#define DO_PAGES_STAT_CHUNK_NR 16
+ const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
+ int chunk_status[DO_PAGES_STAT_CHUNK_NR];
+ unsigned long i, chunk_nr = DO_PAGES_STAT_CHUNK_NR;
+ int err;
+
+ for (i = 0; i < nr_pages; i += chunk_nr) {
+ if (chunk_nr + i > nr_pages)
+ chunk_nr = nr_pages - i;
+
+ err = copy_from_user(chunk_pages, &pages[i],
+ chunk_nr * sizeof(*chunk_pages));
+ if (err) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
+
+ err = copy_to_user(&status[i], chunk_status,
+ chunk_nr * sizeof(*chunk_status));
+ if (err) {
+ err = -EFAULT;
+ goto out;
+ }
}
err = 0;
out:
- up_read(&mm->mmap_sem);
return err;
}
diff --git a/mm/mlock.c b/mm/mlock.c
index 008ea70b7afa..1ada366570cb 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -66,14 +66,10 @@ void __clear_page_mlock(struct page *page)
putback_lru_page(page);
} else {
/*
- * Page not on the LRU yet. Flush all pagevecs and retry.
+ * We lost the race. the page already moved to evictable list.
*/
- lru_add_drain_all();
- if (!isolate_lru_page(page))
- putback_lru_page(page);
- else if (PageUnevictable(page))
+ if (PageUnevictable(page))
count_vm_event(UNEVICTABLE_PGSTRANDED);
-
}
}
@@ -166,7 +162,7 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
unsigned long addr = start;
struct page *pages[16]; /* 16 gives a reasonable batch */
int nr_pages = (end - start) / PAGE_SIZE;
- int ret;
+ int ret = 0;
int gup_flags = 0;
VM_BUG_ON(start & ~PAGE_MASK);
@@ -187,8 +183,6 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
if (vma->vm_flags & VM_WRITE)
gup_flags |= GUP_FLAGS_WRITE;
- lru_add_drain_all(); /* push cached pages to LRU */
-
while (nr_pages > 0) {
int i;
@@ -251,8 +245,6 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
ret = 0;
}
- lru_add_drain_all(); /* to update stats */
-
return ret; /* count entire vma as locked_vm */
}
@@ -546,6 +538,8 @@ asmlinkage long sys_mlock(unsigned long start, size_t len)
if (!can_do_mlock())
return -EPERM;
+ lru_add_drain_all(); /* flush pagevec */
+
down_write(&current->mm->mmap_sem);
len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
start &= PAGE_MASK;
@@ -612,6 +606,8 @@ asmlinkage long sys_mlockall(int flags)
if (!can_do_mlock())
goto out;
+ lru_add_drain_all(); /* flush pagevec */
+
down_write(&current->mm->mmap_sem);
lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
diff --git a/mm/mmap.c b/mm/mmap.c
index de14ac21e5b5..d4855a682ab6 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1704,7 +1704,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
vma = find_vma_prev(mm, addr, &prev);
if (vma && (vma->vm_start <= addr))
return vma;
- if (expand_stack(prev, addr))
+ if (!prev || expand_stack(prev, addr))
return NULL;
if (prev->vm_flags & VM_LOCKED) {
if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0)
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 64e5b4bcd964..a0a01902f551 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -38,7 +38,6 @@ static DEFINE_SPINLOCK(zone_scan_mutex);
* badness - calculate a numeric value for how bad this task has been
* @p: task struct of which task we should calculate
* @uptime: current uptime in seconds
- * @mem: target memory controller
*
* The formula used is relatively simple and documented inline in the
* function. The main rationale is that we want to select a good task
@@ -295,6 +294,8 @@ static void dump_tasks(const struct mem_cgroup *mem)
continue;
if (mem && !task_in_mem_cgroup(p, mem))
continue;
+ if (!thread_group_leader(p))
+ continue;
task_lock(p);
printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n",
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d0a240fbb8bf..d8ac01474563 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -263,24 +263,39 @@ void prep_compound_page(struct page *page, unsigned long order)
{
int i;
int nr_pages = 1 << order;
+
+ set_compound_page_dtor(page, free_compound_page);
+ set_compound_order(page, order);
+ __SetPageHead(page);
+ for (i = 1; i < nr_pages; i++) {
+ struct page *p = page + i;
+
+ __SetPageTail(p);
+ p->first_page = page;
+ }
+}
+
+#ifdef CONFIG_HUGETLBFS
+void prep_compound_gigantic_page(struct page *page, unsigned long order)
+{
+ int i;
+ int nr_pages = 1 << order;
struct page *p = page + 1;
set_compound_page_dtor(page, free_compound_page);
set_compound_order(page, order);
__SetPageHead(page);
- for (i = 1; i < nr_pages; i++, p++) {
- if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0))
- p = pfn_to_page(page_to_pfn(page) + i);
+ for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
__SetPageTail(p);
p->first_page = page;
}
}
+#endif
static void destroy_compound_page(struct page *page, unsigned long order)
{
int i;
int nr_pages = 1 << order;
- struct page *p = page + 1;
if (unlikely(compound_order(page) != order))
bad_page(page);
@@ -288,9 +303,8 @@ static void destroy_compound_page(struct page *page, unsigned long order)
if (unlikely(!PageHead(page)))
bad_page(page);
__ClearPageHead(page);
- for (i = 1; i < nr_pages; i++, p++) {
- if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0))
- p = pfn_to_page(page_to_pfn(page) + i);
+ for (i = 1; i < nr_pages; i++) {
+ struct page *p = page + i;
if (unlikely(!PageTail(p) |
(p->first_page != page)))
@@ -1547,6 +1561,10 @@ nofail_alloc:
/* We now go into synchronous reclaim */
cpuset_memory_pressure_bump();
+ /*
+ * The task's cpuset might have expanded its set of allowable nodes
+ */
+ cpuset_update_task_memory_state();
p->flags |= PF_MEMALLOC;
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index f59d797dc5a9..ab27ff750519 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -21,7 +21,7 @@ static unsigned long total_usage;
#if !defined(CONFIG_SPARSEMEM)
-void __init pgdat_page_cgroup_init(struct pglist_data *pgdat)
+void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
{
pgdat->node_page_cgroup = NULL;
}
@@ -49,6 +49,9 @@ static int __init alloc_node_page_cgroup(int nid)
start_pfn = NODE_DATA(nid)->node_start_pfn;
nr_pages = NODE_DATA(nid)->node_spanned_pages;
+ if (!nr_pages)
+ return 0;
+
table_size = sizeof(struct page_cgroup) * nr_pages;
base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
@@ -97,7 +100,8 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
return section->page_cgroup + pfn;
}
-int __meminit init_section_page_cgroup(unsigned long pfn)
+/* __alloc_bootmem...() is protected by !slab_available() */
+int __init_refok init_section_page_cgroup(unsigned long pfn)
{
struct mem_section *section;
struct page_cgroup *base, *pc;
@@ -106,19 +110,29 @@ int __meminit init_section_page_cgroup(unsigned long pfn)
section = __pfn_to_section(pfn);
- if (section->page_cgroup)
- return 0;
-
- nid = page_to_nid(pfn_to_page(pfn));
-
- table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
- if (slab_is_available()) {
- base = kmalloc_node(table_size, GFP_KERNEL, nid);
- if (!base)
- base = vmalloc_node(table_size, nid);
- } else {
- base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), table_size,
+ if (!section->page_cgroup) {
+ nid = page_to_nid(pfn_to_page(pfn));
+ table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
+ if (slab_is_available()) {
+ base = kmalloc_node(table_size, GFP_KERNEL, nid);
+ if (!base)
+ base = vmalloc_node(table_size, nid);
+ } else {
+ base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
+ table_size,
PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+ }
+ } else {
+ /*
+ * We don't have to allocate page_cgroup again, but
+ * address of memmap may be changed. So, we have to initialize
+ * again.
+ */
+ base = section->page_cgroup + pfn;
+ table_size = 0;
+ /* check address of memmap is changed or not. */
+ if (base->page == pfn_to_page(pfn))
+ return 0;
}
if (!base) {
@@ -158,14 +172,14 @@ void __free_page_cgroup(unsigned long pfn)
}
}
-int online_page_cgroup(unsigned long start_pfn,
+int __meminit online_page_cgroup(unsigned long start_pfn,
unsigned long nr_pages,
int nid)
{
unsigned long start, end, pfn;
int fail = 0;
- start = start_pfn & (PAGES_PER_SECTION - 1);
+ start = start_pfn & ~(PAGES_PER_SECTION - 1);
end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) {
@@ -183,12 +197,12 @@ int online_page_cgroup(unsigned long start_pfn,
return -ENOMEM;
}
-int offline_page_cgroup(unsigned long start_pfn,
+int __meminit offline_page_cgroup(unsigned long start_pfn,
unsigned long nr_pages, int nid)
{
unsigned long start, end, pfn;
- start = start_pfn & (PAGES_PER_SECTION - 1);
+ start = start_pfn & ~(PAGES_PER_SECTION - 1);
end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
@@ -197,7 +211,7 @@ int offline_page_cgroup(unsigned long start_pfn,
}
-static int page_cgroup_callback(struct notifier_block *self,
+static int __meminit page_cgroup_callback(struct notifier_block *self,
unsigned long action, void *arg)
{
struct memory_notify *mn = arg;
@@ -207,18 +221,23 @@ static int page_cgroup_callback(struct notifier_block *self,
ret = online_page_cgroup(mn->start_pfn,
mn->nr_pages, mn->status_change_nid);
break;
- case MEM_CANCEL_ONLINE:
case MEM_OFFLINE:
offline_page_cgroup(mn->start_pfn,
mn->nr_pages, mn->status_change_nid);
break;
+ case MEM_CANCEL_ONLINE:
case MEM_GOING_OFFLINE:
break;
case MEM_ONLINE:
case MEM_CANCEL_OFFLINE:
break;
}
- ret = notifier_from_errno(ret);
+
+ if (ret)
+ ret = notifier_from_errno(ret);
+ else
+ ret = NOTIFY_OK;
+
return ret;
}
@@ -248,7 +267,7 @@ void __init page_cgroup_init(void)
" want\n");
}
-void __init pgdat_page_cgroup_init(struct pglist_data *pgdat)
+void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
{
return;
}
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index b70a7fec1ff6..5e0ffd967452 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -130,10 +130,11 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
if (page && get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
break;
}
- if (pfn < end_pfn)
+ page = __first_valid_page(start_pfn, end_pfn - start_pfn);
+ if ((pfn < end_pfn) || !page)
return -EBUSY;
/* Check all pages are free or Marked as ISOLATED */
- zone = page_zone(pfn_to_page(pfn));
+ zone = page_zone(page);
spin_lock_irqsave(&zone->lock, flags);
ret = __test_page_isolated_in_pageblock(start_pfn, end_pfn);
spin_unlock_irqrestore(&zone->lock, flags);
diff --git a/mm/slob.c b/mm/slob.c
index cb675d126791..bf7e8fc3aed8 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -535,7 +535,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
struct kmem_cache *c;
c = slob_alloc(sizeof(struct kmem_cache),
- flags, ARCH_KMALLOC_MINALIGN, -1);
+ GFP_KERNEL, ARCH_KMALLOC_MINALIGN, -1);
if (c) {
c->name = name;
diff --git a/mm/slub.c b/mm/slub.c
index 7ad489af9561..a2cd47d89e0a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2931,8 +2931,10 @@ static int slab_memory_callback(struct notifier_block *self,
case MEM_CANCEL_OFFLINE:
break;
}
-
- ret = notifier_from_errno(ret);
+ if (ret)
+ ret = notifier_from_errno(ret);
+ else
+ ret = NOTIFY_OK;
return ret;
}
@@ -3595,7 +3597,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
for (i = 0; i < t.count; i++) {
struct location *l = &t.loc[i];
- if (len > PAGE_SIZE - 100)
+ if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
break;
len += sprintf(buf + len, "%7ld ", l->count);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index a91b5f8fcaf6..a13ea6401ae7 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -64,7 +64,7 @@ void __meminit vmemmap_verify(pte_t *pte, int node,
unsigned long pfn = pte_pfn(*pte);
int actual_node = early_pfn_to_nid(pfn);
- if (actual_node != node)
+ if (node_distance(actual_node, node) > LOCAL_DISTANCE)
printk(KERN_WARNING "[%lx-%lx] potential offnode "
"page_structs\n", start, end - 1);
}
diff --git a/mm/sparse.c b/mm/sparse.c
index 39db301b920d..083f5b63e7a8 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -570,7 +570,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap)
* set. If this is <=0, then that means that the passed-in
* map was not consumed and must be freed.
*/
-int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
+int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
int nr_pages)
{
unsigned long section_nr = pfn_to_section_nr(start_pfn);
diff --git a/mm/swap.c b/mm/swap.c
index 2152e48a7b8f..b135ec90cdeb 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -299,7 +299,6 @@ void lru_add_drain(void)
put_cpu();
}
-#if defined(CONFIG_NUMA) || defined(CONFIG_UNEVICTABLE_LRU)
static void lru_add_drain_per_cpu(struct work_struct *dummy)
{
lru_add_drain();
@@ -313,18 +312,6 @@ int lru_add_drain_all(void)
return schedule_on_each_cpu(lru_add_drain_per_cpu);
}
-#else
-
-/*
- * Returns 0 for success
- */
-int lru_add_drain_all(void)
-{
- lru_add_drain();
- return 0;
-}
-#endif
-
/*
* Batched page_cache_release(). Decrement the reference count on all the
* passed pages. If it fell to zero then remove the page from the LRU and
@@ -445,6 +432,7 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
for (i = 0; i < pagevec_count(pvec); i++) {
struct page *page = pvec->pages[i];
struct zone *pagezone = page_zone(page);
+ int file;
if (pagezone != zone) {
if (zone)
@@ -456,8 +444,12 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
VM_BUG_ON(PageUnevictable(page));
VM_BUG_ON(PageLRU(page));
SetPageLRU(page);
- if (is_active_lru(lru))
+ file = is_file_lru(lru);
+ zone->recent_scanned[file]++;
+ if (is_active_lru(lru)) {
SetPageActive(page);
+ zone->recent_rotated[file]++;
+ }
add_page_to_lru_list(zone, page, lru);
}
if (zone)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 90cb67a5417c..54a9f87e5162 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1462,6 +1462,15 @@ static int __init procswaps_init(void)
__initcall(procswaps_init);
#endif /* CONFIG_PROC_FS */
+#ifdef MAX_SWAPFILES_CHECK
+static int __init max_swapfiles_check(void)
+{
+ MAX_SWAPFILES_CHECK();
+ return 0;
+}
+late_initcall(max_swapfiles_check);
+#endif
+
/*
* Written 01/25/92 by Simmule Turner, heavily changed by Linus.
*
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f1cc03bbf6ac..1ddb77ba3995 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -77,7 +77,6 @@ static void vunmap_page_range(unsigned long addr, unsigned long end)
BUG_ON(addr >= end);
pgd = pgd_offset_k(addr);
- flush_cache_vunmap(addr, end);
do {
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd))
@@ -178,7 +177,7 @@ static int vmap_page_range(unsigned long addr, unsigned long end,
static inline int is_vmalloc_or_module_addr(const void *x)
{
/*
- * x86-64 and sparc64 put modules in a special place,
+ * ARM, x86-64 and sparc64 put modules in a special place,
* and fall back on vmalloc() if that fails. Others
* just put it in the vmalloc space.
*/
@@ -324,14 +323,14 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
BUG_ON(size & ~PAGE_MASK);
- addr = ALIGN(vstart, align);
-
va = kmalloc_node(sizeof(struct vmap_area),
gfp_mask & GFP_RECLAIM_MASK, node);
if (unlikely(!va))
return ERR_PTR(-ENOMEM);
retry:
+ addr = ALIGN(vstart, align);
+
spin_lock(&vmap_area_lock);
/* XXX: could have a last_hole cache */
n = vmap_area_root.rb_node;
@@ -362,7 +361,7 @@ retry:
goto found;
}
- while (addr + size >= first->va_start && addr + size <= vend) {
+ while (addr + size > first->va_start && addr + size <= vend) {
addr = ALIGN(first->va_end + PAGE_SIZE, align);
n = rb_next(&first->rb_node);
@@ -522,24 +521,45 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
}
/*
+ * Kick off a purge of the outstanding lazy areas. Don't bother if somebody
+ * is already purging.
+ */
+static void try_purge_vmap_area_lazy(void)
+{
+ unsigned long start = ULONG_MAX, end = 0;
+
+ __purge_vmap_area_lazy(&start, &end, 0, 0);
+}
+
+/*
* Kick off a purge of the outstanding lazy areas.
*/
static void purge_vmap_area_lazy(void)
{
unsigned long start = ULONG_MAX, end = 0;
- __purge_vmap_area_lazy(&start, &end, 0, 0);
+ __purge_vmap_area_lazy(&start, &end, 1, 0);
}
/*
- * Free and unmap a vmap area
+ * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been
+ * called for the correct range previously.
*/
-static void free_unmap_vmap_area(struct vmap_area *va)
+static void free_unmap_vmap_area_noflush(struct vmap_area *va)
{
va->flags |= VM_LAZY_FREE;
atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
- purge_vmap_area_lazy();
+ try_purge_vmap_area_lazy();
+}
+
+/*
+ * Free and unmap a vmap area
+ */
+static void free_unmap_vmap_area(struct vmap_area *va)
+{
+ flush_cache_vunmap(va->va_start, va->va_end);
+ free_unmap_vmap_area_noflush(va);
}
static struct vmap_area *find_vmap_area(unsigned long addr)
@@ -592,6 +612,8 @@ static void free_unmap_vmap_area_addr(unsigned long addr)
#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
+static bool vmap_initialized __read_mostly = false;
+
struct vmap_block_queue {
spinlock_t lock;
struct list_head free;
@@ -721,7 +743,7 @@ static void free_vmap_block(struct vmap_block *vb)
spin_unlock(&vmap_block_tree_lock);
BUG_ON(tmp != vb);
- free_unmap_vmap_area(vb->va);
+ free_unmap_vmap_area_noflush(vb->va);
call_rcu(&vb->rcu_head, rcu_free_vb);
}
@@ -783,6 +805,9 @@ static void vb_free(const void *addr, unsigned long size)
BUG_ON(size & ~PAGE_MASK);
BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
+
+ flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size);
+
order = get_order(size);
offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
@@ -828,6 +853,9 @@ void vm_unmap_aliases(void)
int cpu;
int flush = 0;
+ if (unlikely(!vmap_initialized))
+ return;
+
for_each_possible_cpu(cpu) {
struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
struct vmap_block *vb;
@@ -942,6 +970,8 @@ void __init vmalloc_init(void)
INIT_LIST_HEAD(&vbq->dirty);
vbq->nr_dirty = 0;
}
+
+ vmap_initialized = true;
}
void unmap_kernel_range(unsigned long addr, unsigned long size)
@@ -1687,7 +1717,7 @@ static int s_show(struct seq_file *m, void *p)
v->addr, v->addr + v->size, v->size);
if (v->caller) {
- char buff[2 * KSYM_NAME_LEN];
+ char buff[KSYM_SYMBOL_LEN];
seq_putc(m, ' ');
sprint_symbol(buff, (unsigned long)v->caller);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 3b5860294bb6..62e7f62fb559 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -623,6 +623,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
* Try to allocate it some swap space here.
*/
if (PageAnon(page) && !PageSwapCache(page)) {
+ if (!(sc->gfp_mask & __GFP_IO))
+ goto keep_locked;
switch (try_to_munlock(page)) {
case SWAP_FAIL: /* shouldn't happen */
case SWAP_AGAIN:
@@ -634,6 +636,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
}
if (!add_to_swap(page, GFP_ATOMIC))
goto activate_locked;
+ may_enter_fs = 1;
}
#endif /* CONFIG_SWAP */
@@ -1245,6 +1248,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
list_add(&page->lru, &l_inactive);
}
+ spin_lock_irq(&zone->lru_lock);
/*
* Count referenced pages from currently used mappings as
* rotated, even though they are moved to the inactive list.
@@ -1260,7 +1264,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
pgmoved = 0;
lru = LRU_BASE + file * LRU_FILE;
- spin_lock_irq(&zone->lru_lock);
while (!list_empty(&l_inactive)) {
page = lru_to_page(&l_inactive);
prefetchw_prev_lru_page(page, &l_inactive, flags);
@@ -1386,9 +1389,9 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
file_prio = 200 - sc->swappiness;
/*
- * anon recent_rotated[0]
- * %anon = 100 * ----------- / ----------------- * IO cost
- * anon + file rotate_sum
+ * The amount of pressure on anon vs file pages is inversely
+ * proportional to the fraction of recently scanned pages on
+ * each list that were recently referenced and in active use.
*/
ap = (anon_prio + 1) * (zone->recent_scanned[0] + 1);
ap /= zone->recent_rotated[0] + 1;
@@ -2368,39 +2371,6 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
return 1;
}
-static void show_page_path(struct page *page)
-{
- char buf[256];
- if (page_is_file_cache(page)) {
- struct address_space *mapping = page->mapping;
- struct dentry *dentry;
- pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-
- spin_lock(&mapping->i_mmap_lock);
- dentry = d_find_alias(mapping->host);
- printk(KERN_INFO "rescued: %s %lu\n",
- dentry_path(dentry, buf, 256), pgoff);
- spin_unlock(&mapping->i_mmap_lock);
- } else {
-#if defined(CONFIG_MM_OWNER) && defined(CONFIG_MMU)
- struct anon_vma *anon_vma;
- struct vm_area_struct *vma;
-
- anon_vma = page_lock_anon_vma(page);
- if (!anon_vma)
- return;
-
- list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
- printk(KERN_INFO "rescued: anon %s\n",
- vma->vm_mm->owner->comm);
- break;
- }
- page_unlock_anon_vma(anon_vma);
-#endif
- }
-}
-
-
/**
* check_move_unevictable_page - check page for evictability and move to appropriate zone lru list
* @page: page to check evictability and move to appropriate lru list
@@ -2421,8 +2391,6 @@ retry:
if (page_evictable(page, NULL)) {
enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page);
- show_page_path(page);
-
__dec_zone_state(zone, NR_UNEVICTABLE);
list_move(&page->lru, &zone->lru[l].list);
__inc_zone_state(zone, NR_INACTIVE_ANON + l);