summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorTrond Myklebust <Trond.Myklebust@netapp.com>2007-02-13 09:43:25 +0300
committerTrond Myklebust <Trond.Myklebust@netapp.com>2007-02-13 09:43:25 +0300
commitd9bc125caf592b7d081021f32ce5b717efdf70c8 (patch)
tree263b7066ba22ddce21db610c0300f6eaac6f2064 /mm
parent43d78ef2ba5bec26d0315859e8324bfc0be23766 (diff)
parentec2f9d1331f658433411c58077871e1eef4ee1b4 (diff)
downloadlinux-d9bc125caf592b7d081021f32ce5b717efdf70c8.tar.xz
Merge branch 'master' of /home/trondmy/kernel/linux-2.6/
Conflicts: net/sunrpc/auth_gss/gss_krb5_crypto.c net/sunrpc/auth_gss/gss_spkm3_token.c net/sunrpc/clnt.c Merge with mainline and fix conflicts.
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig6
-rw-r--r--mm/filemap.c24
-rw-r--r--mm/highmem.c3
-rw-r--r--mm/hugetlb.c2
-rw-r--r--mm/memory.c60
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/mempool.c6
-rw-r--r--mm/mincore.c102
-rw-r--r--mm/mmap.c72
-rw-r--r--mm/page-writeback.c17
-rw-r--r--mm/page_alloc.c102
-rw-r--r--mm/readahead.c8
-rw-r--r--mm/shmem.c22
-rw-r--r--mm/slab.c246
-rw-r--r--mm/truncate.c11
-rw-r--r--mm/vmalloc.c2
-rw-r--r--mm/vmscan.c51
-rw-r--r--mm/vmstat.c70
18 files changed, 464 insertions, 342 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index db7c55de92cd..7942b333e46c 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -157,3 +157,9 @@ config RESOURCES_64BIT
default 64BIT
help
This option allows memory and IO resources to be 64 bit.
+
+config ZONE_DMA_FLAG
+ int
+ default "0" if !ZONE_DMA
+ default "1"
+
diff --git a/mm/filemap.c b/mm/filemap.c
index 8332c77b1bd1..00414849a867 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -327,7 +327,7 @@ EXPORT_SYMBOL(sync_page_range);
* @pos: beginning offset in pages to write
* @count: number of bytes to write
*
- * Note: Holding i_mutex across sync_page_range_nolock is not a good idea
+ * Note: Holding i_mutex across sync_page_range_nolock() is not a good idea
* as it forces O_SYNC writers to different parts of the same file
* to be serialised right until io completion.
*/
@@ -606,26 +606,6 @@ struct page * find_get_page(struct address_space *mapping, unsigned long offset)
EXPORT_SYMBOL(find_get_page);
/**
- * find_trylock_page - find and lock a page
- * @mapping: the address_space to search
- * @offset: the page index
- *
- * Same as find_get_page(), but trylock it instead of incrementing the count.
- */
-struct page *find_trylock_page(struct address_space *mapping, unsigned long offset)
-{
- struct page *page;
-
- read_lock_irq(&mapping->tree_lock);
- page = radix_tree_lookup(&mapping->page_tree, offset);
- if (page && TestSetPageLocked(page))
- page = NULL;
- read_unlock_irq(&mapping->tree_lock);
- return page;
-}
-EXPORT_SYMBOL(find_trylock_page);
-
-/**
* find_lock_page - locate, pin and lock a pagecache page
* @mapping: the address_space to search
* @offset: the page index
@@ -804,7 +784,7 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
* @mapping: target address_space
* @index: the page index
*
- * Same as grab_cache_page, but do not wait if the page is unavailable.
+ * Same as grab_cache_page(), but do not wait if the page is unavailable.
* This is intended for speculative data generators, where the data can
* be regenerated if the page couldn't be grabbed. This routine should
* be safe to call while holding the lock for another page.
diff --git a/mm/highmem.c b/mm/highmem.c
index 0206e7e5018c..51e1c1995fec 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -47,7 +47,8 @@ unsigned int nr_free_highpages (void)
unsigned int pages = 0;
for_each_online_pgdat(pgdat)
- pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
+ pages += zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
+ NR_FREE_PAGES);
return pages;
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index cb362f761f17..36db012b38dd 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -389,6 +389,8 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
continue;
page = pte_page(pte);
+ if (pte_dirty(pte))
+ set_page_dirty(page);
list_add(&page->lru, &page_list);
}
spin_unlock(&mm->page_table_lock);
diff --git a/mm/memory.c b/mm/memory.c
index ef09f0acb1d8..e7066e71dfa3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -678,7 +678,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
if (pte_dirty(ptent))
set_page_dirty(page);
if (pte_young(ptent))
- mark_page_accessed(page);
+ SetPageReferenced(page);
file_rss--;
}
page_remove_rmap(page, vma);
@@ -1277,6 +1277,51 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, struct page *
}
EXPORT_SYMBOL(vm_insert_page);
+/**
+ * vm_insert_pfn - insert single pfn into user vma
+ * @vma: user vma to map to
+ * @addr: target user address of this page
+ * @pfn: source kernel pfn
+ *
+ * Similar to vm_inert_page, this allows drivers to insert individual pages
+ * they've allocated into a user vma. Same comments apply.
+ *
+ * This function should only be called from a vm_ops->fault handler, and
+ * in that case the handler should return NULL.
+ */
+int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
+ unsigned long pfn)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ int retval;
+ pte_t *pte, entry;
+ spinlock_t *ptl;
+
+ BUG_ON(!(vma->vm_flags & VM_PFNMAP));
+ BUG_ON(is_cow_mapping(vma->vm_flags));
+
+ retval = -ENOMEM;
+ pte = get_locked_pte(mm, addr, &ptl);
+ if (!pte)
+ goto out;
+ retval = -EBUSY;
+ if (!pte_none(*pte))
+ goto out_unlock;
+
+ /* Ok, finally just insert the thing.. */
+ entry = pfn_pte(pfn, vma->vm_page_prot);
+ set_pte_at(mm, addr, pte, entry);
+ update_mmu_cache(vma, addr, entry);
+
+ retval = 0;
+out_unlock:
+ pte_unmap_unlock(pte, ptl);
+
+out:
+ return retval;
+}
+EXPORT_SYMBOL(vm_insert_pfn);
+
/*
* maps a range of physical memory into the requested pages. the old
* mappings are removed. any references to nonexistent pages results
@@ -1531,8 +1576,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (vma->vm_ops->page_mkwrite(vma, old_page) < 0)
goto unwritable_page;
- page_cache_release(old_page);
-
/*
* Since we dropped the lock we need to revalidate
* the PTE as someone else may have changed it. If
@@ -1541,6 +1584,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
*/
page_table = pte_offset_map_lock(mm, pmd, address,
&ptl);
+ page_cache_release(old_page);
if (!pte_same(*page_table, orig_pte))
goto unlock;
}
@@ -1776,9 +1820,7 @@ restart:
}
/**
- * unmap_mapping_range - unmap the portion of all mmaps
- * in the specified address_space corresponding to the specified
- * page range in the underlying file.
+ * unmap_mapping_range - unmap the portion of all mmaps in the specified address_space corresponding to the specified page range in the underlying file.
* @mapping: the address space containing mmaps to be unmapped.
* @holebegin: byte in first page to unmap, relative to the start of
* the underlying file. This will be rounded down to a PAGE_SIZE
@@ -2313,10 +2355,12 @@ static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma,
BUG_ON(is_cow_mapping(vma->vm_flags));
pfn = vma->vm_ops->nopfn(vma, address & PAGE_MASK);
- if (pfn == NOPFN_OOM)
+ if (unlikely(pfn == NOPFN_OOM))
return VM_FAULT_OOM;
- if (pfn == NOPFN_SIGBUS)
+ else if (unlikely(pfn == NOPFN_SIGBUS))
return VM_FAULT_SIGBUS;
+ else if (unlikely(pfn == NOPFN_REFAULT))
+ return VM_FAULT_MINOR;
page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index c2aec0e1090d..259a706bd83e 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -105,7 +105,7 @@ static struct kmem_cache *sn_cache;
/* Highest zone. An specific allocation for a zone below that is not
policied. */
-enum zone_type policy_zone = ZONE_DMA;
+enum zone_type policy_zone = 0;
struct mempolicy default_policy = {
.refcnt = ATOMIC_INIT(1), /* never free it */
diff --git a/mm/mempool.c b/mm/mempool.c
index ccd8cb8cd41f..cc1ca86dfc24 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -46,9 +46,9 @@ static void free_pool(mempool_t *pool)
* @pool_data: optional private data available to the user-defined functions.
*
* this function creates and allocates a guaranteed size, preallocated
- * memory pool. The pool can be used from the mempool_alloc and mempool_free
+ * memory pool. The pool can be used from the mempool_alloc() and mempool_free()
* functions. This function might sleep. Both the alloc_fn() and the free_fn()
- * functions might sleep - as long as the mempool_alloc function is not called
+ * functions might sleep - as long as the mempool_alloc() function is not called
* from IRQ contexts.
*/
mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
@@ -195,7 +195,7 @@ EXPORT_SYMBOL(mempool_destroy);
* mempool_create().
* @gfp_mask: the usual allocation bitmask.
*
- * this function only sleeps if the alloc_fn function sleeps or
+ * this function only sleeps if the alloc_fn() function sleeps or
* returns NULL. Note that due to preallocation, this function
* *never* fails when called from process contexts. (it might
* fail if called from an IRQ context.)
diff --git a/mm/mincore.c b/mm/mincore.c
index 8aca6f7167bb..95c5f49f0a1a 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -12,6 +12,8 @@
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/syscalls.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -22,14 +24,22 @@
* and is up to date; i.e. that no page-in operation would be required
* at this time if an application were to map and access this page.
*/
-static unsigned char mincore_page(struct vm_area_struct * vma,
- unsigned long pgoff)
+static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
{
unsigned char present = 0;
- struct address_space * as = vma->vm_file->f_mapping;
- struct page * page;
+ struct page *page;
- page = find_get_page(as, pgoff);
+ /*
+ * When tmpfs swaps out a page from a file, any process mapping that
+ * file will not get a swp_entry_t in its pte, but rather it is like
+ * any other file mapping (ie. marked !present and faulted in with
+ * tmpfs's .nopage). So swapped out tmpfs mappings are tested here.
+ *
+ * However when tmpfs moves the page from pagecache and into swapcache,
+ * it is still in core, but the find_get_page below won't find it.
+ * No big deal, but make a note of it.
+ */
+ page = find_get_page(mapping, pgoff);
if (page) {
present = PageUptodate(page);
page_cache_release(page);
@@ -45,7 +55,14 @@ static unsigned char mincore_page(struct vm_area_struct * vma,
*/
static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pages)
{
- unsigned long i, nr, pgoff;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *ptep;
+ spinlock_t *ptl;
+ unsigned long nr;
+ int i;
+ pgoff_t pgoff;
struct vm_area_struct *vma = find_vma(current->mm, addr);
/*
@@ -56,31 +73,64 @@ static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pag
return -ENOMEM;
/*
- * Ok, got it. But check whether it's a segment we support
- * mincore() on. Right now, we don't do any anonymous mappings.
- *
- * FIXME: This is just stupid. And returning ENOMEM is
- * stupid too. We should just look at the page tables. But
- * this is what we've traditionally done, so we'll just
- * continue doing it.
+ * Calculate how many pages there are left in the last level of the
+ * PTE array for our address.
*/
- if (!vma->vm_file)
- return -ENOMEM;
-
- /*
- * Calculate how many pages there are left in the vma, and
- * what the pgoff is for our address.
- */
- nr = (vma->vm_end - addr) >> PAGE_SHIFT;
+ nr = PTRS_PER_PTE - ((addr >> PAGE_SHIFT) & (PTRS_PER_PTE-1));
if (nr > pages)
nr = pages;
- pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
- pgoff += vma->vm_pgoff;
+ pgd = pgd_offset(vma->vm_mm, addr);
+ if (pgd_none_or_clear_bad(pgd))
+ goto none_mapped;
+ pud = pud_offset(pgd, addr);
+ if (pud_none_or_clear_bad(pud))
+ goto none_mapped;
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none_or_clear_bad(pmd))
+ goto none_mapped;
+
+ ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+ for (i = 0; i < nr; i++, ptep++, addr += PAGE_SIZE) {
+ unsigned char present;
+ pte_t pte = *ptep;
+
+ if (pte_present(pte)) {
+ present = 1;
+
+ } else if (pte_none(pte)) {
+ if (vma->vm_file) {
+ pgoff = linear_page_index(vma, addr);
+ present = mincore_page(vma->vm_file->f_mapping,
+ pgoff);
+ } else
+ present = 0;
+
+ } else if (pte_file(pte)) {
+ pgoff = pte_to_pgoff(pte);
+ present = mincore_page(vma->vm_file->f_mapping, pgoff);
+
+ } else { /* pte is a swap entry */
+ swp_entry_t entry = pte_to_swp_entry(pte);
+ if (is_migration_entry(entry)) {
+ /* migration entries are always uptodate */
+ present = 1;
+ } else {
+ pgoff = entry.val;
+ present = mincore_page(&swapper_space, pgoff);
+ }
+ }
+ }
+ pte_unmap_unlock(ptep-1, ptl);
+
+ return nr;
- /* And then we just fill the sucker in.. */
- for (i = 0 ; i < nr; i++, pgoff++)
- vec[i] = mincore_page(vma, pgoff);
+none_mapped:
+ if (vma->vm_file) {
+ pgoff = linear_page_index(vma, addr);
+ for (i = 0; i < nr; i++, pgoff++)
+ vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff);
+ }
return nr;
}
diff --git a/mm/mmap.c b/mm/mmap.c
index cc3a20819457..eb509ae76553 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2101,3 +2101,75 @@ int may_expand_vm(struct mm_struct *mm, unsigned long npages)
return 0;
return 1;
}
+
+
+static struct page *special_mapping_nopage(struct vm_area_struct *vma,
+ unsigned long address, int *type)
+{
+ struct page **pages;
+
+ BUG_ON(address < vma->vm_start || address >= vma->vm_end);
+
+ address -= vma->vm_start;
+ for (pages = vma->vm_private_data; address > 0 && *pages; ++pages)
+ address -= PAGE_SIZE;
+
+ if (*pages) {
+ struct page *page = *pages;
+ get_page(page);
+ return page;
+ }
+
+ return NOPAGE_SIGBUS;
+}
+
+/*
+ * Having a close hook prevents vma merging regardless of flags.
+ */
+static void special_mapping_close(struct vm_area_struct *vma)
+{
+}
+
+static struct vm_operations_struct special_mapping_vmops = {
+ .close = special_mapping_close,
+ .nopage = special_mapping_nopage,
+};
+
+/*
+ * Called with mm->mmap_sem held for writing.
+ * Insert a new vma covering the given region, with the given flags.
+ * Its pages are supplied by the given array of struct page *.
+ * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
+ * The region past the last page supplied will always produce SIGBUS.
+ * The array pointer and the pages it points to are assumed to stay alive
+ * for as long as this mapping might exist.
+ */
+int install_special_mapping(struct mm_struct *mm,
+ unsigned long addr, unsigned long len,
+ unsigned long vm_flags, struct page **pages)
+{
+ struct vm_area_struct *vma;
+
+ vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+ if (unlikely(vma == NULL))
+ return -ENOMEM;
+
+ vma->vm_mm = mm;
+ vma->vm_start = addr;
+ vma->vm_end = addr + len;
+
+ vma->vm_flags = vm_flags | mm->def_flags;
+ vma->vm_page_prot = protection_map[vma->vm_flags & 7];
+
+ vma->vm_ops = &special_mapping_vmops;
+ vma->vm_private_data = pages;
+
+ if (unlikely(insert_vm_struct(mm, vma))) {
+ kmem_cache_free(vm_area_cachep, vma);
+ return -ENOMEM;
+ }
+
+ mm->total_vm += len >> PAGE_SHIFT;
+
+ return 0;
+}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index be0efbde4994..f7e088f5a309 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -515,7 +515,7 @@ static int __cpuinit
ratelimit_handler(struct notifier_block *self, unsigned long u, void *v)
{
writeback_set_ratelimit();
- return 0;
+ return NOTIFY_DONE;
}
static struct notifier_block __cpuinitdata ratelimit_nb = {
@@ -549,9 +549,7 @@ void __init page_writeback_init(void)
}
/**
- * generic_writepages - walk the list of dirty pages of the given
- * address space and writepage() all of them.
- *
+ * generic_writepages - walk the list of dirty pages of the given address space and writepage() all of them.
* @mapping: address space structure to write
* @wbc: subtract the number of written pages from *@wbc->nr_to_write
*
@@ -698,7 +696,6 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
/**
* write_one_page - write out a single page and optionally wait on I/O
- *
* @page: the page to write
* @wait: if true, wait on writeout
*
@@ -737,6 +734,16 @@ int write_one_page(struct page *page, int wait)
EXPORT_SYMBOL(write_one_page);
/*
+ * For address_spaces which do not use buffers nor write back.
+ */
+int __set_page_dirty_no_writeback(struct page *page)
+{
+ if (!PageDirty(page))
+ SetPageDirty(page);
+ return 0;
+}
+
+/*
* For address_spaces which do not use buffers. Just tag the page as dirty in
* its radix tree.
*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2c606cc922a5..d461b23a27a1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -73,7 +73,9 @@ static void __free_pages_ok(struct page *page, unsigned int order);
* don't need any ZONE_NORMAL reservation
*/
int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
+#ifdef CONFIG_ZONE_DMA
256,
+#endif
#ifdef CONFIG_ZONE_DMA32
256,
#endif
@@ -85,7 +87,9 @@ int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
EXPORT_SYMBOL(totalram_pages);
static char * const zone_names[MAX_NR_ZONES] = {
+#ifdef CONFIG_ZONE_DMA
"DMA",
+#endif
#ifdef CONFIG_ZONE_DMA32
"DMA32",
#endif
@@ -395,7 +399,7 @@ static inline void __free_one_page(struct page *page,
VM_BUG_ON(page_idx & (order_size - 1));
VM_BUG_ON(bad_range(zone, page));
- zone->free_pages += order_size;
+ __mod_zone_page_state(zone, NR_FREE_PAGES, order_size);
while (order < MAX_ORDER-1) {
unsigned long combined_idx;
struct free_area *area;
@@ -631,7 +635,7 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order)
list_del(&page->lru);
rmv_page_order(page);
area->nr_free--;
- zone->free_pages -= 1UL << order;
+ __mod_zone_page_state(zone, NR_FREE_PAGES, - (1UL << order));
expand(zone, page, order, current_order, area);
return page;
}
@@ -989,7 +993,8 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
int classzone_idx, int alloc_flags)
{
/* free_pages my go negative - that's OK */
- long min = mark, free_pages = z->free_pages - (1 << order) + 1;
+ long min = mark;
+ long free_pages = zone_page_state(z, NR_FREE_PAGES) - (1 << order) + 1;
int o;
if (alloc_flags & ALLOC_HIGH)
@@ -1439,35 +1444,6 @@ fastcall void free_pages(unsigned long addr, unsigned int order)
EXPORT_SYMBOL(free_pages);
-/*
- * Total amount of free (allocatable) RAM:
- */
-unsigned int nr_free_pages(void)
-{
- unsigned int sum = 0;
- struct zone *zone;
-
- for_each_zone(zone)
- sum += zone->free_pages;
-
- return sum;
-}
-
-EXPORT_SYMBOL(nr_free_pages);
-
-#ifdef CONFIG_NUMA
-unsigned int nr_free_pages_pgdat(pg_data_t *pgdat)
-{
- unsigned int sum = 0;
- enum zone_type i;
-
- for (i = 0; i < MAX_NR_ZONES; i++)
- sum += pgdat->node_zones[i].free_pages;
-
- return sum;
-}
-#endif
-
static unsigned int nr_free_zone_pages(int offset)
{
/* Just pick one node, since fallback list is circular */
@@ -1514,7 +1490,7 @@ void si_meminfo(struct sysinfo *val)
{
val->totalram = totalram_pages;
val->sharedram = 0;
- val->freeram = nr_free_pages();
+ val->freeram = global_page_state(NR_FREE_PAGES);
val->bufferram = nr_blockdev_pages();
val->totalhigh = totalhigh_pages;
val->freehigh = nr_free_highpages();
@@ -1529,10 +1505,11 @@ void si_meminfo_node(struct sysinfo *val, int nid)
pg_data_t *pgdat = NODE_DATA(nid);
val->totalram = pgdat->node_present_pages;
- val->freeram = nr_free_pages_pgdat(pgdat);
+ val->freeram = node_page_state(nid, NR_FREE_PAGES);
#ifdef CONFIG_HIGHMEM
val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages;
- val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages;
+ val->freehigh = zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
+ NR_FREE_PAGES);
#else
val->totalhigh = 0;
val->freehigh = 0;
@@ -1551,9 +1528,6 @@ void si_meminfo_node(struct sysinfo *val, int nid)
void show_free_areas(void)
{
int cpu;
- unsigned long active;
- unsigned long inactive;
- unsigned long free;
struct zone *zone;
for_each_zone(zone) {
@@ -1577,20 +1551,19 @@ void show_free_areas(void)
}
}
- get_zone_counts(&active, &inactive, &free);
-
- printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu "
- "unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n",
- active,
- inactive,
+ printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu\n"
+ " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n",
+ global_page_state(NR_ACTIVE),
+ global_page_state(NR_INACTIVE),
global_page_state(NR_FILE_DIRTY),
global_page_state(NR_WRITEBACK),
global_page_state(NR_UNSTABLE_NFS),
- nr_free_pages(),
+ global_page_state(NR_FREE_PAGES),
global_page_state(NR_SLAB_RECLAIMABLE) +
global_page_state(NR_SLAB_UNRECLAIMABLE),
global_page_state(NR_FILE_MAPPED),
- global_page_state(NR_PAGETABLE));
+ global_page_state(NR_PAGETABLE),
+ global_page_state(NR_BOUNCE));
for_each_zone(zone) {
int i;
@@ -1611,12 +1584,12 @@ void show_free_areas(void)
" all_unreclaimable? %s"
"\n",
zone->name,
- K(zone->free_pages),
+ K(zone_page_state(zone, NR_FREE_PAGES)),
K(zone->pages_min),
K(zone->pages_low),
K(zone->pages_high),
- K(zone->nr_active),
- K(zone->nr_inactive),
+ K(zone_page_state(zone, NR_ACTIVE)),
+ K(zone_page_state(zone, NR_INACTIVE)),
K(zone->present_pages),
zone->pages_scanned,
(zone->all_unreclaimable ? "yes" : "no")
@@ -2650,11 +2623,11 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
" %s zone: %lu pages exceeds realsize %lu\n",
zone_names[j], memmap_pages, realsize);
- /* Account for reserved DMA pages */
- if (j == ZONE_DMA && realsize > dma_reserve) {
+ /* Account for reserved pages */
+ if (j == 0 && realsize > dma_reserve) {
realsize -= dma_reserve;
- printk(KERN_DEBUG " DMA zone: %lu pages reserved\n",
- dma_reserve);
+ printk(KERN_DEBUG " %s zone: %lu pages reserved\n",
+ zone_names[0], dma_reserve);
}
if (!is_highmem_idx(j))
@@ -2674,7 +2647,6 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
spin_lock_init(&zone->lru_lock);
zone_seqlock_init(zone);
zone->zone_pgdat = pgdat;
- zone->free_pages = 0;
zone->prev_priority = DEF_PRIORITY;
@@ -2683,8 +2655,6 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
INIT_LIST_HEAD(&zone->inactive_list);
zone->nr_scan_active = 0;
zone->nr_scan_inactive = 0;
- zone->nr_active = 0;
- zone->nr_inactive = 0;
zap_zone_vm_stats(zone);
atomic_set(&zone->reclaim_in_progress, 0);
if (!size)
@@ -2876,20 +2846,23 @@ static void __init sort_node_map(void)
cmp_node_active_region, NULL);
}
-/* Find the lowest pfn for a node. This depends on a sorted early_node_map */
+/* Find the lowest pfn for a node */
unsigned long __init find_min_pfn_for_node(unsigned long nid)
{
int i;
-
- /* Regions in the early_node_map can be in any order */
- sort_node_map();
+ unsigned long min_pfn = ULONG_MAX;
/* Assuming a sorted map, the first range found has the starting pfn */
for_each_active_range_index_in_nid(i, nid)
- return early_node_map[i].start_pfn;
+ min_pfn = min(min_pfn, early_node_map[i].start_pfn);
- printk(KERN_WARNING "Could not find start_pfn for node %lu\n", nid);
- return 0;
+ if (min_pfn == ULONG_MAX) {
+ printk(KERN_WARNING
+ "Could not find start_pfn for node %lu\n", nid);
+ return 0;
+ }
+
+ return min_pfn;
}
/**
@@ -2938,6 +2911,9 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
unsigned long nid;
enum zone_type i;
+ /* Sort early_node_map as initialisation assumes it is sorted */
+ sort_node_map();
+
/* Record where the zone boundaries are */
memset(arch_zone_lowest_possible_pfn, 0,
sizeof(arch_zone_lowest_possible_pfn));
diff --git a/mm/readahead.c b/mm/readahead.c
index 0f539e8e827a..93d9ee692fd8 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -575,10 +575,6 @@ void handle_ra_miss(struct address_space *mapping,
*/
unsigned long max_sane_readahead(unsigned long nr)
{
- unsigned long active;
- unsigned long inactive;
- unsigned long free;
-
- __get_zone_counts(&active, &inactive, &free, NODE_DATA(numa_node_id()));
- return min(nr, (inactive + free) / 2);
+ return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE)
+ + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2);
}
diff --git a/mm/shmem.c b/mm/shmem.c
index 70da7a0981bf..882053031aa0 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -178,9 +178,9 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages)
static struct super_operations shmem_ops;
static const struct address_space_operations shmem_aops;
static const struct file_operations shmem_file_operations;
-static struct inode_operations shmem_inode_operations;
-static struct inode_operations shmem_dir_inode_operations;
-static struct inode_operations shmem_special_inode_operations;
+static const struct inode_operations shmem_inode_operations;
+static const struct inode_operations shmem_dir_inode_operations;
+static const struct inode_operations shmem_special_inode_operations;
static struct vm_operations_struct shmem_vm_ops;
static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
@@ -1410,8 +1410,8 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
}
#ifdef CONFIG_TMPFS
-static struct inode_operations shmem_symlink_inode_operations;
-static struct inode_operations shmem_symlink_inline_operations;
+static const struct inode_operations shmem_symlink_inode_operations;
+static const struct inode_operations shmem_symlink_inline_operations;
/*
* Normally tmpfs makes no use of shmem_prepare_write, but it
@@ -1904,12 +1904,12 @@ static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *co
}
}
-static struct inode_operations shmem_symlink_inline_operations = {
+static const struct inode_operations shmem_symlink_inline_operations = {
.readlink = generic_readlink,
.follow_link = shmem_follow_link_inline,
};
-static struct inode_operations shmem_symlink_inode_operations = {
+static const struct inode_operations shmem_symlink_inode_operations = {
.truncate = shmem_truncate,
.readlink = generic_readlink,
.follow_link = shmem_follow_link,
@@ -2316,7 +2316,7 @@ static void destroy_inodecache(void)
static const struct address_space_operations shmem_aops = {
.writepage = shmem_writepage,
- .set_page_dirty = __set_page_dirty_nobuffers,
+ .set_page_dirty = __set_page_dirty_no_writeback,
#ifdef CONFIG_TMPFS
.prepare_write = shmem_prepare_write,
.commit_write = simple_commit_write,
@@ -2335,7 +2335,7 @@ static const struct file_operations shmem_file_operations = {
#endif
};
-static struct inode_operations shmem_inode_operations = {
+static const struct inode_operations shmem_inode_operations = {
.truncate = shmem_truncate,
.setattr = shmem_notify_change,
.truncate_range = shmem_truncate_range,
@@ -2349,7 +2349,7 @@ static struct inode_operations shmem_inode_operations = {
};
-static struct inode_operations shmem_dir_inode_operations = {
+static const struct inode_operations shmem_dir_inode_operations = {
#ifdef CONFIG_TMPFS
.create = shmem_create,
.lookup = simple_lookup,
@@ -2371,7 +2371,7 @@ static struct inode_operations shmem_dir_inode_operations = {
#endif
};
-static struct inode_operations shmem_special_inode_operations = {
+static const struct inode_operations shmem_special_inode_operations = {
#ifdef CONFIG_TMPFS_POSIX_ACL
.setattr = shmem_notify_change,
.setxattr = generic_setxattr,
diff --git a/mm/slab.c b/mm/slab.c
index c6100628a6ef..70784b848b69 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -793,8 +793,10 @@ static inline struct kmem_cache *__find_general_cachep(size_t size,
* has cs_{dma,}cachep==NULL. Thus no special case
* for large kmalloc calls required.
*/
+#ifdef CONFIG_ZONE_DMA
if (unlikely(gfpflags & GFP_DMA))
return csizep->cs_dmacachep;
+#endif
return csizep->cs_cachep;
}
@@ -1493,13 +1495,15 @@ void __init kmem_cache_init(void)
ARCH_KMALLOC_FLAGS|SLAB_PANIC,
NULL, NULL);
}
-
- sizes->cs_dmacachep = kmem_cache_create(names->name_dma,
+#ifdef CONFIG_ZONE_DMA
+ sizes->cs_dmacachep = kmem_cache_create(
+ names->name_dma,
sizes->cs_size,
ARCH_KMALLOC_MINALIGN,
ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
SLAB_PANIC,
NULL, NULL);
+#endif
sizes++;
names++;
}
@@ -2321,7 +2325,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
cachep->slab_size = slab_size;
cachep->flags = flags;
cachep->gfpflags = 0;
- if (flags & SLAB_CACHE_DMA)
+ if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
cachep->gfpflags |= GFP_DMA;
cachep->buffer_size = size;
cachep->reciprocal_buffer_size = reciprocal_value(size);
@@ -2516,7 +2520,7 @@ EXPORT_SYMBOL(kmem_cache_shrink);
* kmem_cache_destroy - delete a cache
* @cachep: the cache to destroy
*
- * Remove a struct kmem_cache object from the slab cache.
+ * Remove a &struct kmem_cache object from the slab cache.
*
* It is expected this function will be called by a module when it is
* unloaded. This will remove the cache completely, and avoid a duplicate
@@ -2643,10 +2647,12 @@ static void cache_init_objs(struct kmem_cache *cachep,
static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
{
- if (flags & GFP_DMA)
- BUG_ON(!(cachep->gfpflags & GFP_DMA));
- else
- BUG_ON(cachep->gfpflags & GFP_DMA);
+ if (CONFIG_ZONE_DMA_FLAG) {
+ if (flags & GFP_DMA)
+ BUG_ON(!(cachep->gfpflags & GFP_DMA));
+ else
+ BUG_ON(cachep->gfpflags & GFP_DMA);
+ }
}
static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
@@ -2814,19 +2820,11 @@ failed:
*/
static void kfree_debugcheck(const void *objp)
{
- struct page *page;
-
if (!virt_addr_valid(objp)) {
printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n",
(unsigned long)objp);
BUG();
}
- page = virt_to_page(objp);
- if (!PageSlab(page)) {
- printk(KERN_ERR "kfree_debugcheck: bad ptr %lxh.\n",
- (unsigned long)objp);
- BUG();
- }
}
static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
@@ -3197,35 +3195,6 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
return objp;
}
-static __always_inline void *__cache_alloc(struct kmem_cache *cachep,
- gfp_t flags, void *caller)
-{
- unsigned long save_flags;
- void *objp = NULL;
-
- cache_alloc_debugcheck_before(cachep, flags);
-
- local_irq_save(save_flags);
-
- if (unlikely(NUMA_BUILD &&
- current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY)))
- objp = alternate_node_alloc(cachep, flags);
-
- if (!objp)
- objp = ____cache_alloc(cachep, flags);
- /*
- * We may just have run out of memory on the local node.
- * ____cache_alloc_node() knows how to locate memory on other nodes
- */
- if (NUMA_BUILD && !objp)
- objp = ____cache_alloc_node(cachep, flags, numa_node_id());
- local_irq_restore(save_flags);
- objp = cache_alloc_debugcheck_after(cachep, flags, objp,
- caller);
- prefetchw(objp);
- return objp;
-}
-
#ifdef CONFIG_NUMA
/*
* Try allocating on another node if PF_SPREAD_SLAB|PF_MEMPOLICY.
@@ -3257,14 +3226,20 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
* allocator to do its reclaim / fallback magic. We then insert the
* slab into the proper nodelist and then allocate from it.
*/
-void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
+static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
{
- struct zonelist *zonelist = &NODE_DATA(slab_node(current->mempolicy))
- ->node_zonelists[gfp_zone(flags)];
+ struct zonelist *zonelist;
+ gfp_t local_flags;
struct zone **z;
void *obj = NULL;
int nid;
- gfp_t local_flags = (flags & GFP_LEVEL_MASK);
+
+ if (flags & __GFP_THISNODE)
+ return NULL;
+
+ zonelist = &NODE_DATA(slab_node(current->mempolicy))
+ ->node_zonelists[gfp_zone(flags)];
+ local_flags = (flags & GFP_LEVEL_MASK);
retry:
/*
@@ -3374,16 +3349,110 @@ must_grow:
if (x)
goto retry;
- if (!(flags & __GFP_THISNODE))
- /* Unable to grow the cache. Fall back to other nodes. */
- return fallback_alloc(cachep, flags);
-
- return NULL;
+ return fallback_alloc(cachep, flags);
done:
return obj;
}
-#endif
+
+/**
+ * kmem_cache_alloc_node - Allocate an object on the specified node
+ * @cachep: The cache to allocate from.
+ * @flags: See kmalloc().
+ * @nodeid: node number of the target node.
+ * @caller: return address of caller, used for debug information
+ *
+ * Identical to kmem_cache_alloc but it will allocate memory on the given
+ * node, which can improve the performance for cpu bound structures.
+ *
+ * Fallback to other node is possible if __GFP_THISNODE is not set.
+ */
+static __always_inline void *
+__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
+ void *caller)
+{
+ unsigned long save_flags;
+ void *ptr;
+
+ cache_alloc_debugcheck_before(cachep, flags);
+ local_irq_save(save_flags);
+
+ if (unlikely(nodeid == -1))
+ nodeid = numa_node_id();
+
+ if (unlikely(!cachep->nodelists[nodeid])) {
+ /* Node not bootstrapped yet */
+ ptr = fallback_alloc(cachep, flags);
+ goto out;
+ }
+
+ if (nodeid == numa_node_id()) {
+ /*
+ * Use the locally cached objects if possible.
+ * However ____cache_alloc does not allow fallback
+ * to other nodes. It may fail while we still have
+ * objects on other nodes available.
+ */
+ ptr = ____cache_alloc(cachep, flags);
+ if (ptr)
+ goto out;
+ }
+ /* ___cache_alloc_node can fall back to other nodes */
+ ptr = ____cache_alloc_node(cachep, flags, nodeid);
+ out:
+ local_irq_restore(save_flags);
+ ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
+
+ return ptr;
+}
+
+static __always_inline void *
+__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
+{
+ void *objp;
+
+ if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
+ objp = alternate_node_alloc(cache, flags);
+ if (objp)
+ goto out;
+ }
+ objp = ____cache_alloc(cache, flags);
+
+ /*
+ * We may just have run out of memory on the local node.
+ * ____cache_alloc_node() knows how to locate memory on other nodes
+ */
+ if (!objp)
+ objp = ____cache_alloc_node(cache, flags, numa_node_id());
+
+ out:
+ return objp;
+}
+#else
+
+static __always_inline void *
+__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
+{
+ return ____cache_alloc(cachep, flags);
+}
+
+#endif /* CONFIG_NUMA */
+
+static __always_inline void *
+__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
+{
+ unsigned long save_flags;
+ void *objp;
+
+ cache_alloc_debugcheck_before(cachep, flags);
+ local_irq_save(save_flags);
+ objp = __do_cache_alloc(cachep, flags);
+ local_irq_restore(save_flags);
+ objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
+ prefetchw(objp);
+
+ return objp;
+}
/*
* Caller needs to acquire correct kmem_list's list_lock
@@ -3582,57 +3651,6 @@ out:
}
#ifdef CONFIG_NUMA
-/**
- * kmem_cache_alloc_node - Allocate an object on the specified node
- * @cachep: The cache to allocate from.
- * @flags: See kmalloc().
- * @nodeid: node number of the target node.
- * @caller: return address of caller, used for debug information
- *
- * Identical to kmem_cache_alloc but it will allocate memory on the given
- * node, which can improve the performance for cpu bound structures.
- *
- * Fallback to other node is possible if __GFP_THISNODE is not set.
- */
-static __always_inline void *
-__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
- int nodeid, void *caller)
-{
- unsigned long save_flags;
- void *ptr = NULL;
-
- cache_alloc_debugcheck_before(cachep, flags);
- local_irq_save(save_flags);
-
- if (unlikely(nodeid == -1))
- nodeid = numa_node_id();
-
- if (likely(cachep->nodelists[nodeid])) {
- if (nodeid == numa_node_id()) {
- /*
- * Use the locally cached objects if possible.
- * However ____cache_alloc does not allow fallback
- * to other nodes. It may fail while we still have
- * objects on other nodes available.
- */
- ptr = ____cache_alloc(cachep, flags);
- }
- if (!ptr) {
- /* ___cache_alloc_node can fall back to other nodes */
- ptr = ____cache_alloc_node(cachep, flags, nodeid);
- }
- } else {
- /* Node not bootstrapped yet */
- if (!(flags & __GFP_THISNODE))
- ptr = fallback_alloc(cachep, flags);
- }
-
- local_irq_restore(save_flags);
- ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
-
- return ptr;
-}
-
void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
{
return __cache_alloc_node(cachep, flags, nodeid,
@@ -3733,6 +3751,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
BUG_ON(virt_to_cache(objp) != cachep);
local_irq_save(flags);
+ debug_check_no_locks_freed(objp, obj_size(cachep));
__cache_free(cachep, objp);
local_irq_restore(flags);
}
@@ -4017,18 +4036,17 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
* If we cannot acquire the cache chain mutex then just give up - we'll try
* again on the next iteration.
*/
-static void cache_reap(struct work_struct *unused)
+static void cache_reap(struct work_struct *w)
{
struct kmem_cache *searchp;
struct kmem_list3 *l3;
int node = numa_node_id();
+ struct delayed_work *work =
+ container_of(w, struct delayed_work, work);
- if (!mutex_trylock(&cache_chain_mutex)) {
+ if (!mutex_trylock(&cache_chain_mutex))
/* Give up. Setup the next iteration. */
- schedule_delayed_work(&__get_cpu_var(reap_work),
- round_jiffies_relative(REAPTIMEOUT_CPUC));
- return;
- }
+ goto out;
list_for_each_entry(searchp, &cache_chain, next) {
check_irq_on();
@@ -4071,9 +4089,9 @@ next:
mutex_unlock(&cache_chain_mutex);
next_reap_node();
refresh_cpu_vm_stats(smp_processor_id());
+out:
/* Set up the next iteration */
- schedule_delayed_work(&__get_cpu_var(reap_work),
- round_jiffies_relative(REAPTIMEOUT_CPUC));
+ schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));
}
#ifdef CONFIG_PROC_FS
diff --git a/mm/truncate.c b/mm/truncate.c
index 5df947de7654..ebf3fcb4115b 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -85,7 +85,7 @@ EXPORT_SYMBOL(cancel_dirty_page);
*
* We need to bale out if page->mapping is no longer equal to the original
* mapping. This happens a) when the VM reclaimed the page while we waited on
- * its lock, b) when a concurrent invalidate_inode_pages got there first and
+ * its lock, b) when a concurrent invalidate_mapping_pages got there first and
* c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
*/
static void
@@ -106,7 +106,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
}
/*
- * This is for invalidate_inode_pages(). That function can be called at
+ * This is for invalidate_mapping_pages(). That function can be called at
* any time, and is not supposed to throw away dirty pages. But pages can
* be marked dirty at any time too, so use remove_mapping which safely
* discards clean, unused pages.
@@ -310,12 +310,7 @@ unlock:
}
return ret;
}
-
-unsigned long invalidate_inode_pages(struct address_space *mapping)
-{
- return invalidate_mapping_pages(mapping, 0, ~0UL);
-}
-EXPORT_SYMBOL(invalidate_inode_pages);
+EXPORT_SYMBOL(invalidate_mapping_pages);
/*
* This is like invalidate_complete_page(), except it ignores the page's
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 86897ee792d6..9eef486da909 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -699,7 +699,7 @@ finished:
* that it is big enough to cover the vma. Will return failure if
* that criteria isn't met.
*
- * Similar to remap_pfn_range (see mm/memory.c)
+ * Similar to remap_pfn_range() (see mm/memory.c)
*/
int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
unsigned long pgoff)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7430df68cb64..0655d5fe73e8 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -679,7 +679,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
nr_taken = isolate_lru_pages(sc->swap_cluster_max,
&zone->inactive_list,
&page_list, &nr_scan);
- zone->nr_inactive -= nr_taken;
+ __mod_zone_page_state(zone, NR_INACTIVE, -nr_taken);
zone->pages_scanned += nr_scan;
spin_unlock_irq(&zone->lru_lock);
@@ -740,7 +740,8 @@ static inline void note_zone_scanning_priority(struct zone *zone, int priority)
static inline int zone_is_near_oom(struct zone *zone)
{
- return zone->pages_scanned >= (zone->nr_active + zone->nr_inactive)*3;
+ return zone->pages_scanned >= (zone_page_state(zone, NR_ACTIVE)
+ + zone_page_state(zone, NR_INACTIVE))*3;
}
/*
@@ -825,7 +826,7 @@ force_reclaim_mapped:
pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
&l_hold, &pgscanned);
zone->pages_scanned += pgscanned;
- zone->nr_active -= pgmoved;
+ __mod_zone_page_state(zone, NR_ACTIVE, -pgmoved);
spin_unlock_irq(&zone->lru_lock);
while (!list_empty(&l_hold)) {
@@ -857,7 +858,7 @@ force_reclaim_mapped:
list_move(&page->lru, &zone->inactive_list);
pgmoved++;
if (!pagevec_add(&pvec, page)) {
- zone->nr_inactive += pgmoved;
+ __mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
spin_unlock_irq(&zone->lru_lock);
pgdeactivate += pgmoved;
pgmoved = 0;
@@ -867,7 +868,7 @@ force_reclaim_mapped:
spin_lock_irq(&zone->lru_lock);
}
}
- zone->nr_inactive += pgmoved;
+ __mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
pgdeactivate += pgmoved;
if (buffer_heads_over_limit) {
spin_unlock_irq(&zone->lru_lock);
@@ -885,14 +886,14 @@ force_reclaim_mapped:
list_move(&page->lru, &zone->active_list);
pgmoved++;
if (!pagevec_add(&pvec, page)) {
- zone->nr_active += pgmoved;
+ __mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
pgmoved = 0;
spin_unlock_irq(&zone->lru_lock);
__pagevec_release(&pvec);
spin_lock_irq(&zone->lru_lock);
}
}
- zone->nr_active += pgmoved;
+ __mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
__count_zone_vm_events(PGREFILL, zone, pgscanned);
__count_vm_events(PGDEACTIVATE, pgdeactivate);
@@ -918,14 +919,16 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
* Add one to `nr_to_scan' just to make sure that the kernel will
* slowly sift through the active list.
*/
- zone->nr_scan_active += (zone->nr_active >> priority) + 1;
+ zone->nr_scan_active +=
+ (zone_page_state(zone, NR_ACTIVE) >> priority) + 1;
nr_active = zone->nr_scan_active;
if (nr_active >= sc->swap_cluster_max)
zone->nr_scan_active = 0;
else
nr_active = 0;
- zone->nr_scan_inactive += (zone->nr_inactive >> priority) + 1;
+ zone->nr_scan_inactive +=
+ (zone_page_state(zone, NR_INACTIVE) >> priority) + 1;
nr_inactive = zone->nr_scan_inactive;
if (nr_inactive >= sc->swap_cluster_max)
zone->nr_scan_inactive = 0;
@@ -1037,7 +1040,8 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
continue;
- lru_pages += zone->nr_active + zone->nr_inactive;
+ lru_pages += zone_page_state(zone, NR_ACTIVE)
+ + zone_page_state(zone, NR_INACTIVE);
}
for (priority = DEF_PRIORITY; priority >= 0; priority--) {
@@ -1182,7 +1186,8 @@ loop_again:
for (i = 0; i <= end_zone; i++) {
struct zone *zone = pgdat->node_zones + i;
- lru_pages += zone->nr_active + zone->nr_inactive;
+ lru_pages += zone_page_state(zone, NR_ACTIVE)
+ + zone_page_state(zone, NR_INACTIVE);
}
/*
@@ -1219,8 +1224,9 @@ loop_again:
if (zone->all_unreclaimable)
continue;
if (nr_slab == 0 && zone->pages_scanned >=
- (zone->nr_active + zone->nr_inactive) * 6)
- zone->all_unreclaimable = 1;
+ (zone_page_state(zone, NR_ACTIVE)
+ + zone_page_state(zone, NR_INACTIVE)) * 6)
+ zone->all_unreclaimable = 1;
/*
* If we've done a decent amount of scanning and
* the reclaim ratio is low, start doing writepage
@@ -1385,18 +1391,22 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
/* For pass = 0 we don't shrink the active list */
if (pass > 0) {
- zone->nr_scan_active += (zone->nr_active >> prio) + 1;
+ zone->nr_scan_active +=
+ (zone_page_state(zone, NR_ACTIVE) >> prio) + 1;
if (zone->nr_scan_active >= nr_pages || pass > 3) {
zone->nr_scan_active = 0;
- nr_to_scan = min(nr_pages, zone->nr_active);
+ nr_to_scan = min(nr_pages,
+ zone_page_state(zone, NR_ACTIVE));
shrink_active_list(nr_to_scan, zone, sc, prio);
}
}
- zone->nr_scan_inactive += (zone->nr_inactive >> prio) + 1;
+ zone->nr_scan_inactive +=
+ (zone_page_state(zone, NR_INACTIVE) >> prio) + 1;
if (zone->nr_scan_inactive >= nr_pages || pass > 3) {
zone->nr_scan_inactive = 0;
- nr_to_scan = min(nr_pages, zone->nr_inactive);
+ nr_to_scan = min(nr_pages,
+ zone_page_state(zone, NR_INACTIVE));
ret += shrink_inactive_list(nr_to_scan, zone, sc);
if (ret >= nr_pages)
return ret;
@@ -1408,12 +1418,7 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
static unsigned long count_lru_pages(void)
{
- struct zone *zone;
- unsigned long ret = 0;
-
- for_each_zone(zone)
- ret += zone->nr_active + zone->nr_inactive;
- return ret;
+ return global_page_state(NR_ACTIVE) + global_page_state(NR_INACTIVE);
}
/*
diff --git a/mm/vmstat.c b/mm/vmstat.c
index dc005a0c96ae..6c488d6ac425 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -13,39 +13,6 @@
#include <linux/module.h>
#include <linux/cpu.h>
-void __get_zone_counts(unsigned long *active, unsigned long *inactive,
- unsigned long *free, struct pglist_data *pgdat)
-{
- struct zone *zones = pgdat->node_zones;
- int i;
-
- *active = 0;
- *inactive = 0;
- *free = 0;
- for (i = 0; i < MAX_NR_ZONES; i++) {
- *active += zones[i].nr_active;
- *inactive += zones[i].nr_inactive;
- *free += zones[i].free_pages;
- }
-}
-
-void get_zone_counts(unsigned long *active,
- unsigned long *inactive, unsigned long *free)
-{
- struct pglist_data *pgdat;
-
- *active = 0;
- *inactive = 0;
- *free = 0;
- for_each_online_pgdat(pgdat) {
- unsigned long l, m, n;
- __get_zone_counts(&l, &m, &n, pgdat);
- *active += l;
- *inactive += m;
- *free += n;
- }
-}
-
#ifdef CONFIG_VM_EVENT_COUNTERS
DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
EXPORT_PER_CPU_SYMBOL(vm_event_states);
@@ -239,7 +206,7 @@ EXPORT_SYMBOL(mod_zone_page_state);
* in between and therefore the atomicity vs. interrupt cannot be exploited
* in a useful way here.
*/
-static void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
+void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
{
struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
s8 *p = pcp->vm_stat_diff + item;
@@ -260,9 +227,8 @@ void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
}
EXPORT_SYMBOL(__inc_zone_page_state);
-void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
+void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
{
- struct zone *zone = page_zone(page);
struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
s8 *p = pcp->vm_stat_diff + item;
@@ -275,6 +241,11 @@ void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
*p = overstep;
}
}
+
+void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
+{
+ __dec_zone_state(page_zone(page), item);
+}
EXPORT_SYMBOL(__dec_zone_page_state);
void inc_zone_state(struct zone *zone, enum zone_stat_item item)
@@ -437,6 +408,12 @@ const struct seq_operations fragmentation_op = {
.show = frag_show,
};
+#ifdef CONFIG_ZONE_DMA
+#define TEXT_FOR_DMA(xx) xx "_dma",
+#else
+#define TEXT_FOR_DMA(xx)
+#endif
+
#ifdef CONFIG_ZONE_DMA32
#define TEXT_FOR_DMA32(xx) xx "_dma32",
#else
@@ -449,19 +426,22 @@ const struct seq_operations fragmentation_op = {
#define TEXT_FOR_HIGHMEM(xx)
#endif
-#define TEXTS_FOR_ZONES(xx) xx "_dma", TEXT_FOR_DMA32(xx) xx "_normal", \
+#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
TEXT_FOR_HIGHMEM(xx)
static const char * const vmstat_text[] = {
/* Zoned VM counters */
+ "nr_free_pages",
+ "nr_active",
+ "nr_inactive",
"nr_anon_pages",
"nr_mapped",
"nr_file_pages",
+ "nr_dirty",
+ "nr_writeback",
"nr_slab_reclaimable",
"nr_slab_unreclaimable",
"nr_page_table_pages",
- "nr_dirty",
- "nr_writeback",
"nr_unstable",
"nr_bounce",
"nr_vmscan_write",
@@ -529,17 +509,13 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
"\n min %lu"
"\n low %lu"
"\n high %lu"
- "\n active %lu"
- "\n inactive %lu"
"\n scanned %lu (a: %lu i: %lu)"
"\n spanned %lu"
"\n present %lu",
- zone->free_pages,
+ zone_page_state(zone, NR_FREE_PAGES),
zone->pages_min,
zone->pages_low,
zone->pages_high,
- zone->nr_active,
- zone->nr_inactive,
zone->pages_scanned,
zone->nr_scan_active, zone->nr_scan_inactive,
zone->spanned_pages,
@@ -563,12 +539,6 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
pageset = zone_pcp(zone, i);
for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
- if (pageset->pcp[j].count)
- break;
- }
- if (j == ARRAY_SIZE(pageset->pcp))
- continue;
- for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
seq_printf(m,
"\n cpu: %i pcp: %i"
"\n count: %i"