diff options
author | Trond Myklebust <Trond.Myklebust@netapp.com> | 2007-02-13 09:43:25 +0300 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2007-02-13 09:43:25 +0300 |
commit | d9bc125caf592b7d081021f32ce5b717efdf70c8 (patch) | |
tree | 263b7066ba22ddce21db610c0300f6eaac6f2064 /mm | |
parent | 43d78ef2ba5bec26d0315859e8324bfc0be23766 (diff) | |
parent | ec2f9d1331f658433411c58077871e1eef4ee1b4 (diff) | |
download | linux-d9bc125caf592b7d081021f32ce5b717efdf70c8.tar.xz |
Merge branch 'master' of /home/trondmy/kernel/linux-2.6/
Conflicts:
net/sunrpc/auth_gss/gss_krb5_crypto.c
net/sunrpc/auth_gss/gss_spkm3_token.c
net/sunrpc/clnt.c
Merge with mainline and fix conflicts.
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 6 | ||||
-rw-r--r-- | mm/filemap.c | 24 | ||||
-rw-r--r-- | mm/highmem.c | 3 | ||||
-rw-r--r-- | mm/hugetlb.c | 2 | ||||
-rw-r--r-- | mm/memory.c | 60 | ||||
-rw-r--r-- | mm/mempolicy.c | 2 | ||||
-rw-r--r-- | mm/mempool.c | 6 | ||||
-rw-r--r-- | mm/mincore.c | 102 | ||||
-rw-r--r-- | mm/mmap.c | 72 | ||||
-rw-r--r-- | mm/page-writeback.c | 17 | ||||
-rw-r--r-- | mm/page_alloc.c | 102 | ||||
-rw-r--r-- | mm/readahead.c | 8 | ||||
-rw-r--r-- | mm/shmem.c | 22 | ||||
-rw-r--r-- | mm/slab.c | 246 | ||||
-rw-r--r-- | mm/truncate.c | 11 | ||||
-rw-r--r-- | mm/vmalloc.c | 2 | ||||
-rw-r--r-- | mm/vmscan.c | 51 | ||||
-rw-r--r-- | mm/vmstat.c | 70 |
18 files changed, 464 insertions, 342 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index db7c55de92cd..7942b333e46c 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -157,3 +157,9 @@ config RESOURCES_64BIT default 64BIT help This option allows memory and IO resources to be 64 bit. + +config ZONE_DMA_FLAG + int + default "0" if !ZONE_DMA + default "1" + diff --git a/mm/filemap.c b/mm/filemap.c index 8332c77b1bd1..00414849a867 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -327,7 +327,7 @@ EXPORT_SYMBOL(sync_page_range); * @pos: beginning offset in pages to write * @count: number of bytes to write * - * Note: Holding i_mutex across sync_page_range_nolock is not a good idea + * Note: Holding i_mutex across sync_page_range_nolock() is not a good idea * as it forces O_SYNC writers to different parts of the same file * to be serialised right until io completion. */ @@ -606,26 +606,6 @@ struct page * find_get_page(struct address_space *mapping, unsigned long offset) EXPORT_SYMBOL(find_get_page); /** - * find_trylock_page - find and lock a page - * @mapping: the address_space to search - * @offset: the page index - * - * Same as find_get_page(), but trylock it instead of incrementing the count. - */ -struct page *find_trylock_page(struct address_space *mapping, unsigned long offset) -{ - struct page *page; - - read_lock_irq(&mapping->tree_lock); - page = radix_tree_lookup(&mapping->page_tree, offset); - if (page && TestSetPageLocked(page)) - page = NULL; - read_unlock_irq(&mapping->tree_lock); - return page; -} -EXPORT_SYMBOL(find_trylock_page); - -/** * find_lock_page - locate, pin and lock a pagecache page * @mapping: the address_space to search * @offset: the page index @@ -804,7 +784,7 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, * @mapping: target address_space * @index: the page index * - * Same as grab_cache_page, but do not wait if the page is unavailable. + * Same as grab_cache_page(), but do not wait if the page is unavailable. * This is intended for speculative data generators, where the data can * be regenerated if the page couldn't be grabbed. This routine should * be safe to call while holding the lock for another page. diff --git a/mm/highmem.c b/mm/highmem.c index 0206e7e5018c..51e1c1995fec 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -47,7 +47,8 @@ unsigned int nr_free_highpages (void) unsigned int pages = 0; for_each_online_pgdat(pgdat) - pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages; + pages += zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM], + NR_FREE_PAGES); return pages; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index cb362f761f17..36db012b38dd 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -389,6 +389,8 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, continue; page = pte_page(pte); + if (pte_dirty(pte)) + set_page_dirty(page); list_add(&page->lru, &page_list); } spin_unlock(&mm->page_table_lock); diff --git a/mm/memory.c b/mm/memory.c index ef09f0acb1d8..e7066e71dfa3 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -678,7 +678,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, if (pte_dirty(ptent)) set_page_dirty(page); if (pte_young(ptent)) - mark_page_accessed(page); + SetPageReferenced(page); file_rss--; } page_remove_rmap(page, vma); @@ -1277,6 +1277,51 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, struct page * } EXPORT_SYMBOL(vm_insert_page); +/** + * vm_insert_pfn - insert single pfn into user vma + * @vma: user vma to map to + * @addr: target user address of this page + * @pfn: source kernel pfn + * + * Similar to vm_inert_page, this allows drivers to insert individual pages + * they've allocated into a user vma. Same comments apply. + * + * This function should only be called from a vm_ops->fault handler, and + * in that case the handler should return NULL. + */ +int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn) +{ + struct mm_struct *mm = vma->vm_mm; + int retval; + pte_t *pte, entry; + spinlock_t *ptl; + + BUG_ON(!(vma->vm_flags & VM_PFNMAP)); + BUG_ON(is_cow_mapping(vma->vm_flags)); + + retval = -ENOMEM; + pte = get_locked_pte(mm, addr, &ptl); + if (!pte) + goto out; + retval = -EBUSY; + if (!pte_none(*pte)) + goto out_unlock; + + /* Ok, finally just insert the thing.. */ + entry = pfn_pte(pfn, vma->vm_page_prot); + set_pte_at(mm, addr, pte, entry); + update_mmu_cache(vma, addr, entry); + + retval = 0; +out_unlock: + pte_unmap_unlock(pte, ptl); + +out: + return retval; +} +EXPORT_SYMBOL(vm_insert_pfn); + /* * maps a range of physical memory into the requested pages. the old * mappings are removed. any references to nonexistent pages results @@ -1531,8 +1576,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, if (vma->vm_ops->page_mkwrite(vma, old_page) < 0) goto unwritable_page; - page_cache_release(old_page); - /* * Since we dropped the lock we need to revalidate * the PTE as someone else may have changed it. If @@ -1541,6 +1584,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, */ page_table = pte_offset_map_lock(mm, pmd, address, &ptl); + page_cache_release(old_page); if (!pte_same(*page_table, orig_pte)) goto unlock; } @@ -1776,9 +1820,7 @@ restart: } /** - * unmap_mapping_range - unmap the portion of all mmaps - * in the specified address_space corresponding to the specified - * page range in the underlying file. + * unmap_mapping_range - unmap the portion of all mmaps in the specified address_space corresponding to the specified page range in the underlying file. * @mapping: the address space containing mmaps to be unmapped. * @holebegin: byte in first page to unmap, relative to the start of * the underlying file. This will be rounded down to a PAGE_SIZE @@ -2313,10 +2355,12 @@ static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma, BUG_ON(is_cow_mapping(vma->vm_flags)); pfn = vma->vm_ops->nopfn(vma, address & PAGE_MASK); - if (pfn == NOPFN_OOM) + if (unlikely(pfn == NOPFN_OOM)) return VM_FAULT_OOM; - if (pfn == NOPFN_SIGBUS) + else if (unlikely(pfn == NOPFN_SIGBUS)) return VM_FAULT_SIGBUS; + else if (unlikely(pfn == NOPFN_REFAULT)) + return VM_FAULT_MINOR; page_table = pte_offset_map_lock(mm, pmd, address, &ptl); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index c2aec0e1090d..259a706bd83e 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -105,7 +105,7 @@ static struct kmem_cache *sn_cache; /* Highest zone. An specific allocation for a zone below that is not policied. */ -enum zone_type policy_zone = ZONE_DMA; +enum zone_type policy_zone = 0; struct mempolicy default_policy = { .refcnt = ATOMIC_INIT(1), /* never free it */ diff --git a/mm/mempool.c b/mm/mempool.c index ccd8cb8cd41f..cc1ca86dfc24 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -46,9 +46,9 @@ static void free_pool(mempool_t *pool) * @pool_data: optional private data available to the user-defined functions. * * this function creates and allocates a guaranteed size, preallocated - * memory pool. The pool can be used from the mempool_alloc and mempool_free + * memory pool. The pool can be used from the mempool_alloc() and mempool_free() * functions. This function might sleep. Both the alloc_fn() and the free_fn() - * functions might sleep - as long as the mempool_alloc function is not called + * functions might sleep - as long as the mempool_alloc() function is not called * from IRQ contexts. */ mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, @@ -195,7 +195,7 @@ EXPORT_SYMBOL(mempool_destroy); * mempool_create(). * @gfp_mask: the usual allocation bitmask. * - * this function only sleeps if the alloc_fn function sleeps or + * this function only sleeps if the alloc_fn() function sleeps or * returns NULL. Note that due to preallocation, this function * *never* fails when called from process contexts. (it might * fail if called from an IRQ context.) diff --git a/mm/mincore.c b/mm/mincore.c index 8aca6f7167bb..95c5f49f0a1a 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -12,6 +12,8 @@ #include <linux/mm.h> #include <linux/mman.h> #include <linux/syscalls.h> +#include <linux/swap.h> +#include <linux/swapops.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -22,14 +24,22 @@ * and is up to date; i.e. that no page-in operation would be required * at this time if an application were to map and access this page. */ -static unsigned char mincore_page(struct vm_area_struct * vma, - unsigned long pgoff) +static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff) { unsigned char present = 0; - struct address_space * as = vma->vm_file->f_mapping; - struct page * page; + struct page *page; - page = find_get_page(as, pgoff); + /* + * When tmpfs swaps out a page from a file, any process mapping that + * file will not get a swp_entry_t in its pte, but rather it is like + * any other file mapping (ie. marked !present and faulted in with + * tmpfs's .nopage). So swapped out tmpfs mappings are tested here. + * + * However when tmpfs moves the page from pagecache and into swapcache, + * it is still in core, but the find_get_page below won't find it. + * No big deal, but make a note of it. + */ + page = find_get_page(mapping, pgoff); if (page) { present = PageUptodate(page); page_cache_release(page); @@ -45,7 +55,14 @@ static unsigned char mincore_page(struct vm_area_struct * vma, */ static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pages) { - unsigned long i, nr, pgoff; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *ptep; + spinlock_t *ptl; + unsigned long nr; + int i; + pgoff_t pgoff; struct vm_area_struct *vma = find_vma(current->mm, addr); /* @@ -56,31 +73,64 @@ static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pag return -ENOMEM; /* - * Ok, got it. But check whether it's a segment we support - * mincore() on. Right now, we don't do any anonymous mappings. - * - * FIXME: This is just stupid. And returning ENOMEM is - * stupid too. We should just look at the page tables. But - * this is what we've traditionally done, so we'll just - * continue doing it. + * Calculate how many pages there are left in the last level of the + * PTE array for our address. */ - if (!vma->vm_file) - return -ENOMEM; - - /* - * Calculate how many pages there are left in the vma, and - * what the pgoff is for our address. - */ - nr = (vma->vm_end - addr) >> PAGE_SHIFT; + nr = PTRS_PER_PTE - ((addr >> PAGE_SHIFT) & (PTRS_PER_PTE-1)); if (nr > pages) nr = pages; - pgoff = (addr - vma->vm_start) >> PAGE_SHIFT; - pgoff += vma->vm_pgoff; + pgd = pgd_offset(vma->vm_mm, addr); + if (pgd_none_or_clear_bad(pgd)) + goto none_mapped; + pud = pud_offset(pgd, addr); + if (pud_none_or_clear_bad(pud)) + goto none_mapped; + pmd = pmd_offset(pud, addr); + if (pmd_none_or_clear_bad(pmd)) + goto none_mapped; + + ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + for (i = 0; i < nr; i++, ptep++, addr += PAGE_SIZE) { + unsigned char present; + pte_t pte = *ptep; + + if (pte_present(pte)) { + present = 1; + + } else if (pte_none(pte)) { + if (vma->vm_file) { + pgoff = linear_page_index(vma, addr); + present = mincore_page(vma->vm_file->f_mapping, + pgoff); + } else + present = 0; + + } else if (pte_file(pte)) { + pgoff = pte_to_pgoff(pte); + present = mincore_page(vma->vm_file->f_mapping, pgoff); + + } else { /* pte is a swap entry */ + swp_entry_t entry = pte_to_swp_entry(pte); + if (is_migration_entry(entry)) { + /* migration entries are always uptodate */ + present = 1; + } else { + pgoff = entry.val; + present = mincore_page(&swapper_space, pgoff); + } + } + } + pte_unmap_unlock(ptep-1, ptl); + + return nr; - /* And then we just fill the sucker in.. */ - for (i = 0 ; i < nr; i++, pgoff++) - vec[i] = mincore_page(vma, pgoff); +none_mapped: + if (vma->vm_file) { + pgoff = linear_page_index(vma, addr); + for (i = 0; i < nr; i++, pgoff++) + vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff); + } return nr; } diff --git a/mm/mmap.c b/mm/mmap.c index cc3a20819457..eb509ae76553 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2101,3 +2101,75 @@ int may_expand_vm(struct mm_struct *mm, unsigned long npages) return 0; return 1; } + + +static struct page *special_mapping_nopage(struct vm_area_struct *vma, + unsigned long address, int *type) +{ + struct page **pages; + + BUG_ON(address < vma->vm_start || address >= vma->vm_end); + + address -= vma->vm_start; + for (pages = vma->vm_private_data; address > 0 && *pages; ++pages) + address -= PAGE_SIZE; + + if (*pages) { + struct page *page = *pages; + get_page(page); + return page; + } + + return NOPAGE_SIGBUS; +} + +/* + * Having a close hook prevents vma merging regardless of flags. + */ +static void special_mapping_close(struct vm_area_struct *vma) +{ +} + +static struct vm_operations_struct special_mapping_vmops = { + .close = special_mapping_close, + .nopage = special_mapping_nopage, +}; + +/* + * Called with mm->mmap_sem held for writing. + * Insert a new vma covering the given region, with the given flags. + * Its pages are supplied by the given array of struct page *. + * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated. + * The region past the last page supplied will always produce SIGBUS. + * The array pointer and the pages it points to are assumed to stay alive + * for as long as this mapping might exist. + */ +int install_special_mapping(struct mm_struct *mm, + unsigned long addr, unsigned long len, + unsigned long vm_flags, struct page **pages) +{ + struct vm_area_struct *vma; + + vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + if (unlikely(vma == NULL)) + return -ENOMEM; + + vma->vm_mm = mm; + vma->vm_start = addr; + vma->vm_end = addr + len; + + vma->vm_flags = vm_flags | mm->def_flags; + vma->vm_page_prot = protection_map[vma->vm_flags & 7]; + + vma->vm_ops = &special_mapping_vmops; + vma->vm_private_data = pages; + + if (unlikely(insert_vm_struct(mm, vma))) { + kmem_cache_free(vm_area_cachep, vma); + return -ENOMEM; + } + + mm->total_vm += len >> PAGE_SHIFT; + + return 0; +} diff --git a/mm/page-writeback.c b/mm/page-writeback.c index be0efbde4994..f7e088f5a309 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -515,7 +515,7 @@ static int __cpuinit ratelimit_handler(struct notifier_block *self, unsigned long u, void *v) { writeback_set_ratelimit(); - return 0; + return NOTIFY_DONE; } static struct notifier_block __cpuinitdata ratelimit_nb = { @@ -549,9 +549,7 @@ void __init page_writeback_init(void) } /** - * generic_writepages - walk the list of dirty pages of the given - * address space and writepage() all of them. - * + * generic_writepages - walk the list of dirty pages of the given address space and writepage() all of them. * @mapping: address space structure to write * @wbc: subtract the number of written pages from *@wbc->nr_to_write * @@ -698,7 +696,6 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) /** * write_one_page - write out a single page and optionally wait on I/O - * * @page: the page to write * @wait: if true, wait on writeout * @@ -737,6 +734,16 @@ int write_one_page(struct page *page, int wait) EXPORT_SYMBOL(write_one_page); /* + * For address_spaces which do not use buffers nor write back. + */ +int __set_page_dirty_no_writeback(struct page *page) +{ + if (!PageDirty(page)) + SetPageDirty(page); + return 0; +} + +/* * For address_spaces which do not use buffers. Just tag the page as dirty in * its radix tree. * diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2c606cc922a5..d461b23a27a1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -73,7 +73,9 @@ static void __free_pages_ok(struct page *page, unsigned int order); * don't need any ZONE_NORMAL reservation */ int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { +#ifdef CONFIG_ZONE_DMA 256, +#endif #ifdef CONFIG_ZONE_DMA32 256, #endif @@ -85,7 +87,9 @@ int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { EXPORT_SYMBOL(totalram_pages); static char * const zone_names[MAX_NR_ZONES] = { +#ifdef CONFIG_ZONE_DMA "DMA", +#endif #ifdef CONFIG_ZONE_DMA32 "DMA32", #endif @@ -395,7 +399,7 @@ static inline void __free_one_page(struct page *page, VM_BUG_ON(page_idx & (order_size - 1)); VM_BUG_ON(bad_range(zone, page)); - zone->free_pages += order_size; + __mod_zone_page_state(zone, NR_FREE_PAGES, order_size); while (order < MAX_ORDER-1) { unsigned long combined_idx; struct free_area *area; @@ -631,7 +635,7 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order) list_del(&page->lru); rmv_page_order(page); area->nr_free--; - zone->free_pages -= 1UL << order; + __mod_zone_page_state(zone, NR_FREE_PAGES, - (1UL << order)); expand(zone, page, order, current_order, area); return page; } @@ -989,7 +993,8 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, int classzone_idx, int alloc_flags) { /* free_pages my go negative - that's OK */ - long min = mark, free_pages = z->free_pages - (1 << order) + 1; + long min = mark; + long free_pages = zone_page_state(z, NR_FREE_PAGES) - (1 << order) + 1; int o; if (alloc_flags & ALLOC_HIGH) @@ -1439,35 +1444,6 @@ fastcall void free_pages(unsigned long addr, unsigned int order) EXPORT_SYMBOL(free_pages); -/* - * Total amount of free (allocatable) RAM: - */ -unsigned int nr_free_pages(void) -{ - unsigned int sum = 0; - struct zone *zone; - - for_each_zone(zone) - sum += zone->free_pages; - - return sum; -} - -EXPORT_SYMBOL(nr_free_pages); - -#ifdef CONFIG_NUMA -unsigned int nr_free_pages_pgdat(pg_data_t *pgdat) -{ - unsigned int sum = 0; - enum zone_type i; - - for (i = 0; i < MAX_NR_ZONES; i++) - sum += pgdat->node_zones[i].free_pages; - - return sum; -} -#endif - static unsigned int nr_free_zone_pages(int offset) { /* Just pick one node, since fallback list is circular */ @@ -1514,7 +1490,7 @@ void si_meminfo(struct sysinfo *val) { val->totalram = totalram_pages; val->sharedram = 0; - val->freeram = nr_free_pages(); + val->freeram = global_page_state(NR_FREE_PAGES); val->bufferram = nr_blockdev_pages(); val->totalhigh = totalhigh_pages; val->freehigh = nr_free_highpages(); @@ -1529,10 +1505,11 @@ void si_meminfo_node(struct sysinfo *val, int nid) pg_data_t *pgdat = NODE_DATA(nid); val->totalram = pgdat->node_present_pages; - val->freeram = nr_free_pages_pgdat(pgdat); + val->freeram = node_page_state(nid, NR_FREE_PAGES); #ifdef CONFIG_HIGHMEM val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages; - val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages; + val->freehigh = zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM], + NR_FREE_PAGES); #else val->totalhigh = 0; val->freehigh = 0; @@ -1551,9 +1528,6 @@ void si_meminfo_node(struct sysinfo *val, int nid) void show_free_areas(void) { int cpu; - unsigned long active; - unsigned long inactive; - unsigned long free; struct zone *zone; for_each_zone(zone) { @@ -1577,20 +1551,19 @@ void show_free_areas(void) } } - get_zone_counts(&active, &inactive, &free); - - printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu " - "unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n", - active, - inactive, + printk("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu\n" + " free:%lu slab:%lu mapped:%lu pagetables:%lu bounce:%lu\n", + global_page_state(NR_ACTIVE), + global_page_state(NR_INACTIVE), global_page_state(NR_FILE_DIRTY), global_page_state(NR_WRITEBACK), global_page_state(NR_UNSTABLE_NFS), - nr_free_pages(), + global_page_state(NR_FREE_PAGES), global_page_state(NR_SLAB_RECLAIMABLE) + global_page_state(NR_SLAB_UNRECLAIMABLE), global_page_state(NR_FILE_MAPPED), - global_page_state(NR_PAGETABLE)); + global_page_state(NR_PAGETABLE), + global_page_state(NR_BOUNCE)); for_each_zone(zone) { int i; @@ -1611,12 +1584,12 @@ void show_free_areas(void) " all_unreclaimable? %s" "\n", zone->name, - K(zone->free_pages), + K(zone_page_state(zone, NR_FREE_PAGES)), K(zone->pages_min), K(zone->pages_low), K(zone->pages_high), - K(zone->nr_active), - K(zone->nr_inactive), + K(zone_page_state(zone, NR_ACTIVE)), + K(zone_page_state(zone, NR_INACTIVE)), K(zone->present_pages), zone->pages_scanned, (zone->all_unreclaimable ? "yes" : "no") @@ -2650,11 +2623,11 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, " %s zone: %lu pages exceeds realsize %lu\n", zone_names[j], memmap_pages, realsize); - /* Account for reserved DMA pages */ - if (j == ZONE_DMA && realsize > dma_reserve) { + /* Account for reserved pages */ + if (j == 0 && realsize > dma_reserve) { realsize -= dma_reserve; - printk(KERN_DEBUG " DMA zone: %lu pages reserved\n", - dma_reserve); + printk(KERN_DEBUG " %s zone: %lu pages reserved\n", + zone_names[0], dma_reserve); } if (!is_highmem_idx(j)) @@ -2674,7 +2647,6 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, spin_lock_init(&zone->lru_lock); zone_seqlock_init(zone); zone->zone_pgdat = pgdat; - zone->free_pages = 0; zone->prev_priority = DEF_PRIORITY; @@ -2683,8 +2655,6 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, INIT_LIST_HEAD(&zone->inactive_list); zone->nr_scan_active = 0; zone->nr_scan_inactive = 0; - zone->nr_active = 0; - zone->nr_inactive = 0; zap_zone_vm_stats(zone); atomic_set(&zone->reclaim_in_progress, 0); if (!size) @@ -2876,20 +2846,23 @@ static void __init sort_node_map(void) cmp_node_active_region, NULL); } -/* Find the lowest pfn for a node. This depends on a sorted early_node_map */ +/* Find the lowest pfn for a node */ unsigned long __init find_min_pfn_for_node(unsigned long nid) { int i; - - /* Regions in the early_node_map can be in any order */ - sort_node_map(); + unsigned long min_pfn = ULONG_MAX; /* Assuming a sorted map, the first range found has the starting pfn */ for_each_active_range_index_in_nid(i, nid) - return early_node_map[i].start_pfn; + min_pfn = min(min_pfn, early_node_map[i].start_pfn); - printk(KERN_WARNING "Could not find start_pfn for node %lu\n", nid); - return 0; + if (min_pfn == ULONG_MAX) { + printk(KERN_WARNING + "Could not find start_pfn for node %lu\n", nid); + return 0; + } + + return min_pfn; } /** @@ -2938,6 +2911,9 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) unsigned long nid; enum zone_type i; + /* Sort early_node_map as initialisation assumes it is sorted */ + sort_node_map(); + /* Record where the zone boundaries are */ memset(arch_zone_lowest_possible_pfn, 0, sizeof(arch_zone_lowest_possible_pfn)); diff --git a/mm/readahead.c b/mm/readahead.c index 0f539e8e827a..93d9ee692fd8 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -575,10 +575,6 @@ void handle_ra_miss(struct address_space *mapping, */ unsigned long max_sane_readahead(unsigned long nr) { - unsigned long active; - unsigned long inactive; - unsigned long free; - - __get_zone_counts(&active, &inactive, &free, NODE_DATA(numa_node_id())); - return min(nr, (inactive + free) / 2); + return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE) + + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2); } diff --git a/mm/shmem.c b/mm/shmem.c index 70da7a0981bf..882053031aa0 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -178,9 +178,9 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages) static struct super_operations shmem_ops; static const struct address_space_operations shmem_aops; static const struct file_operations shmem_file_operations; -static struct inode_operations shmem_inode_operations; -static struct inode_operations shmem_dir_inode_operations; -static struct inode_operations shmem_special_inode_operations; +static const struct inode_operations shmem_inode_operations; +static const struct inode_operations shmem_dir_inode_operations; +static const struct inode_operations shmem_special_inode_operations; static struct vm_operations_struct shmem_vm_ops; static struct backing_dev_info shmem_backing_dev_info __read_mostly = { @@ -1410,8 +1410,8 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev) } #ifdef CONFIG_TMPFS -static struct inode_operations shmem_symlink_inode_operations; -static struct inode_operations shmem_symlink_inline_operations; +static const struct inode_operations shmem_symlink_inode_operations; +static const struct inode_operations shmem_symlink_inline_operations; /* * Normally tmpfs makes no use of shmem_prepare_write, but it @@ -1904,12 +1904,12 @@ static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *co } } -static struct inode_operations shmem_symlink_inline_operations = { +static const struct inode_operations shmem_symlink_inline_operations = { .readlink = generic_readlink, .follow_link = shmem_follow_link_inline, }; -static struct inode_operations shmem_symlink_inode_operations = { +static const struct inode_operations shmem_symlink_inode_operations = { .truncate = shmem_truncate, .readlink = generic_readlink, .follow_link = shmem_follow_link, @@ -2316,7 +2316,7 @@ static void destroy_inodecache(void) static const struct address_space_operations shmem_aops = { .writepage = shmem_writepage, - .set_page_dirty = __set_page_dirty_nobuffers, + .set_page_dirty = __set_page_dirty_no_writeback, #ifdef CONFIG_TMPFS .prepare_write = shmem_prepare_write, .commit_write = simple_commit_write, @@ -2335,7 +2335,7 @@ static const struct file_operations shmem_file_operations = { #endif }; -static struct inode_operations shmem_inode_operations = { +static const struct inode_operations shmem_inode_operations = { .truncate = shmem_truncate, .setattr = shmem_notify_change, .truncate_range = shmem_truncate_range, @@ -2349,7 +2349,7 @@ static struct inode_operations shmem_inode_operations = { }; -static struct inode_operations shmem_dir_inode_operations = { +static const struct inode_operations shmem_dir_inode_operations = { #ifdef CONFIG_TMPFS .create = shmem_create, .lookup = simple_lookup, @@ -2371,7 +2371,7 @@ static struct inode_operations shmem_dir_inode_operations = { #endif }; -static struct inode_operations shmem_special_inode_operations = { +static const struct inode_operations shmem_special_inode_operations = { #ifdef CONFIG_TMPFS_POSIX_ACL .setattr = shmem_notify_change, .setxattr = generic_setxattr, diff --git a/mm/slab.c b/mm/slab.c index c6100628a6ef..70784b848b69 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -793,8 +793,10 @@ static inline struct kmem_cache *__find_general_cachep(size_t size, * has cs_{dma,}cachep==NULL. Thus no special case * for large kmalloc calls required. */ +#ifdef CONFIG_ZONE_DMA if (unlikely(gfpflags & GFP_DMA)) return csizep->cs_dmacachep; +#endif return csizep->cs_cachep; } @@ -1493,13 +1495,15 @@ void __init kmem_cache_init(void) ARCH_KMALLOC_FLAGS|SLAB_PANIC, NULL, NULL); } - - sizes->cs_dmacachep = kmem_cache_create(names->name_dma, +#ifdef CONFIG_ZONE_DMA + sizes->cs_dmacachep = kmem_cache_create( + names->name_dma, sizes->cs_size, ARCH_KMALLOC_MINALIGN, ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| SLAB_PANIC, NULL, NULL); +#endif sizes++; names++; } @@ -2321,7 +2325,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, cachep->slab_size = slab_size; cachep->flags = flags; cachep->gfpflags = 0; - if (flags & SLAB_CACHE_DMA) + if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA)) cachep->gfpflags |= GFP_DMA; cachep->buffer_size = size; cachep->reciprocal_buffer_size = reciprocal_value(size); @@ -2516,7 +2520,7 @@ EXPORT_SYMBOL(kmem_cache_shrink); * kmem_cache_destroy - delete a cache * @cachep: the cache to destroy * - * Remove a struct kmem_cache object from the slab cache. + * Remove a &struct kmem_cache object from the slab cache. * * It is expected this function will be called by a module when it is * unloaded. This will remove the cache completely, and avoid a duplicate @@ -2643,10 +2647,12 @@ static void cache_init_objs(struct kmem_cache *cachep, static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) { - if (flags & GFP_DMA) - BUG_ON(!(cachep->gfpflags & GFP_DMA)); - else - BUG_ON(cachep->gfpflags & GFP_DMA); + if (CONFIG_ZONE_DMA_FLAG) { + if (flags & GFP_DMA) + BUG_ON(!(cachep->gfpflags & GFP_DMA)); + else + BUG_ON(cachep->gfpflags & GFP_DMA); + } } static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, @@ -2814,19 +2820,11 @@ failed: */ static void kfree_debugcheck(const void *objp) { - struct page *page; - if (!virt_addr_valid(objp)) { printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n", (unsigned long)objp); BUG(); } - page = virt_to_page(objp); - if (!PageSlab(page)) { - printk(KERN_ERR "kfree_debugcheck: bad ptr %lxh.\n", - (unsigned long)objp); - BUG(); - } } static inline void verify_redzone_free(struct kmem_cache *cache, void *obj) @@ -3197,35 +3195,6 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) return objp; } -static __always_inline void *__cache_alloc(struct kmem_cache *cachep, - gfp_t flags, void *caller) -{ - unsigned long save_flags; - void *objp = NULL; - - cache_alloc_debugcheck_before(cachep, flags); - - local_irq_save(save_flags); - - if (unlikely(NUMA_BUILD && - current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) - objp = alternate_node_alloc(cachep, flags); - - if (!objp) - objp = ____cache_alloc(cachep, flags); - /* - * We may just have run out of memory on the local node. - * ____cache_alloc_node() knows how to locate memory on other nodes - */ - if (NUMA_BUILD && !objp) - objp = ____cache_alloc_node(cachep, flags, numa_node_id()); - local_irq_restore(save_flags); - objp = cache_alloc_debugcheck_after(cachep, flags, objp, - caller); - prefetchw(objp); - return objp; -} - #ifdef CONFIG_NUMA /* * Try allocating on another node if PF_SPREAD_SLAB|PF_MEMPOLICY. @@ -3257,14 +3226,20 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) * allocator to do its reclaim / fallback magic. We then insert the * slab into the proper nodelist and then allocate from it. */ -void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) +static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) { - struct zonelist *zonelist = &NODE_DATA(slab_node(current->mempolicy)) - ->node_zonelists[gfp_zone(flags)]; + struct zonelist *zonelist; + gfp_t local_flags; struct zone **z; void *obj = NULL; int nid; - gfp_t local_flags = (flags & GFP_LEVEL_MASK); + + if (flags & __GFP_THISNODE) + return NULL; + + zonelist = &NODE_DATA(slab_node(current->mempolicy)) + ->node_zonelists[gfp_zone(flags)]; + local_flags = (flags & GFP_LEVEL_MASK); retry: /* @@ -3374,16 +3349,110 @@ must_grow: if (x) goto retry; - if (!(flags & __GFP_THISNODE)) - /* Unable to grow the cache. Fall back to other nodes. */ - return fallback_alloc(cachep, flags); - - return NULL; + return fallback_alloc(cachep, flags); done: return obj; } -#endif + +/** + * kmem_cache_alloc_node - Allocate an object on the specified node + * @cachep: The cache to allocate from. + * @flags: See kmalloc(). + * @nodeid: node number of the target node. + * @caller: return address of caller, used for debug information + * + * Identical to kmem_cache_alloc but it will allocate memory on the given + * node, which can improve the performance for cpu bound structures. + * + * Fallback to other node is possible if __GFP_THISNODE is not set. + */ +static __always_inline void * +__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, + void *caller) +{ + unsigned long save_flags; + void *ptr; + + cache_alloc_debugcheck_before(cachep, flags); + local_irq_save(save_flags); + + if (unlikely(nodeid == -1)) + nodeid = numa_node_id(); + + if (unlikely(!cachep->nodelists[nodeid])) { + /* Node not bootstrapped yet */ + ptr = fallback_alloc(cachep, flags); + goto out; + } + + if (nodeid == numa_node_id()) { + /* + * Use the locally cached objects if possible. + * However ____cache_alloc does not allow fallback + * to other nodes. It may fail while we still have + * objects on other nodes available. + */ + ptr = ____cache_alloc(cachep, flags); + if (ptr) + goto out; + } + /* ___cache_alloc_node can fall back to other nodes */ + ptr = ____cache_alloc_node(cachep, flags, nodeid); + out: + local_irq_restore(save_flags); + ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); + + return ptr; +} + +static __always_inline void * +__do_cache_alloc(struct kmem_cache *cache, gfp_t flags) +{ + void *objp; + + if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) { + objp = alternate_node_alloc(cache, flags); + if (objp) + goto out; + } + objp = ____cache_alloc(cache, flags); + + /* + * We may just have run out of memory on the local node. + * ____cache_alloc_node() knows how to locate memory on other nodes + */ + if (!objp) + objp = ____cache_alloc_node(cache, flags, numa_node_id()); + + out: + return objp; +} +#else + +static __always_inline void * +__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags) +{ + return ____cache_alloc(cachep, flags); +} + +#endif /* CONFIG_NUMA */ + +static __always_inline void * +__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) +{ + unsigned long save_flags; + void *objp; + + cache_alloc_debugcheck_before(cachep, flags); + local_irq_save(save_flags); + objp = __do_cache_alloc(cachep, flags); + local_irq_restore(save_flags); + objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); + prefetchw(objp); + + return objp; +} /* * Caller needs to acquire correct kmem_list's list_lock @@ -3582,57 +3651,6 @@ out: } #ifdef CONFIG_NUMA -/** - * kmem_cache_alloc_node - Allocate an object on the specified node - * @cachep: The cache to allocate from. - * @flags: See kmalloc(). - * @nodeid: node number of the target node. - * @caller: return address of caller, used for debug information - * - * Identical to kmem_cache_alloc but it will allocate memory on the given - * node, which can improve the performance for cpu bound structures. - * - * Fallback to other node is possible if __GFP_THISNODE is not set. - */ -static __always_inline void * -__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, - int nodeid, void *caller) -{ - unsigned long save_flags; - void *ptr = NULL; - - cache_alloc_debugcheck_before(cachep, flags); - local_irq_save(save_flags); - - if (unlikely(nodeid == -1)) - nodeid = numa_node_id(); - - if (likely(cachep->nodelists[nodeid])) { - if (nodeid == numa_node_id()) { - /* - * Use the locally cached objects if possible. - * However ____cache_alloc does not allow fallback - * to other nodes. It may fail while we still have - * objects on other nodes available. - */ - ptr = ____cache_alloc(cachep, flags); - } - if (!ptr) { - /* ___cache_alloc_node can fall back to other nodes */ - ptr = ____cache_alloc_node(cachep, flags, nodeid); - } - } else { - /* Node not bootstrapped yet */ - if (!(flags & __GFP_THISNODE)) - ptr = fallback_alloc(cachep, flags); - } - - local_irq_restore(save_flags); - ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); - - return ptr; -} - void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) { return __cache_alloc_node(cachep, flags, nodeid, @@ -3733,6 +3751,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) BUG_ON(virt_to_cache(objp) != cachep); local_irq_save(flags); + debug_check_no_locks_freed(objp, obj_size(cachep)); __cache_free(cachep, objp); local_irq_restore(flags); } @@ -4017,18 +4036,17 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, * If we cannot acquire the cache chain mutex then just give up - we'll try * again on the next iteration. */ -static void cache_reap(struct work_struct *unused) +static void cache_reap(struct work_struct *w) { struct kmem_cache *searchp; struct kmem_list3 *l3; int node = numa_node_id(); + struct delayed_work *work = + container_of(w, struct delayed_work, work); - if (!mutex_trylock(&cache_chain_mutex)) { + if (!mutex_trylock(&cache_chain_mutex)) /* Give up. Setup the next iteration. */ - schedule_delayed_work(&__get_cpu_var(reap_work), - round_jiffies_relative(REAPTIMEOUT_CPUC)); - return; - } + goto out; list_for_each_entry(searchp, &cache_chain, next) { check_irq_on(); @@ -4071,9 +4089,9 @@ next: mutex_unlock(&cache_chain_mutex); next_reap_node(); refresh_cpu_vm_stats(smp_processor_id()); +out: /* Set up the next iteration */ - schedule_delayed_work(&__get_cpu_var(reap_work), - round_jiffies_relative(REAPTIMEOUT_CPUC)); + schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC)); } #ifdef CONFIG_PROC_FS diff --git a/mm/truncate.c b/mm/truncate.c index 5df947de7654..ebf3fcb4115b 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -85,7 +85,7 @@ EXPORT_SYMBOL(cancel_dirty_page); * * We need to bale out if page->mapping is no longer equal to the original * mapping. This happens a) when the VM reclaimed the page while we waited on - * its lock, b) when a concurrent invalidate_inode_pages got there first and + * its lock, b) when a concurrent invalidate_mapping_pages got there first and * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. */ static void @@ -106,7 +106,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page) } /* - * This is for invalidate_inode_pages(). That function can be called at + * This is for invalidate_mapping_pages(). That function can be called at * any time, and is not supposed to throw away dirty pages. But pages can * be marked dirty at any time too, so use remove_mapping which safely * discards clean, unused pages. @@ -310,12 +310,7 @@ unlock: } return ret; } - -unsigned long invalidate_inode_pages(struct address_space *mapping) -{ - return invalidate_mapping_pages(mapping, 0, ~0UL); -} -EXPORT_SYMBOL(invalidate_inode_pages); +EXPORT_SYMBOL(invalidate_mapping_pages); /* * This is like invalidate_complete_page(), except it ignores the page's diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 86897ee792d6..9eef486da909 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -699,7 +699,7 @@ finished: * that it is big enough to cover the vma. Will return failure if * that criteria isn't met. * - * Similar to remap_pfn_range (see mm/memory.c) + * Similar to remap_pfn_range() (see mm/memory.c) */ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff) diff --git a/mm/vmscan.c b/mm/vmscan.c index 7430df68cb64..0655d5fe73e8 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -679,7 +679,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, nr_taken = isolate_lru_pages(sc->swap_cluster_max, &zone->inactive_list, &page_list, &nr_scan); - zone->nr_inactive -= nr_taken; + __mod_zone_page_state(zone, NR_INACTIVE, -nr_taken); zone->pages_scanned += nr_scan; spin_unlock_irq(&zone->lru_lock); @@ -740,7 +740,8 @@ static inline void note_zone_scanning_priority(struct zone *zone, int priority) static inline int zone_is_near_oom(struct zone *zone) { - return zone->pages_scanned >= (zone->nr_active + zone->nr_inactive)*3; + return zone->pages_scanned >= (zone_page_state(zone, NR_ACTIVE) + + zone_page_state(zone, NR_INACTIVE))*3; } /* @@ -825,7 +826,7 @@ force_reclaim_mapped: pgmoved = isolate_lru_pages(nr_pages, &zone->active_list, &l_hold, &pgscanned); zone->pages_scanned += pgscanned; - zone->nr_active -= pgmoved; + __mod_zone_page_state(zone, NR_ACTIVE, -pgmoved); spin_unlock_irq(&zone->lru_lock); while (!list_empty(&l_hold)) { @@ -857,7 +858,7 @@ force_reclaim_mapped: list_move(&page->lru, &zone->inactive_list); pgmoved++; if (!pagevec_add(&pvec, page)) { - zone->nr_inactive += pgmoved; + __mod_zone_page_state(zone, NR_INACTIVE, pgmoved); spin_unlock_irq(&zone->lru_lock); pgdeactivate += pgmoved; pgmoved = 0; @@ -867,7 +868,7 @@ force_reclaim_mapped: spin_lock_irq(&zone->lru_lock); } } - zone->nr_inactive += pgmoved; + __mod_zone_page_state(zone, NR_INACTIVE, pgmoved); pgdeactivate += pgmoved; if (buffer_heads_over_limit) { spin_unlock_irq(&zone->lru_lock); @@ -885,14 +886,14 @@ force_reclaim_mapped: list_move(&page->lru, &zone->active_list); pgmoved++; if (!pagevec_add(&pvec, page)) { - zone->nr_active += pgmoved; + __mod_zone_page_state(zone, NR_ACTIVE, pgmoved); pgmoved = 0; spin_unlock_irq(&zone->lru_lock); __pagevec_release(&pvec); spin_lock_irq(&zone->lru_lock); } } - zone->nr_active += pgmoved; + __mod_zone_page_state(zone, NR_ACTIVE, pgmoved); __count_zone_vm_events(PGREFILL, zone, pgscanned); __count_vm_events(PGDEACTIVATE, pgdeactivate); @@ -918,14 +919,16 @@ static unsigned long shrink_zone(int priority, struct zone *zone, * Add one to `nr_to_scan' just to make sure that the kernel will * slowly sift through the active list. */ - zone->nr_scan_active += (zone->nr_active >> priority) + 1; + zone->nr_scan_active += + (zone_page_state(zone, NR_ACTIVE) >> priority) + 1; nr_active = zone->nr_scan_active; if (nr_active >= sc->swap_cluster_max) zone->nr_scan_active = 0; else nr_active = 0; - zone->nr_scan_inactive += (zone->nr_inactive >> priority) + 1; + zone->nr_scan_inactive += + (zone_page_state(zone, NR_INACTIVE) >> priority) + 1; nr_inactive = zone->nr_scan_inactive; if (nr_inactive >= sc->swap_cluster_max) zone->nr_scan_inactive = 0; @@ -1037,7 +1040,8 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask) if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) continue; - lru_pages += zone->nr_active + zone->nr_inactive; + lru_pages += zone_page_state(zone, NR_ACTIVE) + + zone_page_state(zone, NR_INACTIVE); } for (priority = DEF_PRIORITY; priority >= 0; priority--) { @@ -1182,7 +1186,8 @@ loop_again: for (i = 0; i <= end_zone; i++) { struct zone *zone = pgdat->node_zones + i; - lru_pages += zone->nr_active + zone->nr_inactive; + lru_pages += zone_page_state(zone, NR_ACTIVE) + + zone_page_state(zone, NR_INACTIVE); } /* @@ -1219,8 +1224,9 @@ loop_again: if (zone->all_unreclaimable) continue; if (nr_slab == 0 && zone->pages_scanned >= - (zone->nr_active + zone->nr_inactive) * 6) - zone->all_unreclaimable = 1; + (zone_page_state(zone, NR_ACTIVE) + + zone_page_state(zone, NR_INACTIVE)) * 6) + zone->all_unreclaimable = 1; /* * If we've done a decent amount of scanning and * the reclaim ratio is low, start doing writepage @@ -1385,18 +1391,22 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio, /* For pass = 0 we don't shrink the active list */ if (pass > 0) { - zone->nr_scan_active += (zone->nr_active >> prio) + 1; + zone->nr_scan_active += + (zone_page_state(zone, NR_ACTIVE) >> prio) + 1; if (zone->nr_scan_active >= nr_pages || pass > 3) { zone->nr_scan_active = 0; - nr_to_scan = min(nr_pages, zone->nr_active); + nr_to_scan = min(nr_pages, + zone_page_state(zone, NR_ACTIVE)); shrink_active_list(nr_to_scan, zone, sc, prio); } } - zone->nr_scan_inactive += (zone->nr_inactive >> prio) + 1; + zone->nr_scan_inactive += + (zone_page_state(zone, NR_INACTIVE) >> prio) + 1; if (zone->nr_scan_inactive >= nr_pages || pass > 3) { zone->nr_scan_inactive = 0; - nr_to_scan = min(nr_pages, zone->nr_inactive); + nr_to_scan = min(nr_pages, + zone_page_state(zone, NR_INACTIVE)); ret += shrink_inactive_list(nr_to_scan, zone, sc); if (ret >= nr_pages) return ret; @@ -1408,12 +1418,7 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio, static unsigned long count_lru_pages(void) { - struct zone *zone; - unsigned long ret = 0; - - for_each_zone(zone) - ret += zone->nr_active + zone->nr_inactive; - return ret; + return global_page_state(NR_ACTIVE) + global_page_state(NR_INACTIVE); } /* diff --git a/mm/vmstat.c b/mm/vmstat.c index dc005a0c96ae..6c488d6ac425 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -13,39 +13,6 @@ #include <linux/module.h> #include <linux/cpu.h> -void __get_zone_counts(unsigned long *active, unsigned long *inactive, - unsigned long *free, struct pglist_data *pgdat) -{ - struct zone *zones = pgdat->node_zones; - int i; - - *active = 0; - *inactive = 0; - *free = 0; - for (i = 0; i < MAX_NR_ZONES; i++) { - *active += zones[i].nr_active; - *inactive += zones[i].nr_inactive; - *free += zones[i].free_pages; - } -} - -void get_zone_counts(unsigned long *active, - unsigned long *inactive, unsigned long *free) -{ - struct pglist_data *pgdat; - - *active = 0; - *inactive = 0; - *free = 0; - for_each_online_pgdat(pgdat) { - unsigned long l, m, n; - __get_zone_counts(&l, &m, &n, pgdat); - *active += l; - *inactive += m; - *free += n; - } -} - #ifdef CONFIG_VM_EVENT_COUNTERS DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}}; EXPORT_PER_CPU_SYMBOL(vm_event_states); @@ -239,7 +206,7 @@ EXPORT_SYMBOL(mod_zone_page_state); * in between and therefore the atomicity vs. interrupt cannot be exploited * in a useful way here. */ -static void __inc_zone_state(struct zone *zone, enum zone_stat_item item) +void __inc_zone_state(struct zone *zone, enum zone_stat_item item) { struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); s8 *p = pcp->vm_stat_diff + item; @@ -260,9 +227,8 @@ void __inc_zone_page_state(struct page *page, enum zone_stat_item item) } EXPORT_SYMBOL(__inc_zone_page_state); -void __dec_zone_page_state(struct page *page, enum zone_stat_item item) +void __dec_zone_state(struct zone *zone, enum zone_stat_item item) { - struct zone *zone = page_zone(page); struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); s8 *p = pcp->vm_stat_diff + item; @@ -275,6 +241,11 @@ void __dec_zone_page_state(struct page *page, enum zone_stat_item item) *p = overstep; } } + +void __dec_zone_page_state(struct page *page, enum zone_stat_item item) +{ + __dec_zone_state(page_zone(page), item); +} EXPORT_SYMBOL(__dec_zone_page_state); void inc_zone_state(struct zone *zone, enum zone_stat_item item) @@ -437,6 +408,12 @@ const struct seq_operations fragmentation_op = { .show = frag_show, }; +#ifdef CONFIG_ZONE_DMA +#define TEXT_FOR_DMA(xx) xx "_dma", +#else +#define TEXT_FOR_DMA(xx) +#endif + #ifdef CONFIG_ZONE_DMA32 #define TEXT_FOR_DMA32(xx) xx "_dma32", #else @@ -449,19 +426,22 @@ const struct seq_operations fragmentation_op = { #define TEXT_FOR_HIGHMEM(xx) #endif -#define TEXTS_FOR_ZONES(xx) xx "_dma", TEXT_FOR_DMA32(xx) xx "_normal", \ +#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \ TEXT_FOR_HIGHMEM(xx) static const char * const vmstat_text[] = { /* Zoned VM counters */ + "nr_free_pages", + "nr_active", + "nr_inactive", "nr_anon_pages", "nr_mapped", "nr_file_pages", + "nr_dirty", + "nr_writeback", "nr_slab_reclaimable", "nr_slab_unreclaimable", "nr_page_table_pages", - "nr_dirty", - "nr_writeback", "nr_unstable", "nr_bounce", "nr_vmscan_write", @@ -529,17 +509,13 @@ static int zoneinfo_show(struct seq_file *m, void *arg) "\n min %lu" "\n low %lu" "\n high %lu" - "\n active %lu" - "\n inactive %lu" "\n scanned %lu (a: %lu i: %lu)" "\n spanned %lu" "\n present %lu", - zone->free_pages, + zone_page_state(zone, NR_FREE_PAGES), zone->pages_min, zone->pages_low, zone->pages_high, - zone->nr_active, - zone->nr_inactive, zone->pages_scanned, zone->nr_scan_active, zone->nr_scan_inactive, zone->spanned_pages, @@ -563,12 +539,6 @@ static int zoneinfo_show(struct seq_file *m, void *arg) pageset = zone_pcp(zone, i); for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { - if (pageset->pcp[j].count) - break; - } - if (j == ARRAY_SIZE(pageset->pcp)) - continue; - for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { seq_printf(m, "\n cpu: %i pcp: %i" "\n count: %i" |