diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 2 | ||||
-rw-r--r-- | mm/filemap.c | 3 | ||||
-rw-r--r-- | mm/hugetlb.c | 4 | ||||
-rw-r--r-- | mm/memory.c | 5 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 50 | ||||
-rw-r--r-- | mm/mempolicy.c | 5 | ||||
-rw-r--r-- | mm/mempool.c | 2 | ||||
-rw-r--r-- | mm/migrate.c | 5 | ||||
-rw-r--r-- | mm/mmap.c | 14 | ||||
-rw-r--r-- | mm/mprotect.c | 6 | ||||
-rw-r--r-- | mm/mremap.c | 4 | ||||
-rw-r--r-- | mm/nommu.c | 3 | ||||
-rw-r--r-- | mm/oom_kill.c | 17 | ||||
-rw-r--r-- | mm/page-writeback.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 8 | ||||
-rw-r--r-- | mm/prio_tree.c | 2 | ||||
-rw-r--r-- | mm/shmem.c | 37 | ||||
-rw-r--r-- | mm/slab.c | 313 | ||||
-rw-r--r-- | mm/slub.c | 118 | ||||
-rw-r--r-- | mm/swap.c | 2 | ||||
-rw-r--r-- | mm/vmalloc.c | 6 | ||||
-rw-r--r-- | mm/vmscan.c | 4 |
22 files changed, 389 insertions, 223 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index b1f03b0eb7f1..c070ec0c15bf 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -37,7 +37,7 @@ config DISCONTIGMEM_MANUAL in their physical address spaces, and this option provides more efficient handling of these holes. However, the vast majority of hardware has quite flat address spaces, and - can have degraded performance from extra overhead that + can have degraded performance from the extra overhead that this option imposes. Many NUMA configurations will have this as the only option. diff --git a/mm/filemap.c b/mm/filemap.c index 79f24a969cb4..5209e47b7fe3 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -25,6 +25,7 @@ #include <linux/uio.h> #include <linux/hash.h> #include <linux/writeback.h> +#include <linux/backing-dev.h> #include <linux/pagevec.h> #include <linux/blkdev.h> #include <linux/security.h> @@ -840,7 +841,7 @@ static void shrink_readahead_size_eio(struct file *filp, /** * do_generic_mapping_read - generic file read routine * @mapping: address_space to be read - * @_ra: file's readahead state + * @ra: file's readahead state * @filp: the file to read * @ppos: current file position * @desc: read_descriptor diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ae2959bb59cb..8b809ecefa39 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1017,10 +1017,10 @@ static long region_chg(struct list_head *head, long f, long t) /* If we are below the current region then a new region is required. * Subtle, allocate a new region at the position but make it zero - * size such that we can guarentee to record the reservation. */ + * size such that we can guarantee to record the reservation. */ if (&rg->link == head || t < rg->from) { nrg = kmalloc(sizeof(*nrg), GFP_KERNEL); - if (nrg == 0) + if (!nrg) return -ENOMEM; nrg->from = f; nrg->to = f; diff --git a/mm/memory.c b/mm/memory.c index bd16dcaeefb8..eefd5b68bc42 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -259,9 +259,6 @@ void free_pgd_range(struct mmu_gather **tlb, continue; free_pud_range(*tlb, pgd, addr, next, floor, ceiling); } while (pgd++, addr = next, addr != end); - - if (!(*tlb)->fullmm) - flush_tlb_pgtables((*tlb)->mm, start, end); } void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma, @@ -2716,7 +2713,7 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in return 0; down_read(&mm->mmap_sem); - /* ignore errors, just check how much was sucessfully transfered */ + /* ignore errors, just check how much was successfully transferred */ while (len) { int bytes, ret, offset; void *maddr; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 091b9c6c2529..3a47871a29d9 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -121,7 +121,7 @@ int __add_pages(struct zone *zone, unsigned long phys_start_pfn, err = __add_section(zone, i << PFN_SECTION_SHIFT); /* - * EEXIST is finally dealed with by ioresource collision + * EEXIST is finally dealt with by ioresource collision * check. see add_memory() => register_memory_resource() * Warning will be printed if there is collision. */ @@ -187,7 +187,24 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) unsigned long onlined_pages = 0; struct zone *zone; int need_zonelists_rebuild = 0; + int nid; + int ret; + struct memory_notify arg; + + arg.start_pfn = pfn; + arg.nr_pages = nr_pages; + arg.status_change_nid = -1; + + nid = page_to_nid(pfn_to_page(pfn)); + if (node_present_pages(nid) == 0) + arg.status_change_nid = nid; + ret = memory_notify(MEM_GOING_ONLINE, &arg); + ret = notifier_to_errno(ret); + if (ret) { + memory_notify(MEM_CANCEL_ONLINE, &arg); + return ret; + } /* * This doesn't need a lock to do pfn_to_page(). * The section can't be removed here because of the @@ -222,6 +239,10 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) build_all_zonelists(); vm_total_pages = nr_free_pagecache_pages(); writeback_set_ratelimit(); + + if (onlined_pages) + memory_notify(MEM_ONLINE, &arg); + return 0; } #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ @@ -467,8 +488,9 @@ int offline_pages(unsigned long start_pfn, { unsigned long pfn, nr_pages, expire; long offlined_pages; - int ret, drain, retry_max; + int ret, drain, retry_max, node; struct zone *zone; + struct memory_notify arg; BUG_ON(start_pfn >= end_pfn); /* at least, alignment against pageblock is necessary */ @@ -480,11 +502,27 @@ int offline_pages(unsigned long start_pfn, we assume this for now. .*/ if (!test_pages_in_a_zone(start_pfn, end_pfn)) return -EINVAL; + + zone = page_zone(pfn_to_page(start_pfn)); + node = zone_to_nid(zone); + nr_pages = end_pfn - start_pfn; + /* set above range as isolated */ ret = start_isolate_page_range(start_pfn, end_pfn); if (ret) return ret; - nr_pages = end_pfn - start_pfn; + + arg.start_pfn = start_pfn; + arg.nr_pages = nr_pages; + arg.status_change_nid = -1; + if (nr_pages >= node_present_pages(node)) + arg.status_change_nid = node; + + ret = memory_notify(MEM_GOING_OFFLINE, &arg); + ret = notifier_to_errno(ret); + if (ret) + goto failed_removal; + pfn = start_pfn; expire = jiffies + timeout; drain = 0; @@ -539,20 +577,24 @@ repeat: /* reset pagetype flags */ start_isolate_page_range(start_pfn, end_pfn); /* removal success */ - zone = page_zone(pfn_to_page(start_pfn)); zone->present_pages -= offlined_pages; zone->zone_pgdat->node_present_pages -= offlined_pages; totalram_pages -= offlined_pages; num_physpages -= offlined_pages; + vm_total_pages = nr_free_pagecache_pages(); writeback_set_ratelimit(); + + memory_notify(MEM_OFFLINE, &arg); return 0; failed_removal: printk(KERN_INFO "memory offlining %lx to %lx failed\n", start_pfn, end_pfn); + memory_notify(MEM_CANCEL_OFFLINE, &arg); /* pushback to free area */ undo_isolate_page_range(start_pfn, end_pfn); + return ret; } #else diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 568152ae6caf..c1592a94582f 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -78,6 +78,7 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/module.h> +#include <linux/nsproxy.h> #include <linux/interrupt.h> #include <linux/init.h> #include <linux/compat.h> @@ -940,7 +941,7 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, /* Find the mm_struct */ read_lock(&tasklist_lock); - task = pid ? find_task_by_pid(pid) : current; + task = pid ? find_task_by_vpid(pid) : current; if (!task) { read_unlock(&tasklist_lock); return -ESRCH; @@ -1388,7 +1389,6 @@ EXPORT_SYMBOL(alloc_pages_current); * keeps mempolicies cpuset relative after its cpuset moves. See * further kernel/cpuset.c update_nodemask(). */ -void *cpuset_being_rebound; /* Slow path of a mempolicy copy */ struct mempolicy *__mpol_copy(struct mempolicy *old) @@ -2019,4 +2019,3 @@ out: m->version = (vma != priv->tail_vma) ? vma->vm_start : 0; return 0; } - diff --git a/mm/mempool.c b/mm/mempool.c index 02d5ec3feabc..a46eb1b4bb66 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -299,7 +299,7 @@ EXPORT_SYMBOL(mempool_free_slab); /* * A commonly used alloc and free fn that kmalloc/kfrees the amount of memory - * specfied by pool_data + * specified by pool_data */ void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data) { diff --git a/mm/migrate.c b/mm/migrate.c index 06d0877a66ef..6a207e8d17ea 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -19,6 +19,7 @@ #include <linux/pagemap.h> #include <linux/buffer_head.h> #include <linux/mm_inline.h> +#include <linux/nsproxy.h> #include <linux/pagevec.h> #include <linux/rmap.h> #include <linux/topology.h> @@ -705,7 +706,7 @@ move_newpage: * The function returns after 10 attempts or if no pages * are movable anymore because to has become empty * or no retryable pages exist anymore. All pages will be - * retruned to the LRU or freed. + * returned to the LRU or freed. * * Return: Number of pages not migrated or error code. */ @@ -924,7 +925,7 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, /* Find the mm_struct */ read_lock(&tasklist_lock); - task = pid ? find_task_by_pid(pid) : current; + task = pid ? find_task_by_vpid(pid) : current; if (!task) { read_unlock(&tasklist_lock); return -ESRCH; diff --git a/mm/mmap.c b/mm/mmap.c index 4275e81e25ba..facc1a75bd4f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1048,8 +1048,7 @@ int vma_wants_writenotify(struct vm_area_struct *vma) /* The open routine did something to the protections already? */ if (pgprot_val(vma->vm_page_prot) != - pgprot_val(protection_map[vm_flags & - (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)])) + pgprot_val(vm_get_page_prot(vm_flags))) return 0; /* Specialty mapping? */ @@ -1130,8 +1129,7 @@ munmap_back: vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_flags = vm_flags; - vma->vm_page_prot = protection_map[vm_flags & - (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; + vma->vm_page_prot = vm_get_page_prot(vm_flags); vma->vm_pgoff = pgoff; if (file) { @@ -1173,8 +1171,7 @@ munmap_back: vm_flags = vma->vm_flags; if (vma_wants_writenotify(vma)) - vma->vm_page_prot = - protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC)]; + vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED); if (!file || !vma_merge(mm, prev, addr, vma->vm_end, vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) { @@ -2002,8 +1999,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len) vma->vm_end = addr + len; vma->vm_pgoff = pgoff; vma->vm_flags = flags; - vma->vm_page_prot = protection_map[flags & - (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; + vma->vm_page_prot = vm_get_page_prot(flags); vma_link(mm, vma, prev, rb_link, rb_parent); out: mm->total_vm += len >> PAGE_SHIFT; @@ -2209,7 +2205,7 @@ int install_special_mapping(struct mm_struct *mm, vma->vm_end = addr + len; vma->vm_flags = vm_flags | mm->def_flags; - vma->vm_page_prot = protection_map[vma->vm_flags & 7]; + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); vma->vm_ops = &special_mapping_vmops; vma->vm_private_data = pages; diff --git a/mm/mprotect.c b/mm/mprotect.c index 1d4d69790e59..4de546899dc1 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -192,11 +192,9 @@ success: * held in write mode. */ vma->vm_flags = newflags; - vma->vm_page_prot = protection_map[newflags & - (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; + vma->vm_page_prot = vm_get_page_prot(newflags); if (vma_wants_writenotify(vma)) { - vma->vm_page_prot = protection_map[newflags & - (VM_READ|VM_WRITE|VM_EXEC)]; + vma->vm_page_prot = vm_get_page_prot(newflags & ~VM_SHARED); dirty_accountable = 1; } diff --git a/mm/mremap.c b/mm/mremap.c index 8ea5c2412c6e..08e3c7f2bd15 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -291,7 +291,7 @@ unsigned long do_mremap(unsigned long addr, if ((addr <= new_addr) && (addr+old_len) > new_addr) goto out; - ret = security_file_mmap(0, 0, 0, 0, new_addr, 1); + ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1); if (ret) goto out; @@ -399,7 +399,7 @@ unsigned long do_mremap(unsigned long addr, goto out; } - ret = security_file_mmap(0, 0, 0, 0, new_addr, 1); + ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1); if (ret) goto out; } diff --git a/mm/nommu.c b/mm/nommu.c index 42fb84e9e815..8f09333f78e1 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -175,7 +175,8 @@ EXPORT_SYMBOL(vfree); void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) { /* - * kmalloc doesn't like __GFP_HIGHMEM for some reason + * You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc() + * returns only a logical address. */ return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM); } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index a64decb5b13f..91a081a82f55 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -212,7 +212,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) if (!p->mm) continue; /* skip the init task */ - if (is_init(p)) + if (is_global_init(p)) continue; /* @@ -265,7 +265,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) */ static void __oom_kill_task(struct task_struct *p, int verbose) { - if (is_init(p)) { + if (is_global_init(p)) { WARN_ON(1); printk(KERN_WARNING "tried to kill init!\n"); return; @@ -278,7 +278,8 @@ static void __oom_kill_task(struct task_struct *p, int verbose) } if (verbose) - printk(KERN_ERR "Killed process %d (%s)\n", p->pid, p->comm); + printk(KERN_ERR "Killed process %d (%s)\n", + task_pid_nr(p), p->comm); /* * We give our sacrificial lamb high priority and access to @@ -326,7 +327,7 @@ static int oom_kill_task(struct task_struct *p) * to memory reserves though, otherwise we might deplete all memory. */ do_each_thread(g, q) { - if (q->mm == mm && q->tgid != p->tgid) + if (q->mm == mm && !same_thread_group(q, p)) force_sig(SIGKILL, q); } while_each_thread(g, q); @@ -337,7 +338,6 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, unsigned long points, const char *message) { struct task_struct *c; - struct list_head *tsk; if (printk_ratelimit()) { printk(KERN_WARNING "%s invoked oom-killer: " @@ -357,11 +357,10 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, } printk(KERN_ERR "%s: kill process %d (%s) score %li or a child\n", - message, p->pid, p->comm, points); + message, task_pid_nr(p), p->comm, points); /* Try to kill a child first */ - list_for_each(tsk, &p->children) { - c = list_entry(tsk, struct task_struct, sibling); + list_for_each_entry(c, &p->children, sibling) { if (c->mm == p->mm) continue; if (!oom_kill_task(c)) @@ -497,7 +496,7 @@ retry: panic("Out of memory and no killable processes...\n"); } - if (oom_kill_process(p, points, gfp_mask, order, + if (oom_kill_process(p, gfp_mask, order, points, "Out of memory")) goto retry; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 7845462064f4..838a5e31394c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -989,7 +989,7 @@ int __set_page_dirty_no_writeback(struct page *page) * mapping is pinned by the vma's ->vm_file reference. * * We take care to handle the case where the page was truncated from the - * mapping by re-checking page_mapping() insode tree_lock. + * mapping by re-checking page_mapping() inside tree_lock. */ int __set_page_dirty_nobuffers(struct page *page) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 43f757fcf30f..da69d833e067 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -123,7 +123,7 @@ static unsigned long __meminitdata dma_reserve; #ifdef CONFIG_ARCH_POPULATES_NODE_MAP /* - * MAX_ACTIVE_REGIONS determines the maxmimum number of distinct + * MAX_ACTIVE_REGIONS determines the maximum number of distinct * ranges of memory (RAM) that may be registered with add_active_range(). * Ranges passed to add_active_range() will be merged if possible * so the number of times add_active_range() can be called is @@ -1260,7 +1260,7 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, * skip over zones that are not allowed by the cpuset, or that have * been recently (in last second) found to be nearly full. See further * comments in mmzone.h. Reduces cache footprint of zonelist scans - * that have to skip over alot of full or unallowed zones. + * that have to skip over a lot of full or unallowed zones. * * If the zonelist cache is present in the passed in zonelist, then * returns a pointer to the allowed node mask (either the current @@ -2358,7 +2358,7 @@ void build_all_zonelists(void) __build_all_zonelists(NULL); cpuset_init_current_mems_allowed(); } else { - /* we have to stop all cpus to guaranntee there is no user + /* we have to stop all cpus to guarantee there is no user of zonelist */ stop_machine_run(__build_all_zonelists, NULL, NR_CPUS); /* cpuset refresh routine should be here */ @@ -2864,7 +2864,7 @@ static int __meminit first_active_region_index_in_nid(int nid) /* * Basic iterator support. Return the next active range of PFNs for a node - * Note: nid == MAX_NUMNODES returns next region regardles of node + * Note: nid == MAX_NUMNODES returns next region regardless of node */ static int __meminit next_active_region_index_in_nid(int index, int nid) { diff --git a/mm/prio_tree.c b/mm/prio_tree.c index b4e76c25f953..603ae98d9694 100644 --- a/mm/prio_tree.c +++ b/mm/prio_tree.c @@ -34,7 +34,7 @@ * Radix priority search tree for address_space->i_mmap * * For each vma that map a unique set of file pages i.e., unique [radix_index, - * heap_index] value, we have a corresponing priority search tree node. If + * heap_index] value, we have a corresponding priority search tree node. If * multiple vmas have identical [radix_index, heap_index] value, then one of * them is used as a tree node and others are stored in a vm_set list. The tree * node points to the first vma (head) of the list using vm_set.head. diff --git a/mm/shmem.c b/mm/shmem.c index 289dbb0a6fd6..404e53bb2127 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2020,33 +2020,25 @@ static int shmem_match(struct inode *ino, void *vfh) return ino->i_ino == inum && fh[0] == ino->i_generation; } -static struct dentry *shmem_get_dentry(struct super_block *sb, void *vfh) +static struct dentry *shmem_fh_to_dentry(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type) { - struct dentry *de = NULL; struct inode *inode; - __u32 *fh = vfh; - __u64 inum = fh[2]; - inum = (inum << 32) | fh[1]; + struct dentry *dentry = NULL; + u64 inum = fid->raw[2]; + inum = (inum << 32) | fid->raw[1]; + + if (fh_len < 3) + return NULL; - inode = ilookup5(sb, (unsigned long)(inum+fh[0]), shmem_match, vfh); + inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]), + shmem_match, fid->raw); if (inode) { - de = d_find_alias(inode); + dentry = d_find_alias(inode); iput(inode); } - return de? de: ERR_PTR(-ESTALE); -} - -static struct dentry *shmem_decode_fh(struct super_block *sb, __u32 *fh, - int len, int type, - int (*acceptable)(void *context, struct dentry *de), - void *context) -{ - if (len < 3) - return ERR_PTR(-ESTALE); - - return sb->s_export_op->find_exported_dentry(sb, fh, NULL, acceptable, - context); + return dentry; } static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, @@ -2079,11 +2071,10 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, return 1; } -static struct export_operations shmem_export_ops = { +static const struct export_operations shmem_export_ops = { .get_parent = shmem_get_parent, - .get_dentry = shmem_get_dentry, .encode_fh = shmem_encode_fh, - .decode_fh = shmem_decode_fh, + .fh_to_dentry = shmem_fh_to_dentry, }; static int shmem_parse_options(char *options, int *mode, uid_t *uid, diff --git a/mm/slab.c b/mm/slab.c index 3ce9bc024d67..cfa6be4e378e 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -26,7 +26,7 @@ * initialized objects. * * This means, that your constructor is used only for newly allocated - * slabs and you must pass objects with the same intializations to + * slabs and you must pass objects with the same initializations to * kmem_cache_free. * * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM, @@ -1156,105 +1156,187 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) } #endif -static int __cpuinit cpuup_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) +static void __cpuinit cpuup_canceled(long cpu) +{ + struct kmem_cache *cachep; + struct kmem_list3 *l3 = NULL; + int node = cpu_to_node(cpu); + + list_for_each_entry(cachep, &cache_chain, next) { + struct array_cache *nc; + struct array_cache *shared; + struct array_cache **alien; + cpumask_t mask; + + mask = node_to_cpumask(node); + /* cpu is dead; no one can alloc from it. */ + nc = cachep->array[cpu]; + cachep->array[cpu] = NULL; + l3 = cachep->nodelists[node]; + + if (!l3) + goto free_array_cache; + + spin_lock_irq(&l3->list_lock); + + /* Free limit for this kmem_list3 */ + l3->free_limit -= cachep->batchcount; + if (nc) + free_block(cachep, nc->entry, nc->avail, node); + + if (!cpus_empty(mask)) { + spin_unlock_irq(&l3->list_lock); + goto free_array_cache; + } + + shared = l3->shared; + if (shared) { + free_block(cachep, shared->entry, + shared->avail, node); + l3->shared = NULL; + } + + alien = l3->alien; + l3->alien = NULL; + + spin_unlock_irq(&l3->list_lock); + + kfree(shared); + if (alien) { + drain_alien_cache(cachep, alien); + free_alien_cache(alien); + } +free_array_cache: + kfree(nc); + } + /* + * In the previous loop, all the objects were freed to + * the respective cache's slabs, now we can go ahead and + * shrink each nodelist to its limit. + */ + list_for_each_entry(cachep, &cache_chain, next) { + l3 = cachep->nodelists[node]; + if (!l3) + continue; + drain_freelist(cachep, l3, l3->free_objects); + } +} + +static int __cpuinit cpuup_prepare(long cpu) { - long cpu = (long)hcpu; struct kmem_cache *cachep; struct kmem_list3 *l3 = NULL; int node = cpu_to_node(cpu); const int memsize = sizeof(struct kmem_list3); - switch (action) { - case CPU_LOCK_ACQUIRE: - mutex_lock(&cache_chain_mutex); - break; - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: + /* + * We need to do this right in the beginning since + * alloc_arraycache's are going to use this list. + * kmalloc_node allows us to add the slab to the right + * kmem_list3 and not this cpu's kmem_list3 + */ + + list_for_each_entry(cachep, &cache_chain, next) { /* - * We need to do this right in the beginning since - * alloc_arraycache's are going to use this list. - * kmalloc_node allows us to add the slab to the right - * kmem_list3 and not this cpu's kmem_list3 + * Set up the size64 kmemlist for cpu before we can + * begin anything. Make sure some other cpu on this + * node has not already allocated this */ + if (!cachep->nodelists[node]) { + l3 = kmalloc_node(memsize, GFP_KERNEL, node); + if (!l3) + goto bad; + kmem_list3_init(l3); + l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + + ((unsigned long)cachep) % REAPTIMEOUT_LIST3; - list_for_each_entry(cachep, &cache_chain, next) { /* - * Set up the size64 kmemlist for cpu before we can - * begin anything. Make sure some other cpu on this - * node has not already allocated this + * The l3s don't come and go as CPUs come and + * go. cache_chain_mutex is sufficient + * protection here. */ - if (!cachep->nodelists[node]) { - l3 = kmalloc_node(memsize, GFP_KERNEL, node); - if (!l3) - goto bad; - kmem_list3_init(l3); - l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + - ((unsigned long)cachep) % REAPTIMEOUT_LIST3; - - /* - * The l3s don't come and go as CPUs come and - * go. cache_chain_mutex is sufficient - * protection here. - */ - cachep->nodelists[node] = l3; - } - - spin_lock_irq(&cachep->nodelists[node]->list_lock); - cachep->nodelists[node]->free_limit = - (1 + nr_cpus_node(node)) * - cachep->batchcount + cachep->num; - spin_unlock_irq(&cachep->nodelists[node]->list_lock); + cachep->nodelists[node] = l3; } - /* - * Now we can go ahead with allocating the shared arrays and - * array caches - */ - list_for_each_entry(cachep, &cache_chain, next) { - struct array_cache *nc; - struct array_cache *shared = NULL; - struct array_cache **alien = NULL; - - nc = alloc_arraycache(node, cachep->limit, - cachep->batchcount); - if (!nc) + spin_lock_irq(&cachep->nodelists[node]->list_lock); + cachep->nodelists[node]->free_limit = + (1 + nr_cpus_node(node)) * + cachep->batchcount + cachep->num; + spin_unlock_irq(&cachep->nodelists[node]->list_lock); + } + + /* + * Now we can go ahead with allocating the shared arrays and + * array caches + */ + list_for_each_entry(cachep, &cache_chain, next) { + struct array_cache *nc; + struct array_cache *shared = NULL; + struct array_cache **alien = NULL; + + nc = alloc_arraycache(node, cachep->limit, + cachep->batchcount); + if (!nc) + goto bad; + if (cachep->shared) { + shared = alloc_arraycache(node, + cachep->shared * cachep->batchcount, + 0xbaadf00d); + if (!shared) { + kfree(nc); goto bad; - if (cachep->shared) { - shared = alloc_arraycache(node, - cachep->shared * cachep->batchcount, - 0xbaadf00d); - if (!shared) - goto bad; } - if (use_alien_caches) { - alien = alloc_alien_cache(node, cachep->limit); - if (!alien) - goto bad; - } - cachep->array[cpu] = nc; - l3 = cachep->nodelists[node]; - BUG_ON(!l3); - - spin_lock_irq(&l3->list_lock); - if (!l3->shared) { - /* - * We are serialised from CPU_DEAD or - * CPU_UP_CANCELLED by the cpucontrol lock - */ - l3->shared = shared; - shared = NULL; + } + if (use_alien_caches) { + alien = alloc_alien_cache(node, cachep->limit); + if (!alien) { + kfree(shared); + kfree(nc); + goto bad; } + } + cachep->array[cpu] = nc; + l3 = cachep->nodelists[node]; + BUG_ON(!l3); + + spin_lock_irq(&l3->list_lock); + if (!l3->shared) { + /* + * We are serialised from CPU_DEAD or + * CPU_UP_CANCELLED by the cpucontrol lock + */ + l3->shared = shared; + shared = NULL; + } #ifdef CONFIG_NUMA - if (!l3->alien) { - l3->alien = alien; - alien = NULL; - } -#endif - spin_unlock_irq(&l3->list_lock); - kfree(shared); - free_alien_cache(alien); + if (!l3->alien) { + l3->alien = alien; + alien = NULL; } +#endif + spin_unlock_irq(&l3->list_lock); + kfree(shared); + free_alien_cache(alien); + } + return 0; +bad: + cpuup_canceled(cpu); + return -ENOMEM; +} + +static int __cpuinit cpuup_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + long cpu = (long)hcpu; + int err = 0; + + switch (action) { + case CPU_LOCK_ACQUIRE: + mutex_lock(&cache_chain_mutex); + break; + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + err = cpuup_prepare(cpu); break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: @@ -1287,76 +1369,17 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, * structure is usually allocated from kmem_cache_create() and * gets destroyed at kmem_cache_destroy(). */ - /* fall thru */ + /* fall through */ #endif case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: - list_for_each_entry(cachep, &cache_chain, next) { - struct array_cache *nc; - struct array_cache *shared; - struct array_cache **alien; - cpumask_t mask; - - mask = node_to_cpumask(node); - /* cpu is dead; no one can alloc from it. */ - nc = cachep->array[cpu]; - cachep->array[cpu] = NULL; - l3 = cachep->nodelists[node]; - - if (!l3) - goto free_array_cache; - - spin_lock_irq(&l3->list_lock); - - /* Free limit for this kmem_list3 */ - l3->free_limit -= cachep->batchcount; - if (nc) - free_block(cachep, nc->entry, nc->avail, node); - - if (!cpus_empty(mask)) { - spin_unlock_irq(&l3->list_lock); - goto free_array_cache; - } - - shared = l3->shared; - if (shared) { - free_block(cachep, shared->entry, - shared->avail, node); - l3->shared = NULL; - } - - alien = l3->alien; - l3->alien = NULL; - - spin_unlock_irq(&l3->list_lock); - - kfree(shared); - if (alien) { - drain_alien_cache(cachep, alien); - free_alien_cache(alien); - } -free_array_cache: - kfree(nc); - } - /* - * In the previous loop, all the objects were freed to - * the respective cache's slabs, now we can go ahead and - * shrink each nodelist to its limit. - */ - list_for_each_entry(cachep, &cache_chain, next) { - l3 = cachep->nodelists[node]; - if (!l3) - continue; - drain_freelist(cachep, l3, l3->free_objects); - } + cpuup_canceled(cpu); break; case CPU_LOCK_RELEASE: mutex_unlock(&cache_chain_mutex); break; } - return NOTIFY_OK; -bad: - return NOTIFY_BAD; + return err ? NOTIFY_BAD : NOTIFY_OK; } static struct notifier_block __cpuinitdata cpucache_notifier = { @@ -3783,7 +3806,7 @@ const char *kmem_cache_name(struct kmem_cache *cachep) EXPORT_SYMBOL_GPL(kmem_cache_name); /* - * This initializes kmem_list3 or resizes varioius caches for all nodes. + * This initializes kmem_list3 or resizes various caches for all nodes. */ static int alloc_kmemlist(struct kmem_cache *cachep) { diff --git a/mm/slub.c b/mm/slub.c index e29a42988c78..aac1dd3c657d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -20,6 +20,7 @@ #include <linux/mempolicy.h> #include <linux/ctype.h> #include <linux/kallsyms.h> +#include <linux/memory.h> /* * Lock order: @@ -2694,6 +2695,121 @@ int kmem_cache_shrink(struct kmem_cache *s) } EXPORT_SYMBOL(kmem_cache_shrink); +#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG) +static int slab_mem_going_offline_callback(void *arg) +{ + struct kmem_cache *s; + + down_read(&slub_lock); + list_for_each_entry(s, &slab_caches, list) + kmem_cache_shrink(s); + up_read(&slub_lock); + + return 0; +} + +static void slab_mem_offline_callback(void *arg) +{ + struct kmem_cache_node *n; + struct kmem_cache *s; + struct memory_notify *marg = arg; + int offline_node; + + offline_node = marg->status_change_nid; + + /* + * If the node still has available memory. we need kmem_cache_node + * for it yet. + */ + if (offline_node < 0) + return; + + down_read(&slub_lock); + list_for_each_entry(s, &slab_caches, list) { + n = get_node(s, offline_node); + if (n) { + /* + * if n->nr_slabs > 0, slabs still exist on the node + * that is going down. We were unable to free them, + * and offline_pages() function shoudn't call this + * callback. So, we must fail. + */ + BUG_ON(atomic_read(&n->nr_slabs)); + + s->node[offline_node] = NULL; + kmem_cache_free(kmalloc_caches, n); + } + } + up_read(&slub_lock); +} + +static int slab_mem_going_online_callback(void *arg) +{ + struct kmem_cache_node *n; + struct kmem_cache *s; + struct memory_notify *marg = arg; + int nid = marg->status_change_nid; + int ret = 0; + + /* + * If the node's memory is already available, then kmem_cache_node is + * already created. Nothing to do. + */ + if (nid < 0) + return 0; + + /* + * We are bringing a node online. No memory is availabe yet. We must + * allocate a kmem_cache_node structure in order to bring the node + * online. + */ + down_read(&slub_lock); + list_for_each_entry(s, &slab_caches, list) { + /* + * XXX: kmem_cache_alloc_node will fallback to other nodes + * since memory is not yet available from the node that + * is brought up. + */ + n = kmem_cache_alloc(kmalloc_caches, GFP_KERNEL); + if (!n) { + ret = -ENOMEM; + goto out; + } + init_kmem_cache_node(n); + s->node[nid] = n; + } +out: + up_read(&slub_lock); + return ret; +} + +static int slab_memory_callback(struct notifier_block *self, + unsigned long action, void *arg) +{ + int ret = 0; + + switch (action) { + case MEM_GOING_ONLINE: + ret = slab_mem_going_online_callback(arg); + break; + case MEM_GOING_OFFLINE: + ret = slab_mem_going_offline_callback(arg); + break; + case MEM_OFFLINE: + case MEM_CANCEL_ONLINE: + slab_mem_offline_callback(arg); + break; + case MEM_ONLINE: + case MEM_CANCEL_OFFLINE: + break; + } + + ret = notifier_from_errno(ret); + return ret; +} + +#endif /* CONFIG_MEMORY_HOTPLUG */ + /******************************************************************** * Basic setup of slabs *******************************************************************/ @@ -2715,6 +2831,8 @@ void __init kmem_cache_init(void) sizeof(struct kmem_cache_node), GFP_KERNEL); kmalloc_caches[0].refcount = -1; caches++; + + hotplug_memory_notifier(slab_memory_callback, 1); #endif /* Able to allocate the per node structures */ diff --git a/mm/swap.c b/mm/swap.c index a65eff8a517a..9ac88323d237 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -5,7 +5,7 @@ */ /* - * This file contains the default values for the opereation of the + * This file contains the default values for the operation of the * Linux VM subsystem. Fine-tuning documentation can be found in * Documentation/sysctl/vm.txt. * Started 18.12.91 diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 2e01af365848..af77e171e339 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -247,7 +247,7 @@ struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, EXPORT_SYMBOL_GPL(__get_vm_area); /** - * get_vm_area - reserve a contingous kernel virtual area + * get_vm_area - reserve a contiguous kernel virtual area * @size: size of the area * @flags: %VM_IOREMAP for I/O mappings or VM_ALLOC * @@ -303,7 +303,7 @@ found: } /** - * remove_vm_area - find and remove a contingous kernel virtual area + * remove_vm_area - find and remove a continuous kernel virtual area * @addr: base address * * Search for the kernel VM area starting at @addr, and remove it. @@ -364,7 +364,7 @@ static void __vunmap(void *addr, int deallocate_pages) * vfree - release memory allocated by vmalloc() * @addr: memory base address * - * Free the virtually contiguous memory area starting at @addr, as + * Free the virtually continuous memory area starting at @addr, as * obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is * NULL, no operation is performed. * diff --git a/mm/vmscan.c b/mm/vmscan.c index e1471385d001..e5a9597e3bbc 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -141,7 +141,7 @@ EXPORT_SYMBOL(unregister_shrinker); * percentages of the lru and ageable caches. This should balance the seeks * generated by these structures. * - * If the vm encounted mapped pages on the LRU it increase the pressure on + * If the vm encountered mapped pages on the LRU it increase the pressure on * slab to avoid swapping. * * We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits. @@ -1282,7 +1282,7 @@ out: */ if (priority < 0) priority = 0; - for (i = 0; zones[i] != 0; i++) { + for (i = 0; zones[i] != NULL; i++) { struct zone *zone = zones[i]; if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |