summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig2
-rw-r--r--mm/filemap.c3
-rw-r--r--mm/hugetlb.c4
-rw-r--r--mm/memory.c5
-rw-r--r--mm/memory_hotplug.c50
-rw-r--r--mm/mempolicy.c5
-rw-r--r--mm/mempool.c2
-rw-r--r--mm/migrate.c5
-rw-r--r--mm/mmap.c14
-rw-r--r--mm/mprotect.c6
-rw-r--r--mm/mremap.c4
-rw-r--r--mm/nommu.c3
-rw-r--r--mm/oom_kill.c17
-rw-r--r--mm/page-writeback.c2
-rw-r--r--mm/page_alloc.c8
-rw-r--r--mm/prio_tree.c2
-rw-r--r--mm/shmem.c37
-rw-r--r--mm/slab.c313
-rw-r--r--mm/slub.c118
-rw-r--r--mm/swap.c2
-rw-r--r--mm/vmalloc.c6
-rw-r--r--mm/vmscan.c4
22 files changed, 389 insertions, 223 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index b1f03b0eb7f1..c070ec0c15bf 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -37,7 +37,7 @@ config DISCONTIGMEM_MANUAL
in their physical address spaces, and this option provides
more efficient handling of these holes. However, the vast
majority of hardware has quite flat address spaces, and
- can have degraded performance from extra overhead that
+ can have degraded performance from the extra overhead that
this option imposes.
Many NUMA configurations will have this as the only option.
diff --git a/mm/filemap.c b/mm/filemap.c
index 79f24a969cb4..5209e47b7fe3 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -25,6 +25,7 @@
#include <linux/uio.h>
#include <linux/hash.h>
#include <linux/writeback.h>
+#include <linux/backing-dev.h>
#include <linux/pagevec.h>
#include <linux/blkdev.h>
#include <linux/security.h>
@@ -840,7 +841,7 @@ static void shrink_readahead_size_eio(struct file *filp,
/**
* do_generic_mapping_read - generic file read routine
* @mapping: address_space to be read
- * @_ra: file's readahead state
+ * @ra: file's readahead state
* @filp: the file to read
* @ppos: current file position
* @desc: read_descriptor
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ae2959bb59cb..8b809ecefa39 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1017,10 +1017,10 @@ static long region_chg(struct list_head *head, long f, long t)
/* If we are below the current region then a new region is required.
* Subtle, allocate a new region at the position but make it zero
- * size such that we can guarentee to record the reservation. */
+ * size such that we can guarantee to record the reservation. */
if (&rg->link == head || t < rg->from) {
nrg = kmalloc(sizeof(*nrg), GFP_KERNEL);
- if (nrg == 0)
+ if (!nrg)
return -ENOMEM;
nrg->from = f;
nrg->to = f;
diff --git a/mm/memory.c b/mm/memory.c
index bd16dcaeefb8..eefd5b68bc42 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -259,9 +259,6 @@ void free_pgd_range(struct mmu_gather **tlb,
continue;
free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
} while (pgd++, addr = next, addr != end);
-
- if (!(*tlb)->fullmm)
- flush_tlb_pgtables((*tlb)->mm, start, end);
}
void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
@@ -2716,7 +2713,7 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
return 0;
down_read(&mm->mmap_sem);
- /* ignore errors, just check how much was sucessfully transfered */
+ /* ignore errors, just check how much was successfully transferred */
while (len) {
int bytes, ret, offset;
void *maddr;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 091b9c6c2529..3a47871a29d9 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -121,7 +121,7 @@ int __add_pages(struct zone *zone, unsigned long phys_start_pfn,
err = __add_section(zone, i << PFN_SECTION_SHIFT);
/*
- * EEXIST is finally dealed with by ioresource collision
+ * EEXIST is finally dealt with by ioresource collision
* check. see add_memory() => register_memory_resource()
* Warning will be printed if there is collision.
*/
@@ -187,7 +187,24 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
unsigned long onlined_pages = 0;
struct zone *zone;
int need_zonelists_rebuild = 0;
+ int nid;
+ int ret;
+ struct memory_notify arg;
+
+ arg.start_pfn = pfn;
+ arg.nr_pages = nr_pages;
+ arg.status_change_nid = -1;
+
+ nid = page_to_nid(pfn_to_page(pfn));
+ if (node_present_pages(nid) == 0)
+ arg.status_change_nid = nid;
+ ret = memory_notify(MEM_GOING_ONLINE, &arg);
+ ret = notifier_to_errno(ret);
+ if (ret) {
+ memory_notify(MEM_CANCEL_ONLINE, &arg);
+ return ret;
+ }
/*
* This doesn't need a lock to do pfn_to_page().
* The section can't be removed here because of the
@@ -222,6 +239,10 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
build_all_zonelists();
vm_total_pages = nr_free_pagecache_pages();
writeback_set_ratelimit();
+
+ if (onlined_pages)
+ memory_notify(MEM_ONLINE, &arg);
+
return 0;
}
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
@@ -467,8 +488,9 @@ int offline_pages(unsigned long start_pfn,
{
unsigned long pfn, nr_pages, expire;
long offlined_pages;
- int ret, drain, retry_max;
+ int ret, drain, retry_max, node;
struct zone *zone;
+ struct memory_notify arg;
BUG_ON(start_pfn >= end_pfn);
/* at least, alignment against pageblock is necessary */
@@ -480,11 +502,27 @@ int offline_pages(unsigned long start_pfn,
we assume this for now. .*/
if (!test_pages_in_a_zone(start_pfn, end_pfn))
return -EINVAL;
+
+ zone = page_zone(pfn_to_page(start_pfn));
+ node = zone_to_nid(zone);
+ nr_pages = end_pfn - start_pfn;
+
/* set above range as isolated */
ret = start_isolate_page_range(start_pfn, end_pfn);
if (ret)
return ret;
- nr_pages = end_pfn - start_pfn;
+
+ arg.start_pfn = start_pfn;
+ arg.nr_pages = nr_pages;
+ arg.status_change_nid = -1;
+ if (nr_pages >= node_present_pages(node))
+ arg.status_change_nid = node;
+
+ ret = memory_notify(MEM_GOING_OFFLINE, &arg);
+ ret = notifier_to_errno(ret);
+ if (ret)
+ goto failed_removal;
+
pfn = start_pfn;
expire = jiffies + timeout;
drain = 0;
@@ -539,20 +577,24 @@ repeat:
/* reset pagetype flags */
start_isolate_page_range(start_pfn, end_pfn);
/* removal success */
- zone = page_zone(pfn_to_page(start_pfn));
zone->present_pages -= offlined_pages;
zone->zone_pgdat->node_present_pages -= offlined_pages;
totalram_pages -= offlined_pages;
num_physpages -= offlined_pages;
+
vm_total_pages = nr_free_pagecache_pages();
writeback_set_ratelimit();
+
+ memory_notify(MEM_OFFLINE, &arg);
return 0;
failed_removal:
printk(KERN_INFO "memory offlining %lx to %lx failed\n",
start_pfn, end_pfn);
+ memory_notify(MEM_CANCEL_OFFLINE, &arg);
/* pushback to free area */
undo_isolate_page_range(start_pfn, end_pfn);
+
return ret;
}
#else
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 568152ae6caf..c1592a94582f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -78,6 +78,7 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/module.h>
+#include <linux/nsproxy.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/compat.h>
@@ -940,7 +941,7 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
/* Find the mm_struct */
read_lock(&tasklist_lock);
- task = pid ? find_task_by_pid(pid) : current;
+ task = pid ? find_task_by_vpid(pid) : current;
if (!task) {
read_unlock(&tasklist_lock);
return -ESRCH;
@@ -1388,7 +1389,6 @@ EXPORT_SYMBOL(alloc_pages_current);
* keeps mempolicies cpuset relative after its cpuset moves. See
* further kernel/cpuset.c update_nodemask().
*/
-void *cpuset_being_rebound;
/* Slow path of a mempolicy copy */
struct mempolicy *__mpol_copy(struct mempolicy *old)
@@ -2019,4 +2019,3 @@ out:
m->version = (vma != priv->tail_vma) ? vma->vm_start : 0;
return 0;
}
-
diff --git a/mm/mempool.c b/mm/mempool.c
index 02d5ec3feabc..a46eb1b4bb66 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -299,7 +299,7 @@ EXPORT_SYMBOL(mempool_free_slab);
/*
* A commonly used alloc and free fn that kmalloc/kfrees the amount of memory
- * specfied by pool_data
+ * specified by pool_data
*/
void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data)
{
diff --git a/mm/migrate.c b/mm/migrate.c
index 06d0877a66ef..6a207e8d17ea 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -19,6 +19,7 @@
#include <linux/pagemap.h>
#include <linux/buffer_head.h>
#include <linux/mm_inline.h>
+#include <linux/nsproxy.h>
#include <linux/pagevec.h>
#include <linux/rmap.h>
#include <linux/topology.h>
@@ -705,7 +706,7 @@ move_newpage:
* The function returns after 10 attempts or if no pages
* are movable anymore because to has become empty
* or no retryable pages exist anymore. All pages will be
- * retruned to the LRU or freed.
+ * returned to the LRU or freed.
*
* Return: Number of pages not migrated or error code.
*/
@@ -924,7 +925,7 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
/* Find the mm_struct */
read_lock(&tasklist_lock);
- task = pid ? find_task_by_pid(pid) : current;
+ task = pid ? find_task_by_vpid(pid) : current;
if (!task) {
read_unlock(&tasklist_lock);
return -ESRCH;
diff --git a/mm/mmap.c b/mm/mmap.c
index 4275e81e25ba..facc1a75bd4f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1048,8 +1048,7 @@ int vma_wants_writenotify(struct vm_area_struct *vma)
/* The open routine did something to the protections already? */
if (pgprot_val(vma->vm_page_prot) !=
- pgprot_val(protection_map[vm_flags &
- (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]))
+ pgprot_val(vm_get_page_prot(vm_flags)))
return 0;
/* Specialty mapping? */
@@ -1130,8 +1129,7 @@ munmap_back:
vma->vm_start = addr;
vma->vm_end = addr + len;
vma->vm_flags = vm_flags;
- vma->vm_page_prot = protection_map[vm_flags &
- (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+ vma->vm_page_prot = vm_get_page_prot(vm_flags);
vma->vm_pgoff = pgoff;
if (file) {
@@ -1173,8 +1171,7 @@ munmap_back:
vm_flags = vma->vm_flags;
if (vma_wants_writenotify(vma))
- vma->vm_page_prot =
- protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC)];
+ vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
if (!file || !vma_merge(mm, prev, addr, vma->vm_end,
vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) {
@@ -2002,8 +1999,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
vma->vm_end = addr + len;
vma->vm_pgoff = pgoff;
vma->vm_flags = flags;
- vma->vm_page_prot = protection_map[flags &
- (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+ vma->vm_page_prot = vm_get_page_prot(flags);
vma_link(mm, vma, prev, rb_link, rb_parent);
out:
mm->total_vm += len >> PAGE_SHIFT;
@@ -2209,7 +2205,7 @@ int install_special_mapping(struct mm_struct *mm,
vma->vm_end = addr + len;
vma->vm_flags = vm_flags | mm->def_flags;
- vma->vm_page_prot = protection_map[vma->vm_flags & 7];
+ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
vma->vm_ops = &special_mapping_vmops;
vma->vm_private_data = pages;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 1d4d69790e59..4de546899dc1 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -192,11 +192,9 @@ success:
* held in write mode.
*/
vma->vm_flags = newflags;
- vma->vm_page_prot = protection_map[newflags &
- (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+ vma->vm_page_prot = vm_get_page_prot(newflags);
if (vma_wants_writenotify(vma)) {
- vma->vm_page_prot = protection_map[newflags &
- (VM_READ|VM_WRITE|VM_EXEC)];
+ vma->vm_page_prot = vm_get_page_prot(newflags & ~VM_SHARED);
dirty_accountable = 1;
}
diff --git a/mm/mremap.c b/mm/mremap.c
index 8ea5c2412c6e..08e3c7f2bd15 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -291,7 +291,7 @@ unsigned long do_mremap(unsigned long addr,
if ((addr <= new_addr) && (addr+old_len) > new_addr)
goto out;
- ret = security_file_mmap(0, 0, 0, 0, new_addr, 1);
+ ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1);
if (ret)
goto out;
@@ -399,7 +399,7 @@ unsigned long do_mremap(unsigned long addr,
goto out;
}
- ret = security_file_mmap(0, 0, 0, 0, new_addr, 1);
+ ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1);
if (ret)
goto out;
}
diff --git a/mm/nommu.c b/mm/nommu.c
index 42fb84e9e815..8f09333f78e1 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -175,7 +175,8 @@ EXPORT_SYMBOL(vfree);
void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
{
/*
- * kmalloc doesn't like __GFP_HIGHMEM for some reason
+ * You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc()
+ * returns only a logical address.
*/
return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM);
}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index a64decb5b13f..91a081a82f55 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -212,7 +212,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
if (!p->mm)
continue;
/* skip the init task */
- if (is_init(p))
+ if (is_global_init(p))
continue;
/*
@@ -265,7 +265,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
*/
static void __oom_kill_task(struct task_struct *p, int verbose)
{
- if (is_init(p)) {
+ if (is_global_init(p)) {
WARN_ON(1);
printk(KERN_WARNING "tried to kill init!\n");
return;
@@ -278,7 +278,8 @@ static void __oom_kill_task(struct task_struct *p, int verbose)
}
if (verbose)
- printk(KERN_ERR "Killed process %d (%s)\n", p->pid, p->comm);
+ printk(KERN_ERR "Killed process %d (%s)\n",
+ task_pid_nr(p), p->comm);
/*
* We give our sacrificial lamb high priority and access to
@@ -326,7 +327,7 @@ static int oom_kill_task(struct task_struct *p)
* to memory reserves though, otherwise we might deplete all memory.
*/
do_each_thread(g, q) {
- if (q->mm == mm && q->tgid != p->tgid)
+ if (q->mm == mm && !same_thread_group(q, p))
force_sig(SIGKILL, q);
} while_each_thread(g, q);
@@ -337,7 +338,6 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
unsigned long points, const char *message)
{
struct task_struct *c;
- struct list_head *tsk;
if (printk_ratelimit()) {
printk(KERN_WARNING "%s invoked oom-killer: "
@@ -357,11 +357,10 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
}
printk(KERN_ERR "%s: kill process %d (%s) score %li or a child\n",
- message, p->pid, p->comm, points);
+ message, task_pid_nr(p), p->comm, points);
/* Try to kill a child first */
- list_for_each(tsk, &p->children) {
- c = list_entry(tsk, struct task_struct, sibling);
+ list_for_each_entry(c, &p->children, sibling) {
if (c->mm == p->mm)
continue;
if (!oom_kill_task(c))
@@ -497,7 +496,7 @@ retry:
panic("Out of memory and no killable processes...\n");
}
- if (oom_kill_process(p, points, gfp_mask, order,
+ if (oom_kill_process(p, gfp_mask, order, points,
"Out of memory"))
goto retry;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 7845462064f4..838a5e31394c 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -989,7 +989,7 @@ int __set_page_dirty_no_writeback(struct page *page)
* mapping is pinned by the vma's ->vm_file reference.
*
* We take care to handle the case where the page was truncated from the
- * mapping by re-checking page_mapping() insode tree_lock.
+ * mapping by re-checking page_mapping() inside tree_lock.
*/
int __set_page_dirty_nobuffers(struct page *page)
{
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 43f757fcf30f..da69d833e067 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -123,7 +123,7 @@ static unsigned long __meminitdata dma_reserve;
#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
/*
- * MAX_ACTIVE_REGIONS determines the maxmimum number of distinct
+ * MAX_ACTIVE_REGIONS determines the maximum number of distinct
* ranges of memory (RAM) that may be registered with add_active_range().
* Ranges passed to add_active_range() will be merged if possible
* so the number of times add_active_range() can be called is
@@ -1260,7 +1260,7 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
* skip over zones that are not allowed by the cpuset, or that have
* been recently (in last second) found to be nearly full. See further
* comments in mmzone.h. Reduces cache footprint of zonelist scans
- * that have to skip over alot of full or unallowed zones.
+ * that have to skip over a lot of full or unallowed zones.
*
* If the zonelist cache is present in the passed in zonelist, then
* returns a pointer to the allowed node mask (either the current
@@ -2358,7 +2358,7 @@ void build_all_zonelists(void)
__build_all_zonelists(NULL);
cpuset_init_current_mems_allowed();
} else {
- /* we have to stop all cpus to guaranntee there is no user
+ /* we have to stop all cpus to guarantee there is no user
of zonelist */
stop_machine_run(__build_all_zonelists, NULL, NR_CPUS);
/* cpuset refresh routine should be here */
@@ -2864,7 +2864,7 @@ static int __meminit first_active_region_index_in_nid(int nid)
/*
* Basic iterator support. Return the next active range of PFNs for a node
- * Note: nid == MAX_NUMNODES returns next region regardles of node
+ * Note: nid == MAX_NUMNODES returns next region regardless of node
*/
static int __meminit next_active_region_index_in_nid(int index, int nid)
{
diff --git a/mm/prio_tree.c b/mm/prio_tree.c
index b4e76c25f953..603ae98d9694 100644
--- a/mm/prio_tree.c
+++ b/mm/prio_tree.c
@@ -34,7 +34,7 @@
* Radix priority search tree for address_space->i_mmap
*
* For each vma that map a unique set of file pages i.e., unique [radix_index,
- * heap_index] value, we have a corresponing priority search tree node. If
+ * heap_index] value, we have a corresponding priority search tree node. If
* multiple vmas have identical [radix_index, heap_index] value, then one of
* them is used as a tree node and others are stored in a vm_set list. The tree
* node points to the first vma (head) of the list using vm_set.head.
diff --git a/mm/shmem.c b/mm/shmem.c
index 289dbb0a6fd6..404e53bb2127 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2020,33 +2020,25 @@ static int shmem_match(struct inode *ino, void *vfh)
return ino->i_ino == inum && fh[0] == ino->i_generation;
}
-static struct dentry *shmem_get_dentry(struct super_block *sb, void *vfh)
+static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
+ struct fid *fid, int fh_len, int fh_type)
{
- struct dentry *de = NULL;
struct inode *inode;
- __u32 *fh = vfh;
- __u64 inum = fh[2];
- inum = (inum << 32) | fh[1];
+ struct dentry *dentry = NULL;
+ u64 inum = fid->raw[2];
+ inum = (inum << 32) | fid->raw[1];
+
+ if (fh_len < 3)
+ return NULL;
- inode = ilookup5(sb, (unsigned long)(inum+fh[0]), shmem_match, vfh);
+ inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]),
+ shmem_match, fid->raw);
if (inode) {
- de = d_find_alias(inode);
+ dentry = d_find_alias(inode);
iput(inode);
}
- return de? de: ERR_PTR(-ESTALE);
-}
-
-static struct dentry *shmem_decode_fh(struct super_block *sb, __u32 *fh,
- int len, int type,
- int (*acceptable)(void *context, struct dentry *de),
- void *context)
-{
- if (len < 3)
- return ERR_PTR(-ESTALE);
-
- return sb->s_export_op->find_exported_dentry(sb, fh, NULL, acceptable,
- context);
+ return dentry;
}
static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
@@ -2079,11 +2071,10 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
return 1;
}
-static struct export_operations shmem_export_ops = {
+static const struct export_operations shmem_export_ops = {
.get_parent = shmem_get_parent,
- .get_dentry = shmem_get_dentry,
.encode_fh = shmem_encode_fh,
- .decode_fh = shmem_decode_fh,
+ .fh_to_dentry = shmem_fh_to_dentry,
};
static int shmem_parse_options(char *options, int *mode, uid_t *uid,
diff --git a/mm/slab.c b/mm/slab.c
index 3ce9bc024d67..cfa6be4e378e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -26,7 +26,7 @@
* initialized objects.
*
* This means, that your constructor is used only for newly allocated
- * slabs and you must pass objects with the same intializations to
+ * slabs and you must pass objects with the same initializations to
* kmem_cache_free.
*
* Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,
@@ -1156,105 +1156,187 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
}
#endif
-static int __cpuinit cpuup_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+static void __cpuinit cpuup_canceled(long cpu)
+{
+ struct kmem_cache *cachep;
+ struct kmem_list3 *l3 = NULL;
+ int node = cpu_to_node(cpu);
+
+ list_for_each_entry(cachep, &cache_chain, next) {
+ struct array_cache *nc;
+ struct array_cache *shared;
+ struct array_cache **alien;
+ cpumask_t mask;
+
+ mask = node_to_cpumask(node);
+ /* cpu is dead; no one can alloc from it. */
+ nc = cachep->array[cpu];
+ cachep->array[cpu] = NULL;
+ l3 = cachep->nodelists[node];
+
+ if (!l3)
+ goto free_array_cache;
+
+ spin_lock_irq(&l3->list_lock);
+
+ /* Free limit for this kmem_list3 */
+ l3->free_limit -= cachep->batchcount;
+ if (nc)
+ free_block(cachep, nc->entry, nc->avail, node);
+
+ if (!cpus_empty(mask)) {
+ spin_unlock_irq(&l3->list_lock);
+ goto free_array_cache;
+ }
+
+ shared = l3->shared;
+ if (shared) {
+ free_block(cachep, shared->entry,
+ shared->avail, node);
+ l3->shared = NULL;
+ }
+
+ alien = l3->alien;
+ l3->alien = NULL;
+
+ spin_unlock_irq(&l3->list_lock);
+
+ kfree(shared);
+ if (alien) {
+ drain_alien_cache(cachep, alien);
+ free_alien_cache(alien);
+ }
+free_array_cache:
+ kfree(nc);
+ }
+ /*
+ * In the previous loop, all the objects were freed to
+ * the respective cache's slabs, now we can go ahead and
+ * shrink each nodelist to its limit.
+ */
+ list_for_each_entry(cachep, &cache_chain, next) {
+ l3 = cachep->nodelists[node];
+ if (!l3)
+ continue;
+ drain_freelist(cachep, l3, l3->free_objects);
+ }
+}
+
+static int __cpuinit cpuup_prepare(long cpu)
{
- long cpu = (long)hcpu;
struct kmem_cache *cachep;
struct kmem_list3 *l3 = NULL;
int node = cpu_to_node(cpu);
const int memsize = sizeof(struct kmem_list3);
- switch (action) {
- case CPU_LOCK_ACQUIRE:
- mutex_lock(&cache_chain_mutex);
- break;
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
+ /*
+ * We need to do this right in the beginning since
+ * alloc_arraycache's are going to use this list.
+ * kmalloc_node allows us to add the slab to the right
+ * kmem_list3 and not this cpu's kmem_list3
+ */
+
+ list_for_each_entry(cachep, &cache_chain, next) {
/*
- * We need to do this right in the beginning since
- * alloc_arraycache's are going to use this list.
- * kmalloc_node allows us to add the slab to the right
- * kmem_list3 and not this cpu's kmem_list3
+ * Set up the size64 kmemlist for cpu before we can
+ * begin anything. Make sure some other cpu on this
+ * node has not already allocated this
*/
+ if (!cachep->nodelists[node]) {
+ l3 = kmalloc_node(memsize, GFP_KERNEL, node);
+ if (!l3)
+ goto bad;
+ kmem_list3_init(l3);
+ l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
+ ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
- list_for_each_entry(cachep, &cache_chain, next) {
/*
- * Set up the size64 kmemlist for cpu before we can
- * begin anything. Make sure some other cpu on this
- * node has not already allocated this
+ * The l3s don't come and go as CPUs come and
+ * go. cache_chain_mutex is sufficient
+ * protection here.
*/
- if (!cachep->nodelists[node]) {
- l3 = kmalloc_node(memsize, GFP_KERNEL, node);
- if (!l3)
- goto bad;
- kmem_list3_init(l3);
- l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
- ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
-
- /*
- * The l3s don't come and go as CPUs come and
- * go. cache_chain_mutex is sufficient
- * protection here.
- */
- cachep->nodelists[node] = l3;
- }
-
- spin_lock_irq(&cachep->nodelists[node]->list_lock);
- cachep->nodelists[node]->free_limit =
- (1 + nr_cpus_node(node)) *
- cachep->batchcount + cachep->num;
- spin_unlock_irq(&cachep->nodelists[node]->list_lock);
+ cachep->nodelists[node] = l3;
}
- /*
- * Now we can go ahead with allocating the shared arrays and
- * array caches
- */
- list_for_each_entry(cachep, &cache_chain, next) {
- struct array_cache *nc;
- struct array_cache *shared = NULL;
- struct array_cache **alien = NULL;
-
- nc = alloc_arraycache(node, cachep->limit,
- cachep->batchcount);
- if (!nc)
+ spin_lock_irq(&cachep->nodelists[node]->list_lock);
+ cachep->nodelists[node]->free_limit =
+ (1 + nr_cpus_node(node)) *
+ cachep->batchcount + cachep->num;
+ spin_unlock_irq(&cachep->nodelists[node]->list_lock);
+ }
+
+ /*
+ * Now we can go ahead with allocating the shared arrays and
+ * array caches
+ */
+ list_for_each_entry(cachep, &cache_chain, next) {
+ struct array_cache *nc;
+ struct array_cache *shared = NULL;
+ struct array_cache **alien = NULL;
+
+ nc = alloc_arraycache(node, cachep->limit,
+ cachep->batchcount);
+ if (!nc)
+ goto bad;
+ if (cachep->shared) {
+ shared = alloc_arraycache(node,
+ cachep->shared * cachep->batchcount,
+ 0xbaadf00d);
+ if (!shared) {
+ kfree(nc);
goto bad;
- if (cachep->shared) {
- shared = alloc_arraycache(node,
- cachep->shared * cachep->batchcount,
- 0xbaadf00d);
- if (!shared)
- goto bad;
}
- if (use_alien_caches) {
- alien = alloc_alien_cache(node, cachep->limit);
- if (!alien)
- goto bad;
- }
- cachep->array[cpu] = nc;
- l3 = cachep->nodelists[node];
- BUG_ON(!l3);
-
- spin_lock_irq(&l3->list_lock);
- if (!l3->shared) {
- /*
- * We are serialised from CPU_DEAD or
- * CPU_UP_CANCELLED by the cpucontrol lock
- */
- l3->shared = shared;
- shared = NULL;
+ }
+ if (use_alien_caches) {
+ alien = alloc_alien_cache(node, cachep->limit);
+ if (!alien) {
+ kfree(shared);
+ kfree(nc);
+ goto bad;
}
+ }
+ cachep->array[cpu] = nc;
+ l3 = cachep->nodelists[node];
+ BUG_ON(!l3);
+
+ spin_lock_irq(&l3->list_lock);
+ if (!l3->shared) {
+ /*
+ * We are serialised from CPU_DEAD or
+ * CPU_UP_CANCELLED by the cpucontrol lock
+ */
+ l3->shared = shared;
+ shared = NULL;
+ }
#ifdef CONFIG_NUMA
- if (!l3->alien) {
- l3->alien = alien;
- alien = NULL;
- }
-#endif
- spin_unlock_irq(&l3->list_lock);
- kfree(shared);
- free_alien_cache(alien);
+ if (!l3->alien) {
+ l3->alien = alien;
+ alien = NULL;
}
+#endif
+ spin_unlock_irq(&l3->list_lock);
+ kfree(shared);
+ free_alien_cache(alien);
+ }
+ return 0;
+bad:
+ cpuup_canceled(cpu);
+ return -ENOMEM;
+}
+
+static int __cpuinit cpuup_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ long cpu = (long)hcpu;
+ int err = 0;
+
+ switch (action) {
+ case CPU_LOCK_ACQUIRE:
+ mutex_lock(&cache_chain_mutex);
+ break;
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ err = cpuup_prepare(cpu);
break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
@@ -1287,76 +1369,17 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
* structure is usually allocated from kmem_cache_create() and
* gets destroyed at kmem_cache_destroy().
*/
- /* fall thru */
+ /* fall through */
#endif
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
- list_for_each_entry(cachep, &cache_chain, next) {
- struct array_cache *nc;
- struct array_cache *shared;
- struct array_cache **alien;
- cpumask_t mask;
-
- mask = node_to_cpumask(node);
- /* cpu is dead; no one can alloc from it. */
- nc = cachep->array[cpu];
- cachep->array[cpu] = NULL;
- l3 = cachep->nodelists[node];
-
- if (!l3)
- goto free_array_cache;
-
- spin_lock_irq(&l3->list_lock);
-
- /* Free limit for this kmem_list3 */
- l3->free_limit -= cachep->batchcount;
- if (nc)
- free_block(cachep, nc->entry, nc->avail, node);
-
- if (!cpus_empty(mask)) {
- spin_unlock_irq(&l3->list_lock);
- goto free_array_cache;
- }
-
- shared = l3->shared;
- if (shared) {
- free_block(cachep, shared->entry,
- shared->avail, node);
- l3->shared = NULL;
- }
-
- alien = l3->alien;
- l3->alien = NULL;
-
- spin_unlock_irq(&l3->list_lock);
-
- kfree(shared);
- if (alien) {
- drain_alien_cache(cachep, alien);
- free_alien_cache(alien);
- }
-free_array_cache:
- kfree(nc);
- }
- /*
- * In the previous loop, all the objects were freed to
- * the respective cache's slabs, now we can go ahead and
- * shrink each nodelist to its limit.
- */
- list_for_each_entry(cachep, &cache_chain, next) {
- l3 = cachep->nodelists[node];
- if (!l3)
- continue;
- drain_freelist(cachep, l3, l3->free_objects);
- }
+ cpuup_canceled(cpu);
break;
case CPU_LOCK_RELEASE:
mutex_unlock(&cache_chain_mutex);
break;
}
- return NOTIFY_OK;
-bad:
- return NOTIFY_BAD;
+ return err ? NOTIFY_BAD : NOTIFY_OK;
}
static struct notifier_block __cpuinitdata cpucache_notifier = {
@@ -3783,7 +3806,7 @@ const char *kmem_cache_name(struct kmem_cache *cachep)
EXPORT_SYMBOL_GPL(kmem_cache_name);
/*
- * This initializes kmem_list3 or resizes varioius caches for all nodes.
+ * This initializes kmem_list3 or resizes various caches for all nodes.
*/
static int alloc_kmemlist(struct kmem_cache *cachep)
{
diff --git a/mm/slub.c b/mm/slub.c
index e29a42988c78..aac1dd3c657d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -20,6 +20,7 @@
#include <linux/mempolicy.h>
#include <linux/ctype.h>
#include <linux/kallsyms.h>
+#include <linux/memory.h>
/*
* Lock order:
@@ -2694,6 +2695,121 @@ int kmem_cache_shrink(struct kmem_cache *s)
}
EXPORT_SYMBOL(kmem_cache_shrink);
+#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
+static int slab_mem_going_offline_callback(void *arg)
+{
+ struct kmem_cache *s;
+
+ down_read(&slub_lock);
+ list_for_each_entry(s, &slab_caches, list)
+ kmem_cache_shrink(s);
+ up_read(&slub_lock);
+
+ return 0;
+}
+
+static void slab_mem_offline_callback(void *arg)
+{
+ struct kmem_cache_node *n;
+ struct kmem_cache *s;
+ struct memory_notify *marg = arg;
+ int offline_node;
+
+ offline_node = marg->status_change_nid;
+
+ /*
+ * If the node still has available memory. we need kmem_cache_node
+ * for it yet.
+ */
+ if (offline_node < 0)
+ return;
+
+ down_read(&slub_lock);
+ list_for_each_entry(s, &slab_caches, list) {
+ n = get_node(s, offline_node);
+ if (n) {
+ /*
+ * if n->nr_slabs > 0, slabs still exist on the node
+ * that is going down. We were unable to free them,
+ * and offline_pages() function shoudn't call this
+ * callback. So, we must fail.
+ */
+ BUG_ON(atomic_read(&n->nr_slabs));
+
+ s->node[offline_node] = NULL;
+ kmem_cache_free(kmalloc_caches, n);
+ }
+ }
+ up_read(&slub_lock);
+}
+
+static int slab_mem_going_online_callback(void *arg)
+{
+ struct kmem_cache_node *n;
+ struct kmem_cache *s;
+ struct memory_notify *marg = arg;
+ int nid = marg->status_change_nid;
+ int ret = 0;
+
+ /*
+ * If the node's memory is already available, then kmem_cache_node is
+ * already created. Nothing to do.
+ */
+ if (nid < 0)
+ return 0;
+
+ /*
+ * We are bringing a node online. No memory is availabe yet. We must
+ * allocate a kmem_cache_node structure in order to bring the node
+ * online.
+ */
+ down_read(&slub_lock);
+ list_for_each_entry(s, &slab_caches, list) {
+ /*
+ * XXX: kmem_cache_alloc_node will fallback to other nodes
+ * since memory is not yet available from the node that
+ * is brought up.
+ */
+ n = kmem_cache_alloc(kmalloc_caches, GFP_KERNEL);
+ if (!n) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ init_kmem_cache_node(n);
+ s->node[nid] = n;
+ }
+out:
+ up_read(&slub_lock);
+ return ret;
+}
+
+static int slab_memory_callback(struct notifier_block *self,
+ unsigned long action, void *arg)
+{
+ int ret = 0;
+
+ switch (action) {
+ case MEM_GOING_ONLINE:
+ ret = slab_mem_going_online_callback(arg);
+ break;
+ case MEM_GOING_OFFLINE:
+ ret = slab_mem_going_offline_callback(arg);
+ break;
+ case MEM_OFFLINE:
+ case MEM_CANCEL_ONLINE:
+ slab_mem_offline_callback(arg);
+ break;
+ case MEM_ONLINE:
+ case MEM_CANCEL_OFFLINE:
+ break;
+ }
+
+ ret = notifier_from_errno(ret);
+ return ret;
+}
+
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
/********************************************************************
* Basic setup of slabs
*******************************************************************/
@@ -2715,6 +2831,8 @@ void __init kmem_cache_init(void)
sizeof(struct kmem_cache_node), GFP_KERNEL);
kmalloc_caches[0].refcount = -1;
caches++;
+
+ hotplug_memory_notifier(slab_memory_callback, 1);
#endif
/* Able to allocate the per node structures */
diff --git a/mm/swap.c b/mm/swap.c
index a65eff8a517a..9ac88323d237 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -5,7 +5,7 @@
*/
/*
- * This file contains the default values for the opereation of the
+ * This file contains the default values for the operation of the
* Linux VM subsystem. Fine-tuning documentation can be found in
* Documentation/sysctl/vm.txt.
* Started 18.12.91
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 2e01af365848..af77e171e339 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -247,7 +247,7 @@ struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
EXPORT_SYMBOL_GPL(__get_vm_area);
/**
- * get_vm_area - reserve a contingous kernel virtual area
+ * get_vm_area - reserve a contiguous kernel virtual area
* @size: size of the area
* @flags: %VM_IOREMAP for I/O mappings or VM_ALLOC
*
@@ -303,7 +303,7 @@ found:
}
/**
- * remove_vm_area - find and remove a contingous kernel virtual area
+ * remove_vm_area - find and remove a continuous kernel virtual area
* @addr: base address
*
* Search for the kernel VM area starting at @addr, and remove it.
@@ -364,7 +364,7 @@ static void __vunmap(void *addr, int deallocate_pages)
* vfree - release memory allocated by vmalloc()
* @addr: memory base address
*
- * Free the virtually contiguous memory area starting at @addr, as
+ * Free the virtually continuous memory area starting at @addr, as
* obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is
* NULL, no operation is performed.
*
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e1471385d001..e5a9597e3bbc 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -141,7 +141,7 @@ EXPORT_SYMBOL(unregister_shrinker);
* percentages of the lru and ageable caches. This should balance the seeks
* generated by these structures.
*
- * If the vm encounted mapped pages on the LRU it increase the pressure on
+ * If the vm encountered mapped pages on the LRU it increase the pressure on
* slab to avoid swapping.
*
* We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits.
@@ -1282,7 +1282,7 @@ out:
*/
if (priority < 0)
priority = 0;
- for (i = 0; zones[i] != 0; i++) {
+ for (i = 0; zones[i] != NULL; i++) {
struct zone *zone = zones[i];
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))