summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile2
-rw-r--r--mm/allocpercpu.c3
-rw-r--r--mm/maccess.c55
-rw-r--r--mm/memory_hotplug.c2
-rw-r--r--mm/page_alloc.c6
-rw-r--r--mm/pdflush.c8
-rw-r--r--mm/slab.c5
-rw-r--r--mm/slub.c97
-rw-r--r--mm/vmscan.c18
9 files changed, 140 insertions, 56 deletions
diff --git a/mm/Makefile b/mm/Makefile
index a5b0dd93427a..18c143b3c46c 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -8,7 +8,7 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \
vmalloc.o
obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
- page_alloc.o page-writeback.o pdflush.o \
+ maccess.o page_alloc.o page-writeback.o pdflush.o \
readahead.o swap.o truncate.o vmscan.o \
prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
page_isolation.o $(mmu-y)
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c
index b0012e27fea8..f4026bae6eed 100644
--- a/mm/allocpercpu.c
+++ b/mm/allocpercpu.c
@@ -82,9 +82,10 @@ EXPORT_SYMBOL_GPL(percpu_populate);
int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
cpumask_t *mask)
{
- cpumask_t populated = CPU_MASK_NONE;
+ cpumask_t populated;
int cpu;
+ cpus_clear(populated);
for_each_cpu_mask(cpu, *mask)
if (unlikely(!percpu_populate(__pdata, size, gfp, cpu))) {
__percpu_depopulate_mask(__pdata, &populated);
diff --git a/mm/maccess.c b/mm/maccess.c
new file mode 100644
index 000000000000..ac40796cfb15
--- /dev/null
+++ b/mm/maccess.c
@@ -0,0 +1,55 @@
+/*
+ * Access kernel memory without faulting.
+ */
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+
+/**
+ * probe_kernel_read(): safely attempt to read from a location
+ * @dst: pointer to the buffer that shall take the data
+ * @src: address to read from
+ * @size: size of the data chunk
+ *
+ * Safely read from address @src to the buffer at @dst. If a kernel fault
+ * happens, handle that and return -EFAULT.
+ */
+long probe_kernel_read(void *dst, void *src, size_t size)
+{
+ long ret;
+ mm_segment_t old_fs = get_fs();
+
+ set_fs(KERNEL_DS);
+ pagefault_disable();
+ ret = __copy_from_user_inatomic(dst,
+ (__force const void __user *)src, size);
+ pagefault_enable();
+ set_fs(old_fs);
+
+ return ret ? -EFAULT : 0;
+}
+EXPORT_SYMBOL_GPL(probe_kernel_read);
+
+/**
+ * probe_kernel_write(): safely attempt to write to a location
+ * @dst: address to write to
+ * @src: pointer to the data that shall be written
+ * @size: size of the data chunk
+ *
+ * Safely write to address @dst from the buffer at @src. If a kernel fault
+ * happens, handle that and return -EFAULT.
+ */
+long probe_kernel_write(void *dst, void *src, size_t size)
+{
+ long ret;
+ mm_segment_t old_fs = get_fs();
+
+ set_fs(KERNEL_DS);
+ pagefault_disable();
+ ret = __copy_to_user_inatomic((__force void __user *)dst, src, size);
+ pagefault_enable();
+ set_fs(old_fs);
+
+ return ret ? -EFAULT : 0;
+}
+EXPORT_SYMBOL_GPL(probe_kernel_write);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 7469c503580d..0fb330271271 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -208,7 +208,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
/*
* This doesn't need a lock to do pfn_to_page().
* The section can't be removed here because of the
- * memory_block->state_sem.
+ * memory_block->state_mutex.
*/
zone = page_zone(pfn_to_page(pfn));
pgdat_resize_lock(zone->zone_pgdat, &flags);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 402a504f1228..32e796af12a1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2029,6 +2029,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
int n, val;
int min_val = INT_MAX;
int best_node = -1;
+ node_to_cpumask_ptr(tmp, 0);
/* Use the local node if we haven't already */
if (!node_isset(node, *used_node_mask)) {
@@ -2037,7 +2038,6 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
}
for_each_node_state(n, N_HIGH_MEMORY) {
- cpumask_t tmp;
/* Don't want a node to appear more than once */
if (node_isset(n, *used_node_mask))
@@ -2050,8 +2050,8 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
val += (n < node);
/* Give preference to headless and unused nodes */
- tmp = node_to_cpumask(n);
- if (!cpus_empty(tmp))
+ node_to_cpumask_ptr_next(tmp, n);
+ if (!cpus_empty(*tmp))
val += PENALTY_FOR_NODE_WITH_CPUS;
/* Slight preference for less loaded node */
diff --git a/mm/pdflush.c b/mm/pdflush.c
index 8f6ee073c0e3..1c96cfc9e040 100644
--- a/mm/pdflush.c
+++ b/mm/pdflush.c
@@ -17,8 +17,8 @@
#include <linux/gfp.h>
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/fs.h> // Needed by writeback.h
-#include <linux/writeback.h> // Prototypes pdflush_operation()
+#include <linux/fs.h> /* Needed by writeback.h */
+#include <linux/writeback.h> /* Prototypes pdflush_operation() */
#include <linux/kthread.h>
#include <linux/cpuset.h>
#include <linux/freezer.h>
@@ -187,8 +187,8 @@ static int pdflush(void *dummy)
* This is needed as pdflush's are dynamically created and destroyed.
* The boottime pdflush's are easily placed w/o these 2 lines.
*/
- cpus_allowed = cpuset_cpus_allowed(current);
- set_cpus_allowed(current, cpus_allowed);
+ cpuset_cpus_allowed(current, &cpus_allowed);
+ set_cpus_allowed_ptr(current, &cpus_allowed);
return __pdflush(&my_work);
}
diff --git a/mm/slab.c b/mm/slab.c
index 04b308c3bc54..03927cb5ec9e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1160,14 +1160,13 @@ static void __cpuinit cpuup_canceled(long cpu)
struct kmem_cache *cachep;
struct kmem_list3 *l3 = NULL;
int node = cpu_to_node(cpu);
+ node_to_cpumask_ptr(mask, node);
list_for_each_entry(cachep, &cache_chain, next) {
struct array_cache *nc;
struct array_cache *shared;
struct array_cache **alien;
- cpumask_t mask;
- mask = node_to_cpumask(node);
/* cpu is dead; no one can alloc from it. */
nc = cachep->array[cpu];
cachep->array[cpu] = NULL;
@@ -1183,7 +1182,7 @@ static void __cpuinit cpuup_canceled(long cpu)
if (nc)
free_block(cachep, nc->entry, nc->avail, node);
- if (!cpus_empty(mask)) {
+ if (!cpus_empty(*mask)) {
spin_unlock_irq(&l3->list_lock);
goto free_array_cache;
}
diff --git a/mm/slub.c b/mm/slub.c
index acc975fcc8cc..7f8aaa291a4e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -837,6 +837,35 @@ static void remove_full(struct kmem_cache *s, struct page *page)
spin_unlock(&n->list_lock);
}
+/* Tracking of the number of slabs for debugging purposes */
+static inline unsigned long slabs_node(struct kmem_cache *s, int node)
+{
+ struct kmem_cache_node *n = get_node(s, node);
+
+ return atomic_long_read(&n->nr_slabs);
+}
+
+static inline void inc_slabs_node(struct kmem_cache *s, int node)
+{
+ struct kmem_cache_node *n = get_node(s, node);
+
+ /*
+ * May be called early in order to allocate a slab for the
+ * kmem_cache_node structure. Solve the chicken-egg
+ * dilemma by deferring the increment of the count during
+ * bootstrap (see early_kmem_cache_node_alloc).
+ */
+ if (!NUMA_BUILD || n)
+ atomic_long_inc(&n->nr_slabs);
+}
+static inline void dec_slabs_node(struct kmem_cache *s, int node)
+{
+ struct kmem_cache_node *n = get_node(s, node);
+
+ atomic_long_dec(&n->nr_slabs);
+}
+
+/* Object debug checks for alloc/free paths */
static void setup_object_debug(struct kmem_cache *s, struct page *page,
void *object)
{
@@ -1028,6 +1057,11 @@ static inline unsigned long kmem_cache_flags(unsigned long objsize,
return flags;
}
#define slub_debug 0
+
+static inline unsigned long slabs_node(struct kmem_cache *s, int node)
+ { return 0; }
+static inline void inc_slabs_node(struct kmem_cache *s, int node) {}
+static inline void dec_slabs_node(struct kmem_cache *s, int node) {}
#endif
/*
* Slab allocation and freeing
@@ -1066,7 +1100,6 @@ static void setup_object(struct kmem_cache *s, struct page *page,
static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
{
struct page *page;
- struct kmem_cache_node *n;
void *start;
void *last;
void *p;
@@ -1078,9 +1111,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
if (!page)
goto out;
- n = get_node(s, page_to_nid(page));
- if (n)
- atomic_long_inc(&n->nr_slabs);
+ inc_slabs_node(s, page_to_nid(page));
page->slab = s;
page->flags |= 1 << PG_slab;
if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON |
@@ -1125,6 +1156,8 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
-pages);
+ __ClearPageSlab(page);
+ reset_page_mapcount(page);
__free_pages(page, s->order);
}
@@ -1151,11 +1184,7 @@ static void free_slab(struct kmem_cache *s, struct page *page)
static void discard_slab(struct kmem_cache *s, struct page *page)
{
- struct kmem_cache_node *n = get_node(s, page_to_nid(page));
-
- atomic_long_dec(&n->nr_slabs);
- reset_page_mapcount(page);
- __ClearPageSlab(page);
+ dec_slabs_node(s, page_to_nid(page));
free_slab(s, page);
}
@@ -1886,15 +1915,18 @@ static void init_kmem_cache_cpu(struct kmem_cache *s,
c->node = 0;
c->offset = s->offset / sizeof(void *);
c->objsize = s->objsize;
+#ifdef CONFIG_SLUB_STATS
+ memset(c->stat, 0, NR_SLUB_STAT_ITEMS * sizeof(unsigned));
+#endif
}
static void init_kmem_cache_node(struct kmem_cache_node *n)
{
n->nr_partial = 0;
- atomic_long_set(&n->nr_slabs, 0);
spin_lock_init(&n->list_lock);
INIT_LIST_HEAD(&n->partial);
#ifdef CONFIG_SLUB_DEBUG
+ atomic_long_set(&n->nr_slabs, 0);
INIT_LIST_HEAD(&n->full);
#endif
}
@@ -2063,7 +2095,7 @@ static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags,
init_tracking(kmalloc_caches, n);
#endif
init_kmem_cache_node(n);
- atomic_long_inc(&n->nr_slabs);
+ inc_slabs_node(kmalloc_caches, node);
/*
* lockdep requires consistent irq usage for each lock
@@ -2376,7 +2408,7 @@ static inline int kmem_cache_close(struct kmem_cache *s)
struct kmem_cache_node *n = get_node(s, node);
n->nr_partial -= free_list(s, n, &n->partial);
- if (atomic_long_read(&n->nr_slabs))
+ if (slabs_node(s, node))
return 1;
}
free_kmem_cache_nodes(s);
@@ -2409,10 +2441,6 @@ EXPORT_SYMBOL(kmem_cache_destroy);
struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned;
EXPORT_SYMBOL(kmalloc_caches);
-#ifdef CONFIG_ZONE_DMA
-static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1];
-#endif
-
static int __init setup_slub_min_order(char *str)
{
get_option(&str, &slub_min_order);
@@ -2472,6 +2500,7 @@ panic:
}
#ifdef CONFIG_ZONE_DMA
+static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1];
static void sysfs_add_func(struct work_struct *w)
{
@@ -2688,21 +2717,6 @@ void kfree(const void *x)
}
EXPORT_SYMBOL(kfree);
-#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SLABINFO)
-static unsigned long count_partial(struct kmem_cache_node *n)
-{
- unsigned long flags;
- unsigned long x = 0;
- struct page *page;
-
- spin_lock_irqsave(&n->list_lock, flags);
- list_for_each_entry(page, &n->partial, lru)
- x += page->inuse;
- spin_unlock_irqrestore(&n->list_lock, flags);
- return x;
-}
-#endif
-
/*
* kmem_cache_shrink removes empty slabs from the partial lists and sorts
* the remaining slabs by the number of items in use. The slabs with the
@@ -2816,7 +2830,7 @@ static void slab_mem_offline_callback(void *arg)
* and offline_pages() function shoudn't call this
* callback. So, we must fail.
*/
- BUG_ON(atomic_long_read(&n->nr_slabs));
+ BUG_ON(slabs_node(s, offline_node));
s->node[offline_node] = NULL;
kmem_cache_free(kmalloc_caches, n);
@@ -3181,6 +3195,21 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
return slab_alloc(s, gfpflags, node, caller);
}
+#if (defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)) || defined(CONFIG_SLABINFO)
+static unsigned long count_partial(struct kmem_cache_node *n)
+{
+ unsigned long flags;
+ unsigned long x = 0;
+ struct page *page;
+
+ spin_lock_irqsave(&n->list_lock, flags);
+ list_for_each_entry(page, &n->partial, lru)
+ x += page->inuse;
+ spin_unlock_irqrestore(&n->list_lock, flags);
+ return x;
+}
+#endif
+
#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)
static int validate_slab(struct kmem_cache *s, struct page *page,
unsigned long *map)
@@ -3979,10 +4008,12 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
len = sprintf(buf, "%lu", sum);
+#ifdef CONFIG_SMP
for_each_online_cpu(cpu) {
if (data[cpu] && len < PAGE_SIZE - 20)
- len += sprintf(buf + len, " c%d=%u", cpu, data[cpu]);
+ len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
}
+#endif
kfree(data);
return len + sprintf(buf + len, "\n");
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4046434046e6..f80a5b7c057f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1647,11 +1647,10 @@ static int kswapd(void *p)
struct reclaim_state reclaim_state = {
.reclaimed_slab = 0,
};
- cpumask_t cpumask;
+ node_to_cpumask_ptr(cpumask, pgdat->node_id);
- cpumask = node_to_cpumask(pgdat->node_id);
- if (!cpus_empty(cpumask))
- set_cpus_allowed(tsk, cpumask);
+ if (!cpus_empty(*cpumask))
+ set_cpus_allowed_ptr(tsk, cpumask);
current->reclaim_state = &reclaim_state;
/*
@@ -1880,17 +1879,16 @@ out:
static int __devinit cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
- pg_data_t *pgdat;
- cpumask_t mask;
int nid;
if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {
for_each_node_state(nid, N_HIGH_MEMORY) {
- pgdat = NODE_DATA(nid);
- mask = node_to_cpumask(pgdat->node_id);
- if (any_online_cpu(mask) != NR_CPUS)
+ pg_data_t *pgdat = NODE_DATA(nid);
+ node_to_cpumask_ptr(mask, pgdat->node_id);
+
+ if (any_online_cpu(*mask) < nr_cpu_ids)
/* One of our CPUs online: restore mask */
- set_cpus_allowed(pgdat->kswapd, mask);
+ set_cpus_allowed_ptr(pgdat->kswapd, mask);
}
}
return NOTIFY_OK;