summaryrefslogtreecommitdiff
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2011-11-28 21:46:22 +0400
committerTejun Heo <tj@kernel.org>2011-11-28 21:46:22 +0400
commitd4bbf7e7759afc172e2bfbc5c416324590049cdd (patch)
tree7eab5ee5481cd3dcf1162329fec827177640018a /mm/page_alloc.c
parenta150439c4a97db379f0ed6faa46fbbb6e7bf3cb2 (diff)
parent401d0069cb344f401bc9d264c31db55876ff78c0 (diff)
downloadlinux-d4bbf7e7759afc172e2bfbc5c416324590049cdd.tar.xz
Merge branch 'master' into x86/memblock
Conflicts & resolutions: * arch/x86/xen/setup.c dc91c728fd "xen: allow extra memory to be in multiple regions" 24aa07882b "memblock, x86: Replace memblock_x86_reserve/free..." conflicted on xen_add_extra_mem() updates. The resolution is trivial as the latter just want to replace memblock_x86_reserve_range() with memblock_reserve(). * drivers/pci/intel-iommu.c 166e9278a3f "x86/ia64: intel-iommu: move to drivers/iommu/" 5dfe8660a3d "bootmem: Replace work_with_active_regions() with..." conflicted as the former moved the file under drivers/iommu/. Resolved by applying the chnages from the latter on the moved file. * mm/Kconfig 6661672053a "memblock: add NO_BOOTMEM config symbol" c378ddd53f9 "memblock, x86: Make ARCH_DISCARD_MEMBLOCK a config option" conflicted trivially. Both added config options. Just letting both add their own options resolves the conflict. * mm/memblock.c d1f0ece6cdc "mm/memblock.c: small function definition fixes" ed7b56a799c "memblock: Remove memblock_memory_can_coalesce()" confliected. The former updates function removed by the latter. Resolution is trivial. Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c135
1 files changed, 78 insertions, 57 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3c7ea45ffba9..6ce27331834c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -321,6 +321,7 @@ static void bad_page(struct page *page)
current->comm, page_to_pfn(page));
dump_page(page);
+ print_modules();
dump_stack();
out:
/* Leave bad fields for debug, except PageBuddy could make trouble */
@@ -1373,21 +1374,12 @@ failed:
#ifdef CONFIG_FAIL_PAGE_ALLOC
-static struct fail_page_alloc_attr {
+static struct {
struct fault_attr attr;
u32 ignore_gfp_highmem;
u32 ignore_gfp_wait;
u32 min_order;
-
-#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
-
- struct dentry *ignore_gfp_highmem_file;
- struct dentry *ignore_gfp_wait_file;
- struct dentry *min_order_file;
-
-#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
-
} fail_page_alloc = {
.attr = FAULT_ATTR_INITIALIZER,
.ignore_gfp_wait = 1,
@@ -1421,36 +1413,27 @@ static int __init fail_page_alloc_debugfs(void)
{
mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
struct dentry *dir;
- int err;
-
- err = init_fault_attr_dentries(&fail_page_alloc.attr,
- "fail_page_alloc");
- if (err)
- return err;
- dir = fail_page_alloc.attr.dentries.dir;
-
- fail_page_alloc.ignore_gfp_wait_file =
- debugfs_create_bool("ignore-gfp-wait", mode, dir,
- &fail_page_alloc.ignore_gfp_wait);
-
- fail_page_alloc.ignore_gfp_highmem_file =
- debugfs_create_bool("ignore-gfp-highmem", mode, dir,
- &fail_page_alloc.ignore_gfp_highmem);
- fail_page_alloc.min_order_file =
- debugfs_create_u32("min-order", mode, dir,
- &fail_page_alloc.min_order);
-
- if (!fail_page_alloc.ignore_gfp_wait_file ||
- !fail_page_alloc.ignore_gfp_highmem_file ||
- !fail_page_alloc.min_order_file) {
- err = -ENOMEM;
- debugfs_remove(fail_page_alloc.ignore_gfp_wait_file);
- debugfs_remove(fail_page_alloc.ignore_gfp_highmem_file);
- debugfs_remove(fail_page_alloc.min_order_file);
- cleanup_fault_attr_dentries(&fail_page_alloc.attr);
- }
- return err;
+ dir = fault_create_debugfs_attr("fail_page_alloc", NULL,
+ &fail_page_alloc.attr);
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
+
+ if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
+ &fail_page_alloc.ignore_gfp_wait))
+ goto fail;
+ if (!debugfs_create_bool("ignore-gfp-highmem", mode, dir,
+ &fail_page_alloc.ignore_gfp_highmem))
+ goto fail;
+ if (!debugfs_create_u32("min-order", mode, dir,
+ &fail_page_alloc.min_order))
+ goto fail;
+
+ return 0;
+fail:
+ debugfs_remove_recursive(dir);
+
+ return -ENOMEM;
}
late_initcall(fail_page_alloc_debugfs);
@@ -1619,6 +1602,21 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
set_bit(i, zlc->fullzones);
}
+/*
+ * clear all zones full, called after direct reclaim makes progress so that
+ * a zone that was recently full is not skipped over for up to a second
+ */
+static void zlc_clear_zones_full(struct zonelist *zonelist)
+{
+ struct zonelist_cache *zlc; /* cached zonelist speedup info */
+
+ zlc = zonelist->zlcache_ptr;
+ if (!zlc)
+ return;
+
+ bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
+}
+
#else /* CONFIG_NUMA */
static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
@@ -1635,6 +1633,10 @@ static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z,
static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
{
}
+
+static void zlc_clear_zones_full(struct zonelist *zonelist)
+{
+}
#endif /* CONFIG_NUMA */
/*
@@ -1667,7 +1669,7 @@ zonelist_scan:
continue;
if ((alloc_flags & ALLOC_CPUSET) &&
!cpuset_zone_allowed_softwall(zone, gfp_mask))
- goto try_next_zone;
+ continue;
BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
@@ -1679,17 +1681,36 @@ zonelist_scan:
classzone_idx, alloc_flags))
goto try_this_zone;
+ if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) {
+ /*
+ * we do zlc_setup if there are multiple nodes
+ * and before considering the first zone allowed
+ * by the cpuset.
+ */
+ allowednodes = zlc_setup(zonelist, alloc_flags);
+ zlc_active = 1;
+ did_zlc_setup = 1;
+ }
+
if (zone_reclaim_mode == 0)
goto this_zone_full;
+ /*
+ * As we may have just activated ZLC, check if the first
+ * eligible zone has failed zone_reclaim recently.
+ */
+ if (NUMA_BUILD && zlc_active &&
+ !zlc_zone_worth_trying(zonelist, z, allowednodes))
+ continue;
+
ret = zone_reclaim(zone, gfp_mask, order);
switch (ret) {
case ZONE_RECLAIM_NOSCAN:
/* did not scan */
- goto try_next_zone;
+ continue;
case ZONE_RECLAIM_FULL:
/* scanned but unreclaimable */
- goto this_zone_full;
+ continue;
default:
/* did we reclaim enough */
if (!zone_watermark_ok(zone, order, mark,
@@ -1706,16 +1727,6 @@ try_this_zone:
this_zone_full:
if (NUMA_BUILD)
zlc_mark_zone_full(zonelist, z);
-try_next_zone:
- if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) {
- /*
- * we do zlc_setup after the first zone is tried but only
- * if there are multiple nodes make it worthwhile
- */
- allowednodes = zlc_setup(zonelist, alloc_flags);
- zlc_active = 1;
- did_zlc_setup = 1;
- }
}
if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) {
@@ -1746,7 +1757,6 @@ static DEFINE_RATELIMIT_STATE(nopage_rs,
void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
{
- va_list args;
unsigned int filter = SHOW_MEM_FILTER_NODES;
if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs))
@@ -1765,14 +1775,21 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
filter &= ~SHOW_MEM_FILTER_NODES;
if (fmt) {
- printk(KERN_WARNING);
+ struct va_format vaf;
+ va_list args;
+
va_start(args, fmt);
- vprintk(fmt, args);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ pr_warn("%pV", &vaf);
+
va_end(args);
}
- pr_warning("%s: page allocation failure: order:%d, mode:0x%x\n",
- current->comm, order, gfp_mask);
+ pr_warn("%s: page allocation failure: order:%d, mode:0x%x\n",
+ current->comm, order, gfp_mask);
dump_stack();
if (!should_suppress_show_mem())
@@ -1957,6 +1974,10 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
if (unlikely(!(*did_some_progress)))
return NULL;
+ /* After successful reclaim, reconsider all zones for allocation */
+ if (NUMA_BUILD)
+ zlc_clear_zones_full(zonelist);
+
retry:
page = get_page_from_freelist(gfp_mask, nodemask, order,
zonelist, high_zoneidx,