diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 112 |
1 files changed, 99 insertions, 13 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4e8985acdab8..094472377d81 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1616,6 +1616,21 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z) set_bit(i, zlc->fullzones); } +/* + * clear all zones full, called after direct reclaim makes progress so that + * a zone that was recently full is not skipped over for up to a second + */ +static void zlc_clear_zones_full(struct zonelist *zonelist) +{ + struct zonelist_cache *zlc; /* cached zonelist speedup info */ + + zlc = zonelist->zlcache_ptr; + if (!zlc) + return; + + bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); +} + #else /* CONFIG_NUMA */ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) @@ -1632,6 +1647,10 @@ static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z, static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z) { } + +static void zlc_clear_zones_full(struct zonelist *zonelist) +{ +} #endif /* CONFIG_NUMA */ /* @@ -1664,7 +1683,7 @@ zonelist_scan: continue; if ((alloc_flags & ALLOC_CPUSET) && !cpuset_zone_allowed_softwall(zone, gfp_mask)) - goto try_next_zone; + continue; BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { @@ -1676,17 +1695,36 @@ zonelist_scan: classzone_idx, alloc_flags)) goto try_this_zone; + if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) { + /* + * we do zlc_setup if there are multiple nodes + * and before considering the first zone allowed + * by the cpuset. + */ + allowednodes = zlc_setup(zonelist, alloc_flags); + zlc_active = 1; + did_zlc_setup = 1; + } + if (zone_reclaim_mode == 0) goto this_zone_full; + /* + * As we may have just activated ZLC, check if the first + * eligible zone has failed zone_reclaim recently. + */ + if (NUMA_BUILD && zlc_active && + !zlc_zone_worth_trying(zonelist, z, allowednodes)) + continue; + ret = zone_reclaim(zone, gfp_mask, order); switch (ret) { case ZONE_RECLAIM_NOSCAN: /* did not scan */ - goto try_next_zone; + continue; case ZONE_RECLAIM_FULL: /* scanned but unreclaimable */ - goto this_zone_full; + continue; default: /* did we reclaim enough */ if (!zone_watermark_ok(zone, order, mark, @@ -1703,16 +1741,6 @@ try_this_zone: this_zone_full: if (NUMA_BUILD) zlc_mark_zone_full(zonelist, z); -try_next_zone: - if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) { - /* - * we do zlc_setup after the first zone is tried but only - * if there are multiple nodes make it worthwhile - */ - allowednodes = zlc_setup(zonelist, alloc_flags); - zlc_active = 1; - did_zlc_setup = 1; - } } if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) { @@ -1954,6 +1982,10 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, if (unlikely(!(*did_some_progress))) return NULL; + /* After successful reclaim, reconsider all zones for allocation */ + if (NUMA_BUILD) + zlc_clear_zones_full(zonelist); + retry: page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, @@ -4585,6 +4617,60 @@ void __init sort_node_map(void) cmp_node_active_region, NULL); } +/** + * node_map_pfn_alignment - determine the maximum internode alignment + * + * This function should be called after node map is populated and sorted. + * It calculates the maximum power of two alignment which can distinguish + * all the nodes. + * + * For example, if all nodes are 1GiB and aligned to 1GiB, the return value + * would indicate 1GiB alignment with (1 << (30 - PAGE_SHIFT)). If the + * nodes are shifted by 256MiB, 256MiB. Note that if only the last node is + * shifted, 1GiB is enough and this function will indicate so. + * + * This is used to test whether pfn -> nid mapping of the chosen memory + * model has fine enough granularity to avoid incorrect mapping for the + * populated node map. + * + * Returns the determined alignment in pfn's. 0 if there is no alignment + * requirement (single node). + */ +unsigned long __init node_map_pfn_alignment(void) +{ + unsigned long accl_mask = 0, last_end = 0; + int last_nid = -1; + int i; + + for_each_active_range_index_in_nid(i, MAX_NUMNODES) { + int nid = early_node_map[i].nid; + unsigned long start = early_node_map[i].start_pfn; + unsigned long end = early_node_map[i].end_pfn; + unsigned long mask; + + if (!start || last_nid < 0 || last_nid == nid) { + last_nid = nid; + last_end = end; + continue; + } + + /* + * Start with a mask granular enough to pin-point to the + * start pfn and tick off bits one-by-one until it becomes + * too coarse to separate the current node from the last. + */ + mask = ~((1 << __ffs(start)) - 1); + while (mask && last_end <= (start & (mask << 1))) + mask <<= 1; + + /* accumulate all internode masks */ + accl_mask |= mask; + } + + /* convert mask to number of pages */ + return ~accl_mask + 1; +} + /* Find the lowest pfn for a node */ static unsigned long __init find_min_pfn_for_node(int nid) { |