diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 161 |
1 files changed, 81 insertions, 80 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 17a3c66639a9..ce63d603820f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -114,13 +114,6 @@ static DEFINE_SPINLOCK(managed_page_count_lock); unsigned long totalram_pages __read_mostly; unsigned long totalreserve_pages __read_mostly; unsigned long totalcma_pages __read_mostly; -/* - * When calculating the number of globally allowed dirty pages, there - * is a certain number of per-zone reserves that should not be - * considered dirtyable memory. This is the sum of those reserves - * over all existing zones that contribute dirtyable memory. - */ -unsigned long dirty_balance_reserve __read_mostly; int percpu_pagelist_fraction; gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; @@ -812,7 +805,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, do { int mt; /* migratetype of the to-be-freed page */ - page = list_entry(list->prev, struct page, lru); + page = list_last_entry(list, struct page, lru); /* must delete as __free_one_page list manipulates */ list_del(&page->lru); @@ -1417,11 +1410,10 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, /* Find a page of the appropriate size in the preferred list */ for (current_order = order; current_order < MAX_ORDER; ++current_order) { area = &(zone->free_area[current_order]); - if (list_empty(&area->free_list[migratetype])) - continue; - - page = list_entry(area->free_list[migratetype].next, + page = list_first_entry_or_null(&area->free_list[migratetype], struct page, lru); + if (!page) + continue; list_del(&page->lru); rmv_page_order(page); area->nr_free--; @@ -1700,12 +1692,12 @@ static void unreserve_highatomic_pageblock(const struct alloc_context *ac) for (order = 0; order < MAX_ORDER; order++) { struct free_area *area = &(zone->free_area[order]); - if (list_empty(&area->free_list[MIGRATE_HIGHATOMIC])) + page = list_first_entry_or_null( + &area->free_list[MIGRATE_HIGHATOMIC], + struct page, lru); + if (!page) continue; - page = list_entry(area->free_list[MIGRATE_HIGHATOMIC].next, - struct page, lru); - /* * It should never happen but changes to locking could * inadvertently allow a per-cpu drain to add pages @@ -1753,7 +1745,7 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) if (fallback_mt == -1) continue; - page = list_entry(area->free_list[fallback_mt].next, + page = list_first_entry(&area->free_list[fallback_mt], struct page, lru); if (can_steal) steal_suitable_fallback(zone, page, start_migratetype); @@ -1788,7 +1780,7 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) * Call me with the zone->lock already held. */ static struct page *__rmqueue(struct zone *zone, unsigned int order, - int migratetype, gfp_t gfp_flags) + int migratetype) { struct page *page; @@ -1818,7 +1810,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, spin_lock(&zone->lock); for (i = 0; i < count; ++i) { - struct page *page = __rmqueue(zone, order, migratetype, 0); + struct page *page = __rmqueue(zone, order, migratetype); if (unlikely(page == NULL)) break; @@ -1988,7 +1980,7 @@ void mark_free_pages(struct zone *zone) unsigned long pfn, max_zone_pfn; unsigned long flags; unsigned int order, t; - struct list_head *curr; + struct page *page; if (zone_is_empty(zone)) return; @@ -1998,17 +1990,17 @@ void mark_free_pages(struct zone *zone) max_zone_pfn = zone_end_pfn(zone); for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (pfn_valid(pfn)) { - struct page *page = pfn_to_page(pfn); - + page = pfn_to_page(pfn); if (!swsusp_page_is_forbidden(page)) swsusp_unset_page_free(page); } for_each_migratetype_order(order, t) { - list_for_each(curr, &zone->free_area[order].free_list[t]) { + list_for_each_entry(page, + &zone->free_area[order].free_list[t], lru) { unsigned long i; - pfn = page_to_pfn(list_entry(curr, struct page, lru)); + pfn = page_to_pfn(page); for (i = 0; i < (1UL << order); i++) swsusp_set_page_free(pfn_to_page(pfn + i)); } @@ -2212,9 +2204,9 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, } if (cold) - page = list_entry(list->prev, struct page, lru); + page = list_last_entry(list, struct page, lru); else - page = list_entry(list->next, struct page, lru); + page = list_first_entry(list, struct page, lru); list_del(&page->lru); pcp->count--; @@ -2241,7 +2233,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, trace_mm_page_alloc_zone_locked(page, order, migratetype); } if (!page) - page = __rmqueue(zone, order, migratetype, gfp_flags); + page = __rmqueue(zone, order, migratetype); spin_unlock(&zone->lock); if (!page) goto failed; @@ -2740,8 +2732,21 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, goto out; } /* Exhausted what can be done so it's blamo time */ - if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) + if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) { *did_some_progress = 1; + + if (gfp_mask & __GFP_NOFAIL) { + page = get_page_from_freelist(gfp_mask, order, + ALLOC_NO_WATERMARKS|ALLOC_CPUSET, ac); + /* + * fallback to ignore cpuset restriction if our nodes + * are depleted + */ + if (!page) + page = get_page_from_freelist(gfp_mask, order, + ALLOC_NO_WATERMARKS, ac); + } + } out: mutex_unlock(&oom_lock); return page; @@ -2876,28 +2881,6 @@ retry: return page; } -/* - * This is called in the allocator slow-path if the allocation request is of - * sufficient urgency to ignore watermarks and take other desperate measures - */ -static inline struct page * -__alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order, - const struct alloc_context *ac) -{ - struct page *page; - - do { - page = get_page_from_freelist(gfp_mask, order, - ALLOC_NO_WATERMARKS, ac); - - if (!page && gfp_mask & __GFP_NOFAIL) - wait_iff_congested(ac->preferred_zone, BLK_RW_ASYNC, - HZ/50); - } while (!page && (gfp_mask & __GFP_NOFAIL)); - - return page; -} - static void wake_all_kswapds(unsigned int order, const struct alloc_context *ac) { struct zoneref *z; @@ -3042,28 +3025,36 @@ retry: * allocations are system rather than user orientated */ ac->zonelist = node_zonelist(numa_node_id(), gfp_mask); - - page = __alloc_pages_high_priority(gfp_mask, order, ac); - - if (page) { + page = get_page_from_freelist(gfp_mask, order, + ALLOC_NO_WATERMARKS, ac); + if (page) goto got_pg; - } } /* Caller is not willing to reclaim, we can't balance anything */ if (!can_direct_reclaim) { /* - * All existing users of the deprecated __GFP_NOFAIL are - * blockable, so warn of any new users that actually allow this - * type of allocation to fail. + * All existing users of the __GFP_NOFAIL are blockable, so warn + * of any new users that actually allow this type of allocation + * to fail. */ WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL); goto nopage; } /* Avoid recursion of direct reclaim */ - if (current->flags & PF_MEMALLOC) + if (current->flags & PF_MEMALLOC) { + /* + * __GFP_NOFAIL request from this context is rather bizarre + * because we cannot reclaim anything and only can loop waiting + * for somebody to do a work for us. + */ + if (WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) { + cond_resched(); + goto retry; + } goto nopage; + } /* Avoid allocations with no watermarks from looping endlessly */ if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL)) @@ -3402,7 +3393,8 @@ EXPORT_SYMBOL(__free_page_frag); /* * alloc_kmem_pages charges newly allocated pages to the kmem resource counter - * of the current memory cgroup. + * of the current memory cgroup if __GFP_ACCOUNT is set, other than that it is + * equivalent to alloc_pages. * * It should be used when the caller would like to use kmalloc, but since the * allocation is large, it has to fall back to the page allocator. @@ -3647,8 +3639,9 @@ static void show_migration_types(unsigned char type) { static const char types[MIGRATE_TYPES] = { [MIGRATE_UNMOVABLE] = 'U', - [MIGRATE_RECLAIMABLE] = 'E', [MIGRATE_MOVABLE] = 'M', + [MIGRATE_RECLAIMABLE] = 'E', + [MIGRATE_HIGHATOMIC] = 'H', #ifdef CONFIG_CMA [MIGRATE_CMA] = 'C', #endif @@ -4146,8 +4139,7 @@ static void set_zonelist_order(void) static void build_zonelists(pg_data_t *pgdat) { - int j, node, load; - enum zone_type i; + int i, node, load; nodemask_t used_mask; int local_node, prev_node; struct zonelist *zonelist; @@ -4167,7 +4159,7 @@ static void build_zonelists(pg_data_t *pgdat) nodes_clear(used_mask); memset(node_order, 0, sizeof(node_order)); - j = 0; + i = 0; while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { /* @@ -4184,12 +4176,12 @@ static void build_zonelists(pg_data_t *pgdat) if (order == ZONELIST_ORDER_NODE) build_zonelists_in_node_order(pgdat, node); else - node_order[j++] = node; /* remember order */ + node_order[i++] = node; /* remember order */ } if (order == ZONELIST_ORDER_ZONE) { /* calculate node order -- i.e., DMA last! */ - build_zonelists_in_zone_order(pgdat, j); + build_zonelists_in_zone_order(pgdat, i); } build_thisnode_zonelists(pgdat); @@ -5955,20 +5947,12 @@ static void calculate_totalreserve_pages(void) if (max > zone->managed_pages) max = zone->managed_pages; + + zone->totalreserve_pages = max; + reserve_pages += max; - /* - * Lowmem reserves are not available to - * GFP_HIGHUSER page cache allocations and - * kswapd tries to balance zones to their high - * watermark. As a result, neither should be - * regarded as dirtyable memory, to prevent a - * situation where reclaim has to clean pages - * in order to balance the zones. - */ - zone->dirty_balance_reserve = max; } } - dirty_balance_reserve = reserve_pages; totalreserve_pages = reserve_pages; } @@ -6723,8 +6707,12 @@ int alloc_contig_range(unsigned long start, unsigned long end, if (ret) return ret; + /* + * In case of -EBUSY, we'd like to know which page causes problem. + * So, just fall through. We will check it in test_pages_isolated(). + */ ret = __alloc_contig_migrate_range(&cc, start, end); - if (ret) + if (ret && ret != -EBUSY) goto done; /* @@ -6751,12 +6739,25 @@ int alloc_contig_range(unsigned long start, unsigned long end, outer_start = start; while (!PageBuddy(pfn_to_page(outer_start))) { if (++order >= MAX_ORDER) { - ret = -EBUSY; - goto done; + outer_start = start; + break; } outer_start &= ~0UL << order; } + if (outer_start != start) { + order = page_order(pfn_to_page(outer_start)); + + /* + * outer_start page could be small order buddy page and + * it doesn't include start page. Adjust outer_start + * in this case to report failed page properly + * on tracepoint in test_pages_isolated() + */ + if (outer_start + (1UL << order) <= start) + outer_start = start; + } + /* Make sure the range is really isolated. */ if (test_pages_isolated(outer_start, end, false)) { pr_info("%s: [%lx, %lx) PFNs busy\n", |