diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-08 22:31:16 +0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-08 22:31:16 +0400 |
commit | 3f17ea6dea8ba5668873afa54628a91aaa3fb1c0 (patch) | |
tree | afbeb2accd4c2199ddd705ae943995b143a0af02 /mm/hugetlb.c | |
parent | 1860e379875dfe7271c649058aeddffe5afd9d0d (diff) | |
parent | 1a5700bc2d10cd379a795fd2bb377a190af5acd4 (diff) | |
download | linux-3f17ea6dea8ba5668873afa54628a91aaa3fb1c0.tar.xz |
Merge branch 'next' (accumulated 3.16 merge window patches) into master
Now that 3.15 is released, this merges the 'next' branch into 'master',
bringing us to the normal situation where my 'master' branch is the
merge window.
* accumulated work in next: (6809 commits)
ufs: sb mutex merge + mutex_destroy
powerpc: update comments for generic idle conversion
cris: update comments for generic idle conversion
idle: remove cpu_idle() forward declarations
nbd: zero from and len fields in NBD_CMD_DISCONNECT.
mm: convert some level-less printks to pr_*
MAINTAINERS: adi-buildroot-devel is moderated
MAINTAINERS: add linux-api for review of API/ABI changes
mm/kmemleak-test.c: use pr_fmt for logging
fs/dlm/debug_fs.c: replace seq_printf by seq_puts
fs/dlm/lockspace.c: convert simple_str to kstr
fs/dlm/config.c: convert simple_str to kstr
mm: mark remap_file_pages() syscall as deprecated
mm: memcontrol: remove unnecessary memcg argument from soft limit functions
mm: memcontrol: clean up memcg zoneinfo lookup
mm/memblock.c: call kmemleak directly from memblock_(alloc|free)
mm/mempool.c: update the kmemleak stack trace for mempool allocations
lib/radix-tree.c: update the kmemleak stack trace for radix tree allocations
mm: introduce kmemleak_update_trace()
mm/kmemleak.c: use %u to print ->checksum
...
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 363 |
1 files changed, 253 insertions, 110 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c82290b9c1fc..226910cb7c9b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -544,7 +544,7 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid) /* Movability of hugepages depends on migration support. */ static inline gfp_t htlb_alloc_mask(struct hstate *h) { - if (hugepages_treat_as_movable || hugepage_migration_support(h)) + if (hugepages_treat_as_movable || hugepage_migration_supported(h)) return GFP_HIGHUSER_MOVABLE; else return GFP_HIGHUSER; @@ -607,25 +607,242 @@ err: return NULL; } +/* + * common helper functions for hstate_next_node_to_{alloc|free}. + * We may have allocated or freed a huge page based on a different + * nodes_allowed previously, so h->next_node_to_{alloc|free} might + * be outside of *nodes_allowed. Ensure that we use an allowed + * node for alloc or free. + */ +static int next_node_allowed(int nid, nodemask_t *nodes_allowed) +{ + nid = next_node(nid, *nodes_allowed); + if (nid == MAX_NUMNODES) + nid = first_node(*nodes_allowed); + VM_BUG_ON(nid >= MAX_NUMNODES); + + return nid; +} + +static int get_valid_node_allowed(int nid, nodemask_t *nodes_allowed) +{ + if (!node_isset(nid, *nodes_allowed)) + nid = next_node_allowed(nid, nodes_allowed); + return nid; +} + +/* + * returns the previously saved node ["this node"] from which to + * allocate a persistent huge page for the pool and advance the + * next node from which to allocate, handling wrap at end of node + * mask. + */ +static int hstate_next_node_to_alloc(struct hstate *h, + nodemask_t *nodes_allowed) +{ + int nid; + + VM_BUG_ON(!nodes_allowed); + + nid = get_valid_node_allowed(h->next_nid_to_alloc, nodes_allowed); + h->next_nid_to_alloc = next_node_allowed(nid, nodes_allowed); + + return nid; +} + +/* + * helper for free_pool_huge_page() - return the previously saved + * node ["this node"] from which to free a huge page. Advance the + * next node id whether or not we find a free huge page to free so + * that the next attempt to free addresses the next node. + */ +static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) +{ + int nid; + + VM_BUG_ON(!nodes_allowed); + + nid = get_valid_node_allowed(h->next_nid_to_free, nodes_allowed); + h->next_nid_to_free = next_node_allowed(nid, nodes_allowed); + + return nid; +} + +#define for_each_node_mask_to_alloc(hs, nr_nodes, node, mask) \ + for (nr_nodes = nodes_weight(*mask); \ + nr_nodes > 0 && \ + ((node = hstate_next_node_to_alloc(hs, mask)) || 1); \ + nr_nodes--) + +#define for_each_node_mask_to_free(hs, nr_nodes, node, mask) \ + for (nr_nodes = nodes_weight(*mask); \ + nr_nodes > 0 && \ + ((node = hstate_next_node_to_free(hs, mask)) || 1); \ + nr_nodes--) + +#if defined(CONFIG_CMA) && defined(CONFIG_X86_64) +static void destroy_compound_gigantic_page(struct page *page, + unsigned long order) +{ + int i; + int nr_pages = 1 << order; + struct page *p = page + 1; + + for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { + __ClearPageTail(p); + set_page_refcounted(p); + p->first_page = NULL; + } + + set_compound_order(page, 0); + __ClearPageHead(page); +} + +static void free_gigantic_page(struct page *page, unsigned order) +{ + free_contig_range(page_to_pfn(page), 1 << order); +} + +static int __alloc_gigantic_page(unsigned long start_pfn, + unsigned long nr_pages) +{ + unsigned long end_pfn = start_pfn + nr_pages; + return alloc_contig_range(start_pfn, end_pfn, MIGRATE_MOVABLE); +} + +static bool pfn_range_valid_gigantic(unsigned long start_pfn, + unsigned long nr_pages) +{ + unsigned long i, end_pfn = start_pfn + nr_pages; + struct page *page; + + for (i = start_pfn; i < end_pfn; i++) { + if (!pfn_valid(i)) + return false; + + page = pfn_to_page(i); + + if (PageReserved(page)) + return false; + + if (page_count(page) > 0) + return false; + + if (PageHuge(page)) + return false; + } + + return true; +} + +static bool zone_spans_last_pfn(const struct zone *zone, + unsigned long start_pfn, unsigned long nr_pages) +{ + unsigned long last_pfn = start_pfn + nr_pages - 1; + return zone_spans_pfn(zone, last_pfn); +} + +static struct page *alloc_gigantic_page(int nid, unsigned order) +{ + unsigned long nr_pages = 1 << order; + unsigned long ret, pfn, flags; + struct zone *z; + + z = NODE_DATA(nid)->node_zones; + for (; z - NODE_DATA(nid)->node_zones < MAX_NR_ZONES; z++) { + spin_lock_irqsave(&z->lock, flags); + + pfn = ALIGN(z->zone_start_pfn, nr_pages); + while (zone_spans_last_pfn(z, pfn, nr_pages)) { + if (pfn_range_valid_gigantic(pfn, nr_pages)) { + /* + * We release the zone lock here because + * alloc_contig_range() will also lock the zone + * at some point. If there's an allocation + * spinning on this lock, it may win the race + * and cause alloc_contig_range() to fail... + */ + spin_unlock_irqrestore(&z->lock, flags); + ret = __alloc_gigantic_page(pfn, nr_pages); + if (!ret) + return pfn_to_page(pfn); + spin_lock_irqsave(&z->lock, flags); + } + pfn += nr_pages; + } + + spin_unlock_irqrestore(&z->lock, flags); + } + + return NULL; +} + +static void prep_new_huge_page(struct hstate *h, struct page *page, int nid); +static void prep_compound_gigantic_page(struct page *page, unsigned long order); + +static struct page *alloc_fresh_gigantic_page_node(struct hstate *h, int nid) +{ + struct page *page; + + page = alloc_gigantic_page(nid, huge_page_order(h)); + if (page) { + prep_compound_gigantic_page(page, huge_page_order(h)); + prep_new_huge_page(h, page, nid); + } + + return page; +} + +static int alloc_fresh_gigantic_page(struct hstate *h, + nodemask_t *nodes_allowed) +{ + struct page *page = NULL; + int nr_nodes, node; + + for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) { + page = alloc_fresh_gigantic_page_node(h, node); + if (page) + return 1; + } + + return 0; +} + +static inline bool gigantic_page_supported(void) { return true; } +#else +static inline bool gigantic_page_supported(void) { return false; } +static inline void free_gigantic_page(struct page *page, unsigned order) { } +static inline void destroy_compound_gigantic_page(struct page *page, + unsigned long order) { } +static inline int alloc_fresh_gigantic_page(struct hstate *h, + nodemask_t *nodes_allowed) { return 0; } +#endif + static void update_and_free_page(struct hstate *h, struct page *page) { int i; - VM_BUG_ON(h->order >= MAX_ORDER); + if (hstate_is_gigantic(h) && !gigantic_page_supported()) + return; h->nr_huge_pages--; h->nr_huge_pages_node[page_to_nid(page)]--; for (i = 0; i < pages_per_huge_page(h); i++) { page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | 1 << PG_dirty | - 1 << PG_active | 1 << PG_reserved | - 1 << PG_private | 1 << PG_writeback); + 1 << PG_active | 1 << PG_private | + 1 << PG_writeback); } VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page); set_compound_page_dtor(page, NULL); set_page_refcounted(page); - arch_release_hugepage(page); - __free_pages(page, huge_page_order(h)); + if (hstate_is_gigantic(h)) { + destroy_compound_gigantic_page(page, huge_page_order(h)); + free_gigantic_page(page, huge_page_order(h)); + } else { + arch_release_hugepage(page); + __free_pages(page, huge_page_order(h)); + } } struct hstate *size_to_hstate(unsigned long size) @@ -664,7 +881,7 @@ static void free_huge_page(struct page *page) if (restore_reserve) h->resv_huge_pages++; - if (h->surplus_huge_pages_node[nid] && huge_page_order(h) < MAX_ORDER) { + if (h->surplus_huge_pages_node[nid]) { /* remove the page from active list */ list_del(&page->lru); update_and_free_page(h, page); @@ -690,8 +907,7 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) put_page(page); /* free it into the hugepage allocator */ } -static void __init prep_compound_gigantic_page(struct page *page, - unsigned long order) +static void prep_compound_gigantic_page(struct page *page, unsigned long order) { int i; int nr_pages = 1 << order; @@ -769,9 +985,6 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid) { struct page *page; - if (h->order >= MAX_ORDER) - return NULL; - page = alloc_pages_exact_node(nid, htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE| __GFP_REPEAT|__GFP_NOWARN, @@ -787,79 +1000,6 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid) return page; } -/* - * common helper functions for hstate_next_node_to_{alloc|free}. - * We may have allocated or freed a huge page based on a different - * nodes_allowed previously, so h->next_node_to_{alloc|free} might - * be outside of *nodes_allowed. Ensure that we use an allowed - * node for alloc or free. - */ -static int next_node_allowed(int nid, nodemask_t *nodes_allowed) -{ - nid = next_node(nid, *nodes_allowed); - if (nid == MAX_NUMNODES) - nid = first_node(*nodes_allowed); - VM_BUG_ON(nid >= MAX_NUMNODES); - - return nid; -} - -static int get_valid_node_allowed(int nid, nodemask_t *nodes_allowed) -{ - if (!node_isset(nid, *nodes_allowed)) - nid = next_node_allowed(nid, nodes_allowed); - return nid; -} - -/* - * returns the previously saved node ["this node"] from which to - * allocate a persistent huge page for the pool and advance the - * next node from which to allocate, handling wrap at end of node - * mask. - */ -static int hstate_next_node_to_alloc(struct hstate *h, - nodemask_t *nodes_allowed) -{ - int nid; - - VM_BUG_ON(!nodes_allowed); - - nid = get_valid_node_allowed(h->next_nid_to_alloc, nodes_allowed); - h->next_nid_to_alloc = next_node_allowed(nid, nodes_allowed); - - return nid; -} - -/* - * helper for free_pool_huge_page() - return the previously saved - * node ["this node"] from which to free a huge page. Advance the - * next node id whether or not we find a free huge page to free so - * that the next attempt to free addresses the next node. - */ -static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) -{ - int nid; - - VM_BUG_ON(!nodes_allowed); - - nid = get_valid_node_allowed(h->next_nid_to_free, nodes_allowed); - h->next_nid_to_free = next_node_allowed(nid, nodes_allowed); - - return nid; -} - -#define for_each_node_mask_to_alloc(hs, nr_nodes, node, mask) \ - for (nr_nodes = nodes_weight(*mask); \ - nr_nodes > 0 && \ - ((node = hstate_next_node_to_alloc(hs, mask)) || 1); \ - nr_nodes--) - -#define for_each_node_mask_to_free(hs, nr_nodes, node, mask) \ - for (nr_nodes = nodes_weight(*mask); \ - nr_nodes > 0 && \ - ((node = hstate_next_node_to_free(hs, mask)) || 1); \ - nr_nodes--) - static int alloc_fresh_huge_page(struct hstate *h, nodemask_t *nodes_allowed) { struct page *page; @@ -963,7 +1103,7 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid) struct page *page; unsigned int r_nid; - if (h->order >= MAX_ORDER) + if (hstate_is_gigantic(h)) return NULL; /* @@ -1156,7 +1296,7 @@ static void return_unused_surplus_pages(struct hstate *h, h->resv_huge_pages -= unused_resv_pages; /* Cannot return gigantic pages currently */ - if (h->order >= MAX_ORDER) + if (hstate_is_gigantic(h)) return; nr_pages = min(unused_resv_pages, h->surplus_huge_pages); @@ -1246,24 +1386,17 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, return ERR_PTR(-ENOSPC); ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), &h_cg); - if (ret) { - if (chg || avoid_reserve) - hugepage_subpool_put_pages(spool, 1); - return ERR_PTR(-ENOSPC); - } + if (ret) + goto out_subpool_put; + spin_lock(&hugetlb_lock); page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, chg); if (!page) { spin_unlock(&hugetlb_lock); page = alloc_buddy_huge_page(h, NUMA_NO_NODE); - if (!page) { - hugetlb_cgroup_uncharge_cgroup(idx, - pages_per_huge_page(h), - h_cg); - if (chg || avoid_reserve) - hugepage_subpool_put_pages(spool, 1); - return ERR_PTR(-ENOSPC); - } + if (!page) + goto out_uncharge_cgroup; + spin_lock(&hugetlb_lock); list_move(&page->lru, &h->hugepage_activelist); /* Fall through */ @@ -1275,6 +1408,13 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, vma_commit_reservation(h, vma, addr); return page; + +out_uncharge_cgroup: + hugetlb_cgroup_uncharge_cgroup(idx, pages_per_huge_page(h), h_cg); +out_subpool_put: + if (chg || avoid_reserve) + hugepage_subpool_put_pages(spool, 1); + return ERR_PTR(-ENOSPC); } /* @@ -1356,7 +1496,7 @@ static void __init gather_bootmem_prealloc(void) * fix confusing memory reports from free(1) and another * side-effects, like CommitLimit going negative. */ - if (h->order > (MAX_ORDER - 1)) + if (hstate_is_gigantic(h)) adjust_managed_page_count(page, 1 << h->order); } } @@ -1366,7 +1506,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h) unsigned long i; for (i = 0; i < h->max_huge_pages; ++i) { - if (h->order >= MAX_ORDER) { + if (hstate_is_gigantic(h)) { if (!alloc_bootmem_huge_page(h)) break; } else if (!alloc_fresh_huge_page(h, @@ -1382,7 +1522,7 @@ static void __init hugetlb_init_hstates(void) for_each_hstate(h) { /* oversize hugepages were init'ed in early boot */ - if (h->order < MAX_ORDER) + if (!hstate_is_gigantic(h)) hugetlb_hstate_alloc_pages(h); } } @@ -1416,7 +1556,7 @@ static void try_to_free_low(struct hstate *h, unsigned long count, { int i; - if (h->order >= MAX_ORDER) + if (hstate_is_gigantic(h)) return; for_each_node_mask(i, *nodes_allowed) { @@ -1479,7 +1619,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count, { unsigned long min_count, ret; - if (h->order >= MAX_ORDER) + if (hstate_is_gigantic(h) && !gigantic_page_supported()) return h->max_huge_pages; /* @@ -1506,7 +1646,10 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count, * and reducing the surplus. */ spin_unlock(&hugetlb_lock); - ret = alloc_fresh_huge_page(h, nodes_allowed); + if (hstate_is_gigantic(h)) + ret = alloc_fresh_gigantic_page(h, nodes_allowed); + else + ret = alloc_fresh_huge_page(h, nodes_allowed); spin_lock(&hugetlb_lock); if (!ret) goto out; @@ -1606,7 +1749,7 @@ static ssize_t nr_hugepages_store_common(bool obey_mempolicy, goto out; h = kobj_to_hstate(kobj, &nid); - if (h->order >= MAX_ORDER) { + if (hstate_is_gigantic(h) && !gigantic_page_supported()) { err = -EINVAL; goto out; } @@ -1689,7 +1832,7 @@ static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj, unsigned long input; struct hstate *h = kobj_to_hstate(kobj, NULL); - if (h->order >= MAX_ORDER) + if (hstate_is_gigantic(h)) return -EINVAL; err = kstrtoul(buf, 10, &input); @@ -2113,7 +2256,7 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy, tmp = h->max_huge_pages; - if (write && h->order >= MAX_ORDER) + if (write && hstate_is_gigantic(h) && !gigantic_page_supported()) return -EINVAL; table->data = &tmp; @@ -2169,7 +2312,7 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, tmp = h->nr_overcommit_huge_pages; - if (write && h->order >= MAX_ORDER) + if (write && hstate_is_gigantic(h)) return -EINVAL; table->data = &tmp; |