summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@engr.sgi.com>2006-01-06 11:10:46 +0300
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-06 19:33:23 +0300
commit5da7ca86078964cbfe6c83efc1205904587706fe (patch)
treea64a7824e90b42d6fdd71e6cb652362beb8983a1 /mm
parent96df9333c94d7d5aeceb21f6c5e7ae8ff34753cf (diff)
downloadlinux-5da7ca86078964cbfe6c83efc1205904587706fe.tar.xz
[PATCH] Add NUMA policy support for huge pages.
The huge_zonelist() function in the memory policy layer provides an list of zones ordered by NUMA distance. The hugetlb layer will walk that list looking for a zone that has available huge pages but is also in the nodeset of the current cpuset. This patch does not contain the folding of find_or_alloc_huge_page() that was controversial in the earlier discussion. Signed-off-by: Christoph Lameter <clameter@sgi.com> Cc: Andi Kleen <ak@muc.de> Acked-by: William Lee Irwin III <wli@holomorphy.com> Cc: Adam Litke <agl@us.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/hugetlb.c24
-rw-r--r--mm/mempolicy.c39
2 files changed, 44 insertions, 19 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e93bd63462f0..eb405565949d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -11,6 +11,8 @@
#include <linux/highmem.h>
#include <linux/nodemask.h>
#include <linux/pagemap.h>
+#include <linux/mempolicy.h>
+
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -36,11 +38,12 @@ static void enqueue_huge_page(struct page *page)
free_huge_pages_node[nid]++;
}
-static struct page *dequeue_huge_page(void)
+static struct page *dequeue_huge_page(struct vm_area_struct *vma,
+ unsigned long address)
{
int nid = numa_node_id();
struct page *page = NULL;
- struct zonelist *zonelist = NODE_DATA(nid)->node_zonelists;
+ struct zonelist *zonelist = huge_zonelist(vma, address);
struct zone **z;
for (z = zonelist->zones; *z; z++) {
@@ -87,13 +90,13 @@ void free_huge_page(struct page *page)
spin_unlock(&hugetlb_lock);
}
-struct page *alloc_huge_page(void)
+struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr)
{
struct page *page;
int i;
spin_lock(&hugetlb_lock);
- page = dequeue_huge_page();
+ page = dequeue_huge_page(vma, addr);
if (!page) {
spin_unlock(&hugetlb_lock);
return NULL;
@@ -196,7 +199,7 @@ static unsigned long set_max_huge_pages(unsigned long count)
spin_lock(&hugetlb_lock);
try_to_free_low(count);
while (count < nr_huge_pages) {
- struct page *page = dequeue_huge_page();
+ struct page *page = dequeue_huge_page(NULL, 0);
if (!page)
break;
update_and_free_page(page);
@@ -365,8 +368,9 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
flush_tlb_range(vma, start, end);
}
-static struct page *find_or_alloc_huge_page(struct address_space *mapping,
- unsigned long idx, int shared)
+static struct page *find_or_alloc_huge_page(struct vm_area_struct *vma,
+ unsigned long addr, struct address_space *mapping,
+ unsigned long idx, int shared)
{
struct page *page;
int err;
@@ -378,7 +382,7 @@ retry:
if (hugetlb_get_quota(mapping))
goto out;
- page = alloc_huge_page();
+ page = alloc_huge_page(vma, addr);
if (!page) {
hugetlb_put_quota(mapping);
goto out;
@@ -418,7 +422,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
}
page_cache_get(old_page);
- new_page = alloc_huge_page();
+ new_page = alloc_huge_page(vma, address);
if (!new_page) {
page_cache_release(old_page);
@@ -467,7 +471,7 @@ int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
* Use page lock to guard against racing truncation
* before we get page_table_lock.
*/
- page = find_or_alloc_huge_page(mapping, idx,
+ page = find_or_alloc_huge_page(vma, address, mapping, idx,
vma->vm_flags & VM_SHARED);
if (!page)
goto out;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 72f402cc9c9a..45c51ac63443 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -785,6 +785,34 @@ static unsigned offset_il_node(struct mempolicy *pol,
return nid;
}
+/* Determine a node number for interleave */
+static inline unsigned interleave_nid(struct mempolicy *pol,
+ struct vm_area_struct *vma, unsigned long addr, int shift)
+{
+ if (vma) {
+ unsigned long off;
+
+ off = vma->vm_pgoff;
+ off += (addr - vma->vm_start) >> shift;
+ return offset_il_node(pol, vma, off);
+ } else
+ return interleave_nodes(pol);
+}
+
+/* Return a zonelist suitable for a huge page allocation. */
+struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr)
+{
+ struct mempolicy *pol = get_vma_policy(current, vma, addr);
+
+ if (pol->policy == MPOL_INTERLEAVE) {
+ unsigned nid;
+
+ nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT);
+ return NODE_DATA(nid)->node_zonelists + gfp_zone(GFP_HIGHUSER);
+ }
+ return zonelist_policy(GFP_HIGHUSER, pol);
+}
+
/* Allocate a page in interleaved policy.
Own path because it needs to do special accounting. */
static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
@@ -833,15 +861,8 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
if (unlikely(pol->policy == MPOL_INTERLEAVE)) {
unsigned nid;
- if (vma) {
- unsigned long off;
- off = vma->vm_pgoff;
- off += (addr - vma->vm_start) >> PAGE_SHIFT;
- nid = offset_il_node(pol, vma, off);
- } else {
- /* fall back to process interleaving */
- nid = interleave_nodes(pol);
- }
+
+ nid = interleave_nid(pol, vma, addr, PAGE_SHIFT);
return alloc_page_interleave(gfp, 0, nid);
}
return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol));