summaryrefslogtreecommitdiff
path: root/fs/hugetlbfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/hugetlbfs')
-rw-r--r--fs/hugetlbfs/inode.c107
1 files changed, 89 insertions, 18 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index aff8642f0c2e..b5c109703daa 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -38,6 +38,7 @@
#include <linux/uio.h>
#include <linux/uaccess.h>
+#include <linux/sched/mm.h>
static const struct super_operations hugetlbfs_ops;
static const struct address_space_operations hugetlbfs_aops;
@@ -139,7 +140,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
* already been checked by prepare_hugepage_range. If you add
* any error returns here, do so after setting VM_HUGETLB, so
* is_vm_hugetlb_page tests below unmap_region go the right
- * way when do_mmap_pgoff unwinds (may be important on powerpc
+ * way when do_mmap unwinds (may be important on powerpc
* and ia64).
*/
vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
@@ -186,18 +187,65 @@ out:
}
/*
- * Called under down_write(mmap_sem).
+ * Called under mmap_write_lock(mm).
*/
#ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
static unsigned long
+hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+ struct hstate *h = hstate_file(file);
+ struct vm_unmapped_area_info info;
+
+ info.flags = 0;
+ info.length = len;
+ info.low_limit = current->mm->mmap_base;
+ info.high_limit = TASK_SIZE;
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+ info.align_offset = 0;
+ return vm_unmapped_area(&info);
+}
+
+static unsigned long
+hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+ struct hstate *h = hstate_file(file);
+ struct vm_unmapped_area_info info;
+
+ info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+ info.length = len;
+ info.low_limit = max(PAGE_SIZE, mmap_min_addr);
+ info.high_limit = current->mm->mmap_base;
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+ info.align_offset = 0;
+ addr = vm_unmapped_area(&info);
+
+ /*
+ * A failed mmap() very likely causes application failure,
+ * so fall back to the bottom-up function here. This scenario
+ * can happen with large stack limits and large mmap()
+ * allocations.
+ */
+ if (unlikely(offset_in_page(addr))) {
+ VM_BUG_ON(addr != -ENOMEM);
+ info.flags = 0;
+ info.low_limit = current->mm->mmap_base;
+ info.high_limit = TASK_SIZE;
+ addr = vm_unmapped_area(&info);
+ }
+
+ return addr;
+}
+
+static unsigned long
hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct hstate *h = hstate_file(file);
- struct vm_unmapped_area_info info;
if (len & ~huge_page_mask(h))
return -EINVAL;
@@ -218,13 +266,16 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
return addr;
}
- info.flags = 0;
- info.length = len;
- info.low_limit = TASK_UNMAPPED_BASE;
- info.high_limit = TASK_SIZE;
- info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- info.align_offset = 0;
- return vm_unmapped_area(&info);
+ /*
+ * Use mm->get_unmapped_area value as a hint to use topdown routine.
+ * If architectures have special needs, they should define their own
+ * version of hugetlb_get_unmapped_area.
+ */
+ if (mm->get_unmapped_area == arch_get_unmapped_area_topdown)
+ return hugetlb_get_unmapped_area_topdown(file, addr, len,
+ pgoff, flags);
+ return hugetlb_get_unmapped_area_bottomup(file, addr, len,
+ pgoff, flags);
}
#endif
@@ -393,10 +444,9 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
* In this case, we first scan the range and release found pages.
* After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
* maps and global counts. Page faults can not race with truncation
- * in this routine. hugetlb_no_page() prevents page faults in the
- * truncated range. It checks i_size before allocation, and again after
- * with the page table lock for the page held. The same lock must be
- * acquired to unmap a page.
+ * in this routine. hugetlb_no_page() holds i_mmap_rwsem and prevents
+ * page faults in the truncated range by checking i_size. i_size is
+ * modified while holding i_mmap_rwsem.
* hole punch is indicated if end is not LLONG_MAX
* In the hole punch case we scan the range and release found pages.
* Only when releasing a page is the associated region/reserv map
@@ -436,7 +486,15 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
index = page->index;
hash = hugetlb_fault_mutex_hash(mapping, index);
- mutex_lock(&hugetlb_fault_mutex_table[hash]);
+ if (!truncate_op) {
+ /*
+ * Only need to hold the fault mutex in the
+ * hole punch case. This prevents races with
+ * page faults. Races are not possible in the
+ * case of truncation.
+ */
+ mutex_lock(&hugetlb_fault_mutex_table[hash]);
+ }
/*
* If page is mapped, it was faulted in after being
@@ -450,7 +508,9 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
if (unlikely(page_mapped(page))) {
BUG_ON(truncate_op);
+ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
i_mmap_lock_write(mapping);
+ mutex_lock(&hugetlb_fault_mutex_table[hash]);
hugetlb_vmdelete_list(&mapping->i_mmap,
index * pages_per_huge_page(h),
(index + 1) * pages_per_huge_page(h));
@@ -477,7 +537,8 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
}
unlock_page(page);
- mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+ if (!truncate_op)
+ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
}
huge_pagevec_release(&pvec);
cond_resched();
@@ -515,8 +576,8 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
BUG_ON(offset & ~huge_page_mask(h));
pgoff = offset >> PAGE_SHIFT;
- i_size_write(inode, offset);
i_mmap_lock_write(mapping);
+ i_size_write(inode, offset);
if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
i_mmap_unlock_write(mapping);
@@ -638,7 +699,11 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
/* addr is the offset within the file (zero based) */
addr = index * hpage_size;
- /* mutex taken here, fault path and hole punch */
+ /*
+ * fault mutex taken here, protects against fault path
+ * and hole punch. inode_lock previously taken protects
+ * against truncation.
+ */
hash = hugetlb_fault_mutex_hash(mapping, index);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
@@ -1299,6 +1364,12 @@ hugetlbfs_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_magic = HUGETLBFS_MAGIC;
sb->s_op = &hugetlbfs_ops;
sb->s_time_gran = 1;
+
+ /*
+ * Due to the special and limited functionality of hugetlbfs, it does
+ * not work well as a stacking filesystem.
+ */
+ sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
sb->s_root = d_make_root(hugetlbfs_get_root(sb, ctx));
if (!sb->s_root)
goto out_free;