summaryrefslogtreecommitdiff
path: root/fs/hugetlbfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/hugetlbfs/inode.c')
-rw-r--r--fs/hugetlbfs/inode.c224
1 files changed, 116 insertions, 108 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 99c7477cee5c..f7a5b5124d8a 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -11,7 +11,6 @@
#include <linux/thread_info.h>
#include <asm/current.h>
-#include <linux/sched/signal.h> /* remove ASAP */
#include <linux/falloc.h>
#include <linux/fs.h>
#include <linux/mount.h>
@@ -40,7 +39,6 @@
#include <linux/uaccess.h>
#include <linux/sched/mm.h>
-static const struct super_operations hugetlbfs_ops;
static const struct address_space_operations hugetlbfs_aops;
const struct file_operations hugetlbfs_file_operations;
static const struct inode_operations hugetlbfs_dir_inode_operations;
@@ -108,16 +106,6 @@ static inline void hugetlb_drop_vma_policy(struct vm_area_struct *vma)
}
#endif
-static void huge_pagevec_release(struct pagevec *pvec)
-{
- int i;
-
- for (i = 0; i < pagevec_count(pvec); ++i)
- put_page(pvec->pages[i]);
-
- pagevec_reinit(pvec);
-}
-
/*
* Mask used when checking the page offset value passed in via system
* calls. This value will be converted to a loff_t which is signed.
@@ -195,7 +183,6 @@ out:
* Called under mmap_write_lock(mm).
*/
-#ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
static unsigned long
hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags)
@@ -206,7 +193,7 @@ hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr,
info.flags = 0;
info.length = len;
info.low_limit = current->mm->mmap_base;
- info.high_limit = TASK_SIZE;
+ info.high_limit = arch_get_mmap_end(addr, len, flags);
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
return vm_unmapped_area(&info);
@@ -222,7 +209,7 @@ hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr,
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = max(PAGE_SIZE, mmap_min_addr);
- info.high_limit = current->mm->mmap_base;
+ info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
addr = vm_unmapped_area(&info);
@@ -237,20 +224,22 @@ hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr,
VM_BUG_ON(addr != -ENOMEM);
info.flags = 0;
info.low_limit = current->mm->mmap_base;
- info.high_limit = TASK_SIZE;
+ info.high_limit = arch_get_mmap_end(addr, len, flags);
addr = vm_unmapped_area(&info);
}
return addr;
}
-static unsigned long
-hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
+unsigned long
+generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct hstate *h = hstate_file(file);
+ const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
if (len & ~huge_page_mask(h))
return -EINVAL;
@@ -266,7 +255,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
if (addr) {
addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
- if (TASK_SIZE - len >= addr &&
+ if (mmap_end - len >= addr &&
(!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
@@ -282,41 +271,20 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
return hugetlb_get_unmapped_area_bottomup(file, addr, len,
pgoff, flags);
}
-#endif
-static size_t
-hugetlbfs_read_actor(struct page *page, unsigned long offset,
- struct iov_iter *to, unsigned long size)
+#ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+static unsigned long
+hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
{
- size_t copied = 0;
- int i, chunksize;
-
- /* Find which 4k chunk and offset with in that chunk */
- i = offset >> PAGE_SHIFT;
- offset = offset & ~PAGE_MASK;
-
- while (size) {
- size_t n;
- chunksize = PAGE_SIZE;
- if (offset)
- chunksize -= offset;
- if (chunksize > size)
- chunksize = size;
- n = copy_page_to_iter(&page[i], offset, chunksize, to);
- copied += n;
- if (n != chunksize)
- return copied;
- offset = 0;
- size -= chunksize;
- i++;
- }
- return copied;
+ return generic_hugetlb_get_unmapped_area(file, addr, len, pgoff, flags);
}
+#endif
/*
* Support for read() - Find the page attached to f_mapping and copy out the
- * data. Its *very* similar to generic_file_buffered_read(), we can't use that
- * since it has PAGE_SIZE assumptions.
+ * data. This provides functionality similar to filemap_read().
*/
static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
@@ -363,7 +331,7 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
/*
* We have the page, copy it to user space buffer.
*/
- copied = hugetlbfs_read_actor(page, offset, to, nr);
+ copied = copy_page_to_iter(page, offset, nr, to);
put_page(page);
}
offset += copied;
@@ -382,7 +350,7 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
static int hugetlbfs_write_begin(struct file *file,
struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
+ loff_t pos, unsigned len,
struct page **pagep, void **fsdata)
{
return -EINVAL;
@@ -404,7 +372,8 @@ static void remove_huge_page(struct page *page)
}
static void
-hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
+hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end,
+ zap_flags_t zap_flags)
{
struct vm_area_struct *vma;
@@ -438,7 +407,7 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
}
unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end,
- NULL);
+ NULL, zap_flags);
}
}
@@ -469,25 +438,19 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
struct address_space *mapping = &inode->i_data;
const pgoff_t start = lstart >> huge_page_shift(h);
const pgoff_t end = lend >> huge_page_shift(h);
- struct pagevec pvec;
+ struct folio_batch fbatch;
pgoff_t next, index;
int i, freed = 0;
bool truncate_op = (lend == LLONG_MAX);
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
next = start;
- while (next < end) {
- /*
- * When no more pages are found, we are done.
- */
- if (!pagevec_lookup_range(&pvec, mapping, &next, end - 1))
- break;
-
- for (i = 0; i < pagevec_count(&pvec); ++i) {
- struct page *page = pvec.pages[i];
+ while (filemap_get_folios(mapping, &next, end - 1, &fbatch)) {
+ for (i = 0; i < folio_batch_count(&fbatch); ++i) {
+ struct folio *folio = fbatch.folios[i];
u32 hash = 0;
- index = page->index;
+ index = folio->index;
if (!truncate_op) {
/*
* Only need to hold the fault mutex in the
@@ -500,15 +463,15 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
}
/*
- * If page is mapped, it was faulted in after being
+ * If folio is mapped, it was faulted in after being
* unmapped in caller. Unmap (again) now after taking
* the fault mutex. The mutex will prevent faults
- * until we finish removing the page.
+ * until we finish removing the folio.
*
* This race can only happen in the hole punch case.
* Getting here in a truncate operation is a bug.
*/
- if (unlikely(page_mapped(page))) {
+ if (unlikely(folio_mapped(folio))) {
BUG_ON(truncate_op);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
@@ -516,11 +479,12 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
mutex_lock(&hugetlb_fault_mutex_table[hash]);
hugetlb_vmdelete_list(&mapping->i_mmap,
index * pages_per_huge_page(h),
- (index + 1) * pages_per_huge_page(h));
+ (index + 1) * pages_per_huge_page(h),
+ ZAP_FLAG_DROP_MARKER);
i_mmap_unlock_write(mapping);
}
- lock_page(page);
+ folio_lock(folio);
/*
* We must free the huge page and remove from page
* cache (remove_huge_page) BEFORE removing the
@@ -530,8 +494,8 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
* the subpool and global reserve usage count can need
* to be adjusted.
*/
- VM_BUG_ON(HPageRestoreReserve(page));
- remove_huge_page(page);
+ VM_BUG_ON(HPageRestoreReserve(&folio->page));
+ remove_huge_page(&folio->page);
freed++;
if (!truncate_op) {
if (unlikely(hugetlb_unreserve_pages(inode,
@@ -539,11 +503,11 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
hugetlb_fix_reserve_counts(inode);
}
- unlock_page(page);
+ folio_unlock(folio);
if (!truncate_op)
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
}
- huge_pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
}
@@ -582,46 +546,85 @@ static void hugetlb_vmtruncate(struct inode *inode, loff_t offset)
i_mmap_lock_write(mapping);
i_size_write(inode, offset);
if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
- hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
+ hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0,
+ ZAP_FLAG_DROP_MARKER);
i_mmap_unlock_write(mapping);
remove_inode_hugepages(inode, offset, LLONG_MAX);
}
+static void hugetlbfs_zero_partial_page(struct hstate *h,
+ struct address_space *mapping,
+ loff_t start,
+ loff_t end)
+{
+ pgoff_t idx = start >> huge_page_shift(h);
+ struct folio *folio;
+
+ folio = filemap_lock_folio(mapping, idx);
+ if (!folio)
+ return;
+
+ start = start & ~huge_page_mask(h);
+ end = end & ~huge_page_mask(h);
+ if (!end)
+ end = huge_page_size(h);
+
+ folio_zero_segment(folio, (size_t)start, (size_t)end);
+
+ folio_unlock(folio);
+ folio_put(folio);
+}
+
static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
{
+ struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
+ struct address_space *mapping = inode->i_mapping;
struct hstate *h = hstate_inode(inode);
loff_t hpage_size = huge_page_size(h);
loff_t hole_start, hole_end;
/*
- * For hole punch round up the beginning offset of the hole and
- * round down the end.
+ * hole_start and hole_end indicate the full pages within the hole.
*/
hole_start = round_up(offset, hpage_size);
hole_end = round_down(offset + len, hpage_size);
- if (hole_end > hole_start) {
- struct address_space *mapping = inode->i_mapping;
- struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
+ inode_lock(inode);
- inode_lock(inode);
+ /* protected by i_rwsem */
+ if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
+ inode_unlock(inode);
+ return -EPERM;
+ }
- /* protected by i_rwsem */
- if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
- inode_unlock(inode);
- return -EPERM;
- }
+ i_mmap_lock_write(mapping);
- i_mmap_lock_write(mapping);
+ /* If range starts before first full page, zero partial page. */
+ if (offset < hole_start)
+ hugetlbfs_zero_partial_page(h, mapping,
+ offset, min(offset + len, hole_start));
+
+ /* Unmap users of full pages in the hole. */
+ if (hole_end > hole_start) {
if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
hugetlb_vmdelete_list(&mapping->i_mmap,
- hole_start >> PAGE_SHIFT,
- hole_end >> PAGE_SHIFT);
- i_mmap_unlock_write(mapping);
- remove_inode_hugepages(inode, hole_start, hole_end);
- inode_unlock(inode);
+ hole_start >> PAGE_SHIFT,
+ hole_end >> PAGE_SHIFT, 0);
}
+ /* If range extends beyond last full page, zero partial page. */
+ if ((offset + len) > hole_end && (offset + len) > hole_start)
+ hugetlbfs_zero_partial_page(h, mapping,
+ hole_end, offset + len);
+
+ i_mmap_unlock_write(mapping);
+
+ /* Remove full pages from the file. */
+ if (hole_end > hole_start)
+ remove_inode_hugepages(inode, hole_start, hole_end);
+
+ inode_unlock(inode);
+
return 0;
}
@@ -746,7 +749,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
SetHPageMigratable(page);
/*
- * unlock_page because locked by add_to_page_cache()
+ * unlock_page because locked by huge_add_to_page_cache()
* put_page() due to reference from alloc_huge_page()
*/
unlock_page(page);
@@ -957,28 +960,33 @@ static int hugetlbfs_symlink(struct user_namespace *mnt_userns,
return error;
}
-static int hugetlbfs_migrate_page(struct address_space *mapping,
- struct page *newpage, struct page *page,
+#ifdef CONFIG_MIGRATION
+static int hugetlbfs_migrate_folio(struct address_space *mapping,
+ struct folio *dst, struct folio *src,
enum migrate_mode mode)
{
int rc;
- rc = migrate_huge_page_move_mapping(mapping, newpage, page);
+ rc = migrate_huge_page_move_mapping(mapping, dst, src);
if (rc != MIGRATEPAGE_SUCCESS)
return rc;
- if (hugetlb_page_subpool(page)) {
- hugetlb_set_page_subpool(newpage, hugetlb_page_subpool(page));
- hugetlb_set_page_subpool(page, NULL);
+ if (hugetlb_page_subpool(&src->page)) {
+ hugetlb_set_page_subpool(&dst->page,
+ hugetlb_page_subpool(&src->page));
+ hugetlb_set_page_subpool(&src->page, NULL);
}
if (mode != MIGRATE_SYNC_NO_COPY)
- migrate_page_copy(newpage, page);
+ folio_migrate_copy(dst, src);
else
- migrate_page_states(newpage, page);
+ folio_migrate_flags(dst, src);
return MIGRATEPAGE_SUCCESS;
}
+#else
+#define hugetlbfs_migrate_folio NULL
+#endif
static int hugetlbfs_error_remove_page(struct address_space *mapping,
struct page *page)
@@ -1042,17 +1050,17 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bsize = huge_page_size(h);
if (sbinfo) {
spin_lock(&sbinfo->stat_lock);
- /* If no limits set, just report 0 for max/free/used
+ /* If no limits set, just report 0 or -1 for max/free/used
* blocks, like simple_statfs() */
if (sbinfo->spool) {
long free_pages;
- spin_lock(&sbinfo->spool->lock);
+ spin_lock_irq(&sbinfo->spool->lock);
buf->f_blocks = sbinfo->spool->max_hpages;
free_pages = sbinfo->spool->max_hpages
- sbinfo->spool->used_hpages;
buf->f_bavail = buf->f_bfree = free_pages;
- spin_unlock(&sbinfo->spool->lock);
+ spin_unlock_irq(&sbinfo->spool->lock);
buf->f_files = sbinfo->max_inodes;
buf->f_ffree = sbinfo->free_inodes;
}
@@ -1145,7 +1153,7 @@ static const struct address_space_operations hugetlbfs_aops = {
.write_begin = hugetlbfs_write_begin,
.write_end = hugetlbfs_write_end,
.dirty_folio = noop_dirty_folio,
- .migratepage = hugetlbfs_migrate_page,
+ .migrate_folio = hugetlbfs_migrate_folio,
.error_remove_page = hugetlbfs_error_remove_page,
};
@@ -1269,7 +1277,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
ps = memparse(param->string, &rest);
ctx->hstate = size_to_hstate(ps);
if (!ctx->hstate) {
- pr_err("Unsupported page size %lu MB\n", ps >> 20);
+ pr_err("Unsupported page size %lu MB\n", ps / SZ_1M);
return -EINVAL;
}
return 0;
@@ -1345,7 +1353,7 @@ hugetlbfs_fill_super(struct super_block *sb, struct fs_context *fc)
/*
* Allocate and initialize subpool if maximum or minimum size is
* specified. Any needed reservations (for minimum size) are taken
- * taken when the subpool is created.
+ * when the subpool is created.
*/
if (ctx->max_hpages != -1 || ctx->min_hpages != -1) {
sbinfo->spool = hugepage_new_subpool(ctx->hstate,
@@ -1515,7 +1523,7 @@ static struct vfsmount *__init mount_one_hugetlbfs(struct hstate *h)
}
if (IS_ERR(mnt))
pr_err("Cannot mount internal hugetlbfs for page size %luK",
- huge_page_size(h) >> 10);
+ huge_page_size(h) / SZ_1K);
return mnt;
}