summaryrefslogtreecommitdiff
path: root/fs/hugetlbfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/hugetlbfs/inode.c')
-rw-r--r--fs/hugetlbfs/inode.c177
1 files changed, 69 insertions, 108 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index ac519515ef6c..c0856585bb63 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -40,7 +40,7 @@
#include <linux/sched/mm.h>
static const struct address_space_operations hugetlbfs_aops;
-const struct file_operations hugetlbfs_file_operations;
+static const struct file_operations hugetlbfs_file_operations;
static const struct inode_operations hugetlbfs_dir_inode_operations;
static const struct inode_operations hugetlbfs_inode_operations;
@@ -73,39 +73,16 @@ enum hugetlb_param {
};
static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
- fsparam_u32 ("gid", Opt_gid),
+ fsparam_gid ("gid", Opt_gid),
fsparam_string("min_size", Opt_min_size),
fsparam_u32oct("mode", Opt_mode),
fsparam_string("nr_inodes", Opt_nr_inodes),
fsparam_string("pagesize", Opt_pagesize),
fsparam_string("size", Opt_size),
- fsparam_u32 ("uid", Opt_uid),
+ fsparam_uid ("uid", Opt_uid),
{}
};
-#ifdef CONFIG_NUMA
-static inline void hugetlb_set_vma_policy(struct vm_area_struct *vma,
- struct inode *inode, pgoff_t index)
-{
- vma->vm_policy = mpol_shared_policy_lookup(&HUGETLBFS_I(inode)->policy,
- index);
-}
-
-static inline void hugetlb_drop_vma_policy(struct vm_area_struct *vma)
-{
- mpol_cond_put(vma->vm_policy);
-}
-#else
-static inline void hugetlb_set_vma_policy(struct vm_area_struct *vma,
- struct inode *inode, pgoff_t index)
-{
-}
-
-static inline void hugetlb_drop_vma_policy(struct vm_area_struct *vma)
-{
-}
-#endif
-
/*
* Mask used when checking the page offset value passed in via system
* calls. This value will be converted to a loff_t which is signed.
@@ -136,7 +113,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
vm_flags_set(vma, VM_HUGETLB | VM_DONTEXPAND);
vma->vm_ops = &hugetlb_vm_ops;
- ret = seal_check_future_write(info->seals, vma);
+ ret = seal_check_write(info->seals, vma);
if (ret)
return ret;
@@ -199,14 +176,12 @@ hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags)
{
struct hstate *h = hstate_file(file);
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
- info.flags = 0;
info.length = len;
info.low_limit = current->mm->mmap_base;
info.high_limit = arch_get_mmap_end(addr, len, flags);
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- info.align_offset = 0;
return vm_unmapped_area(&info);
}
@@ -215,14 +190,13 @@ hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags)
{
struct hstate *h = hstate_file(file);
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = PAGE_SIZE;
info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- info.align_offset = 0;
addr = vm_unmapped_area(&info);
/*
@@ -248,13 +222,13 @@ generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long flags)
{
struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma, *prev;
struct hstate *h = hstate_file(file);
const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
if (len & ~huge_page_mask(h))
return -EINVAL;
- if (len > TASK_SIZE)
+ if (len > mmap_end - mmap_min_addr)
return -ENOMEM;
if (flags & MAP_FIXED) {
@@ -265,18 +239,19 @@ generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
if (addr) {
addr = ALIGN(addr, huge_page_size(h));
- vma = find_vma(mm, addr);
- if (mmap_end - len >= addr &&
- (!vma || addr + len <= vm_start_gap(vma)))
+ vma = find_vma_prev(mm, addr, &prev);
+ if (mmap_end - len >= addr && addr >= mmap_min_addr &&
+ (!vma || addr + len <= vm_start_gap(vma)) &&
+ (!prev || addr >= vm_end_gap(prev)))
return addr;
}
/*
- * Use mm->get_unmapped_area value as a hint to use topdown routine.
+ * Use MMF_TOPDOWN flag as a hint to use topdown routine.
* If architectures have special needs, they should define their own
* version of hugetlb_get_unmapped_area.
*/
- if (mm->get_unmapped_area == arch_get_unmapped_area_topdown)
+ if (test_bit(MMF_TOPDOWN, &mm->flags))
return hugetlb_get_unmapped_area_topdown(file, addr, len,
pgoff, flags);
return hugetlb_get_unmapped_area_bottomup(file, addr, len,
@@ -345,7 +320,7 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
ssize_t retval = 0;
while (iov_iter_count(to)) {
- struct page *page;
+ struct folio *folio;
size_t nr, copied, want;
/* nr is the maximum number of bytes to copy from this page */
@@ -363,18 +338,18 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
}
nr = nr - offset;
- /* Find the page */
- page = find_lock_page(mapping, index);
- if (unlikely(page == NULL)) {
+ /* Find the folio */
+ folio = filemap_lock_hugetlb_folio(h, mapping, index);
+ if (IS_ERR(folio)) {
/*
* We have a HOLE, zero out the user-buffer for the
* length of the hole or request.
*/
copied = iov_iter_zero(nr, to);
} else {
- unlock_page(page);
+ folio_unlock(folio);
- if (!PageHWPoison(page))
+ if (!folio_test_hwpoison(folio))
want = nr;
else {
/*
@@ -382,19 +357,19 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
* touching the 1st raw HWPOISON subpage after
* offset.
*/
- want = adjust_range_hwpoison(page, offset, nr);
+ want = adjust_range_hwpoison(&folio->page, offset, nr);
if (want == 0) {
- put_page(page);
+ folio_put(folio);
retval = -EIO;
break;
}
}
/*
- * We have the page, copy it to user space buffer.
+ * We have the folio, copy it to user space buffer.
*/
- copied = copy_page_to_iter(page, offset, want, to);
- put_page(page);
+ copied = copy_folio_to_iter(folio, offset, want, to);
+ folio_put(folio);
}
offset += copied;
retval += copied;
@@ -413,14 +388,14 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
static int hugetlbfs_write_begin(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
return -EINVAL;
}
static int hugetlbfs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
BUG();
return -EINVAL;
@@ -448,7 +423,7 @@ static bool hugetlb_vma_maps_page(struct vm_area_struct *vma,
if (!ptep)
return false;
- pte = huge_ptep_get(ptep);
+ pte = huge_ptep_get(vma->vm_mm, addr, ptep);
if (huge_pte_none(pte) || !pte_present(pte))
return false;
@@ -672,21 +647,20 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
{
struct hstate *h = hstate_inode(inode);
struct address_space *mapping = &inode->i_data;
- const pgoff_t start = lstart >> huge_page_shift(h);
- const pgoff_t end = lend >> huge_page_shift(h);
+ const pgoff_t end = lend >> PAGE_SHIFT;
struct folio_batch fbatch;
pgoff_t next, index;
int i, freed = 0;
bool truncate_op = (lend == LLONG_MAX);
folio_batch_init(&fbatch);
- next = start;
+ next = lstart >> PAGE_SHIFT;
while (filemap_get_folios(mapping, &next, end - 1, &fbatch)) {
for (i = 0; i < folio_batch_count(&fbatch); ++i) {
struct folio *folio = fbatch.folios[i];
u32 hash = 0;
- index = folio->index;
+ index = folio->index >> huge_page_order(h);
hash = hugetlb_fault_mutex_hash(mapping, index);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
@@ -704,7 +678,9 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
}
if (truncate_op)
- (void)hugetlb_unreserve_pages(inode, start, LONG_MAX, freed);
+ (void)hugetlb_unreserve_pages(inode,
+ lstart >> huge_page_shift(h),
+ LONG_MAX, freed);
}
static void hugetlbfs_evict_inode(struct inode *inode)
@@ -719,7 +695,7 @@ static void hugetlbfs_evict_inode(struct inode *inode)
* at inode creation time. If this is a device special inode,
* i_mapping may not point to the original address space.
*/
- resv_map = (struct resv_map *)(&inode->i_data)->private_data;
+ resv_map = (struct resv_map *)(&inode->i_data)->i_private_data;
/* Only regular and link inodes have associated reserve maps */
if (resv_map)
resv_map_release(&resv_map->refs);
@@ -752,7 +728,7 @@ static void hugetlbfs_zero_partial_page(struct hstate *h,
pgoff_t idx = start >> huge_page_shift(h);
struct folio *folio;
- folio = filemap_lock_folio(mapping, idx);
+ folio = filemap_lock_hugetlb_folio(h, mapping, idx);
if (IS_ERR(folio))
return;
@@ -863,8 +839,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
/*
* Initialize a pseudo vma as this is required by the huge page
- * allocation routines. If NUMA is configured, use page index
- * as input to create an allocation policy.
+ * allocation routines.
*/
vma_init(&pseudo_vma, mm);
vm_flags_init(&pseudo_vma, VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
@@ -897,7 +872,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
mutex_lock(&hugetlb_fault_mutex_table[hash]);
/* See if already present in mapping to avoid alloc/free */
- folio = filemap_get_folio(mapping, index);
+ folio = filemap_get_folio(mapping, index << huge_page_order(h));
if (!IS_ERR(folio)) {
folio_put(folio);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
@@ -912,15 +887,13 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
* folios in these areas, we need to consume the reserves
* to keep reservation accounting consistent.
*/
- hugetlb_set_vma_policy(&pseudo_vma, inode, index);
folio = alloc_hugetlb_folio(&pseudo_vma, addr, 0);
- hugetlb_drop_vma_policy(&pseudo_vma);
if (IS_ERR(folio)) {
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
error = PTR_ERR(folio);
goto out;
}
- clear_huge_page(&folio->page, addr, pages_per_huge_page(h));
+ folio_zero_user(folio, addr);
__folio_mark_uptodate(folio);
error = hugetlb_add_to_page_cache(folio, mapping, index);
if (unlikely(error)) {
@@ -958,7 +931,7 @@ static int hugetlbfs_setattr(struct mnt_idmap *idmap,
unsigned int ia_valid = attr->ia_valid;
struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
- error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
+ error = setattr_prepare(idmap, dentry, attr);
if (error)
return error;
@@ -975,7 +948,7 @@ static int hugetlbfs_setattr(struct mnt_idmap *idmap,
hugetlb_vmtruncate(inode, newsize);
}
- setattr_copy(&nop_mnt_idmap, inode, attr);
+ setattr_copy(idmap, inode, attr);
mark_inode_dirty(inode);
return 0;
}
@@ -991,7 +964,7 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
inode->i_mode = S_IFDIR | ctx->mode;
inode->i_uid = ctx->uid;
inode->i_gid = ctx->gid;
- inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
+ simple_inode_init_ts(inode);
inode->i_op = &hugetlbfs_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
/* directory inodes start off with i_nlink == 2 (for "." entry) */
@@ -1010,6 +983,7 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
static struct lock_class_key hugetlbfs_i_mmap_rwsem_key;
static struct inode *hugetlbfs_get_inode(struct super_block *sb,
+ struct mnt_idmap *idmap,
struct inode *dir,
umode_t mode, dev_t dev)
{
@@ -1031,12 +1005,12 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
inode->i_ino = get_next_ino();
- inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
+ inode_init_owner(idmap, inode, dir, mode);
lockdep_set_class(&inode->i_mapping->i_mmap_rwsem,
&hugetlbfs_i_mmap_rwsem_key);
inode->i_mapping->a_ops = &hugetlbfs_aops;
- inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
- inode->i_mapping->private_data = resv_map;
+ simple_inode_init_ts(inode);
+ inode->i_mapping->i_private_data = resv_map;
info->seals = F_SEAL_SEAL;
switch (mode & S_IFMT) {
default:
@@ -1075,10 +1049,10 @@ static int hugetlbfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
{
struct inode *inode;
- inode = hugetlbfs_get_inode(dir->i_sb, dir, mode, dev);
+ inode = hugetlbfs_get_inode(dir->i_sb, idmap, dir, mode, dev);
if (!inode)
return -ENOSPC;
- dir->i_mtime = inode_set_ctime_current(dir);
+ inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
d_instantiate(dentry, inode);
dget(dentry);/* Extra count - pin the dentry in core */
return 0;
@@ -1087,7 +1061,7 @@ static int hugetlbfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
static int hugetlbfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
- int retval = hugetlbfs_mknod(&nop_mnt_idmap, dir, dentry,
+ int retval = hugetlbfs_mknod(idmap, dir, dentry,
mode | S_IFDIR, 0);
if (!retval)
inc_nlink(dir);
@@ -1098,7 +1072,7 @@ static int hugetlbfs_create(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *dentry,
umode_t mode, bool excl)
{
- return hugetlbfs_mknod(&nop_mnt_idmap, dir, dentry, mode | S_IFREG, 0);
+ return hugetlbfs_mknod(idmap, dir, dentry, mode | S_IFREG, 0);
}
static int hugetlbfs_tmpfile(struct mnt_idmap *idmap,
@@ -1107,10 +1081,10 @@ static int hugetlbfs_tmpfile(struct mnt_idmap *idmap,
{
struct inode *inode;
- inode = hugetlbfs_get_inode(dir->i_sb, dir, mode | S_IFREG, 0);
+ inode = hugetlbfs_get_inode(dir->i_sb, idmap, dir, mode | S_IFREG, 0);
if (!inode)
return -ENOSPC;
- dir->i_mtime = inode_set_ctime_current(dir);
+ inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
d_tmpfile(file, inode);
return finish_open_simple(file, 0);
}
@@ -1119,10 +1093,11 @@ static int hugetlbfs_symlink(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *dentry,
const char *symname)
{
+ const umode_t mode = S_IFLNK|S_IRWXUGO;
struct inode *inode;
int error = -ENOSPC;
- inode = hugetlbfs_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0);
+ inode = hugetlbfs_get_inode(dir->i_sb, idmap, dir, mode, 0);
if (inode) {
int l = strlen(symname)+1;
error = page_symlink(inode, symname, l);
@@ -1132,7 +1107,7 @@ static int hugetlbfs_symlink(struct mnt_idmap *idmap,
} else
iput(inode);
}
- dir->i_mtime = inode_set_ctime_current(dir);
+ inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
return error;
}
@@ -1154,10 +1129,7 @@ static int hugetlbfs_migrate_folio(struct address_space *mapping,
hugetlb_set_folio_subpool(src, NULL);
}
- if (mode != MIGRATE_SYNC_NO_COPY)
- folio_migrate_copy(dst, src);
- else
- folio_migrate_flags(dst, src);
+ folio_migrate_flags(dst, src);
return MIGRATEPAGE_SUCCESS;
}
@@ -1165,8 +1137,8 @@ static int hugetlbfs_migrate_folio(struct address_space *mapping,
#define hugetlbfs_migrate_folio NULL
#endif
-static int hugetlbfs_error_remove_page(struct address_space *mapping,
- struct page *page)
+static int hugetlbfs_error_remove_folio(struct address_space *mapping,
+ struct folio *folio)
{
return 0;
}
@@ -1215,7 +1187,9 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
struct hstate *h = hstate_inode(d_inode(dentry));
+ u64 id = huge_encode_dev(dentry->d_sb->s_dev);
+ buf->f_fsid = u64_to_fsid(id);
buf->f_type = HUGETLBFS_MAGIC;
buf->f_bsize = huge_page_size(h);
if (sbinfo) {
@@ -1293,18 +1267,6 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
hugetlbfs_inc_free_inodes(sbinfo);
return NULL;
}
-
- /*
- * Any time after allocation, hugetlbfs_destroy_inode can be called
- * for the inode. mpol_free_shared_policy is unconditionally called
- * as part of hugetlbfs_destroy_inode. So, initialize policy here
- * in case of a quick call to destroy.
- *
- * Note that the policy is initialized even if we are creating a
- * private inode. This simplifies hugetlbfs_destroy_inode.
- */
- mpol_shared_policy_init(&p->policy, NULL);
-
return &p->vfs_inode;
}
@@ -1316,7 +1278,6 @@ static void hugetlbfs_free_inode(struct inode *inode)
static void hugetlbfs_destroy_inode(struct inode *inode)
{
hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
- mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
}
static const struct address_space_operations hugetlbfs_aops = {
@@ -1324,7 +1285,7 @@ static const struct address_space_operations hugetlbfs_aops = {
.write_end = hugetlbfs_write_end,
.dirty_folio = noop_dirty_folio,
.migrate_folio = hugetlbfs_migrate_folio,
- .error_remove_page = hugetlbfs_error_remove_page,
+ .error_remove_folio = hugetlbfs_error_remove_folio,
};
@@ -1335,13 +1296,14 @@ static void init_once(void *foo)
inode_init_once(&ei->vfs_inode);
}
-const struct file_operations hugetlbfs_file_operations = {
+static const struct file_operations hugetlbfs_file_operations = {
.read_iter = hugetlbfs_read_iter,
.mmap = hugetlbfs_file_mmap,
.fsync = noop_fsync,
.get_unmapped_area = hugetlb_get_unmapped_area,
.llseek = default_llseek,
.fallocate = hugetlbfs_fallocate,
+ .fop_flags = FOP_HUGE_PAGES,
};
static const struct inode_operations hugetlbfs_dir_inode_operations = {
@@ -1412,15 +1374,11 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
switch (opt) {
case Opt_uid:
- ctx->uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(ctx->uid))
- goto bad_val;
+ ctx->uid = result.uid;
return 0;
case Opt_gid:
- ctx->gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(ctx->gid))
- goto bad_val;
+ ctx->gid = result.gid;
return 0;
case Opt_mode:
@@ -1602,6 +1560,7 @@ static struct file_system_type hugetlbfs_fs_type = {
.init_fs_context = hugetlbfs_init_fs_context,
.parameters = hugetlb_fs_parameters,
.kill_sb = kill_litter_super,
+ .fs_flags = FS_ALLOW_IDMAP,
};
static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE];
@@ -1655,7 +1614,9 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
}
file = ERR_PTR(-ENOSPC);
- inode = hugetlbfs_get_inode(mnt->mnt_sb, NULL, S_IFREG | S_IRWXUGO, 0);
+ /* hugetlbfs_vfsmount[] mounts do not use idmapped mounts. */
+ inode = hugetlbfs_get_inode(mnt->mnt_sb, &nop_mnt_idmap, NULL,
+ S_IFREG | S_IRWXUGO, 0);
if (!inode)
goto out;
if (creat_flags == HUGETLB_SHMFS_INODE)