diff options
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 231 |
1 files changed, 204 insertions, 27 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c848ea97ab02..387b030c7f15 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -74,12 +74,23 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, static atomic_t huge_zero_refcount; struct page *huge_zero_page __read_mostly; unsigned long huge_zero_pfn __read_mostly = ~0UL; +unsigned long huge_anon_orders_always __read_mostly; +unsigned long huge_anon_orders_madvise __read_mostly; +unsigned long huge_anon_orders_inherit __read_mostly; + +unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, + unsigned long vm_flags, bool smaps, + bool in_pf, bool enforce_sysfs, + unsigned long orders) +{ + /* Check the intersection of requested and supported orders. */ + orders &= vma_is_anonymous(vma) ? + THP_ORDERS_ALL_ANON : THP_ORDERS_ALL_FILE; + if (!orders) + return 0; -bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags, - bool smaps, bool in_pf, bool enforce_sysfs) -{ if (!vma->vm_mm) /* vdso */ - return false; + return 0; /* * Explicitly disabled through madvise or prctl, or some @@ -88,16 +99,16 @@ bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags, * */ if ((vm_flags & VM_NOHUGEPAGE) || test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) - return false; + return 0; /* * If the hardware/firmware marked hugepage support disabled. */ if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED)) - return false; + return 0; /* khugepaged doesn't collapse DAX vma, but page fault is fine. */ if (vma_is_dax(vma)) - return in_pf; + return in_pf ? orders : 0; /* * khugepaged special VMA and hugetlb VMA. @@ -105,17 +116,29 @@ bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags, * VM_MIXEDMAP set. */ if (!in_pf && !smaps && (vm_flags & VM_NO_KHUGEPAGED)) - return false; + return 0; /* - * Check alignment for file vma and size for both file and anon vma. + * Check alignment for file vma and size for both file and anon vma by + * filtering out the unsuitable orders. * * Skip the check for page fault. Huge fault does the check in fault - * handlers. And this check is not suitable for huge PUD fault. + * handlers. */ - if (!in_pf && - !transhuge_vma_suitable(vma, (vma->vm_end - HPAGE_PMD_SIZE))) - return false; + if (!in_pf) { + int order = highest_order(orders); + unsigned long addr; + + while (orders) { + addr = vma->vm_end - (PAGE_SIZE << order); + if (thp_vma_suitable_order(vma, addr, order)) + break; + order = next_order(&orders, order); + } + + if (!orders) + return 0; + } /* * Enabled via shmem mount options or sysfs settings. @@ -124,29 +147,33 @@ bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags, */ if (!in_pf && shmem_file(vma->vm_file)) return shmem_is_huge(file_inode(vma->vm_file), vma->vm_pgoff, - !enforce_sysfs, vma->vm_mm, vm_flags); - - /* Enforce sysfs THP requirements as necessary */ - if (enforce_sysfs && - (!hugepage_flags_enabled() || (!(vm_flags & VM_HUGEPAGE) && - !hugepage_flags_always()))) - return false; + !enforce_sysfs, vma->vm_mm, vm_flags) + ? orders : 0; if (!vma_is_anonymous(vma)) { /* + * Enforce sysfs THP requirements as necessary. Anonymous vmas + * were already handled in thp_vma_allowable_orders(). + */ + if (enforce_sysfs && + (!hugepage_global_enabled() || (!(vm_flags & VM_HUGEPAGE) && + !hugepage_global_always()))) + return 0; + + /* * Trust that ->huge_fault() handlers know what they are doing * in fault path. */ if (((in_pf || smaps)) && vma->vm_ops->huge_fault) - return true; + return orders; /* Only regular file is valid in collapse path */ if (((!in_pf || smaps)) && file_thp_enabled(vma)) - return true; - return false; + return orders; + return 0; } if (vma_is_temporary_stack(vma)) - return false; + return 0; /* * THPeligible bit of smaps should show 1 for proper VMAs even @@ -156,9 +183,9 @@ bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags, * the first page fault. */ if (!vma->anon_vma) - return (smaps || in_pf); + return (smaps || in_pf) ? orders : 0; - return true; + return orders; } static bool get_huge_zero_page(void) @@ -412,9 +439,136 @@ static const struct attribute_group hugepage_attr_group = { .attrs = hugepage_attr, }; +static void hugepage_exit_sysfs(struct kobject *hugepage_kobj); +static void thpsize_release(struct kobject *kobj); +static DEFINE_SPINLOCK(huge_anon_orders_lock); +static LIST_HEAD(thpsize_list); + +struct thpsize { + struct kobject kobj; + struct list_head node; + int order; +}; + +#define to_thpsize(kobj) container_of(kobj, struct thpsize, kobj) + +static ssize_t thpsize_enabled_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + int order = to_thpsize(kobj)->order; + const char *output; + + if (test_bit(order, &huge_anon_orders_always)) + output = "[always] inherit madvise never"; + else if (test_bit(order, &huge_anon_orders_inherit)) + output = "always [inherit] madvise never"; + else if (test_bit(order, &huge_anon_orders_madvise)) + output = "always inherit [madvise] never"; + else + output = "always inherit madvise [never]"; + + return sysfs_emit(buf, "%s\n", output); +} + +static ssize_t thpsize_enabled_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int order = to_thpsize(kobj)->order; + ssize_t ret = count; + + if (sysfs_streq(buf, "always")) { + spin_lock(&huge_anon_orders_lock); + clear_bit(order, &huge_anon_orders_inherit); + clear_bit(order, &huge_anon_orders_madvise); + set_bit(order, &huge_anon_orders_always); + spin_unlock(&huge_anon_orders_lock); + } else if (sysfs_streq(buf, "inherit")) { + spin_lock(&huge_anon_orders_lock); + clear_bit(order, &huge_anon_orders_always); + clear_bit(order, &huge_anon_orders_madvise); + set_bit(order, &huge_anon_orders_inherit); + spin_unlock(&huge_anon_orders_lock); + } else if (sysfs_streq(buf, "madvise")) { + spin_lock(&huge_anon_orders_lock); + clear_bit(order, &huge_anon_orders_always); + clear_bit(order, &huge_anon_orders_inherit); + set_bit(order, &huge_anon_orders_madvise); + spin_unlock(&huge_anon_orders_lock); + } else if (sysfs_streq(buf, "never")) { + spin_lock(&huge_anon_orders_lock); + clear_bit(order, &huge_anon_orders_always); + clear_bit(order, &huge_anon_orders_inherit); + clear_bit(order, &huge_anon_orders_madvise); + spin_unlock(&huge_anon_orders_lock); + } else + ret = -EINVAL; + + return ret; +} + +static struct kobj_attribute thpsize_enabled_attr = + __ATTR(enabled, 0644, thpsize_enabled_show, thpsize_enabled_store); + +static struct attribute *thpsize_attrs[] = { + &thpsize_enabled_attr.attr, + NULL, +}; + +static const struct attribute_group thpsize_attr_group = { + .attrs = thpsize_attrs, +}; + +static const struct kobj_type thpsize_ktype = { + .release = &thpsize_release, + .sysfs_ops = &kobj_sysfs_ops, +}; + +static struct thpsize *thpsize_create(int order, struct kobject *parent) +{ + unsigned long size = (PAGE_SIZE << order) / SZ_1K; + struct thpsize *thpsize; + int ret; + + thpsize = kzalloc(sizeof(*thpsize), GFP_KERNEL); + if (!thpsize) + return ERR_PTR(-ENOMEM); + + ret = kobject_init_and_add(&thpsize->kobj, &thpsize_ktype, parent, + "hugepages-%lukB", size); + if (ret) { + kfree(thpsize); + return ERR_PTR(ret); + } + + ret = sysfs_create_group(&thpsize->kobj, &thpsize_attr_group); + if (ret) { + kobject_put(&thpsize->kobj); + return ERR_PTR(ret); + } + + thpsize->order = order; + return thpsize; +} + +static void thpsize_release(struct kobject *kobj) +{ + kfree(to_thpsize(kobj)); +} + static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj) { int err; + struct thpsize *thpsize; + unsigned long orders; + int order; + + /* + * Default to setting PMD-sized THP to inherit the global setting and + * disable all other sizes. powerpc's PMD_ORDER isn't a compile-time + * constant so we have to do this here. + */ + huge_anon_orders_inherit = BIT(PMD_ORDER); *hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj); if (unlikely(!*hugepage_kobj)) { @@ -434,8 +588,24 @@ static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj) goto remove_hp_group; } + orders = THP_ORDERS_ALL_ANON; + order = highest_order(orders); + while (orders) { + thpsize = thpsize_create(order, *hugepage_kobj); + if (IS_ERR(thpsize)) { + pr_err("failed to create thpsize for order %d\n", order); + err = PTR_ERR(thpsize); + goto remove_all; + } + list_add(&thpsize->node, &thpsize_list); + order = next_order(&orders, order); + } + return 0; +remove_all: + hugepage_exit_sysfs(*hugepage_kobj); + return err; remove_hp_group: sysfs_remove_group(*hugepage_kobj, &hugepage_attr_group); delete_obj: @@ -445,6 +615,13 @@ delete_obj: static void __init hugepage_exit_sysfs(struct kobject *hugepage_kobj) { + struct thpsize *thpsize, *tmp; + + list_for_each_entry_safe(thpsize, tmp, &thpsize_list, node) { + list_del(&thpsize->node); + kobject_put(&thpsize->kobj); + } + sysfs_remove_group(hugepage_kobj, &khugepaged_attr_group); sysfs_remove_group(hugepage_kobj, &hugepage_attr_group); kobject_put(hugepage_kobj); @@ -811,7 +988,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf) struct folio *folio; unsigned long haddr = vmf->address & HPAGE_PMD_MASK; - if (!transhuge_vma_suitable(vma, haddr)) + if (!thp_vma_suitable_order(vma, haddr, PMD_ORDER)) return VM_FAULT_FALLBACK; if (unlikely(anon_vma_prepare(vma))) return VM_FAULT_OOM; |