diff options
Diffstat (limited to 'mm/madvise.c')
-rw-r--r-- | mm/madvise.c | 101 |
1 files changed, 89 insertions, 12 deletions
diff --git a/mm/madvise.c b/mm/madvise.c index 0e3828eae9f8..7a2abf0127ae 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -10,6 +10,7 @@ #include <linux/syscalls.h> #include <linux/mempolicy.h> #include <linux/page-isolation.h> +#include <linux/userfaultfd_k.h> #include <linux/hugetlb.h> #include <linux/falloc.h> #include <linux/sched.h> @@ -20,10 +21,13 @@ #include <linux/backing-dev.h> #include <linux/swap.h> #include <linux/swapops.h> +#include <linux/shmem_fs.h> #include <linux/mmu_notifier.h> #include <asm/tlb.h> +#include "internal.h" + /* * Any behaviour which results in changes to the vma->vm_flags needs to * take mmap_sem for writing. Others, which simply traverse vmas, need @@ -89,14 +93,28 @@ static long madvise_behavior(struct vm_area_struct *vma, case MADV_MERGEABLE: case MADV_UNMERGEABLE: error = ksm_madvise(vma, start, end, behavior, &new_flags); - if (error) + if (error) { + /* + * madvise() returns EAGAIN if kernel resources, such as + * slab, are temporarily unavailable. + */ + if (error == -ENOMEM) + error = -EAGAIN; goto out; + } break; case MADV_HUGEPAGE: case MADV_NOHUGEPAGE: error = hugepage_madvise(vma, &new_flags, behavior); - if (error) + if (error) { + /* + * madvise() returns EAGAIN if kernel resources, such as + * slab, are temporarily unavailable. + */ + if (error == -ENOMEM) + error = -EAGAIN; goto out; + } break; } @@ -117,15 +135,37 @@ static long madvise_behavior(struct vm_area_struct *vma, *prev = vma; if (start != vma->vm_start) { - error = split_vma(mm, vma, start, 1); - if (error) + if (unlikely(mm->map_count >= sysctl_max_map_count)) { + error = -ENOMEM; goto out; + } + error = __split_vma(mm, vma, start, 1); + if (error) { + /* + * madvise() returns EAGAIN if kernel resources, such as + * slab, are temporarily unavailable. + */ + if (error == -ENOMEM) + error = -EAGAIN; + goto out; + } } if (end != vma->vm_end) { - error = split_vma(mm, vma, end, 0); - if (error) + if (unlikely(mm->map_count >= sysctl_max_map_count)) { + error = -ENOMEM; goto out; + } + error = __split_vma(mm, vma, end, 0); + if (error) { + /* + * madvise() returns EAGAIN if kernel resources, such as + * slab, are temporarily unavailable. + */ + if (error == -ENOMEM) + error = -EAGAIN; + goto out; + } } success: @@ -133,10 +173,7 @@ success: * vm_flags is protected by the mmap_sem held in write mode. */ vma->vm_flags = new_flags; - out: - if (error == -ENOMEM) - error = -EAGAIN; return error; } @@ -473,10 +510,47 @@ static long madvise_dontneed(struct vm_area_struct *vma, unsigned long start, unsigned long end) { *prev = vma; - if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP)) + if (!can_madv_dontneed_vma(vma)) return -EINVAL; - zap_page_range(vma, start, end - start, NULL); + if (!userfaultfd_remove(vma, start, end)) { + *prev = NULL; /* mmap_sem has been dropped, prev is stale */ + + down_read(¤t->mm->mmap_sem); + vma = find_vma(current->mm, start); + if (!vma) + return -ENOMEM; + if (start < vma->vm_start) { + /* + * This "vma" under revalidation is the one + * with the lowest vma->vm_start where start + * is also < vma->vm_end. If start < + * vma->vm_start it means an hole materialized + * in the user address space within the + * virtual range passed to MADV_DONTNEED. + */ + return -ENOMEM; + } + if (!can_madv_dontneed_vma(vma)) + return -EINVAL; + if (end > vma->vm_end) { + /* + * Don't fail if end > vma->vm_end. If the old + * vma was splitted while the mmap_sem was + * released the effect of the concurrent + * operation may not cause MADV_DONTNEED to + * have an undefined result. There may be an + * adjacent next vma that we'll walk + * next. userfaultfd_remove() will generate an + * UFFD_EVENT_REMOVE repetition on the + * end-vma->vm_end range, but the manager can + * handle a repetition fine. + */ + end = vma->vm_end; + } + VM_WARN_ON(start >= end); + } + zap_page_range(vma, start, end - start); return 0; } @@ -516,7 +590,10 @@ static long madvise_remove(struct vm_area_struct *vma, * mmap_sem. */ get_file(f); - up_read(¤t->mm->mmap_sem); + if (userfaultfd_remove(vma, start, end)) { + /* mmap_sem was not released by userfaultfd_remove() */ + up_read(¤t->mm->mmap_sem); + } error = vfs_fallocate(f, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, end - start); |