summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/ksm.c2
-rw-r--r--mm/madvise.c5
-rw-r--r--mm/memory.c13
-rw-r--r--mm/mempolicy.c3
-rw-r--r--mm/mlock.c2
-rw-r--r--mm/mmap.c30
-rw-r--r--mm/rmap.c21
-rw-r--r--mm/vmscan.c9
8 files changed, 70 insertions, 15 deletions
diff --git a/mm/ksm.c b/mm/ksm.c
index df6bae3a5a2c..14d9e53b1ec2 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -713,7 +713,7 @@ static bool vma_ksm_compatible(struct vm_area_struct *vma)
{
if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE | VM_PFNMAP |
VM_IO | VM_DONTEXPAND | VM_HUGETLB |
- VM_MIXEDMAP))
+ VM_MIXEDMAP| VM_DROPPABLE))
return false; /* just ignore the advice */
if (vma_is_dax(vma))
diff --git a/mm/madvise.c b/mm/madvise.c
index 96c026fe0c99..89089d84f8df 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1068,13 +1068,16 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
new_flags |= VM_WIPEONFORK;
break;
case MADV_KEEPONFORK:
+ if (vma->vm_flags & VM_DROPPABLE)
+ return -EINVAL;
new_flags &= ~VM_WIPEONFORK;
break;
case MADV_DONTDUMP:
new_flags |= VM_DONTDUMP;
break;
case MADV_DODUMP:
- if (!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL)
+ if ((!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL) ||
+ (vma->vm_flags & VM_DROPPABLE))
return -EINVAL;
new_flags &= ~VM_DONTDUMP;
break;
diff --git a/mm/memory.c b/mm/memory.c
index 4bcd79619574..1ff7b6f51ec1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5801,6 +5801,7 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
/* If the fault handler drops the mmap_lock, vma may be freed */
struct mm_struct *mm = vma->vm_mm;
vm_fault_t ret;
+ bool is_droppable;
__set_current_state(TASK_RUNNING);
@@ -5815,6 +5816,8 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
goto out;
}
+ is_droppable = !!(vma->vm_flags & VM_DROPPABLE);
+
/*
* Enable the memcg OOM handling for faults triggered in user
* space. Kernel faults are handled more gracefully.
@@ -5829,8 +5832,18 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
else
ret = __handle_mm_fault(vma, address, flags);
+ /*
+ * Warning: It is no longer safe to dereference vma-> after this point,
+ * because mmap_lock might have been dropped by __handle_mm_fault(), so
+ * vma might be destroyed from underneath us.
+ */
+
lru_gen_exit_fault();
+ /* If the mapping is droppable, then errors due to OOM aren't fatal. */
+ if (is_droppable)
+ ret &= ~VM_FAULT_OOM;
+
if (flags & FAULT_FLAG_USER) {
mem_cgroup_exit_user_fault();
/*
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 327a19b0883d..b858e22b259d 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2305,6 +2305,9 @@ struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct vm_area_struct
pgoff_t ilx;
struct folio *folio;
+ if (vma->vm_flags & VM_DROPPABLE)
+ gfp |= __GFP_NOWARN;
+
pol = get_vma_policy(vma, addr, order, &ilx);
folio = folio_alloc_mpol_noprof(gfp, order, pol, ilx, numa_node_id());
mpol_cond_put(pol);
diff --git a/mm/mlock.c b/mm/mlock.c
index 52d6e401ad67..e3e3dc2b2956 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -474,7 +474,7 @@ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma,
if (newflags == oldflags || (oldflags & VM_SPECIAL) ||
is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) ||
- vma_is_dax(vma) || vma_is_secretmem(vma))
+ vma_is_dax(vma) || vma_is_secretmem(vma) || (oldflags & VM_DROPPABLE))
/* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */
goto out;
diff --git a/mm/mmap.c b/mm/mmap.c
index e42d89f98071..d0dfc85b209b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1410,6 +1410,36 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
pgoff = 0;
vm_flags |= VM_SHARED | VM_MAYSHARE;
break;
+ case MAP_DROPPABLE:
+ if (VM_DROPPABLE == VM_NONE)
+ return -ENOTSUPP;
+ /*
+ * A locked or stack area makes no sense to be droppable.
+ *
+ * Also, since droppable pages can just go away at any time
+ * it makes no sense to copy them on fork or dump them.
+ *
+ * And don't attempt to combine with hugetlb for now.
+ */
+ if (flags & (MAP_LOCKED | MAP_HUGETLB))
+ return -EINVAL;
+ if (vm_flags & (VM_GROWSDOWN | VM_GROWSUP))
+ return -EINVAL;
+
+ vm_flags |= VM_DROPPABLE;
+
+ /*
+ * If the pages can be dropped, then it doesn't make
+ * sense to reserve them.
+ */
+ vm_flags |= VM_NORESERVE;
+
+ /*
+ * Likewise, they're volatile enough that they
+ * shouldn't survive forks or coredumps.
+ */
+ vm_flags |= VM_WIPEONFORK | VM_DONTDUMP;
+ fallthrough;
case MAP_PRIVATE:
/*
* Set pgoff according to addr for anon_vma.
diff --git a/mm/rmap.c b/mm/rmap.c
index 8616308610b9..2490e727e2dc 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1412,7 +1412,11 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
VM_BUG_ON_VMA(address < vma->vm_start ||
address + (nr << PAGE_SHIFT) > vma->vm_end, vma);
- if (!folio_test_swapbacked(folio))
+ /*
+ * VM_DROPPABLE mappings don't swap; instead they're just dropped when
+ * under memory pressure.
+ */
+ if (!folio_test_swapbacked(folio) && !(vma->vm_flags & VM_DROPPABLE))
__folio_set_swapbacked(folio);
__folio_set_anon(folio, vma, address, exclusive);
@@ -1848,7 +1852,13 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
* plus the rmap(s) (dropped by discard:).
*/
if (ref_count == 1 + map_count &&
- !folio_test_dirty(folio)) {
+ (!folio_test_dirty(folio) ||
+ /*
+ * Unlike MADV_FREE mappings, VM_DROPPABLE
+ * ones can be dropped even if they've
+ * been dirtied.
+ */
+ (vma->vm_flags & VM_DROPPABLE))) {
dec_mm_counter(mm, MM_ANONPAGES);
goto discard;
}
@@ -1858,7 +1868,12 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
* discarded. Remap the page to page table.
*/
set_pte_at(mm, address, pvmw.pte, pteval);
- folio_set_swapbacked(folio);
+ /*
+ * Unlike MADV_FREE mappings, VM_DROPPABLE ones
+ * never get swap backed on failure to drop.
+ */
+ if (!(vma->vm_flags & VM_DROPPABLE))
+ folio_set_swapbacked(folio);
goto walk_abort;
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 525d3ffa8451..cfa839284b92 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4301,15 +4301,6 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
return true;
}
- /* dirty lazyfree */
- if (type == LRU_GEN_FILE && folio_test_anon(folio) && folio_test_dirty(folio)) {
- success = lru_gen_del_folio(lruvec, folio, true);
- VM_WARN_ON_ONCE_FOLIO(!success, folio);
- folio_set_swapbacked(folio);
- lruvec_add_folio_tail(lruvec, folio);
- return true;
- }
-
/* promoted */
if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
list_move(&folio->lru, &lrugen->folios[gen][type][zone]);