diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/hugetlb.c | 19 | ||||
-rw-r--r-- | mm/memory-failure.c | 12 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 1 | ||||
-rw-r--r-- | mm/page_alloc.c | 25 | ||||
-rw-r--r-- | mm/shmem.c | 14 | ||||
-rw-r--r-- | mm/swap_state.c | 7 | ||||
-rw-r--r-- | mm/vmscan.c | 30 |
7 files changed, 59 insertions, 49 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index dfc940d5221d..8ea35ba6699f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2476,7 +2476,7 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, if (!rc) { /* * This indicates there is an entry in the reserve map - * added by alloc_huge_page. We know it was added + * not added by alloc_huge_page. We know it was added * before the alloc_huge_page call, otherwise * HPageRestoreReserve would be set on the page. * Remove the entry so that a subsequent allocation @@ -4660,7 +4660,9 @@ retry_avoidcopy: spin_unlock(ptl); mmu_notifier_invalidate_range_end(&range); out_release_all: - restore_reserve_on_error(h, vma, haddr, new_page); + /* No restore in case of successful pagetable update (Break COW) */ + if (new_page != old_page) + restore_reserve_on_error(h, vma, haddr, new_page); put_page(new_page); out_release_old: put_page(old_page); @@ -4776,7 +4778,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, pte_t new_pte; spinlock_t *ptl; unsigned long haddr = address & huge_page_mask(h); - bool new_page = false; + bool new_page, new_pagecache_page = false; /* * Currently, we are forced to kill the process in the event the @@ -4799,6 +4801,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, goto out; retry: + new_page = false; page = find_lock_page(mapping, idx); if (!page) { /* Check for page in userfault range */ @@ -4842,6 +4845,7 @@ retry: goto retry; goto out; } + new_pagecache_page = true; } else { lock_page(page); if (unlikely(anon_vma_prepare(vma))) { @@ -4926,7 +4930,9 @@ backout: spin_unlock(ptl); backout_unlocked: unlock_page(page); - restore_reserve_on_error(h, vma, haddr, page); + /* restore reserve for newly allocated pages not in page cache */ + if (new_page && !new_pagecache_page) + restore_reserve_on_error(h, vma, haddr, page); put_page(page); goto out; } @@ -5135,6 +5141,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, int ret = -ENOMEM; struct page *page; int writable; + bool new_pagecache_page = false; if (is_continue) { ret = -EFAULT; @@ -5228,6 +5235,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ret = huge_add_to_page_cache(page, mapping, idx); if (ret) goto out_release_nounlock; + new_pagecache_page = true; } ptl = huge_pte_lockptr(h, dst_mm, dst_pte); @@ -5291,7 +5299,8 @@ out_release_unlock: if (vm_shared || is_continue) unlock_page(page); out_release_nounlock: - restore_reserve_on_error(h, dst_vma, dst_addr, page); + if (!new_pagecache_page) + restore_reserve_on_error(h, dst_vma, dst_addr, page); put_page(page); goto out; } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index eefd823deb67..470400cc7513 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1146,7 +1146,7 @@ static int __get_hwpoison_page(struct page *page) * unexpected races caused by taking a page refcount. */ if (!HWPoisonHandlable(head)) - return 0; + return -EBUSY; if (PageTransHuge(head)) { /* @@ -1199,9 +1199,15 @@ try_again: } goto out; } else if (ret == -EBUSY) { - /* We raced with freeing huge page to buddy, retry. */ - if (pass++ < 3) + /* + * We raced with (possibly temporary) unhandlable + * page, retry. + */ + if (pass++ < 3) { + shake_page(p, 1); goto try_again; + } + ret = -EIO; goto out; } } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 8cb75b26ea4f..86c3af79e874 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1731,6 +1731,7 @@ failed_removal_isolated: undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); memory_notify(MEM_CANCEL_OFFLINE, &arg); failed_removal_pcplists_disabled: + lru_cache_enable(); zone_pcp_enable(zone); failed_removal: pr_debug("memory offlining [mem %#010llx-%#010llx] failed due to %s\n", diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 856b175c15a4..eeb3a9cb36bb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3453,19 +3453,10 @@ void free_unref_page_list(struct list_head *list) * comment in free_unref_page. */ migratetype = get_pcppage_migratetype(page); - if (unlikely(migratetype >= MIGRATE_PCPTYPES)) { - if (unlikely(is_migrate_isolate(migratetype))) { - list_del(&page->lru); - free_one_page(page_zone(page), page, pfn, 0, - migratetype, FPI_NONE); - continue; - } - - /* - * Non-isolated types over MIGRATE_PCPTYPES get added - * to the MIGRATE_MOVABLE pcp list. - */ - set_pcppage_migratetype(page, MIGRATE_MOVABLE); + if (unlikely(is_migrate_isolate(migratetype))) { + list_del(&page->lru); + free_one_page(page_zone(page), page, pfn, 0, migratetype, FPI_NONE); + continue; } set_page_private(page, pfn); @@ -3475,7 +3466,15 @@ void free_unref_page_list(struct list_head *list) list_for_each_entry_safe(page, next, list, lru) { pfn = page_private(page); set_page_private(page, 0); + + /* + * Non-isolated types over MIGRATE_PCPTYPES get added + * to the MIGRATE_MOVABLE pcp list. + */ migratetype = get_pcppage_migratetype(page); + if (unlikely(migratetype >= MIGRATE_PCPTYPES)) + migratetype = MIGRATE_MOVABLE; + trace_mm_page_free_batched(page); free_unref_page_commit(page, pfn, migratetype, 0); diff --git a/mm/shmem.c b/mm/shmem.c index 70d9ce294bb4..dacda7463d54 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1696,8 +1696,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, struct address_space *mapping = inode->i_mapping; struct shmem_inode_info *info = SHMEM_I(inode); struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL; - struct swap_info_struct *si; - struct page *page = NULL; + struct page *page; swp_entry_t swap; int error; @@ -1705,12 +1704,6 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, swap = radix_to_swp_entry(*pagep); *pagep = NULL; - /* Prevent swapoff from happening to us. */ - si = get_swap_device(swap); - if (!si) { - error = EINVAL; - goto failed; - } /* Look it up and read it in.. */ page = lookup_swap_cache(swap, NULL, 0); if (!page) { @@ -1772,8 +1765,6 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, swap_free(swap); *pagep = page; - if (si) - put_swap_device(si); return 0; failed: if (!shmem_confirm_swap(mapping, index, swap)) @@ -1784,9 +1775,6 @@ unlock: put_page(page); } - if (si) - put_swap_device(si); - return error; } diff --git a/mm/swap_state.c b/mm/swap_state.c index c56aa9ac050d..bc7cee6b2ec5 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -628,13 +628,6 @@ struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask, if (!mask) goto skip; - /* Test swap type to make sure the dereference is safe */ - if (likely(si->flags & (SWP_BLKDEV | SWP_FS_OPS))) { - struct inode *inode = si->swap_file->f_mapping->host; - if (inode_read_congested(inode)) - goto skip; - } - do_poll = false; /* Read a page_cluster sized and aligned cluster around offset. */ start_offset = offset & ~mask; diff --git a/mm/vmscan.c b/mm/vmscan.c index 4620df62f0ff..eeae2f6bc532 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -100,9 +100,12 @@ struct scan_control { unsigned int may_swap:1; /* - * Cgroups are not reclaimed below their configured memory.low, - * unless we threaten to OOM. If any cgroups are skipped due to - * memory.low and nothing was reclaimed, go back for memory.low. + * Cgroup memory below memory.low is protected as long as we + * don't threaten to OOM. If any cgroup is reclaimed at + * reduced force or passed over entirely due to its memory.low + * setting (memcg_low_skipped), and nothing is reclaimed as a + * result, then go back for one more cycle that reclaims the protected + * memory (memcg_low_reclaim) to avert OOM. */ unsigned int memcg_low_reclaim:1; unsigned int memcg_low_skipped:1; @@ -2537,15 +2540,14 @@ out: for_each_evictable_lru(lru) { int file = is_file_lru(lru); unsigned long lruvec_size; + unsigned long low, min; unsigned long scan; - unsigned long protection; lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); - protection = mem_cgroup_protection(sc->target_mem_cgroup, - memcg, - sc->memcg_low_reclaim); + mem_cgroup_protection(sc->target_mem_cgroup, memcg, + &min, &low); - if (protection) { + if (min || low) { /* * Scale a cgroup's reclaim pressure by proportioning * its current usage to its memory.low or memory.min @@ -2576,6 +2578,15 @@ out: * hard protection. */ unsigned long cgroup_size = mem_cgroup_size(memcg); + unsigned long protection; + + /* memory.low scaling, make sure we retry before OOM */ + if (!sc->memcg_low_reclaim && low > min) { + protection = low; + sc->memcg_low_skipped = 1; + } else { + protection = min; + } /* Avoid TOCTOU with earlier protection check */ cgroup_size = max(cgroup_size, protection); @@ -4413,11 +4424,13 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in .may_swap = 1, .reclaim_idx = gfp_zone(gfp_mask), }; + unsigned long pflags; trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order, sc.gfp_mask); cond_resched(); + psi_memstall_enter(&pflags); fs_reclaim_acquire(sc.gfp_mask); /* * We need to be able to allocate from the reserves for RECLAIM_UNMAP @@ -4442,6 +4455,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in current->flags &= ~PF_SWAPWRITE; memalloc_noreclaim_restore(noreclaim_flag); fs_reclaim_release(sc.gfp_mask); + psi_memstall_leave(&pflags); trace_mm_vmscan_node_reclaim_end(sc.nr_reclaimed); |