summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/compaction.c22
-rw-r--r--mm/filemap.c50
-rw-r--r--mm/huge_memory.c13
-rw-r--r--mm/hugetlb.c20
-rw-r--r--mm/memcontrol.c20
-rw-r--r--mm/memory.c58
-rw-r--r--mm/page-writeback.c6
-rw-r--r--mm/slab.c6
-rw-r--r--mm/slab.h1
-rw-r--r--mm/slab_common.c13
-rw-r--r--mm/slub.c41
-rw-r--r--mm/truncate.c8
-rw-r--r--mm/util.c10
-rw-r--r--mm/vmacache.c8
-rw-r--r--mm/vmscan.c20
15 files changed, 183 insertions, 113 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index 37f976287068..627dc2e4320f 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -671,16 +671,20 @@ static void isolate_freepages(struct zone *zone,
struct compact_control *cc)
{
struct page *page;
- unsigned long high_pfn, low_pfn, pfn, z_end_pfn, end_pfn;
+ unsigned long high_pfn, low_pfn, pfn, z_end_pfn;
int nr_freepages = cc->nr_freepages;
struct list_head *freelist = &cc->freepages;
/*
* Initialise the free scanner. The starting point is where we last
- * scanned from (or the end of the zone if starting). The low point
- * is the end of the pageblock the migration scanner is using.
+ * successfully isolated from, zone-cached value, or the end of the
+ * zone when isolating for the first time. We need this aligned to
+ * the pageblock boundary, because we do pfn -= pageblock_nr_pages
+ * in the for loop.
+ * The low boundary is the end of the pageblock the migration scanner
+ * is using.
*/
- pfn = cc->free_pfn;
+ pfn = cc->free_pfn & ~(pageblock_nr_pages-1);
low_pfn = ALIGN(cc->migrate_pfn + 1, pageblock_nr_pages);
/*
@@ -700,6 +704,7 @@ static void isolate_freepages(struct zone *zone,
for (; pfn >= low_pfn && cc->nr_migratepages > nr_freepages;
pfn -= pageblock_nr_pages) {
unsigned long isolated;
+ unsigned long end_pfn;
/*
* This can iterate a massively long zone without finding any
@@ -734,13 +739,10 @@ static void isolate_freepages(struct zone *zone,
isolated = 0;
/*
- * As pfn may not start aligned, pfn+pageblock_nr_page
- * may cross a MAX_ORDER_NR_PAGES boundary and miss
- * a pfn_valid check. Ensure isolate_freepages_block()
- * only scans within a pageblock
+ * Take care when isolating in last pageblock of a zone which
+ * ends in the middle of a pageblock.
*/
- end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
- end_pfn = min(end_pfn, z_end_pfn);
+ end_pfn = min(pfn + pageblock_nr_pages, z_end_pfn);
isolated = isolate_freepages_block(cc, pfn, end_pfn,
freelist, false);
nr_freepages += isolated;
diff --git a/mm/filemap.c b/mm/filemap.c
index a82fbe4c9e8e..000a220e2a41 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -906,8 +906,8 @@ EXPORT_SYMBOL(page_cache_prev_hole);
* Looks up the page cache slot at @mapping & @offset. If there is a
* page cache page, it is returned with an increased refcount.
*
- * If the slot holds a shadow entry of a previously evicted page, it
- * is returned.
+ * If the slot holds a shadow entry of a previously evicted page, or a
+ * swap entry from shmem/tmpfs, it is returned.
*
* Otherwise, %NULL is returned.
*/
@@ -928,9 +928,9 @@ repeat:
if (radix_tree_deref_retry(page))
goto repeat;
/*
- * Otherwise, shmem/tmpfs must be storing a swap entry
- * here as an exceptional entry: so return it without
- * attempting to raise page count.
+ * A shadow entry of a recently evicted page,
+ * or a swap entry from shmem/tmpfs. Return
+ * it without attempting to raise page count.
*/
goto out;
}
@@ -983,8 +983,8 @@ EXPORT_SYMBOL(find_get_page);
* page cache page, it is returned locked and with an increased
* refcount.
*
- * If the slot holds a shadow entry of a previously evicted page, it
- * is returned.
+ * If the slot holds a shadow entry of a previously evicted page, or a
+ * swap entry from shmem/tmpfs, it is returned.
*
* Otherwise, %NULL is returned.
*
@@ -1099,8 +1099,8 @@ EXPORT_SYMBOL(find_or_create_page);
* with ascending indexes. There may be holes in the indices due to
* not-present pages.
*
- * Any shadow entries of evicted pages are included in the returned
- * array.
+ * Any shadow entries of evicted pages, or swap entries from
+ * shmem/tmpfs, are included in the returned array.
*
* find_get_entries() returns the number of pages and shadow entries
* which were found.
@@ -1128,9 +1128,9 @@ repeat:
if (radix_tree_deref_retry(page))
goto restart;
/*
- * Otherwise, we must be storing a swap entry
- * here as an exceptional entry: so return it
- * without attempting to raise page count.
+ * A shadow entry of a recently evicted page,
+ * or a swap entry from shmem/tmpfs. Return
+ * it without attempting to raise page count.
*/
goto export;
}
@@ -1198,9 +1198,9 @@ repeat:
goto restart;
}
/*
- * Otherwise, shmem/tmpfs must be storing a swap entry
- * here as an exceptional entry: so skip over it -
- * we only reach this from invalidate_mapping_pages().
+ * A shadow entry of a recently evicted page,
+ * or a swap entry from shmem/tmpfs. Skip
+ * over it.
*/
continue;
}
@@ -1265,9 +1265,9 @@ repeat:
goto restart;
}
/*
- * Otherwise, shmem/tmpfs must be storing a swap entry
- * here as an exceptional entry: so stop looking for
- * contiguous pages.
+ * A shadow entry of a recently evicted page,
+ * or a swap entry from shmem/tmpfs. Stop
+ * looking for contiguous pages.
*/
break;
}
@@ -1341,10 +1341,17 @@ repeat:
goto restart;
}
/*
- * This function is never used on a shmem/tmpfs
- * mapping, so a swap entry won't be found here.
+ * A shadow entry of a recently evicted page.
+ *
+ * Those entries should never be tagged, but
+ * this tree walk is lockless and the tags are
+ * looked up in bulk, one radix tree node at a
+ * time, so there is a sizable window for page
+ * reclaim to evict a page we saw tagged.
+ *
+ * Skip over it.
*/
- BUG();
+ continue;
}
if (!page_cache_get_speculative(page))
@@ -2581,7 +2588,6 @@ EXPORT_SYMBOL(generic_perform_write);
* @iocb: IO state structure (file, offset, etc.)
* @iov: vector with data to write
* @nr_segs: number of segments in the vector
- * @ppos: position where to write
*
* This function does all the work needed for actually writing data to a
* file. It does all basic checks, removes SUID from the file, updates
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 64635f5278ff..b4b1feba6472 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1536,16 +1536,23 @@ pmd_t *page_check_address_pmd(struct page *page,
enum page_check_address_pmd_flag flag,
spinlock_t **ptl)
{
+ pgd_t *pgd;
+ pud_t *pud;
pmd_t *pmd;
if (address & ~HPAGE_PMD_MASK)
return NULL;
- pmd = mm_find_pmd(mm, address);
- if (!pmd)
+ pgd = pgd_offset(mm, address);
+ if (!pgd_present(*pgd))
return NULL;
+ pud = pud_offset(pgd, address);
+ if (!pud_present(*pud))
+ return NULL;
+ pmd = pmd_offset(pud, address);
+
*ptl = pmd_lock(mm, pmd);
- if (pmd_none(*pmd))
+ if (!pmd_present(*pmd))
goto unlock;
if (pmd_page(*pmd) != page)
goto unlock;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index dd30f22b35e0..c82290b9c1fc 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1172,6 +1172,7 @@ static void return_unused_surplus_pages(struct hstate *h,
while (nr_pages--) {
if (!free_pool_huge_page(h, &node_states[N_MEMORY], 1))
break;
+ cond_resched_lock(&hugetlb_lock);
}
}
@@ -1980,11 +1981,7 @@ static int __init hugetlb_init(void)
{
int i;
- /* Some platform decide whether they support huge pages at boot
- * time. On these, such as powerpc, HPAGE_SHIFT is set to 0 when
- * there is no such support
- */
- if (HPAGE_SHIFT == 0)
+ if (!hugepages_supported())
return 0;
if (!size_to_hstate(default_hstate_size)) {
@@ -2111,6 +2108,9 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
unsigned long tmp;
int ret;
+ if (!hugepages_supported())
+ return -ENOTSUPP;
+
tmp = h->max_huge_pages;
if (write && h->order >= MAX_ORDER)
@@ -2164,6 +2164,9 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
unsigned long tmp;
int ret;
+ if (!hugepages_supported())
+ return -ENOTSUPP;
+
tmp = h->nr_overcommit_huge_pages;
if (write && h->order >= MAX_ORDER)
@@ -2189,6 +2192,8 @@ out:
void hugetlb_report_meminfo(struct seq_file *m)
{
struct hstate *h = &default_hstate;
+ if (!hugepages_supported())
+ return;
seq_printf(m,
"HugePages_Total: %5lu\n"
"HugePages_Free: %5lu\n"
@@ -2205,6 +2210,8 @@ void hugetlb_report_meminfo(struct seq_file *m)
int hugetlb_report_node_meminfo(int nid, char *buf)
{
struct hstate *h = &default_hstate;
+ if (!hugepages_supported())
+ return 0;
return sprintf(buf,
"Node %d HugePages_Total: %5u\n"
"Node %d HugePages_Free: %5u\n"
@@ -2219,6 +2226,9 @@ void hugetlb_show_meminfo(void)
struct hstate *h;
int nid;
+ if (!hugepages_supported())
+ return;
+
for_each_node_state(nid, N_MEMORY)
for_each_hstate(h)
pr_info("Node %d hugepages_total=%u hugepages_free=%u hugepages_surp=%u hugepages_size=%lukB\n",
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 29501f040568..c47dffdcb246 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6686,16 +6686,20 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
pgoff = pte_to_pgoff(ptent);
/* page is moved even if it's not RSS of this task(page-faulted). */
- page = find_get_page(mapping, pgoff);
-
#ifdef CONFIG_SWAP
/* shmem/tmpfs may report page out on swap: account for that too. */
- if (radix_tree_exceptional_entry(page)) {
- swp_entry_t swap = radix_to_swp_entry(page);
- if (do_swap_account)
- *entry = swap;
- page = find_get_page(swap_address_space(swap), swap.val);
- }
+ if (shmem_mapping(mapping)) {
+ page = find_get_entry(mapping, pgoff);
+ if (radix_tree_exceptional_entry(page)) {
+ swp_entry_t swp = radix_to_swp_entry(page);
+ if (do_swap_account)
+ *entry = swp;
+ page = find_get_page(swap_address_space(swp), swp.val);
+ }
+ } else
+ page = find_get_page(mapping, pgoff);
+#else
+ page = find_get_page(mapping, pgoff);
#endif
return page;
}
diff --git a/mm/memory.c b/mm/memory.c
index d0f0bef3be48..037b812a9531 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -232,17 +232,18 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long
#endif
}
-void tlb_flush_mmu(struct mmu_gather *tlb)
+static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
{
- struct mmu_gather_batch *batch;
-
- if (!tlb->need_flush)
- return;
tlb->need_flush = 0;
tlb_flush(tlb);
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
tlb_table_flush(tlb);
#endif
+}
+
+static void tlb_flush_mmu_free(struct mmu_gather *tlb)
+{
+ struct mmu_gather_batch *batch;
for (batch = &tlb->local; batch; batch = batch->next) {
free_pages_and_swap_cache(batch->pages, batch->nr);
@@ -251,6 +252,14 @@ void tlb_flush_mmu(struct mmu_gather *tlb)
tlb->active = &tlb->local;
}
+void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+ if (!tlb->need_flush)
+ return;
+ tlb_flush_mmu_tlbonly(tlb);
+ tlb_flush_mmu_free(tlb);
+}
+
/* tlb_finish_mmu
* Called at the end of the shootdown operation to free up any resources
* that were required.
@@ -1127,8 +1136,10 @@ again:
if (PageAnon(page))
rss[MM_ANONPAGES]--;
else {
- if (pte_dirty(ptent))
+ if (pte_dirty(ptent)) {
+ force_flush = 1;
set_page_dirty(page);
+ }
if (pte_young(ptent) &&
likely(!(vma->vm_flags & VM_SEQ_READ)))
mark_page_accessed(page);
@@ -1137,9 +1148,10 @@ again:
page_remove_rmap(page);
if (unlikely(page_mapcount(page) < 0))
print_bad_pte(vma, addr, ptent, page);
- force_flush = !__tlb_remove_page(tlb, page);
- if (force_flush)
+ if (unlikely(!__tlb_remove_page(tlb, page))) {
+ force_flush = 1;
break;
+ }
continue;
}
/*
@@ -1174,18 +1186,11 @@ again:
add_mm_rss_vec(mm, rss);
arch_leave_lazy_mmu_mode();
- pte_unmap_unlock(start_pte, ptl);
- /*
- * mmu_gather ran out of room to batch pages, we break out of
- * the PTE lock to avoid doing the potential expensive TLB invalidate
- * and page-free while holding it.
- */
+ /* Do the actual TLB flush before dropping ptl */
if (force_flush) {
unsigned long old_end;
- force_flush = 0;
-
/*
* Flush the TLB just for the previous segment,
* then update the range to be the remaining
@@ -1193,11 +1198,21 @@ again:
*/
old_end = tlb->end;
tlb->end = addr;
-
- tlb_flush_mmu(tlb);
-
+ tlb_flush_mmu_tlbonly(tlb);
tlb->start = addr;
tlb->end = old_end;
+ }
+ pte_unmap_unlock(start_pte, ptl);
+
+ /*
+ * If we forced a TLB flush (either due to running out of
+ * batch buffers or because we needed to flush dirty TLB
+ * entries before releasing the ptl), free the batched
+ * memory too. Restart if we didn't do everything.
+ */
+ if (force_flush) {
+ force_flush = 0;
+ tlb_flush_mmu_free(tlb);
if (addr != end)
goto again;
@@ -1955,12 +1970,17 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
unsigned long address, unsigned int fault_flags)
{
struct vm_area_struct *vma;
+ vm_flags_t vm_flags;
int ret;
vma = find_extend_vma(mm, address);
if (!vma || address < vma->vm_start)
return -EFAULT;
+ vm_flags = (fault_flags & FAULT_FLAG_WRITE) ? VM_WRITE : VM_READ;
+ if (!(vm_flags & vma->vm_flags))
+ return -EFAULT;
+
ret = handle_mm_fault(mm, vma, address, fault_flags);
if (ret & VM_FAULT_ERROR) {
if (ret & VM_FAULT_OOM)
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index ef413492a149..a4317da60532 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -593,14 +593,14 @@ unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, unsigned long dirty)
* (5) the closer to setpoint, the smaller |df/dx| (and the reverse)
* => fast response on large errors; small oscillation near setpoint
*/
-static inline long long pos_ratio_polynom(unsigned long setpoint,
+static long long pos_ratio_polynom(unsigned long setpoint,
unsigned long dirty,
unsigned long limit)
{
long long pos_ratio;
long x;
- x = div_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
+ x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
limit - setpoint + 1);
pos_ratio = x;
pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
@@ -842,7 +842,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
x_intercept = bdi_setpoint + span;
if (bdi_dirty < x_intercept - span / 4) {
- pos_ratio = div_u64(pos_ratio * (x_intercept - bdi_dirty),
+ pos_ratio = div64_u64(pos_ratio * (x_intercept - bdi_dirty),
x_intercept - bdi_setpoint + 1);
} else
pos_ratio /= 4;
diff --git a/mm/slab.c b/mm/slab.c
index 388cb1ae6fbc..19d92181ce24 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -166,7 +166,7 @@ typedef unsigned char freelist_idx_t;
typedef unsigned short freelist_idx_t;
#endif
-#define SLAB_OBJ_MAX_NUM (1 << sizeof(freelist_idx_t) * BITS_PER_BYTE)
+#define SLAB_OBJ_MAX_NUM ((1 << sizeof(freelist_idx_t) * BITS_PER_BYTE) - 1)
/*
* true if a page was allocated from pfmemalloc reserves for network-based
@@ -2572,13 +2572,13 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep,
return freelist;
}
-static inline freelist_idx_t get_free_obj(struct page *page, unsigned char idx)
+static inline freelist_idx_t get_free_obj(struct page *page, unsigned int idx)
{
return ((freelist_idx_t *)page->freelist)[idx];
}
static inline void set_free_obj(struct page *page,
- unsigned char idx, freelist_idx_t val)
+ unsigned int idx, freelist_idx_t val)
{
((freelist_idx_t *)(page->freelist))[idx] = val;
}
diff --git a/mm/slab.h b/mm/slab.h
index 3045316b7c9d..6bd4c353704f 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -91,6 +91,7 @@ __kmem_cache_alias(const char *name, size_t size, size_t align,
#define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
int __kmem_cache_shutdown(struct kmem_cache *);
+void slab_kmem_cache_release(struct kmem_cache *);
struct seq_file;
struct file;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index f3cfccf76dda..102cc6fca3d3 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -323,6 +323,12 @@ static int kmem_cache_destroy_memcg_children(struct kmem_cache *s)
}
#endif /* CONFIG_MEMCG_KMEM */
+void slab_kmem_cache_release(struct kmem_cache *s)
+{
+ kfree(s->name);
+ kmem_cache_free(kmem_cache, s);
+}
+
void kmem_cache_destroy(struct kmem_cache *s)
{
get_online_cpus();
@@ -352,8 +358,11 @@ void kmem_cache_destroy(struct kmem_cache *s)
rcu_barrier();
memcg_free_cache_params(s);
- kfree(s->name);
- kmem_cache_free(kmem_cache, s);
+#ifdef SLAB_SUPPORTS_SYSFS
+ sysfs_slab_remove(s);
+#else
+ slab_kmem_cache_release(s);
+#endif
goto out_put_cpus;
out_unlock:
diff --git a/mm/slub.c b/mm/slub.c
index 5e234f1f8853..2b1ce697fc4b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -210,14 +210,11 @@ enum track_item { TRACK_ALLOC, TRACK_FREE };
#ifdef CONFIG_SYSFS
static int sysfs_slab_add(struct kmem_cache *);
static int sysfs_slab_alias(struct kmem_cache *, const char *);
-static void sysfs_slab_remove(struct kmem_cache *);
static void memcg_propagate_slab_attrs(struct kmem_cache *s);
#else
static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
{ return 0; }
-static inline void sysfs_slab_remove(struct kmem_cache *s) { }
-
static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
#endif
@@ -3238,24 +3235,7 @@ static inline int kmem_cache_close(struct kmem_cache *s)
int __kmem_cache_shutdown(struct kmem_cache *s)
{
- int rc = kmem_cache_close(s);
-
- if (!rc) {
- /*
- * Since slab_attr_store may take the slab_mutex, we should
- * release the lock while removing the sysfs entry in order to
- * avoid a deadlock. Because this is pretty much the last
- * operation we do and the lock will be released shortly after
- * that in slab_common.c, we could just move sysfs_slab_remove
- * to a later point in common code. We should do that when we
- * have a common sysfs framework for all allocators.
- */
- mutex_unlock(&slab_mutex);
- sysfs_slab_remove(s);
- mutex_lock(&slab_mutex);
- }
-
- return rc;
+ return kmem_cache_close(s);
}
/********************************************************************
@@ -5071,15 +5051,18 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
#ifdef CONFIG_MEMCG_KMEM
int i;
char *buffer = NULL;
+ struct kmem_cache *root_cache;
- if (!is_root_cache(s))
+ if (is_root_cache(s))
return;
+ root_cache = s->memcg_params->root_cache;
+
/*
* This mean this cache had no attribute written. Therefore, no point
* in copying default values around
*/
- if (!s->max_attr_size)
+ if (!root_cache->max_attr_size)
return;
for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
@@ -5101,7 +5084,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
*/
if (buffer)
buf = buffer;
- else if (s->max_attr_size < ARRAY_SIZE(mbuf))
+ else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf))
buf = mbuf;
else {
buffer = (char *) get_zeroed_page(GFP_KERNEL);
@@ -5110,7 +5093,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
buf = buffer;
}
- attr->show(s->memcg_params->root_cache, buf);
+ attr->show(root_cache, buf);
attr->store(s, buf, strlen(buf));
}
@@ -5119,6 +5102,11 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
#endif
}
+static void kmem_cache_release(struct kobject *k)
+{
+ slab_kmem_cache_release(to_slab(k));
+}
+
static const struct sysfs_ops slab_sysfs_ops = {
.show = slab_attr_show,
.store = slab_attr_store,
@@ -5126,6 +5114,7 @@ static const struct sysfs_ops slab_sysfs_ops = {
static struct kobj_type slab_ktype = {
.sysfs_ops = &slab_sysfs_ops,
+ .release = kmem_cache_release,
};
static int uevent_filter(struct kset *kset, struct kobject *kobj)
@@ -5252,7 +5241,7 @@ out_put_kobj:
goto out;
}
-static void sysfs_slab_remove(struct kmem_cache *s)
+void sysfs_slab_remove(struct kmem_cache *s)
{
if (slab_state < FULL)
/*
diff --git a/mm/truncate.c b/mm/truncate.c
index e5cc39ab0751..6a78c814bebf 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -484,14 +484,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
unsigned long count = 0;
int i;
- /*
- * Note: this function may get called on a shmem/tmpfs mapping:
- * pagevec_lookup() might then return 0 prematurely (because it
- * got a gangful of swap entries); but it's hardly worth worrying
- * about - it can rarely have anything to free from such a mapping
- * (most pages are dirty), and already skips over any difficulties.
- */
-
pagevec_init(&pvec, 0);
while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
diff --git a/mm/util.c b/mm/util.c
index f380af7ea779..d5ea733c5082 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -10,6 +10,7 @@
#include <linux/swapops.h>
#include <linux/mman.h>
#include <linux/hugetlb.h>
+#include <linux/vmalloc.h>
#include <asm/uaccess.h>
@@ -387,6 +388,15 @@ unsigned long vm_mmap(struct file *file, unsigned long addr,
}
EXPORT_SYMBOL(vm_mmap);
+void kvfree(const void *addr)
+{
+ if (is_vmalloc_addr(addr))
+ vfree(addr);
+ else
+ kfree(addr);
+}
+EXPORT_SYMBOL(kvfree);
+
struct address_space *page_mapping(struct page *page)
{
struct address_space *mapping = page->mapping;
diff --git a/mm/vmacache.c b/mm/vmacache.c
index d4224b397c0e..1037a3bab505 100644
--- a/mm/vmacache.c
+++ b/mm/vmacache.c
@@ -81,10 +81,12 @@ struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
for (i = 0; i < VMACACHE_SIZE; i++) {
struct vm_area_struct *vma = current->vmacache[i];
- if (vma && vma->vm_start <= addr && vma->vm_end > addr) {
- BUG_ON(vma->vm_mm != mm);
+ if (!vma)
+ continue;
+ if (WARN_ON_ONCE(vma->vm_mm != mm))
+ break;
+ if (vma->vm_start <= addr && vma->vm_end > addr)
return vma;
- }
}
return NULL;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9b6497eda806..32c661d66a45 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1158,7 +1158,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
TTU_UNMAP|TTU_IGNORE_ACCESS,
&dummy1, &dummy2, &dummy3, &dummy4, &dummy5, true);
list_splice(&clean_pages, page_list);
- __mod_zone_page_state(zone, NR_ISOLATED_FILE, -ret);
+ mod_zone_page_state(zone, NR_ISOLATED_FILE, -ret);
return ret;
}
@@ -1916,6 +1916,24 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
get_lru_size(lruvec, LRU_INACTIVE_FILE);
/*
+ * Prevent the reclaimer from falling into the cache trap: as
+ * cache pages start out inactive, every cache fault will tip
+ * the scan balance towards the file LRU. And as the file LRU
+ * shrinks, so does the window for rotation from references.
+ * This means we have a runaway feedback loop where a tiny
+ * thrashing file LRU becomes infinitely more attractive than
+ * anon pages. Try to detect this based on file LRU size.
+ */
+ if (global_reclaim(sc)) {
+ unsigned long free = zone_page_state(zone, NR_FREE_PAGES);
+
+ if (unlikely(file + free <= high_wmark_pages(zone))) {
+ scan_balance = SCAN_ANON;
+ goto out;
+ }
+ }
+
+ /*
* There is enough inactive page cache, do not reclaim
* anything from the anonymous working set right now.
*/