diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memory.c | 30 | ||||
-rw-r--r-- | mm/mlock.c | 21 | ||||
-rw-r--r-- | mm/mmap.c | 24 | ||||
-rw-r--r-- | mm/nommu.c | 7 | ||||
-rw-r--r-- | mm/oom_kill.c | 16 | ||||
-rw-r--r-- | mm/page-writeback.c | 30 | ||||
-rw-r--r-- | mm/percpu.c | 6 | ||||
-rw-r--r-- | mm/percpu_up.c | 4 | ||||
-rw-r--r-- | mm/rmap.c | 19 | ||||
-rw-r--r-- | mm/shmem.c | 8 | ||||
-rw-r--r-- | mm/slab.c | 4 |
11 files changed, 112 insertions, 57 deletions
diff --git a/mm/memory.c b/mm/memory.c index b6e5fd23cc5a..6b2ab1051851 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2760,21 +2760,35 @@ out_release: } /* - * This is like a special single-page "expand_downwards()", - * except we must first make sure that 'address-PAGE_SIZE' + * This is like a special single-page "expand_{down|up}wards()", + * except we must first make sure that 'address{-|+}PAGE_SIZE' * doesn't hit another vma. - * - * The "find_vma()" will do the right thing even if we wrap */ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address) { address &= PAGE_MASK; if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) { - address -= PAGE_SIZE; - if (find_vma(vma->vm_mm, address) != vma) - return -ENOMEM; + struct vm_area_struct *prev = vma->vm_prev; + + /* + * Is there a mapping abutting this one below? + * + * That's only ok if it's the same stack mapping + * that has gotten split.. + */ + if (prev && prev->vm_end == address) + return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM; + + expand_stack(vma, address - PAGE_SIZE); + } + if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) { + struct vm_area_struct *next = vma->vm_next; + + /* As VM_GROWSDOWN but s/below/above/ */ + if (next && next->vm_start == address + PAGE_SIZE) + return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM; - expand_stack(vma, address); + expand_upwards(vma, address + PAGE_SIZE); } return 0; } diff --git a/mm/mlock.c b/mm/mlock.c index 49e5e4cb8232..cbae7c5b9568 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -135,6 +135,19 @@ void munlock_vma_page(struct page *page) } } +/* Is the vma a continuation of the stack vma above it? */ +static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr) +{ + return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN); +} + +static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr) +{ + return (vma->vm_flags & VM_GROWSDOWN) && + (vma->vm_start == addr) && + !vma_stack_continue(vma->vm_prev, addr); +} + /** * __mlock_vma_pages_range() - mlock a range of pages in the vma. * @vma: target vma @@ -168,11 +181,9 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, gup_flags |= FOLL_WRITE; /* We don't try to access the guard page of a stack vma */ - if (vma->vm_flags & VM_GROWSDOWN) { - if (start == vma->vm_start) { - start += PAGE_SIZE; - nr_pages--; - } + if (stack_guard_page(vma, start)) { + addr += PAGE_SIZE; + nr_pages--; } while (nr_pages > 0) { diff --git a/mm/mmap.c b/mm/mmap.c index 31003338b978..6128dc8e5ede 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -388,17 +388,23 @@ static inline void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, struct rb_node *rb_parent) { + struct vm_area_struct *next; + + vma->vm_prev = prev; if (prev) { - vma->vm_next = prev->vm_next; + next = prev->vm_next; prev->vm_next = vma; } else { mm->mmap = vma; if (rb_parent) - vma->vm_next = rb_entry(rb_parent, + next = rb_entry(rb_parent, struct vm_area_struct, vm_rb); else - vma->vm_next = NULL; + next = NULL; } + vma->vm_next = next; + if (next) + next->vm_prev = vma; } void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, @@ -483,7 +489,11 @@ static inline void __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev) { - prev->vm_next = vma->vm_next; + struct vm_area_struct *next = vma->vm_next; + + prev->vm_next = next; + if (next) + next->vm_prev = prev; rb_erase(&vma->vm_rb, &mm->mm_rb); if (mm->mmap_cache == vma) mm->mmap_cache = prev; @@ -1706,9 +1716,6 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns * PA-RISC uses this for its stack; IA64 for its Register Backing Store. * vma is the last one with address > vma->vm_end. Have to extend vma. */ -#ifndef CONFIG_IA64 -static -#endif int expand_upwards(struct vm_area_struct *vma, unsigned long address) { int error; @@ -1915,6 +1922,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr; insertion_point = (prev ? &prev->vm_next : &mm->mmap); + vma->vm_prev = NULL; do { rb_erase(&vma->vm_rb, &mm->mm_rb); mm->map_count--; @@ -1922,6 +1930,8 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, vma = vma->vm_next; } while (vma && vma->vm_start < end); *insertion_point = vma; + if (vma) + vma->vm_prev = prev; tail_vma->vm_next = NULL; if (mm->unmap_area == arch_unmap_area) addr = prev ? prev->vm_end : mm->mmap_base; diff --git a/mm/nommu.c b/mm/nommu.c index efa9a380335e..88ff091eb07a 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -604,7 +604,7 @@ static void protect_vma(struct vm_area_struct *vma, unsigned long flags) */ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) { - struct vm_area_struct *pvma, **pp; + struct vm_area_struct *pvma, **pp, *next; struct address_space *mapping; struct rb_node **p, *parent; @@ -664,8 +664,11 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) break; } - vma->vm_next = *pp; + next = *pp; *pp = vma; + vma->vm_next = next; + if (next) + next->vm_prev = vma; } /* diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 5014e50644d1..fc81cb22869e 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -372,7 +372,7 @@ static void dump_tasks(const struct mem_cgroup *mem) } pr_info("[%5d] %5d %5d %8lu %8lu %3u %3d %5d %s\n", - task->pid, __task_cred(task)->uid, task->tgid, + task->pid, task_uid(task), task->tgid, task->mm->total_vm, get_mm_rss(task->mm), task_cpu(task), task->signal->oom_adj, task->signal->oom_score_adj, task->comm); @@ -401,10 +401,9 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, static int oom_kill_task(struct task_struct *p, struct mem_cgroup *mem) { p = find_lock_task_mm(p); - if (!p) { - task_unlock(p); + if (!p) return 1; - } + pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n", task_pid_nr(p), p->comm, K(p->mm->total_vm), K(get_mm_counter(p->mm, MM_ANONPAGES)), @@ -647,6 +646,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, unsigned long freed = 0; unsigned int points; enum oom_constraint constraint = CONSTRAINT_NONE; + int killed = 0; blocking_notifier_call_chain(&oom_notify_list, 0, &freed); if (freed > 0) @@ -684,7 +684,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, if (!oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL, nodemask, "Out of memory (oom_kill_allocating_task)")) - return; + goto out; } retry: @@ -692,7 +692,7 @@ retry: constraint == CONSTRAINT_MEMORY_POLICY ? nodemask : NULL); if (PTR_ERR(p) == -1UL) - return; + goto out; /* Found nothing?!?! Either we hang forever, or we panic. */ if (!p) { @@ -704,13 +704,15 @@ retry: if (oom_kill_process(p, gfp_mask, order, points, totalpages, NULL, nodemask, "Out of memory")) goto retry; + killed = 1; +out: read_unlock(&tasklist_lock); /* * Give "p" a good chance of killing itself before we * retry to allocate memory unless "p" is current */ - if (!test_thread_flag(TIF_MEMDIE)) + if (killed && !test_thread_flag(TIF_MEMDIE)) schedule_timeout_uninterruptible(1); } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 7262aacea8a2..e3bccac1f025 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -836,7 +836,8 @@ void tag_pages_for_writeback(struct address_space *mapping, spin_unlock_irq(&mapping->tree_lock); WARN_ON_ONCE(tagged > WRITEBACK_TAG_BATCH); cond_resched(); - } while (tagged >= WRITEBACK_TAG_BATCH); + /* We check 'start' to handle wrapping when end == ~0UL */ + } while (tagged >= WRITEBACK_TAG_BATCH && start); } EXPORT_SYMBOL(tag_pages_for_writeback); @@ -984,22 +985,16 @@ continue_unlock: } } - if (wbc->nr_to_write > 0) { - if (--wbc->nr_to_write == 0 && - wbc->sync_mode == WB_SYNC_NONE) { - /* - * We stop writing back only if we are - * not doing integrity sync. In case of - * integrity sync we have to keep going - * because someone may be concurrently - * dirtying pages, and we might have - * synced a lot of newly appeared dirty - * pages, but have not synced all of the - * old dirty pages. - */ - done = 1; - break; - } + /* + * We stop writing back only if we are not doing + * integrity sync. In case of integrity sync we have to + * keep going until we have written all the pages + * we tagged for writeback prior to entering this loop. + */ + if (--wbc->nr_to_write <= 0 && + wbc->sync_mode == WB_SYNC_NONE) { + done = 1; + break; } } pagevec_release(&pvec); @@ -1131,6 +1126,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping) task_io_account_write(PAGE_CACHE_SIZE); } } +EXPORT_SYMBOL(account_page_dirtied); /* * For address_spaces which do not use buffers. Just tag the page as dirty in diff --git a/mm/percpu.c b/mm/percpu.c index e61dc2cc5873..58c572b18b07 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -393,7 +393,9 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc) goto out_unlock; old_size = chunk->map_alloc * sizeof(chunk->map[0]); - memcpy(new, chunk->map, old_size); + old = chunk->map; + + memcpy(new, old, old_size); chunk->map_alloc = new_alloc; chunk->map = new; @@ -1162,7 +1164,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info( } /* - * Don't accept if wastage is over 25%. The + * Don't accept if wastage is over 1/3. The * greater-than comparison ensures upa==1 always * passes the following check. */ diff --git a/mm/percpu_up.c b/mm/percpu_up.c index c4351c7f57d2..db884fae5721 100644 --- a/mm/percpu_up.c +++ b/mm/percpu_up.c @@ -14,13 +14,13 @@ void __percpu *__alloc_percpu(size_t size, size_t align) * percpu sections on SMP for which this path isn't used. */ WARN_ON_ONCE(align > SMP_CACHE_BYTES); - return kzalloc(size, GFP_KERNEL); + return (void __percpu __force *)kzalloc(size, GFP_KERNEL); } EXPORT_SYMBOL_GPL(__alloc_percpu); void free_percpu(void __percpu *p) { - kfree(p); + kfree(this_cpu_ptr(p)); } EXPORT_SYMBOL_GPL(free_percpu); diff --git a/mm/rmap.c b/mm/rmap.c index 87b9e8ad4509..f6f0d2dda2ea 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -316,7 +316,7 @@ void __init anon_vma_init(void) */ struct anon_vma *page_lock_anon_vma(struct page *page) { - struct anon_vma *anon_vma; + struct anon_vma *anon_vma, *root_anon_vma; unsigned long anon_mapping; rcu_read_lock(); @@ -327,8 +327,21 @@ struct anon_vma *page_lock_anon_vma(struct page *page) goto out; anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); - anon_vma_lock(anon_vma); - return anon_vma; + root_anon_vma = ACCESS_ONCE(anon_vma->root); + spin_lock(&root_anon_vma->lock); + + /* + * If this page is still mapped, then its anon_vma cannot have been + * freed. But if it has been unmapped, we have no security against + * the anon_vma structure being freed and reused (for another anon_vma: + * SLAB_DESTROY_BY_RCU guarantees that - so the spin_lock above cannot + * corrupt): with anon_vma_prepare() or anon_vma_fork() redirecting + * anon_vma->root before page_unlock_anon_vma() is called to unlock. + */ + if (page_mapped(page)) + return anon_vma; + + spin_unlock(&root_anon_vma->lock); out: rcu_read_unlock(); return NULL; diff --git a/mm/shmem.c b/mm/shmem.c index dfaa0f4e9789..080b09a57a8f 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2325,7 +2325,10 @@ static int shmem_show_options(struct seq_file *seq, struct vfsmount *vfs) static void shmem_put_super(struct super_block *sb) { - kfree(sb->s_fs_info); + struct shmem_sb_info *sbinfo = SHMEM_SB(sb); + + percpu_counter_destroy(&sbinfo->used_blocks); + kfree(sbinfo); sb->s_fs_info = NULL; } @@ -2367,7 +2370,8 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) #endif spin_lock_init(&sbinfo->stat_lock); - percpu_counter_init(&sbinfo->used_blocks, 0); + if (percpu_counter_init(&sbinfo->used_blocks, 0)) + goto failed; sbinfo->free_inodes = sbinfo->max_inodes; sb->s_maxbytes = SHMEM_MAX_BYTES; diff --git a/mm/slab.c b/mm/slab.c index 88435fcc8387..fcae9815d3b3 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2330,8 +2330,8 @@ kmem_cache_create (const char *name, size_t size, size_t align, } #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) if (size >= malloc_sizes[INDEX_L3 + 1].cs_size - && cachep->obj_size > cache_line_size() && size < PAGE_SIZE) { - cachep->obj_offset += PAGE_SIZE - size; + && cachep->obj_size > cache_line_size() && ALIGN(size, align) < PAGE_SIZE) { + cachep->obj_offset += PAGE_SIZE - ALIGN(size, align); size = PAGE_SIZE; } #endif |