diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-27 05:33:41 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-27 05:33:41 +0300 |
commit | 345671ea0f9258f410eb057b9ced9cefbbe5dc78 (patch) | |
tree | fe97ba3d27679789e6aa34e39b002ee64ce25412 /kernel/fork.c | |
parent | 4904008165c8a1c48602b8316139691b8c735e6e (diff) | |
parent | 22146c3ce98962436e401f7b7016a6f664c9ffb5 (diff) | |
download | linux-345671ea0f9258f410eb057b9ced9cefbbe5dc78.tar.xz |
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton:
- a few misc things
- ocfs2 updates
- most of MM
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (132 commits)
hugetlbfs: dirty pages as they are added to pagecache
mm: export add_swap_extent()
mm: split SWP_FILE into SWP_ACTIVATED and SWP_FS
tools/testing/selftests/vm/map_fixed_noreplace.c: add test for MAP_FIXED_NOREPLACE
mm: thp: relocate flush_cache_range() in migrate_misplaced_transhuge_page()
mm: thp: fix mmu_notifier in migrate_misplaced_transhuge_page()
mm: thp: fix MADV_DONTNEED vs migrate_misplaced_transhuge_page race condition
mm/kasan/quarantine.c: make quarantine_lock a raw_spinlock_t
mm/gup: cache dev_pagemap while pinning pages
Revert "x86/e820: put !E820_TYPE_RAM regions into memblock.reserved"
mm: return zero_resv_unavail optimization
mm: zero remaining unavailable struct pages
tools/testing/selftests/vm/gup_benchmark.c: add MAP_HUGETLB option
tools/testing/selftests/vm/gup_benchmark.c: add MAP_SHARED option
tools/testing/selftests/vm/gup_benchmark.c: allow user specified file
tools/testing/selftests/vm/gup_benchmark.c: fix 'write' flag usage
mm/gup_benchmark.c: add additional pinning methods
mm/gup_benchmark.c: time put_page()
mm: don't raise MEMCG_OOM event due to failed high-order allocation
mm/page-writeback.c: fix range_cyclic writeback vs writepages deadlock
...
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 59 |
1 files changed, 53 insertions, 6 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index f0b58479534f..8f82a3bdcb8f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -223,9 +223,14 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) return s->addr; } + /* + * Allocated stacks are cached and later reused by new threads, + * so memcg accounting is performed manually on assigning/releasing + * stacks to tasks. Drop __GFP_ACCOUNT. + */ stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN, VMALLOC_START, VMALLOC_END, - THREADINFO_GFP, + THREADINFO_GFP & ~__GFP_ACCOUNT, PAGE_KERNEL, 0, node, __builtin_return_address(0)); @@ -248,9 +253,19 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) static inline void free_thread_stack(struct task_struct *tsk) { #ifdef CONFIG_VMAP_STACK - if (task_stack_vm_area(tsk)) { + struct vm_struct *vm = task_stack_vm_area(tsk); + + if (vm) { int i; + for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { + mod_memcg_page_state(vm->pages[i], + MEMCG_KERNEL_STACK_KB, + -(int)(PAGE_SIZE / 1024)); + + memcg_kmem_uncharge(vm->pages[i], 0); + } + for (i = 0; i < NR_CACHED_STACKS; i++) { if (this_cpu_cmpxchg(cached_stacks[i], NULL, tsk->stack_vm_area) != NULL) @@ -351,10 +366,6 @@ static void account_kernel_stack(struct task_struct *tsk, int account) NR_KERNEL_STACK_KB, PAGE_SIZE / 1024 * account); } - - /* All stack pages belong to the same memcg. */ - mod_memcg_page_state(vm->pages[0], MEMCG_KERNEL_STACK_KB, - account * (THREAD_SIZE / 1024)); } else { /* * All stack pages are in the same zone and belong to the @@ -370,6 +381,35 @@ static void account_kernel_stack(struct task_struct *tsk, int account) } } +static int memcg_charge_kernel_stack(struct task_struct *tsk) +{ +#ifdef CONFIG_VMAP_STACK + struct vm_struct *vm = task_stack_vm_area(tsk); + int ret; + + if (vm) { + int i; + + for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { + /* + * If memcg_kmem_charge() fails, page->mem_cgroup + * pointer is NULL, and both memcg_kmem_uncharge() + * and mod_memcg_page_state() in free_thread_stack() + * will ignore this page. So it's safe. + */ + ret = memcg_kmem_charge(vm->pages[i], GFP_KERNEL, 0); + if (ret) + return ret; + + mod_memcg_page_state(vm->pages[i], + MEMCG_KERNEL_STACK_KB, + PAGE_SIZE / 1024); + } + } +#endif + return 0; +} + static void release_task_stack(struct task_struct *tsk) { if (WARN_ON(tsk->state != TASK_DEAD)) @@ -807,6 +847,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) if (!stack) goto free_tsk; + if (memcg_charge_kernel_stack(tsk)) + goto free_stack; + stack_vm_area = task_stack_vm_area(tsk); err = arch_dup_task_struct(tsk, orig); @@ -1779,6 +1822,10 @@ static __latent_entropy struct task_struct *copy_process( p->default_timer_slack_ns = current->timer_slack_ns; +#ifdef CONFIG_PSI + p->psi_flags = 0; +#endif + task_io_accounting_init(&p->ioac); acct_clear_integrals(p); |