From 318b275fbca1ab9ec0862de71420e0e92c3d1aa7 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 22 Mar 2011 16:30:51 -0700 Subject: mm: allow GUP to fail instead of waiting on a page GUP user may want to try to acquire a reference to a page if it is already in memory, but not if IO, to bring it in, is needed. For example KVM may tell vcpu to schedule another guest process if current one is trying to access swapped out page. Meanwhile, the page will be swapped in and the guest process, that depends on it, will be able to run again. This patch adds FAULT_FLAG_RETRY_NOWAIT (suggested by Linus) and FOLL_NOWAIT follow_page flags. FAULT_FLAG_RETRY_NOWAIT, when used in conjunction with VM_FAULT_ALLOW_RETRY, indicates to handle_mm_fault that it shouldn't drop mmap_sem and wait on a page, but return VM_FAULT_RETRY instead. [akpm@linux-foundation.org: improve FOLL_NOWAIT comment] Signed-off-by: Gleb Natapov Cc: Linus Torvalds Cc: Hugh Dickins Acked-by: Rik van Riel Cc: Michel Lespinasse Cc: Avi Kivity Cc: Marcelo Tosatti Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index e48945ab362b..615be5127ce1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1569,6 +1569,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, fault_flags |= FAULT_FLAG_WRITE; if (nonblocking) fault_flags |= FAULT_FLAG_ALLOW_RETRY; + if (foll_flags & FOLL_NOWAIT) + fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT); ret = handle_mm_fault(mm, vma, start, fault_flags); @@ -1595,7 +1597,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, tsk->min_flt++; if (ret & VM_FAULT_RETRY) { - *nonblocking = 0; + if (nonblocking) + *nonblocking = 0; return i; } -- cgit v1.2.3 From 31db58b3ab432f72ea76be58b12e6ffaf627d5db Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:15 -0400 Subject: mm: arch: make get_gate_vma take an mm_struct instead of a task_struct Morally, the presence of a gate vma is more an attribute of a particular mm than a particular task. Moreover, dropping the dependency on task_struct will help make both existing and future operations on mm's more flexible and convenient. Signed-off-by: Stephen Wilson Reviewed-by: Michel Lespinasse Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Al Viro --- arch/powerpc/kernel/vdso.c | 2 +- arch/s390/kernel/vdso.c | 2 +- arch/sh/kernel/vsyscall/vsyscall.c | 2 +- arch/x86/mm/init_64.c | 6 +++--- arch/x86/vdso/vdso32-setup.c | 11 ++++++----- fs/binfmt_elf.c | 2 +- fs/proc/task_mmu.c | 8 +++++--- include/linux/mm.h | 2 +- mm/memory.c | 4 ++-- mm/mlock.c | 4 ++-- 10 files changed, 23 insertions(+), 20 deletions(-) (limited to 'mm/memory.c') diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index fd8728729abc..6169f1756930 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -830,7 +830,7 @@ int in_gate_area(struct task_struct *task, unsigned long addr) return 0; } -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { return NULL; } diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index f438d74dedbd..d19f30504c63 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -347,7 +347,7 @@ int in_gate_area(struct task_struct *task, unsigned long addr) return 0; } -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { return NULL; } diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c index 242117cbad67..3f9b6f41813a 100644 --- a/arch/sh/kernel/vsyscall/vsyscall.c +++ b/arch/sh/kernel/vsyscall/vsyscall.c @@ -94,7 +94,7 @@ const char *arch_vma_name(struct vm_area_struct *vma) return NULL; } -struct vm_area_struct *get_gate_vma(struct task_struct *task) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { return NULL; } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0aa34669ed3f..dd4809b58441 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -861,10 +861,10 @@ static struct vm_area_struct gate_vma = { .vm_flags = VM_READ | VM_EXEC }; -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { #ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(tsk, TIF_IA32)) + if (!mm || mm->context.ia32_compat) return NULL; #endif return &gate_vma; @@ -872,7 +872,7 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk) int in_gate_area(struct task_struct *task, unsigned long addr) { - struct vm_area_struct *vma = get_gate_vma(task); + struct vm_area_struct *vma = get_gate_vma(task->mm); if (!vma) return 0; diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 36df991985b2..1f651f6bdf61 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -417,11 +417,12 @@ const char *arch_vma_name(struct vm_area_struct *vma) return NULL; } -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { - struct mm_struct *mm = tsk->mm; - - /* Check to see if this task was created in compat vdso mode */ + /* + * Check to see if the corresponding task was created in compat vdso + * mode. + */ if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) return &gate_vma; return NULL; @@ -429,7 +430,7 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk) int in_gate_area(struct task_struct *task, unsigned long addr) { - const struct vm_area_struct *vma = get_gate_vma(task); + const struct vm_area_struct *vma = get_gate_vma(task->mm); return vma && addr >= vma->vm_start && addr < vma->vm_end; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index d5b640ba6cb1..bbabdcce1179 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1906,7 +1906,7 @@ static int elf_core_dump(struct coredump_params *cprm) segs = current->mm->map_count; segs += elf_core_extra_phdrs(); - gate_vma = get_gate_vma(current); + gate_vma = get_gate_vma(current->mm); if (gate_vma != NULL) segs++; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 8fed0f88fbf7..e73314afc535 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -126,7 +126,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) return mm; down_read(&mm->mmap_sem); - tail_vma = get_gate_vma(priv->task); + tail_vma = get_gate_vma(priv->task->mm); priv->tail_vma = tail_vma; /* Start with last addr hint */ @@ -277,7 +277,8 @@ static int show_map(struct seq_file *m, void *v) show_map_vma(m, vma); if (m->count < m->size) /* vma is copied successfully */ - m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; + m->version = (vma != get_gate_vma(task->mm)) + ? vma->vm_start : 0; return 0; } @@ -436,7 +437,8 @@ static int show_smap(struct seq_file *m, void *v) (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); if (m->count < m->size) /* vma is copied successfully */ - m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; + m->version = (vma != get_gate_vma(task->mm)) + ? vma->vm_start : 0; return 0; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 581703d86fbd..18b4a6358ab4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1578,7 +1578,7 @@ static inline bool kernel_page_present(struct page *page) { return true; } #endif /* CONFIG_HIBERNATION */ #endif -extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk); +extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); #ifdef __HAVE_ARCH_GATE_AREA int in_gate_area_no_task(unsigned long addr); int in_gate_area(struct task_struct *task, unsigned long addr); diff --git a/mm/memory.c b/mm/memory.c index e48945ab362b..b6dc37097433 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1488,7 +1488,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, vma = find_extend_vma(mm, start); if (!vma && in_gate_area(tsk, start)) { unsigned long pg = start & PAGE_MASK; - struct vm_area_struct *gate_vma = get_gate_vma(tsk); + struct vm_area_struct *gate_vma = get_gate_vma(tsk->mm); pgd_t *pgd; pud_t *pud; pmd_t *pmd; @@ -3496,7 +3496,7 @@ static int __init gate_vma_init(void) __initcall(gate_vma_init); #endif -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { #ifdef AT_SYSINFO_EHDR return &gate_vma; diff --git a/mm/mlock.c b/mm/mlock.c index c3924c7f00be..2689a08c79af 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -237,7 +237,7 @@ long mlock_vma_pages_range(struct vm_area_struct *vma, if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || is_vm_hugetlb_page(vma) || - vma == get_gate_vma(current))) { + vma == get_gate_vma(current->mm))) { __mlock_vma_pages_range(vma, start, end, NULL); @@ -332,7 +332,7 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, int lock = newflags & VM_LOCKED; if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) || - is_vm_hugetlb_page(vma) || vma == get_gate_vma(current)) + is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm)) goto out; /* don't set VM_LOCKED, don't count */ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); -- cgit v1.2.3 From 83b964bbf82eb13a8f31bb49ca420787fe01f7a6 Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:16 -0400 Subject: mm: arch: make in_gate_area take an mm_struct instead of a task_struct Morally, the question of whether an address lies in a gate vma should be asked with respect to an mm, not a particular task. Moreover, dropping the dependency on task_struct will help make existing and future operations on mm's more flexible and convenient. Signed-off-by: Stephen Wilson Reviewed-by: Michel Lespinasse Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Al Viro --- arch/powerpc/kernel/vdso.c | 2 +- arch/s390/kernel/vdso.c | 2 +- arch/sh/kernel/vsyscall/vsyscall.c | 2 +- arch/x86/mm/init_64.c | 4 ++-- arch/x86/vdso/vdso32-setup.c | 4 ++-- include/linux/mm.h | 4 ++-- mm/memory.c | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) (limited to 'mm/memory.c') diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 6169f1756930..467aa9ecbf9d 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -825,7 +825,7 @@ int in_gate_area_no_task(unsigned long addr) return 0; } -int in_gate_area(struct task_struct *task, unsigned long addr) +int in_gate_area(struct mm_struct *mm, unsigned long addr) { return 0; } diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index d19f30504c63..9006e966ef00 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -342,7 +342,7 @@ int in_gate_area_no_task(unsigned long addr) return 0; } -int in_gate_area(struct task_struct *task, unsigned long addr) +int in_gate_area(struct mm_struct *mm, unsigned long addr) { return 0; } diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c index 3f9b6f41813a..62c36a8961d3 100644 --- a/arch/sh/kernel/vsyscall/vsyscall.c +++ b/arch/sh/kernel/vsyscall/vsyscall.c @@ -99,7 +99,7 @@ struct vm_area_struct *get_gate_vma(struct mm_struct *mm) return NULL; } -int in_gate_area(struct task_struct *task, unsigned long address) +int in_gate_area(struct mm_struct *mm, unsigned long address) { return 0; } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index dd4809b58441..43c441622c89 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -870,9 +870,9 @@ struct vm_area_struct *get_gate_vma(struct mm_struct *mm) return &gate_vma; } -int in_gate_area(struct task_struct *task, unsigned long addr) +int in_gate_area(struct mm_struct *mm, unsigned long addr) { - struct vm_area_struct *vma = get_gate_vma(task->mm); + struct vm_area_struct *vma = get_gate_vma(mm); if (!vma) return 0; diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 1f651f6bdf61..f849bb29fda1 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -428,9 +428,9 @@ struct vm_area_struct *get_gate_vma(struct mm_struct *mm) return NULL; } -int in_gate_area(struct task_struct *task, unsigned long addr) +int in_gate_area(struct mm_struct *mm, unsigned long addr) { - const struct vm_area_struct *vma = get_gate_vma(task->mm); + const struct vm_area_struct *vma = get_gate_vma(mm); return vma && addr >= vma->vm_start && addr < vma->vm_end; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 18b4a6358ab4..5c6d916cd302 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1581,10 +1581,10 @@ static inline bool kernel_page_present(struct page *page) { return true; } extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); #ifdef __HAVE_ARCH_GATE_AREA int in_gate_area_no_task(unsigned long addr); -int in_gate_area(struct task_struct *task, unsigned long addr); +int in_gate_area(struct mm_struct *mm, unsigned long addr); #else int in_gate_area_no_task(unsigned long addr); -#define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);}) +#define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_task(addr);}) #endif /* __HAVE_ARCH_GATE_AREA */ int drop_caches_sysctl_handler(struct ctl_table *, int, diff --git a/mm/memory.c b/mm/memory.c index b6dc37097433..931d479b80c2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1486,7 +1486,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, struct vm_area_struct *vma; vma = find_extend_vma(mm, start); - if (!vma && in_gate_area(tsk, start)) { + if (!vma && in_gate_area(tsk->mm, start)) { unsigned long pg = start & PAGE_MASK; struct vm_area_struct *gate_vma = get_gate_vma(tsk->mm); pgd_t *pgd; -- cgit v1.2.3 From cae5d39032acf26c265f6b1dc73d7ce6ff4bc387 Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:17 -0400 Subject: mm: arch: rename in_gate_area_no_task to in_gate_area_no_mm Now that gate vma's are referenced with respect to a particular mm and not a particular task it only makes sense to propagate the change to this predicate as well. Signed-off-by: Stephen Wilson Reviewed-by: Michel Lespinasse Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Al Viro --- arch/powerpc/kernel/vdso.c | 2 +- arch/s390/kernel/vdso.c | 2 +- arch/sh/kernel/vsyscall/vsyscall.c | 2 +- arch/x86/mm/init_64.c | 8 ++++---- arch/x86/vdso/vdso32-setup.c | 2 +- include/linux/mm.h | 6 +++--- kernel/kallsyms.c | 4 ++-- mm/memory.c | 2 +- mm/nommu.c | 2 +- 9 files changed, 15 insertions(+), 15 deletions(-) (limited to 'mm/memory.c') diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 467aa9ecbf9d..142ab1008c3b 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -820,7 +820,7 @@ static int __init vdso_init(void) } arch_initcall(vdso_init); -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return 0; } diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 9006e966ef00..d73630b4fe1d 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -337,7 +337,7 @@ static int __init vdso_init(void) } arch_initcall(vdso_init); -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return 0; } diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c index 62c36a8961d3..1d6d51a1ce79 100644 --- a/arch/sh/kernel/vsyscall/vsyscall.c +++ b/arch/sh/kernel/vsyscall/vsyscall.c @@ -104,7 +104,7 @@ int in_gate_area(struct mm_struct *mm, unsigned long address) return 0; } -int in_gate_area_no_task(unsigned long address) +int in_gate_area_no_mm(unsigned long address) { return 0; } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 43c441622c89..835393c85546 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -881,11 +881,11 @@ int in_gate_area(struct mm_struct *mm, unsigned long addr) } /* - * Use this when you have no reliable task/vma, typically from interrupt - * context. It is less reliable than using the task's vma and may give - * false positives: + * Use this when you have no reliable mm, typically from interrupt + * context. It is less reliable than using a task's mm and may give + * false positives. */ -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); } diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index f849bb29fda1..468d591dde31 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -435,7 +435,7 @@ int in_gate_area(struct mm_struct *mm, unsigned long addr) return vma && addr >= vma->vm_start && addr < vma->vm_end; } -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return 0; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 5c6d916cd302..9d6efefdde50 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1580,11 +1580,11 @@ static inline bool kernel_page_present(struct page *page) { return true; } extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); #ifdef __HAVE_ARCH_GATE_AREA -int in_gate_area_no_task(unsigned long addr); +int in_gate_area_no_mm(unsigned long addr); int in_gate_area(struct mm_struct *mm, unsigned long addr); #else -int in_gate_area_no_task(unsigned long addr); -#define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_task(addr);}) +int in_gate_area_no_mm(unsigned long addr); +#define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_mm(addr);}) #endif /* __HAVE_ARCH_GATE_AREA */ int drop_caches_sysctl_handler(struct ctl_table *, int, diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 6f6d091b5757..b9d0fd1d21c7 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -64,14 +64,14 @@ static inline int is_kernel_text(unsigned long addr) if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) || arch_is_kernel_text(addr)) return 1; - return in_gate_area_no_task(addr); + return in_gate_area_no_mm(addr); } static inline int is_kernel(unsigned long addr) { if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) return 1; - return in_gate_area_no_task(addr); + return in_gate_area_no_mm(addr); } static int is_ksym_addr(unsigned long addr) diff --git a/mm/memory.c b/mm/memory.c index 931d479b80c2..5f5b5de5a40e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3505,7 +3505,7 @@ struct vm_area_struct *get_gate_vma(struct mm_struct *mm) #endif } -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { #ifdef AT_SYSINFO_EHDR if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END)) diff --git a/mm/nommu.c b/mm/nommu.c index f59e1424d3db..e629143f9440 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1963,7 +1963,7 @@ error: return -ENOMEM; } -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return 0; } -- cgit v1.2.3 From e7f22e207bacdba5b73f2893a3abe935a5373e2e Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:18 -0400 Subject: mm: use mm_struct to resolve gate vma's in __get_user_pages We now check if a requested user page overlaps a gate vma using the supplied mm instead of the supplied task. The given task is now used solely for accounting purposes and may be NULL. Signed-off-by: Stephen Wilson Signed-off-by: Al Viro --- mm/memory.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 5f5b5de5a40e..5f585b65d734 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1486,9 +1486,9 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, struct vm_area_struct *vma; vma = find_extend_vma(mm, start); - if (!vma && in_gate_area(tsk->mm, start)) { + if (!vma && in_gate_area(mm, start)) { unsigned long pg = start & PAGE_MASK; - struct vm_area_struct *gate_vma = get_gate_vma(tsk->mm); + struct vm_area_struct *gate_vma = get_gate_vma(mm); pgd_t *pgd; pud_t *pud; pmd_t *pmd; @@ -1589,10 +1589,13 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, return i ? i : -EFAULT; BUG(); } - if (ret & VM_FAULT_MAJOR) - tsk->maj_flt++; - else - tsk->min_flt++; + + if (tsk) { + if (ret & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; + } if (ret & VM_FAULT_RETRY) { *nonblocking = 0; @@ -1638,7 +1641,8 @@ EXPORT_SYMBOL(__get_user_pages); /** * get_user_pages() - pin user pages in memory - * @tsk: task_struct of target task + * @tsk: the task_struct to use for page fault accounting, or + * NULL if faults are not to be recorded. * @mm: mm_struct of target mm * @start: starting user address * @nr_pages: number of pages from start to pin -- cgit v1.2.3 From 206cb636576b969e9b471cdedeaea7752e6acb33 Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:19 -0400 Subject: mm: factor out main logic of access_process_vm Introduce an internal helper __access_remote_vm and base access_process_vm on top of it. This new method may be called with a NULL task_struct if page fault accounting is not desired. This code will be shared with a new address space accessor that is independent of task_struct. Signed-off-by: Stephen Wilson Signed-off-by: Al Viro --- mm/memory.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 5f585b65d734..820b4c4810f0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3650,20 +3650,15 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, #endif /* - * Access another process' address space. - * Source/target buffer must be kernel space, - * Do not walk the page table directly, use get_user_pages + * Access another process' address space as given in mm. If non-NULL, use the + * given task for page fault accounting. */ -int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) +static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, + unsigned long addr, void *buf, int len, int write) { - struct mm_struct *mm; struct vm_area_struct *vma; void *old_buf = buf; - mm = get_task_mm(tsk); - if (!mm) - return 0; - down_read(&mm->mmap_sem); /* ignore errors, just check how much was successfully transferred */ while (len) { @@ -3712,11 +3707,31 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in addr += bytes; } up_read(&mm->mmap_sem); - mmput(mm); return buf - old_buf; } +/* + * Access another process' address space. + * Source/target buffer must be kernel space, + * Do not walk the page table directly, use get_user_pages + */ +int access_process_vm(struct task_struct *tsk, unsigned long addr, + void *buf, int len, int write) +{ + struct mm_struct *mm; + int ret; + + mm = get_task_mm(tsk); + if (!mm) + return 0; + + ret = __access_remote_vm(tsk, mm, addr, buf, len, write); + mmput(mm); + + return ret; +} + /* * Print the name of a VMA. */ -- cgit v1.2.3 From 5ddd36b9c59887c6416e21daf984fbdd9b1818df Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:20 -0400 Subject: mm: implement access_remote_vm Provide an alternative to access_process_vm that allows the caller to obtain a reference to the supplied mm_struct. Signed-off-by: Stephen Wilson Signed-off-by: Al Viro --- include/linux/mm.h | 2 ++ mm/memory.c | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'mm/memory.c') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9d6efefdde50..60011d26bffc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -971,6 +971,8 @@ static inline int handle_mm_fault(struct mm_struct *mm, extern int make_pages_present(unsigned long addr, unsigned long end); extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); +extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, + void *buf, int len, int write); int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, unsigned int foll_flags, diff --git a/mm/memory.c b/mm/memory.c index 820b4c4810f0..468f5076754c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3711,6 +3711,22 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, return buf - old_buf; } +/** + * @access_remote_vm - access another process' address space + * @mm: the mm_struct of the target address space + * @addr: start address to access + * @buf: source or destination buffer + * @len: number of bytes to transfer + * @write: whether the access is a write + * + * The caller must hold a reference on @mm. + */ +int access_remote_vm(struct mm_struct *mm, unsigned long addr, + void *buf, int len, int write) +{ + return __access_remote_vm(NULL, mm, addr, buf, len, write); +} + /* * Access another process' address space. * Source/target buffer must be kernel space, -- cgit v1.2.3 From 56039efa18f2530fc23e8ef19e716b65ee2a1d1e Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 23 Mar 2011 16:42:19 -0700 Subject: memcg: fix ugly initialization of return value is in caller Remove initialization of vaiable in caller of memory cgroup function. Actually, it's return value of memcg function but it's initialized in caller. Some memory cgroup uses following style to bring the result of start function to the end function for avoiding races. mem_cgroup_start_A(&(*ptr)) /* Something very complicated can happen here. */ mem_cgroup_end_A(*ptr) In some calls, *ptr should be initialized to NULL be caller. But it's ugly. This patch fixes that *ptr is initialized by _start function. Signed-off-by: KAMEZAWA Hiroyuki Acked-by: Johannes Weiner Acked-by: Daisuke Nishimura Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 8 ++++++-- mm/memory.c | 2 +- mm/migrate.c | 2 +- mm/swapfile.c | 2 +- 4 files changed, 9 insertions(+), 5 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e1ee6ad9c971..b56bd74b486f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2475,7 +2475,7 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, /* shmem */ if (PageSwapCache(page)) { - struct mem_cgroup *mem = NULL; + struct mem_cgroup *mem; ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem); if (!ret) @@ -2501,6 +2501,8 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct mem_cgroup *mem; int ret; + *ptr = NULL; + if (mem_cgroup_disabled()) return 0; @@ -2916,6 +2918,8 @@ int mem_cgroup_prepare_migration(struct page *page, enum charge_type ctype; int ret = 0; + *ptr = NULL; + VM_BUG_ON(PageTransHuge(page)); if (mem_cgroup_disabled()) return 0; @@ -3058,7 +3062,7 @@ int mem_cgroup_shmem_charge_fallback(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { - struct mem_cgroup *mem = NULL; + struct mem_cgroup *mem; int ret; if (mem_cgroup_disabled()) diff --git a/mm/memory.c b/mm/memory.c index 615be5127ce1..20d5f7499ce2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2767,7 +2767,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, swp_entry_t entry; pte_t pte; int locked; - struct mem_cgroup *ptr = NULL; + struct mem_cgroup *ptr; int exclusive = 0; int ret = 0; diff --git a/mm/migrate.c b/mm/migrate.c index 89e5c3fe8bbc..b0406d739ea7 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -633,7 +633,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, struct page *newpage = get_new_page(page, private, &result); int remap_swapcache = 1; int charge = 0; - struct mem_cgroup *mem = NULL; + struct mem_cgroup *mem; struct anon_vma *anon_vma = NULL; if (!newpage) diff --git a/mm/swapfile.c b/mm/swapfile.c index 71b42ec55b78..039e61677635 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -880,7 +880,7 @@ unsigned int count_swap_pages(int type, int free) static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, swp_entry_t entry, struct page *page) { - struct mem_cgroup *ptr = NULL; + struct mem_cgroup *ptr; spinlock_t *ptl; pte_t *pte; int ret = 1; -- cgit v1.2.3 From ae91dbfc9949cf042c45798557b48d3b83bc3635 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 26 Mar 2011 13:27:01 -0700 Subject: mm: fix memory.c incorrect kernel-doc Fix mm/memory.c incorrect kernel-doc function notation: Warning(mm/memory.c:3718): Cannot understand * @access_remote_vm - access another process' address space on line 3718 - I thought it was a doc line Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 51a5c23704af..9da8cab1b1b0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3715,7 +3715,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, } /** - * @access_remote_vm - access another process' address space + * access_remote_vm - access another process' address space * @mm: the mm_struct of the target address space * @addr: start address to access * @buf: source or destination buffer -- cgit v1.2.3