summaryrefslogtreecommitdiff
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c198
1 files changed, 102 insertions, 96 deletions
diff --git a/mm/memory.c b/mm/memory.c
index fa2f04e0337c..6105f475fa86 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -125,17 +125,17 @@ core_initcall(init_zero_pfn);
#if defined(SPLIT_RSS_COUNTING)
-static void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm)
+void sync_mm_rss(struct mm_struct *mm)
{
int i;
for (i = 0; i < NR_MM_COUNTERS; i++) {
- if (task->rss_stat.count[i]) {
- add_mm_counter(mm, i, task->rss_stat.count[i]);
- task->rss_stat.count[i] = 0;
+ if (current->rss_stat.count[i]) {
+ add_mm_counter(mm, i, current->rss_stat.count[i]);
+ current->rss_stat.count[i] = 0;
}
}
- task->rss_stat.events = 0;
+ current->rss_stat.events = 0;
}
static void add_mm_counter_fast(struct mm_struct *mm, int member, int val)
@@ -157,30 +157,7 @@ static void check_sync_rss_stat(struct task_struct *task)
if (unlikely(task != current))
return;
if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH))
- __sync_task_rss_stat(task, task->mm);
-}
-
-unsigned long get_mm_counter(struct mm_struct *mm, int member)
-{
- long val = 0;
-
- /*
- * Don't use task->mm here...for avoiding to use task_get_mm()..
- * The caller must guarantee task->mm is not invalid.
- */
- val = atomic_long_read(&mm->rss_stat.count[member]);
- /*
- * counter is updated in asynchronous manner and may go to minus.
- * But it's never be expected number for users.
- */
- if (val < 0)
- return 0;
- return (unsigned long)val;
-}
-
-void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
-{
- __sync_task_rss_stat(task, mm);
+ sync_mm_rss(task->mm);
}
#else /* SPLIT_RSS_COUNTING */
@@ -661,7 +638,7 @@ static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss)
int i;
if (current->mm == mm)
- sync_mm_rss(current, mm);
+ sync_mm_rss(mm);
for (i = 0; i < NR_MM_COUNTERS; i++)
if (rss[i])
add_mm_counter(mm, i, rss[i]);
@@ -1247,16 +1224,24 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
do {
next = pmd_addr_end(addr, end);
if (pmd_trans_huge(*pmd)) {
- if (next-addr != HPAGE_PMD_SIZE) {
+ if (next - addr != HPAGE_PMD_SIZE) {
VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem));
split_huge_page_pmd(vma->vm_mm, pmd);
} else if (zap_huge_pmd(tlb, vma, pmd, addr))
- continue;
+ goto next;
/* fall through */
}
- if (pmd_none_or_clear_bad(pmd))
- continue;
+ /*
+ * Here there can be other concurrent MADV_DONTNEED or
+ * trans huge page faults running, and if the pmd is
+ * none or trans huge it can change under us. This is
+ * because MADV_DONTNEED holds the mmap_sem in read
+ * mode.
+ */
+ if (pmd_none_or_trans_huge_or_clear_bad(pmd))
+ goto next;
next = zap_pte_range(tlb, vma, pmd, addr, next, details);
+next:
cond_resched();
} while (pmd++, addr = next, addr != end);
@@ -1282,10 +1267,10 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
return addr;
}
-static unsigned long unmap_page_range(struct mmu_gather *tlb,
- struct vm_area_struct *vma,
- unsigned long addr, unsigned long end,
- struct zap_details *details)
+static void unmap_page_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end,
+ struct zap_details *details)
{
pgd_t *pgd;
unsigned long next;
@@ -1305,8 +1290,47 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
} while (pgd++, addr = next, addr != end);
tlb_end_vma(tlb, vma);
mem_cgroup_uncharge_end();
+}
- return addr;
+
+static void unmap_single_vma(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, unsigned long start_addr,
+ unsigned long end_addr, unsigned long *nr_accounted,
+ struct zap_details *details)
+{
+ unsigned long start = max(vma->vm_start, start_addr);
+ unsigned long end;
+
+ if (start >= vma->vm_end)
+ return;
+ end = min(vma->vm_end, end_addr);
+ if (end <= vma->vm_start)
+ return;
+
+ if (vma->vm_flags & VM_ACCOUNT)
+ *nr_accounted += (end - start) >> PAGE_SHIFT;
+
+ if (unlikely(is_pfn_mapping(vma)))
+ untrack_pfn_vma(vma, 0, 0);
+
+ if (start != end) {
+ if (unlikely(is_vm_hugetlb_page(vma))) {
+ /*
+ * It is undesirable to test vma->vm_file as it
+ * should be non-null for valid hugetlb area.
+ * However, vm_file will be NULL in the error
+ * cleanup path of do_mmap_pgoff. When
+ * hugetlbfs ->mmap method fails,
+ * do_mmap_pgoff() nullifies vma->vm_file
+ * before calling this function to clean up.
+ * Since no pte has actually been setup, it is
+ * safe to do nothing in this case.
+ */
+ if (vma->vm_file)
+ unmap_hugepage_range(vma, start, end, NULL);
+ } else
+ unmap_page_range(tlb, vma, start, end, details);
+ }
}
/**
@@ -1318,8 +1342,6 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
* @nr_accounted: Place number of unmapped pages in vm-accountable vma's here
* @details: details of nonlinear truncation or shared cache invalidation
*
- * Returns the end address of the unmapping (restart addr if interrupted).
- *
* Unmap all pages in the vma list.
*
* Only addresses between `start' and `end' will be unmapped.
@@ -1331,55 +1353,18 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
* ensure that any thus-far unmapped pages are flushed before unmap_vmas()
* drops the lock and schedules.
*/
-unsigned long unmap_vmas(struct mmu_gather *tlb,
+void unmap_vmas(struct mmu_gather *tlb,
struct vm_area_struct *vma, unsigned long start_addr,
unsigned long end_addr, unsigned long *nr_accounted,
struct zap_details *details)
{
- unsigned long start = start_addr;
struct mm_struct *mm = vma->vm_mm;
mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
- for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
- unsigned long end;
-
- start = max(vma->vm_start, start_addr);
- if (start >= vma->vm_end)
- continue;
- end = min(vma->vm_end, end_addr);
- if (end <= vma->vm_start)
- continue;
-
- if (vma->vm_flags & VM_ACCOUNT)
- *nr_accounted += (end - start) >> PAGE_SHIFT;
-
- if (unlikely(is_pfn_mapping(vma)))
- untrack_pfn_vma(vma, 0, 0);
-
- while (start != end) {
- if (unlikely(is_vm_hugetlb_page(vma))) {
- /*
- * It is undesirable to test vma->vm_file as it
- * should be non-null for valid hugetlb area.
- * However, vm_file will be NULL in the error
- * cleanup path of do_mmap_pgoff. When
- * hugetlbfs ->mmap method fails,
- * do_mmap_pgoff() nullifies vma->vm_file
- * before calling this function to clean up.
- * Since no pte has actually been setup, it is
- * safe to do nothing in this case.
- */
- if (vma->vm_file)
- unmap_hugepage_range(vma, start, end, NULL);
-
- start = end;
- } else
- start = unmap_page_range(tlb, vma, start, end, details);
- }
- }
-
+ for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next)
+ unmap_single_vma(tlb, vma, start_addr, end_addr, nr_accounted,
+ details);
mmu_notifier_invalidate_range_end(mm, start_addr, end_addr);
- return start; /* which is now the end (or restart) address */
}
/**
@@ -1388,8 +1373,10 @@ unsigned long unmap_vmas(struct mmu_gather *tlb,
* @address: starting address of pages to zap
* @size: number of bytes to zap
* @details: details of nonlinear truncation or shared cache invalidation
+ *
+ * Caller must protect the VMA list
*/
-unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
+void zap_page_range(struct vm_area_struct *vma, unsigned long address,
unsigned long size, struct zap_details *details)
{
struct mm_struct *mm = vma->vm_mm;
@@ -1400,9 +1387,34 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
lru_add_drain();
tlb_gather_mmu(&tlb, mm, 0);
update_hiwater_rss(mm);
- end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
+ unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
+ tlb_finish_mmu(&tlb, address, end);
+}
+
+/**
+ * zap_page_range_single - remove user pages in a given range
+ * @vma: vm_area_struct holding the applicable pages
+ * @address: starting address of pages to zap
+ * @size: number of bytes to zap
+ * @details: details of nonlinear truncation or shared cache invalidation
+ *
+ * The range must fit into one VMA.
+ */
+static void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
+ unsigned long size, struct zap_details *details)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ struct mmu_gather tlb;
+ unsigned long end = address + size;
+ unsigned long nr_accounted = 0;
+
+ lru_add_drain();
+ tlb_gather_mmu(&tlb, mm, 0);
+ update_hiwater_rss(mm);
+ mmu_notifier_invalidate_range_start(mm, address, end);
+ unmap_single_vma(&tlb, vma, address, end, &nr_accounted, details);
+ mmu_notifier_invalidate_range_end(mm, address, end);
tlb_finish_mmu(&tlb, address, end);
- return end;
}
/**
@@ -1423,7 +1435,7 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
if (address < vma->vm_start || address + size > vma->vm_end ||
!(vma->vm_flags & VM_PFNMAP))
return -1;
- zap_page_range(vma, address, size, NULL);
+ zap_page_range_single(vma, address, size, NULL);
return 0;
}
EXPORT_SYMBOL_GPL(zap_vma_ptes);
@@ -2447,7 +2459,7 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo
* fails, we just zero-fill it. Live with it.
*/
if (unlikely(!src)) {
- void *kaddr = kmap_atomic(dst, KM_USER0);
+ void *kaddr = kmap_atomic(dst);
void __user *uaddr = (void __user *)(va & PAGE_MASK);
/*
@@ -2458,7 +2470,7 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo
*/
if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
clear_page(kaddr);
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
flush_dcache_page(dst);
} else
copy_user_highpage(dst, src, va, vma);
@@ -2770,7 +2782,7 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma,
unsigned long start_addr, unsigned long end_addr,
struct zap_details *details)
{
- zap_page_range(vma, start_addr, end_addr - start_addr, details);
+ zap_page_range_single(vma, start_addr, end_addr - start_addr, details);
}
static inline void unmap_mapping_range_tree(struct prio_tree_root *root,
@@ -3611,13 +3623,7 @@ static int __init gate_vma_init(void)
gate_vma.vm_end = FIXADDR_USER_END;
gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
gate_vma.vm_page_prot = __P101;
- /*
- * Make sure the vDSO gets into every core dump.
- * Dumping its contents makes post-mortem fully interpretable later
- * without matching up the same kernel and hardware config to see
- * what PC values meant.
- */
- gate_vma.vm_flags |= VM_ALWAYSDUMP;
+
return 0;
}
__initcall(gate_vma_init);