diff options
author | Tony Lindgren <tony@atomide.com> | 2016-03-30 20:36:06 +0300 |
---|---|---|
committer | Tony Lindgren <tony@atomide.com> | 2016-03-30 20:36:06 +0300 |
commit | 1809de7e7d37c585e01a1bcc583ea92b78fc759d (patch) | |
tree | 76c5b35c2b04eafce86a1a729c02ab705eba44bc /mm/filemap.c | |
parent | ebf24414809200915b9ddf7f109bba7c278c8210 (diff) | |
parent | 3ca4a238106dedc285193ee47f494a6584b6fd2f (diff) | |
download | linux-1809de7e7d37c585e01a1bcc583ea92b78fc759d.tar.xz |
Merge tag 'for-v4.6-rc/omap-fixes-a' of git://git.kernel.org/pub/scm/linux/kernel/git/pjw/omap-pending into omap-for-v4.6/fixes
ARM: OMAP2+: first hwmod fix for v4.6-rc
Fix a longstanding bug in the hwmod code that could cause
hardware SYSCONFIG register values to not match the kernel's
idea of what they should be, and that could result in lower
performance during IP block idle entry.
Basic build, boot, and PM test logs are available here:
http://www.pwsan.com/omap/testlogs/omap-hwmod-fixes-a-for-v4.6-rc/20160326231727/
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 213 |
1 files changed, 118 insertions, 95 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index bc943867d68c..a8c69c8c0a90 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -101,7 +101,7 @@ * ->tree_lock (page_remove_rmap->set_page_dirty) * bdi.wb->list_lock (page_remove_rmap->set_page_dirty) * ->inode->i_lock (page_remove_rmap->set_page_dirty) - * ->memcg->move_lock (page_remove_rmap->mem_cgroup_begin_page_stat) + * ->memcg->move_lock (page_remove_rmap->lock_page_memcg) * bdi.wb->list_lock (zap_pte_range->set_page_dirty) * ->inode->i_lock (zap_pte_range->set_page_dirty) * ->private_lock (zap_pte_range->__set_page_dirty_buffers) @@ -176,11 +176,9 @@ static void page_cache_tree_delete(struct address_space *mapping, /* * Delete a page from the page cache and free it. Caller has to make * sure the page is locked and that nobody else uses it - or that usage - * is safe. The caller must hold the mapping's tree_lock and - * mem_cgroup_begin_page_stat(). + * is safe. The caller must hold the mapping's tree_lock. */ -void __delete_from_page_cache(struct page *page, void *shadow, - struct mem_cgroup *memcg) +void __delete_from_page_cache(struct page *page, void *shadow) { struct address_space *mapping = page->mapping; @@ -195,6 +193,30 @@ void __delete_from_page_cache(struct page *page, void *shadow, else cleancache_invalidate_page(mapping, page); + VM_BUG_ON_PAGE(page_mapped(page), page); + if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) { + int mapcount; + + pr_alert("BUG: Bad page cache in process %s pfn:%05lx\n", + current->comm, page_to_pfn(page)); + dump_page(page, "still mapped when deleted"); + dump_stack(); + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); + + mapcount = page_mapcount(page); + if (mapping_exiting(mapping) && + page_count(page) >= mapcount + 2) { + /* + * All vmas have already been torn down, so it's + * a good bet that actually the page is unmapped, + * and we'd prefer not to leak it: if we're wrong, + * some other bad page check should catch it later. + */ + page_mapcount_reset(page); + atomic_sub(mapcount, &page->_count); + } + } + page_cache_tree_delete(mapping, page, shadow); page->mapping = NULL; @@ -205,7 +227,6 @@ void __delete_from_page_cache(struct page *page, void *shadow, __dec_zone_page_state(page, NR_FILE_PAGES); if (PageSwapBacked(page)) __dec_zone_page_state(page, NR_SHMEM); - VM_BUG_ON_PAGE(page_mapped(page), page); /* * At this point page must be either written or cleaned by truncate. @@ -216,8 +237,7 @@ void __delete_from_page_cache(struct page *page, void *shadow, * anyway will be cleared before returning page into buddy allocator. */ if (WARN_ON_ONCE(PageDirty(page))) - account_page_cleaned(page, mapping, memcg, - inode_to_wb(mapping->host)); + account_page_cleaned(page, mapping, inode_to_wb(mapping->host)); } /** @@ -231,7 +251,6 @@ void __delete_from_page_cache(struct page *page, void *shadow, void delete_from_page_cache(struct page *page) { struct address_space *mapping = page->mapping; - struct mem_cgroup *memcg; unsigned long flags; void (*freepage)(struct page *); @@ -240,11 +259,9 @@ void delete_from_page_cache(struct page *page) freepage = mapping->a_ops->freepage; - memcg = mem_cgroup_begin_page_stat(page); spin_lock_irqsave(&mapping->tree_lock, flags); - __delete_from_page_cache(page, NULL, memcg); + __delete_from_page_cache(page, NULL); spin_unlock_irqrestore(&mapping->tree_lock, flags); - mem_cgroup_end_page_stat(memcg); if (freepage) freepage(page); @@ -446,7 +463,8 @@ int filemap_write_and_wait(struct address_space *mapping) { int err = 0; - if (mapping->nrpages) { + if ((!dax_mapping(mapping) && mapping->nrpages) || + (dax_mapping(mapping) && mapping->nrexceptional)) { err = filemap_fdatawrite(mapping); /* * Even if the above returned error, the pages may be @@ -482,13 +500,8 @@ int filemap_write_and_wait_range(struct address_space *mapping, { int err = 0; - if (dax_mapping(mapping) && mapping->nrexceptional) { - err = dax_writeback_mapping_range(mapping, lstart, lend); - if (err) - return err; - } - - if (mapping->nrpages) { + if ((!dax_mapping(mapping) && mapping->nrpages) || + (dax_mapping(mapping) && mapping->nrexceptional)) { err = __filemap_fdatawrite_range(mapping, lstart, lend, WB_SYNC_ALL); /* See comment of filemap_write_and_wait() */ @@ -532,7 +545,6 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) if (!error) { struct address_space *mapping = old->mapping; void (*freepage)(struct page *); - struct mem_cgroup *memcg; unsigned long flags; pgoff_t offset = old->index; @@ -542,9 +554,8 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) new->mapping = mapping; new->index = offset; - memcg = mem_cgroup_begin_page_stat(old); spin_lock_irqsave(&mapping->tree_lock, flags); - __delete_from_page_cache(old, NULL, memcg); + __delete_from_page_cache(old, NULL); error = radix_tree_insert(&mapping->page_tree, offset, new); BUG_ON(error); mapping->nrpages++; @@ -557,8 +568,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) if (PageSwapBacked(new)) __inc_zone_page_state(new, NR_SHMEM); spin_unlock_irqrestore(&mapping->tree_lock, flags); - mem_cgroup_end_page_stat(memcg); - mem_cgroup_replace_page(old, new); + mem_cgroup_migrate(old, new); radix_tree_preload_end(); if (freepage) freepage(old); @@ -576,7 +586,7 @@ static int page_cache_tree_insert(struct address_space *mapping, void **slot; int error; - error = __radix_tree_create(&mapping->page_tree, page->index, + error = __radix_tree_create(&mapping->page_tree, page->index, 0, &node, &slot); if (error) return error; @@ -1245,7 +1255,6 @@ unsigned find_get_entries(struct address_space *mapping, return 0; rcu_read_lock(); -restart: radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { struct page *page; repeat: @@ -1253,8 +1262,10 @@ repeat: if (unlikely(!page)) continue; if (radix_tree_exception(page)) { - if (radix_tree_deref_retry(page)) - goto restart; + if (radix_tree_deref_retry(page)) { + slot = radix_tree_iter_retry(&iter); + continue; + } /* * A shadow entry of a recently evicted page, a swap * entry from shmem/tmpfs or a DAX entry. Return it @@ -1307,7 +1318,6 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start, return 0; rcu_read_lock(); -restart: radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { struct page *page; repeat: @@ -1317,13 +1327,8 @@ repeat: if (radix_tree_exception(page)) { if (radix_tree_deref_retry(page)) { - /* - * Transient condition which can only trigger - * when entry at index 0 moves out of or back - * to root: none yet gotten, safe to restart. - */ - WARN_ON(iter.index); - goto restart; + slot = radix_tree_iter_retry(&iter); + continue; } /* * A shadow entry of a recently evicted page, @@ -1374,7 +1379,6 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, return 0; rcu_read_lock(); -restart: radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) { struct page *page; repeat: @@ -1385,12 +1389,8 @@ repeat: if (radix_tree_exception(page)) { if (radix_tree_deref_retry(page)) { - /* - * Transient condition which can only trigger - * when entry at index 0 moves out of or back - * to root: none yet gotten, safe to restart. - */ - goto restart; + slot = radix_tree_iter_retry(&iter); + continue; } /* * A shadow entry of a recently evicted page, @@ -1450,7 +1450,6 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, return 0; rcu_read_lock(); -restart: radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, *index, tag) { struct page *page; @@ -1461,12 +1460,8 @@ repeat: if (radix_tree_exception(page)) { if (radix_tree_deref_retry(page)) { - /* - * Transient condition which can only trigger - * when entry at index 0 moves out of or back - * to root: none yet gotten, safe to restart. - */ - goto restart; + slot = radix_tree_iter_retry(&iter); + continue; } /* * A shadow entry of a recently evicted page. @@ -1529,7 +1524,6 @@ unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start, return 0; rcu_read_lock(); -restart: radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, start, tag) { struct page *page; @@ -1539,12 +1533,8 @@ repeat: continue; if (radix_tree_exception(page)) { if (radix_tree_deref_retry(page)) { - /* - * Transient condition which can only trigger - * when entry at index 0 moves out of or back - * to root: none yet gotten, safe to restart. - */ - goto restart; + slot = radix_tree_iter_retry(&iter); + continue; } /* @@ -1649,6 +1639,15 @@ find_page: index, last_index - index); } if (!PageUptodate(page)) { + /* + * See comment in do_read_cache_page on why + * wait_on_page_locked is used to avoid unnecessarily + * serialisations and why it's safe. + */ + wait_on_page_locked_killable(page); + if (PageUptodate(page)) + goto page_ok; + if (inode->i_blkbits == PAGE_CACHE_SHIFT || !mapping->a_ops->is_partially_uptodate) goto page_not_up_to_date; @@ -1841,15 +1840,16 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) ssize_t retval = 0; loff_t *ppos = &iocb->ki_pos; loff_t pos = *ppos; + size_t count = iov_iter_count(iter); + + if (!count) + goto out; /* skip atime */ if (iocb->ki_flags & IOCB_DIRECT) { struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; - size_t count = iov_iter_count(iter); loff_t size; - if (!count) - goto out; /* skip atime */ size = i_size_read(inode); retval = filemap_write_and_wait_range(mapping, pos, pos + count - 1); @@ -1890,6 +1890,7 @@ EXPORT_SYMBOL(generic_file_read_iter); * page_cache_read - adds requested page to the page cache if not already there * @file: file to read * @offset: page index + * @gfp_mask: memory allocation flags * * This adds the requested page to the page cache if it isn't already there, * and schedules an I/O to read in its contents from disk. @@ -2151,10 +2152,11 @@ repeat: if (unlikely(!page)) goto next; if (radix_tree_exception(page)) { - if (radix_tree_deref_retry(page)) - break; - else - goto next; + if (radix_tree_deref_retry(page)) { + slot = radix_tree_iter_retry(&iter); + continue; + } + goto next; } if (!page_cache_get_speculative(page)) @@ -2283,7 +2285,7 @@ static struct page *wait_on_page_read(struct page *page) return page; } -static struct page *__read_cache_page(struct address_space *mapping, +static struct page *do_read_cache_page(struct address_space *mapping, pgoff_t index, int (*filler)(void *, struct page *), void *data, @@ -2305,53 +2307,74 @@ repeat: /* Presumably ENOMEM for radix tree node */ return ERR_PTR(err); } + +filler: err = filler(data, page); if (err < 0) { page_cache_release(page); - page = ERR_PTR(err); - } else { - page = wait_on_page_read(page); + return ERR_PTR(err); } - } - return page; -} - -static struct page *do_read_cache_page(struct address_space *mapping, - pgoff_t index, - int (*filler)(void *, struct page *), - void *data, - gfp_t gfp) -{ - struct page *page; - int err; + page = wait_on_page_read(page); + if (IS_ERR(page)) + return page; + goto out; + } + if (PageUptodate(page)) + goto out; -retry: - page = __read_cache_page(mapping, index, filler, data, gfp); - if (IS_ERR(page)) - return page; + /* + * Page is not up to date and may be locked due one of the following + * case a: Page is being filled and the page lock is held + * case b: Read/write error clearing the page uptodate status + * case c: Truncation in progress (page locked) + * case d: Reclaim in progress + * + * Case a, the page will be up to date when the page is unlocked. + * There is no need to serialise on the page lock here as the page + * is pinned so the lock gives no additional protection. Even if the + * the page is truncated, the data is still valid if PageUptodate as + * it's a race vs truncate race. + * Case b, the page will not be up to date + * Case c, the page may be truncated but in itself, the data may still + * be valid after IO completes as it's a read vs truncate race. The + * operation must restart if the page is not uptodate on unlock but + * otherwise serialising on page lock to stabilise the mapping gives + * no additional guarantees to the caller as the page lock is + * released before return. + * Case d, similar to truncation. If reclaim holds the page lock, it + * will be a race with remove_mapping that determines if the mapping + * is valid on unlock but otherwise the data is valid and there is + * no need to serialise with page lock. + * + * As the page lock gives no additional guarantee, we optimistically + * wait on the page to be unlocked and check if it's up to date and + * use the page if it is. Otherwise, the page lock is required to + * distinguish between the different cases. The motivation is that we + * avoid spurious serialisations and wakeups when multiple processes + * wait on the same page for IO to complete. + */ + wait_on_page_locked(page); if (PageUptodate(page)) goto out; + /* Distinguish between all the cases under the safety of the lock */ lock_page(page); + + /* Case c or d, restart the operation */ if (!page->mapping) { unlock_page(page); page_cache_release(page); - goto retry; + goto repeat; } + + /* Someone else locked and filled the page in a very small window */ if (PageUptodate(page)) { unlock_page(page); goto out; } - err = filler(data, page); - if (err < 0) { - page_cache_release(page); - return ERR_PTR(err); - } else { - page = wait_on_page_read(page); - if (IS_ERR(page)) - return page; - } + goto filler; + out: mark_page_accessed(page); return page; |