diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 238 |
1 files changed, 187 insertions, 51 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 5b4dd03130da..d0e4d1002059 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -135,10 +135,9 @@ static int page_cache_tree_insert(struct address_space *mapping, } else { /* DAX can replace empty locked entry with a hole */ WARN_ON_ONCE(p != - (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | - RADIX_DAX_ENTRY_LOCK)); + dax_radix_locked_entry(0, RADIX_DAX_EMPTY)); /* Wakeup waiters for exceptional entry lock */ - dax_wake_mapping_entry_waiter(mapping, page->index, + dax_wake_mapping_entry_waiter(mapping, page->index, p, false); } } @@ -740,45 +739,159 @@ EXPORT_SYMBOL(__page_cache_alloc); * at a cost of "thundering herd" phenomena during rare hash * collisions. */ -wait_queue_head_t *page_waitqueue(struct page *page) +#define PAGE_WAIT_TABLE_BITS 8 +#define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS) +static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned; + +static wait_queue_head_t *page_waitqueue(struct page *page) { - return bit_waitqueue(page, 0); + return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)]; } -EXPORT_SYMBOL(page_waitqueue); -void wait_on_page_bit(struct page *page, int bit_nr) +void __init pagecache_init(void) { - DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); + int i; + + for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++) + init_waitqueue_head(&page_wait_table[i]); - if (test_bit(bit_nr, &page->flags)) - __wait_on_bit(page_waitqueue(page), &wait, bit_wait_io, - TASK_UNINTERRUPTIBLE); + page_writeback_init(); } -EXPORT_SYMBOL(wait_on_page_bit); -int wait_on_page_bit_killable(struct page *page, int bit_nr) +struct wait_page_key { + struct page *page; + int bit_nr; + int page_match; +}; + +struct wait_page_queue { + struct page *page; + int bit_nr; + wait_queue_t wait; +}; + +static int wake_page_function(wait_queue_t *wait, unsigned mode, int sync, void *arg) { - DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); + struct wait_page_key *key = arg; + struct wait_page_queue *wait_page + = container_of(wait, struct wait_page_queue, wait); + + if (wait_page->page != key->page) + return 0; + key->page_match = 1; - if (!test_bit(bit_nr, &page->flags)) + if (wait_page->bit_nr != key->bit_nr) + return 0; + if (test_bit(key->bit_nr, &key->page->flags)) return 0; - return __wait_on_bit(page_waitqueue(page), &wait, - bit_wait_io, TASK_KILLABLE); + return autoremove_wake_function(wait, mode, sync, key); } -int wait_on_page_bit_killable_timeout(struct page *page, - int bit_nr, unsigned long timeout) +void wake_up_page_bit(struct page *page, int bit_nr) { - DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); + wait_queue_head_t *q = page_waitqueue(page); + struct wait_page_key key; + unsigned long flags; - wait.key.timeout = jiffies + timeout; - if (!test_bit(bit_nr, &page->flags)) - return 0; - return __wait_on_bit(page_waitqueue(page), &wait, - bit_wait_io_timeout, TASK_KILLABLE); + key.page = page; + key.bit_nr = bit_nr; + key.page_match = 0; + + spin_lock_irqsave(&q->lock, flags); + __wake_up_locked_key(q, TASK_NORMAL, &key); + /* + * It is possible for other pages to have collided on the waitqueue + * hash, so in that case check for a page match. That prevents a long- + * term waiter + * + * It is still possible to miss a case here, when we woke page waiters + * and removed them from the waitqueue, but there are still other + * page waiters. + */ + if (!waitqueue_active(q) || !key.page_match) { + ClearPageWaiters(page); + /* + * It's possible to miss clearing Waiters here, when we woke + * our page waiters, but the hashed waitqueue has waiters for + * other pages on it. + * + * That's okay, it's a rare case. The next waker will clear it. + */ + } + spin_unlock_irqrestore(&q->lock, flags); +} +EXPORT_SYMBOL(wake_up_page_bit); + +static inline int wait_on_page_bit_common(wait_queue_head_t *q, + struct page *page, int bit_nr, int state, bool lock) +{ + struct wait_page_queue wait_page; + wait_queue_t *wait = &wait_page.wait; + int ret = 0; + + init_wait(wait); + wait->func = wake_page_function; + wait_page.page = page; + wait_page.bit_nr = bit_nr; + + for (;;) { + spin_lock_irq(&q->lock); + + if (likely(list_empty(&wait->task_list))) { + if (lock) + __add_wait_queue_tail_exclusive(q, wait); + else + __add_wait_queue(q, wait); + SetPageWaiters(page); + } + + set_current_state(state); + + spin_unlock_irq(&q->lock); + + if (likely(test_bit(bit_nr, &page->flags))) { + io_schedule(); + if (unlikely(signal_pending_state(state, current))) { + ret = -EINTR; + break; + } + } + + if (lock) { + if (!test_and_set_bit_lock(bit_nr, &page->flags)) + break; + } else { + if (!test_bit(bit_nr, &page->flags)) + break; + } + } + + finish_wait(q, wait); + + /* + * A signal could leave PageWaiters set. Clearing it here if + * !waitqueue_active would be possible (by open-coding finish_wait), + * but still fail to catch it in the case of wait hash collision. We + * already can fail to clear wait hash collision cases, so don't + * bother with signals either. + */ + + return ret; +} + +void wait_on_page_bit(struct page *page, int bit_nr) +{ + wait_queue_head_t *q = page_waitqueue(page); + wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, false); +} +EXPORT_SYMBOL(wait_on_page_bit); + +int wait_on_page_bit_killable(struct page *page, int bit_nr) +{ + wait_queue_head_t *q = page_waitqueue(page); + return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, false); } -EXPORT_SYMBOL_GPL(wait_on_page_bit_killable_timeout); /** * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue @@ -794,10 +907,34 @@ void add_page_wait_queue(struct page *page, wait_queue_t *waiter) spin_lock_irqsave(&q->lock, flags); __add_wait_queue(q, waiter); + SetPageWaiters(page); spin_unlock_irqrestore(&q->lock, flags); } EXPORT_SYMBOL_GPL(add_page_wait_queue); +#ifndef clear_bit_unlock_is_negative_byte + +/* + * PG_waiters is the high bit in the same byte as PG_lock. + * + * On x86 (and on many other architectures), we can clear PG_lock and + * test the sign bit at the same time. But if the architecture does + * not support that special operation, we just do this all by hand + * instead. + * + * The read of PG_waiters has to be after (or concurrently with) PG_locked + * being cleared, but a memory barrier should be unneccssary since it is + * in the same byte as PG_locked. + */ +static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem) +{ + clear_bit_unlock(nr, mem); + /* smp_mb__after_atomic(); */ + return test_bit(PG_waiters, mem); +} + +#endif + /** * unlock_page - unlock a locked page * @page: the page @@ -807,16 +944,19 @@ EXPORT_SYMBOL_GPL(add_page_wait_queue); * mechanism between PageLocked pages and PageWriteback pages is shared. * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep. * - * The mb is necessary to enforce ordering between the clear_bit and the read - * of the waitqueue (to avoid SMP races with a parallel wait_on_page_locked()). + * Note that this depends on PG_waiters being the sign bit in the byte + * that contains PG_locked - thus the BUILD_BUG_ON(). That allows us to + * clear the PG_locked bit and test PG_waiters at the same time fairly + * portably (architectures that do LL/SC can test any bit, while x86 can + * test the sign bit). */ void unlock_page(struct page *page) { + BUILD_BUG_ON(PG_waiters != 7); page = compound_head(page); VM_BUG_ON_PAGE(!PageLocked(page), page); - clear_bit_unlock(PG_locked, &page->flags); - smp_mb__after_atomic(); - wake_up_page(page, PG_locked); + if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags)) + wake_up_page_bit(page, PG_locked); } EXPORT_SYMBOL(unlock_page); @@ -875,23 +1015,19 @@ EXPORT_SYMBOL_GPL(page_endio); * __lock_page - get a lock on the page, assuming we need to sleep to get it * @page: the page to lock */ -void __lock_page(struct page *page) +void __lock_page(struct page *__page) { - struct page *page_head = compound_head(page); - DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked); - - __wait_on_bit_lock(page_waitqueue(page_head), &wait, bit_wait_io, - TASK_UNINTERRUPTIBLE); + struct page *page = compound_head(__page); + wait_queue_head_t *q = page_waitqueue(page); + wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, true); } EXPORT_SYMBOL(__lock_page); -int __lock_page_killable(struct page *page) +int __lock_page_killable(struct page *__page) { - struct page *page_head = compound_head(page); - DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked); - - return __wait_on_bit_lock(page_waitqueue(page_head), &wait, - bit_wait_io, TASK_KILLABLE); + struct page *page = compound_head(__page); + wait_queue_head_t *q = page_waitqueue(page); + return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE, true); } EXPORT_SYMBOL_GPL(__lock_page_killable); @@ -1638,7 +1774,7 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos, int error = 0; if (unlikely(*ppos >= inode->i_sb->s_maxbytes)) - return -EINVAL; + return 0; iov_iter_truncate(iter, inode->i_sb->s_maxbytes); index = *ppos >> PAGE_SHIFT; @@ -2165,12 +2301,12 @@ page_not_uptodate: } EXPORT_SYMBOL(filemap_fault); -void filemap_map_pages(struct fault_env *fe, +void filemap_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff) { struct radix_tree_iter iter; void **slot; - struct file *file = fe->vma->vm_file; + struct file *file = vmf->vma->vm_file; struct address_space *mapping = file->f_mapping; pgoff_t last_pgoff = start_pgoff; loff_t size; @@ -2226,11 +2362,11 @@ repeat: if (file->f_ra.mmap_miss > 0) file->f_ra.mmap_miss--; - fe->address += (iter.index - last_pgoff) << PAGE_SHIFT; - if (fe->pte) - fe->pte += iter.index - last_pgoff; + vmf->address += (iter.index - last_pgoff) << PAGE_SHIFT; + if (vmf->pte) + vmf->pte += iter.index - last_pgoff; last_pgoff = iter.index; - if (alloc_set_pte(fe, NULL, page)) + if (alloc_set_pte(vmf, NULL, page)) goto unlock; unlock_page(page); goto next; @@ -2240,7 +2376,7 @@ skip: put_page(page); next: /* Huge page is mapped? No need to proceed. */ - if (pmd_trans_huge(*fe->pmd)) + if (pmd_trans_huge(*vmf->pmd)) break; if (iter.index == end_pgoff) break; |