diff options
Diffstat (limited to 'fs/dax.c')
-rw-r--r-- | fs/dax.c | 86 |
1 files changed, 44 insertions, 42 deletions
@@ -29,6 +29,11 @@ #include <linux/uio.h> #include <linux/vmstat.h> +/* + * dax_clear_blocks() is called from within transaction context from XFS, + * and hence this means the stack from this point must follow GFP_NOFS + * semantics for all operations. + */ int dax_clear_blocks(struct inode *inode, sector_t block, long size) { struct block_device *bdev = inode->i_sb->s_bdev; @@ -169,8 +174,10 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, else len = iov_iter_zero(max - pos, iter); - if (!len) + if (!len) { + retval = -EFAULT; break; + } pos += len; addr += len; @@ -285,6 +292,7 @@ static int copy_user_bh(struct page *to, struct buffer_head *bh, static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, struct vm_area_struct *vma, struct vm_fault *vmf) { + struct address_space *mapping = inode->i_mapping; sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9); unsigned long vaddr = (unsigned long)vmf->virtual_address; void __pmem *addr; @@ -292,6 +300,8 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, pgoff_t size; int error; + i_mmap_lock_read(mapping); + /* * Check truncate didn't happen while we were allocating a block. * If it did, this block may or may not be still allocated to the @@ -321,6 +331,8 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, error = vm_insert_mixed(vma, vaddr, pfn); out: + i_mmap_unlock_read(mapping); + return error; } @@ -382,17 +394,15 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, * from a read fault and we've raced with a truncate */ error = -EIO; - goto unlock; + goto unlock_page; } - } else { - i_mmap_lock_write(mapping); } error = get_block(inode, block, &bh, 0); if (!error && (bh.b_size < PAGE_SIZE)) error = -EIO; /* fs corruption? */ if (error) - goto unlock; + goto unlock_page; if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) { if (vmf->flags & FAULT_FLAG_WRITE) { @@ -403,9 +413,8 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, if (!error && (bh.b_size < PAGE_SIZE)) error = -EIO; if (error) - goto unlock; + goto unlock_page; } else { - i_mmap_unlock_write(mapping); return dax_load_hole(mapping, page, vmf); } } @@ -417,15 +426,17 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, else clear_user_highpage(new_page, vaddr); if (error) - goto unlock; + goto unlock_page; vmf->page = page; if (!page) { + i_mmap_lock_read(mapping); /* Check we didn't race with truncate */ size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; if (vmf->pgoff >= size) { + i_mmap_unlock_read(mapping); error = -EIO; - goto unlock; + goto out; } } return VM_FAULT_LOCKED; @@ -461,8 +472,6 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE)); } - if (!page) - i_mmap_unlock_write(mapping); out: if (error == -ENOMEM) return VM_FAULT_OOM | major; @@ -471,14 +480,11 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, return VM_FAULT_SIGBUS | major; return VM_FAULT_NOPAGE | major; - unlock: + unlock_page: if (page) { unlock_page(page); page_cache_release(page); - } else { - i_mmap_unlock_write(mapping); } - goto out; } EXPORT_SYMBOL(__dax_fault); @@ -556,10 +562,10 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, block = (sector_t)pgoff << (PAGE_SHIFT - blkbits); bh.b_size = PMD_SIZE; - i_mmap_lock_write(mapping); length = get_block(inode, block, &bh, write); if (length) return VM_FAULT_SIGBUS; + i_mmap_lock_read(mapping); /* * If the filesystem isn't willing to tell us the length of a hole, @@ -569,36 +575,14 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) goto fallback; - sector = bh.b_blocknr << (blkbits - 9); - - if (buffer_unwritten(&bh) || buffer_new(&bh)) { - int i; - - length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn, - bh.b_size); - if (length < 0) { - result = VM_FAULT_SIGBUS; - goto out; - } - if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR)) - goto fallback; - - for (i = 0; i < PTRS_PER_PMD; i++) - clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE); - wmb_pmem(); - count_vm_event(PGMAJFAULT); - mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); - result |= VM_FAULT_MAJOR; - } - /* * If we allocated new storage, make sure no process has any * zero pages covering this hole */ if (buffer_new(&bh)) { - i_mmap_unlock_write(mapping); + i_mmap_unlock_read(mapping); unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0); - i_mmap_lock_write(mapping); + i_mmap_lock_read(mapping); } /* @@ -635,6 +619,7 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, result = VM_FAULT_NOPAGE; spin_unlock(ptl); } else { + sector = bh.b_blocknr << (blkbits - 9); length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn, bh.b_size); if (length < 0) { @@ -644,15 +629,32 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR)) goto fallback; + /* + * TODO: teach vmf_insert_pfn_pmd() to support + * 'pte_special' for pmds + */ + if (pfn_valid(pfn)) + goto fallback; + + if (buffer_unwritten(&bh) || buffer_new(&bh)) { + int i; + for (i = 0; i < PTRS_PER_PMD; i++) + clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE); + wmb_pmem(); + count_vm_event(PGMAJFAULT); + mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); + result |= VM_FAULT_MAJOR; + } + result |= vmf_insert_pfn_pmd(vma, address, pmd, pfn, write); } out: + i_mmap_unlock_read(mapping); + if (buffer_unwritten(&bh)) complete_unwritten(&bh, !(result & VM_FAULT_ERROR)); - i_mmap_unlock_write(mapping); - return result; fallback: |