From 17879e8f865f4ed8b6f9041a2687ad40f13ae460 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Sep 2016 11:24:50 +1000 Subject: xfs: fix locking for DAX writes So far DAX writes inherited the locking from direct I/O writes, but the direct I/O model of using shared locks for writes is actually wrong for DAX. For direct I/O we're out of any standards and don't have to provide the Posix required exclusion between writers, but for DAX which gets transparently enable on applications without any knowledge of it we can't simply drop the requirement. Even worse this only happens for aligned writes and thus doesn't show up for many typical use cases. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) (limited to 'fs/xfs/xfs_file.c') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index e612a0233710..62649ccdbb4d 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -714,24 +714,11 @@ xfs_file_dax_write( struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; ssize_t ret = 0; - int unaligned_io = 0; - int iolock; + int iolock = XFS_IOLOCK_EXCL; struct iov_iter data; - /* "unaligned" here means not aligned to a filesystem block */ - if ((iocb->ki_pos & mp->m_blockmask) || - ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) { - unaligned_io = 1; - iolock = XFS_IOLOCK_EXCL; - } else if (mapping->nrpages) { - iolock = XFS_IOLOCK_EXCL; - } else { - iolock = XFS_IOLOCK_SHARED; - } xfs_rw_ilock(ip, iolock); - ret = xfs_file_aio_write_checks(iocb, from, &iolock); if (ret) goto out; @@ -758,11 +745,6 @@ xfs_file_dax_write( WARN_ON_ONCE(ret); } - if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) { - xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); - iolock = XFS_IOLOCK_SHARED; - } - trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos); data = *from; -- cgit v1.2.3 From 6c31f495d19975b7d2e824ee614934d5db113afe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Sep 2016 11:28:38 +1000 Subject: xfs: use iomap to implement DAX Another users of buffer_heads bytes the dust. Signed-off-by: Christoph Hellwig Reviewed-by: Ross Zwisler Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 61 +++++++++++++++--------------------------------------- fs/xfs/xfs_iomap.c | 11 ++++++---- 2 files changed, 24 insertions(+), 48 deletions(-) (limited to 'fs/xfs/xfs_file.c') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 62649ccdbb4d..f99d7fac5abf 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -332,10 +332,7 @@ xfs_file_dax_read( struct kiocb *iocb, struct iov_iter *to) { - struct address_space *mapping = iocb->ki_filp->f_mapping; - struct inode *inode = mapping->host; - struct xfs_inode *ip = XFS_I(inode); - struct iov_iter data = *to; + struct xfs_inode *ip = XFS_I(iocb->ki_filp->f_mapping->host); size_t count = iov_iter_count(to); ssize_t ret = 0; @@ -345,11 +342,7 @@ xfs_file_dax_read( return 0; /* skip atime */ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); - ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0); - if (ret > 0) { - iocb->ki_pos += ret; - iov_iter_advance(to, ret); - } + ret = iomap_dax_rw(iocb, to, &xfs_iomap_ops); xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); file_accessed(iocb->ki_filp); @@ -711,52 +704,32 @@ xfs_file_dax_write( struct kiocb *iocb, struct iov_iter *from) { - struct address_space *mapping = iocb->ki_filp->f_mapping; - struct inode *inode = mapping->host; + struct inode *inode = iocb->ki_filp->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); - ssize_t ret = 0; int iolock = XFS_IOLOCK_EXCL; - struct iov_iter data; + ssize_t ret, error = 0; + size_t count; + loff_t pos; xfs_rw_ilock(ip, iolock); ret = xfs_file_aio_write_checks(iocb, from, &iolock); if (ret) goto out; - /* - * Yes, even DAX files can have page cache attached to them: A zeroed - * page is inserted into the pagecache when we have to serve a write - * fault on a hole. It should never be dirtied and can simply be - * dropped from the pagecache once we get real data for the page. - * - * XXX: This is racy against mmap, and there's nothing we can do about - * it. dax_do_io() should really do this invalidation internally as - * it will know if we've allocated over a holei for this specific IO and - * if so it needs to update the mapping tree and invalidate existing - * PTEs over the newly allocated range. Remove this invalidation when - * dax_do_io() is fixed up. - */ - if (mapping->nrpages) { - loff_t end = iocb->ki_pos + iov_iter_count(from) - 1; - - ret = invalidate_inode_pages2_range(mapping, - iocb->ki_pos >> PAGE_SHIFT, - end >> PAGE_SHIFT); - WARN_ON_ONCE(ret); - } + pos = iocb->ki_pos; + count = iov_iter_count(from); - trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos); + trace_xfs_file_dax_write(ip, count, pos); - data = *from; - ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, - xfs_end_io_direct_write, 0); - if (ret > 0) { - iocb->ki_pos += ret; - iov_iter_advance(from, ret); + ret = iomap_dax_rw(iocb, from, &xfs_iomap_ops); + if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { + i_size_write(inode, iocb->ki_pos); + error = xfs_setfilesize(ip, pos, ret); } + out: xfs_rw_iunlock(ip, iolock); - return ret; + return error ? error : ret; } STATIC ssize_t @@ -1495,7 +1468,7 @@ xfs_filemap_page_mkwrite( xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); if (IS_DAX(inode)) { - ret = dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault); + ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops); } else { ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops); ret = block_page_mkwrite_return(ret); @@ -1529,7 +1502,7 @@ xfs_filemap_fault( * changes to xfs_get_blocks_direct() to map unwritten extent * ioend for conversion on read-only mappings. */ - ret = dax_fault(vma, vmf, xfs_get_blocks_dax_fault); + ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops); } else ret = filemap_fault(vma, vmf); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index fe4a26d752d0..c08253e11545 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -934,11 +934,13 @@ error_on_bmapi_transaction: return error; } -static inline bool imap_needs_alloc(struct xfs_bmbt_irec *imap, int nimaps) +static inline bool imap_needs_alloc(struct inode *inode, + struct xfs_bmbt_irec *imap, int nimaps) { return !nimaps || imap->br_startblock == HOLESTARTBLOCK || - imap->br_startblock == DELAYSTARTBLOCK; + imap->br_startblock == DELAYSTARTBLOCK || + (IS_DAX(inode) && ISUNWRITTEN(imap)); } static int @@ -959,7 +961,8 @@ xfs_file_iomap_begin( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - if ((flags & IOMAP_WRITE) && !xfs_get_extsz_hint(ip)) { + if ((flags & IOMAP_WRITE) && + !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) { return xfs_file_iomap_begin_delay(inode, offset, length, flags, iomap); } @@ -979,7 +982,7 @@ xfs_file_iomap_begin( return error; } - if ((flags & IOMAP_WRITE) && imap_needs_alloc(&imap, nimaps)) { + if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) { /* * We cap the maximum length we map here to MAX_WRITEBACK_PAGES * pages to keep the chunks of work done where somewhat symmetric -- cgit v1.2.3