From 5a9d929d6e13278df62bd9e3d3ceae8c87ad1eea Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 8 Jan 2018 10:41:39 -0800 Subject: iomap: report collisions between directio and buffered writes to userspace If two programs simultaneously try to write to the same part of a file via direct IO and buffered IO, there's a chance that the post-diowrite pagecache invalidation will fail on the dirty page. When this happens, the dio write succeeded, which means that the page cache is no longer coherent with the disk! Programs are not supposed to mix IO types and this is a clear case of data corruption, so store an EIO which will be reflected to userspace during the next fsync. Replace the WARN_ON with a ratelimited pr_crit so that the developers have /some/ kind of breadcrumb to track down the offending program(s) and file(s) involved. Signed-off-by: Darrick J. Wong Reviewed-by: Liu Bo --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 511fbaabf624..7f8d96d68f34 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2992,6 +2992,7 @@ enum { }; void dio_end_io(struct bio *bio); +void dio_warn_stale_pagecache(struct file *filp); ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, struct block_device *bdev, struct iov_iter *iter, -- cgit v1.2.3 From 01c2e13dcae9757ea987af8933f9fcc6e33f2d7c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 18 Jan 2018 14:07:53 -0800 Subject: xfs: only grab shared inode locks for source file during reflink Reflink and dedupe operations remap blocks from a source file into a destination file. The destination file needs exclusive locks on all levels because we're updating its block map, but the source file isn't undergoing any block map changes so we can use a shared lock. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_reflink.c | 25 +++++++++++++++---------- include/linux/fs.h | 5 +++++ 2 files changed, 20 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index bcc58c24287c..85a119e1463b 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1202,13 +1202,16 @@ xfs_reflink_remap_blocks( /* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */ while (len) { + uint lock_mode; + trace_xfs_reflink_remap_blocks_loop(src, srcoff, len, dest, destoff); + /* Read extent from the source file */ nimaps = 1; - xfs_ilock(src, XFS_ILOCK_EXCL); + lock_mode = xfs_ilock_data_map_shared(src); error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0); - xfs_iunlock(src, XFS_ILOCK_EXCL); + xfs_iunlock(src, lock_mode); if (error) goto err; ASSERT(nimaps == 1); @@ -1260,7 +1263,7 @@ xfs_iolock_two_inodes_and_break_layout( retry: if (src < dest) { - inode_lock(src); + inode_lock_shared(src); inode_lock_nested(dest, I_MUTEX_NONDIR2); } else { /* src >= dest */ @@ -1271,7 +1274,7 @@ retry: if (error == -EWOULDBLOCK) { inode_unlock(dest); if (src < dest) - inode_unlock(src); + inode_unlock_shared(src); error = break_layout(dest, true); if (error) return error; @@ -1280,11 +1283,11 @@ retry: if (error) { inode_unlock(dest); if (src < dest) - inode_unlock(src); + inode_unlock_shared(src); return error; } if (src > dest) - inode_lock_nested(src, I_MUTEX_NONDIR2); + inode_lock_shared_nested(src, I_MUTEX_NONDIR2); return 0; } @@ -1324,7 +1327,7 @@ xfs_reflink_remap_range( if (same_inode) xfs_ilock(src, XFS_MMAPLOCK_EXCL); else - xfs_lock_two_inodes(src, XFS_MMAPLOCK_EXCL, dest, + xfs_lock_two_inodes(src, XFS_MMAPLOCK_SHARED, dest, XFS_MMAPLOCK_EXCL); /* Check file eligibility and prepare for block sharing. */ @@ -1393,10 +1396,12 @@ xfs_reflink_remap_range( is_dedupe); out_unlock: - xfs_iunlock(src, XFS_MMAPLOCK_EXCL); + xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); + if (!same_inode) + xfs_iunlock(src, XFS_MMAPLOCK_SHARED); + inode_unlock(inode_out); if (!same_inode) - xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); - unlock_two_nondirectories(inode_in, inode_out); + inode_unlock_shared(inode_in); if (ret) trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); return ret; diff --git a/include/linux/fs.h b/include/linux/fs.h index 7f8d96d68f34..5cbeab8a63ca 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -748,6 +748,11 @@ static inline void inode_lock_nested(struct inode *inode, unsigned subclass) down_write_nested(&inode->i_rwsem, subclass); } +static inline void inode_lock_shared_nested(struct inode *inode, unsigned subclass) +{ + down_read_nested(&inode->i_rwsem, subclass); +} + void lock_two_nondirectories(struct inode *, struct inode*); void unlock_two_nondirectories(struct inode *, struct inode*); -- cgit v1.2.3