From 02fbd139759feb1f331cebd858523b5d774082e6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 11 May 2016 11:58:48 +0200 Subject: dax: Remove complete_unwritten argument Fault handlers currently take complete_unwritten argument to convert unwritten extents after PTEs are updated. However no filesystem uses this anymore as the code is racy. Remove the unused argument. Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Vishal Verma --- fs/ext2/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ext2') diff --git a/fs/ext2/file.c b/fs/ext2/file.c index c1400b109805..868c02317b05 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -51,7 +51,7 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } down_read(&ei->dax_sem); - ret = __dax_fault(vma, vmf, ext2_get_block, NULL); + ret = __dax_fault(vma, vmf, ext2_get_block); up_read(&ei->dax_sem); if (vmf->flags & FAULT_FLAG_WRITE) @@ -72,7 +72,7 @@ static int ext2_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, } down_read(&ei->dax_sem); - ret = __dax_pmd_fault(vma, addr, pmd, flags, ext2_get_block, NULL); + ret = __dax_pmd_fault(vma, addr, pmd, flags, ext2_get_block); up_read(&ei->dax_sem); if (flags & FAULT_FLAG_WRITE) -- cgit v1.2.3 From 9b6cd5f76d60b563d75e55e432e03ed134761432 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 16 May 2016 17:17:04 +0200 Subject: ext2: Fix block zeroing in ext2_get_blocks() for DAX When zeroing allocated blocks for DAX, we accidentally zeroed only the first allocated block instead of all of them. So far this problem is hidden by the fact that page faults always need only a single block and DAX write code zeroes blocks again. But the zeroing in DAX code is racy and needs to be removed so fix the zeroing in ext2 to zero all allocated blocks. Reported-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Vishal Verma --- fs/ext2/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext2') diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 6bd58e6ff038..038d0ed5f565 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -740,7 +740,7 @@ static int ext2_get_blocks(struct inode *inode, err = dax_clear_sectors(inode->i_sb->s_bdev, le32_to_cpu(chain[depth-1].key) << (inode->i_blkbits - 9), - 1 << inode->i_blkbits); + count << inode->i_blkbits); if (err) { mutex_unlock(&ei->truncate_mutex); goto cleanup; -- cgit v1.2.3 From 86b0624e42d03a424e9571b8591d191c436f9af1 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 11 May 2016 11:58:49 +0200 Subject: ext2: Avoid DAX zeroing to corrupt data Currently ext2 zeroes any data blocks allocated for DAX inode however it still returns them as BH_New. Thus DAX code zeroes them again in dax_insert_mapping() which can possibly overwrite the data that has been already stored to those blocks by a racing dax_io(). Avoid marking pre-zeroed buffers as new. Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Vishal Verma --- fs/ext2/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ext2') diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 038d0ed5f565..9a14af3b1a69 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -745,11 +745,11 @@ static int ext2_get_blocks(struct inode *inode, mutex_unlock(&ei->truncate_mutex); goto cleanup; } - } + } else + set_buffer_new(bh_result); ext2_splice_branch(inode, iblock, partial, indirect_blks, count); mutex_unlock(&ei->truncate_mutex); - set_buffer_new(bh_result); got_it: map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); if (count > blocks_to_boundary) -- cgit v1.2.3 From 284854be2b85e967d17c098a1d4c176b5cd37eb3 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 10 May 2016 10:23:55 -0600 Subject: ext2: Add alignment check for DAX mount When a partition is not aligned by 4KB, mount -o dax succeeds, but any read/write access to the filesystem fails, except for metadata update. Call bdev_dax_supported() to perform proper precondition checks which includes this partition alignment check. Signed-off-by: Toshi Kani Reviewed-by: Jan Kara Cc: Jan Kara Cc: Dan Williams Cc: Ross Zwisler Cc: Christoph Hellwig Cc: Boaz Harrosh Signed-off-by: Vishal Verma --- fs/ext2/super.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'fs/ext2') diff --git a/fs/ext2/super.c b/fs/ext2/super.c index b78caf25f746..1d9379568aa8 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -922,16 +922,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); if (sbi->s_mount_opt & EXT2_MOUNT_DAX) { - if (blocksize != PAGE_SIZE) { - ext2_msg(sb, KERN_ERR, - "error: unsupported blocksize for dax"); + err = bdev_dax_supported(sb, blocksize); + if (err) goto failed_mount; - } - if (!sb->s_bdev->bd_disk->fops->direct_access) { - ext2_msg(sb, KERN_ERR, - "error: device does not support dax"); - goto failed_mount; - } } /* If the blocksize doesn't match, re-read the thing.. */ -- cgit v1.2.3 From 3dc29161070ab14d065554c0ad58988ab77a7bfd Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 15 Mar 2016 11:20:41 -0600 Subject: dax: use sb_issue_zerout instead of calling dax_clear_sectors dax_clear_sectors() cannot handle poisoned blocks. These must be zeroed using the BIO interface instead. Convert ext2 and XFS to use only sb_issue_zerout(). Reviewed-by: Jeff Moyer Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Matthew Wilcox [vishal: Also remove the dax_clear_sectors function entirely] Signed-off-by: Vishal Verma --- fs/dax.c | 32 -------------------------------- fs/ext2/inode.c | 8 ++++---- fs/xfs/xfs_bmap_util.c | 15 ++++----------- include/linux/dax.h | 1 - 4 files changed, 8 insertions(+), 48 deletions(-) (limited to 'fs/ext2') diff --git a/fs/dax.c b/fs/dax.c index d602410d8e52..0abbbb62981e 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -87,38 +87,6 @@ struct page *read_dax_sector(struct block_device *bdev, sector_t n) return page; } -/* - * dax_clear_sectors() is called from within transaction context from XFS, - * and hence this means the stack from this point must follow GFP_NOFS - * semantics for all operations. - */ -int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size) -{ - struct blk_dax_ctl dax = { - .sector = _sector, - .size = _size, - }; - - might_sleep(); - do { - long count, sz; - - count = dax_map_atomic(bdev, &dax); - if (count < 0) - return count; - sz = min_t(long, count, SZ_128K); - clear_pmem(dax.addr, sz); - dax.size -= sz; - dax.sector += sz / 512; - dax_unmap_atomic(bdev, &dax); - cond_resched(); - } while (dax.size); - - wmb_pmem(); - return 0; -} -EXPORT_SYMBOL_GPL(dax_clear_sectors); - static bool buffer_written(struct buffer_head *bh) { return buffer_mapped(bh) && !buffer_unwritten(bh); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 9a14af3b1a69..17cbd6b696f2 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -737,10 +738,9 @@ static int ext2_get_blocks(struct inode *inode, * so that it's not found by another thread before it's * initialised */ - err = dax_clear_sectors(inode->i_sb->s_bdev, - le32_to_cpu(chain[depth-1].key) << - (inode->i_blkbits - 9), - count << inode->i_blkbits); + err = sb_issue_zeroout(inode->i_sb, + le32_to_cpu(chain[depth-1].key), count, + GFP_NOFS); if (err) { mutex_unlock(&ei->truncate_mutex); goto cleanup; diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 3b6309865c65..930ac6a17ce3 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -72,18 +72,11 @@ xfs_zero_extent( struct xfs_mount *mp = ip->i_mount; xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb); sector_t block = XFS_BB_TO_FSBT(mp, sector); - ssize_t size = XFS_FSB_TO_B(mp, count_fsb); - - if (IS_DAX(VFS_I(ip))) - return dax_clear_sectors(xfs_find_bdev_for_inode(VFS_I(ip)), - sector, size); - - /* - * let the block layer decide on the fastest method of - * implementing the zeroing. - */ - return sb_issue_zeroout(mp->m_super, block, count_fsb, GFP_NOFS); + return blkdev_issue_zeroout(xfs_find_bdev_for_inode(VFS_I(ip)), + block << (mp->m_super->s_blocksize_bits - 9), + count_fsb << (mp->m_super->s_blocksize_bits - 9), + GFP_NOFS, true); } /* diff --git a/include/linux/dax.h b/include/linux/dax.h index 7c45ac7ea1d1..7f853ffaa987 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -7,7 +7,6 @@ ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t, get_block_t, dio_iodone_t, int flags); -int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size); int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); int dax_truncate_page(struct inode *, loff_t from, get_block_t); int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); -- cgit v1.2.3