summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2025-01-09 11:31:04 +0300
committerChristian Brauner <brauner@kernel.org>2025-01-09 18:23:18 +0300
commit7422bbd030210fbdc011acfd6f0f15b966fd2b46 (patch)
tree6886bcb1e9b20da494d24d21764c02ea7eae344c
parent7e17483c7b151ed64f8959278def2fea164526f5 (diff)
downloadlinux-7422bbd030210fbdc011acfd6f0f15b966fd2b46.tar.xz
xfs: report the correct read/write dio alignment for reflinked inodes
For I/O to reflinked blocks we always need to write an entire new file system block, and the code enforces the file system block alignment for the entire file if it has any reflinked blocks. Use the new STATX_DIO_READ_ALIGN flag to report the asymmetric read vs write alignments for reflinked files. Signed-off-by: Christoph Hellwig <hch@lst.de> Link: https://lore.kernel.org/r/20250109083109.1441561-5-hch@lst.de Reviewed-by: John Garry <john.g.garry@oracle.com> Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Christian Brauner <brauner@kernel.org>
-rw-r--r--fs/xfs/xfs_iops.c21
1 files changed, 18 insertions, 3 deletions
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 6b0228a21617..40289fe6f5b2 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -580,9 +580,24 @@ xfs_report_dioalign(
struct xfs_buftarg *target = xfs_inode_buftarg(ip);
struct block_device *bdev = target->bt_bdev;
- stat->result_mask |= STATX_DIOALIGN;
+ stat->result_mask |= STATX_DIOALIGN | STATX_DIO_READ_ALIGN;
stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
- stat->dio_offset_align = bdev_logical_block_size(bdev);
+
+ /*
+ * For COW inodes, we can only perform out of place writes of entire
+ * allocation units (blocks or RT extents).
+ * For writes smaller than the allocation unit, we must fall back to
+ * buffered I/O to perform read-modify-write cycles. At best this is
+ * highly inefficient; at worst it leads to page cache invalidation
+ * races. Tell applications to avoid this by reporting the larger write
+ * alignment in dio_offset_align, and the smaller read alignment in
+ * dio_read_offset_align.
+ */
+ stat->dio_read_offset_align = bdev_logical_block_size(bdev);
+ if (xfs_is_cow_inode(ip))
+ stat->dio_offset_align = xfs_inode_alloc_unitsize(ip);
+ else
+ stat->dio_offset_align = stat->dio_read_offset_align;
}
static void
@@ -658,7 +673,7 @@ xfs_vn_getattr(
stat->rdev = inode->i_rdev;
break;
case S_IFREG:
- if (request_mask & STATX_DIOALIGN)
+ if (request_mask & (STATX_DIOALIGN | STATX_DIO_READ_ALIGN))
xfs_report_dioalign(ip, stat);
if (request_mask & STATX_WRITE_ATOMIC)
xfs_report_atomic_write(ip, stat);