diff options
author | Christian Brauner <brauner@kernel.org> | 2025-01-09 18:23:26 +0300 |
---|---|---|
committer | Christian Brauner <brauner@kernel.org> | 2025-01-09 18:23:26 +0300 |
commit | cf40ebb2ed9fde24195260637e00e47a6f0e7c15 (patch) | |
tree | 30d739022ac9055134a15eaf74f36dc34df677bf | |
parent | 40384c840ea1944d7c5a392e8975ed088ecf0b37 (diff) | |
parent | 468210ec76e155bbc53f8fc41b2bd5e26a2f6d20 (diff) | |
download | linux-cf40ebb2ed9fde24195260637e00e47a6f0e7c15.tar.xz |
Merge patch series "add STATX_DIO_READ_ALIGN v3"
Christoph Hellwig <hch@lst.de> says:
File systems that write out of place usually require different alignment
for direct I/O writes than what they can do for reads. This series tries
to address this by adding yet another statx field.
* patches from https://lore.kernel.org/r/20250109083109.1441561-1-hch@lst.de:
xfs: report larger dio alignment for COW inodes
xfs: report the correct read/write dio alignment for reflinked inodes
xfs: cleanup xfs_vn_getattr
fs: add STATX_DIO_READ_ALIGN
fs: reformat the statx definition
Link: https://lore.kernel.org/r/20250109083109.1441561-1-hch@lst.de
Signed-off-by: Christian Brauner <brauner@kernel.org>
-rw-r--r-- | fs/stat.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl.c | 11 | ||||
-rw-r--r-- | fs/xfs/xfs_iops.c | 62 | ||||
-rw-r--r-- | include/linux/stat.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/stat.h | 99 |
5 files changed, 125 insertions, 49 deletions
diff --git a/fs/stat.c b/fs/stat.c index 0870e969a8a0..2c0e111a098a 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -725,6 +725,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer) tmp.stx_mnt_id = stat->mnt_id; tmp.stx_dio_mem_align = stat->dio_mem_align; tmp.stx_dio_offset_align = stat->dio_offset_align; + tmp.stx_dio_read_offset_align = stat->dio_read_offset_align; tmp.stx_subvol = stat->subvol; tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min; tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max; diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 0789c18aaa18..f95103325318 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1204,7 +1204,16 @@ xfs_file_ioctl( struct xfs_buftarg *target = xfs_inode_buftarg(ip); struct dioattr da; - da.d_mem = da.d_miniosz = target->bt_logical_sectorsize; + da.d_mem = target->bt_logical_sectorsize; + + /* + * See xfs_report_dioalign() for an explanation about why this + * reports a value larger than the sector size for COW inodes. + */ + if (xfs_is_cow_inode(ip)) + da.d_miniosz = xfs_inode_alloc_unitsize(ip); + else + da.d_miniosz = target->bt_logical_sectorsize; da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); if (copy_to_user(arg, &da, sizeof(da))) diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 207e0dadffc3..40289fe6f5b2 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -573,17 +573,43 @@ xfs_stat_blksize( } static void -xfs_get_atomic_write_attr( +xfs_report_dioalign( struct xfs_inode *ip, - unsigned int *unit_min, - unsigned int *unit_max) + struct kstat *stat) { - if (!xfs_inode_can_atomicwrite(ip)) { - *unit_min = *unit_max = 0; - return; - } + struct xfs_buftarg *target = xfs_inode_buftarg(ip); + struct block_device *bdev = target->bt_bdev; + + stat->result_mask |= STATX_DIOALIGN | STATX_DIO_READ_ALIGN; + stat->dio_mem_align = bdev_dma_alignment(bdev) + 1; - *unit_min = *unit_max = ip->i_mount->m_sb.sb_blocksize; + /* + * For COW inodes, we can only perform out of place writes of entire + * allocation units (blocks or RT extents). + * For writes smaller than the allocation unit, we must fall back to + * buffered I/O to perform read-modify-write cycles. At best this is + * highly inefficient; at worst it leads to page cache invalidation + * races. Tell applications to avoid this by reporting the larger write + * alignment in dio_offset_align, and the smaller read alignment in + * dio_read_offset_align. + */ + stat->dio_read_offset_align = bdev_logical_block_size(bdev); + if (xfs_is_cow_inode(ip)) + stat->dio_offset_align = xfs_inode_alloc_unitsize(ip); + else + stat->dio_offset_align = stat->dio_read_offset_align; +} + +static void +xfs_report_atomic_write( + struct xfs_inode *ip, + struct kstat *stat) +{ + unsigned int unit_min = 0, unit_max = 0; + + if (xfs_inode_can_atomicwrite(ip)) + unit_min = unit_max = ip->i_mount->m_sb.sb_blocksize; + generic_fill_statx_atomic_writes(stat, unit_min, unit_max); } STATIC int @@ -647,22 +673,10 @@ xfs_vn_getattr( stat->rdev = inode->i_rdev; break; case S_IFREG: - if (request_mask & STATX_DIOALIGN) { - struct xfs_buftarg *target = xfs_inode_buftarg(ip); - struct block_device *bdev = target->bt_bdev; - - stat->result_mask |= STATX_DIOALIGN; - stat->dio_mem_align = bdev_dma_alignment(bdev) + 1; - stat->dio_offset_align = bdev_logical_block_size(bdev); - } - if (request_mask & STATX_WRITE_ATOMIC) { - unsigned int unit_min, unit_max; - - xfs_get_atomic_write_attr(ip, &unit_min, - &unit_max); - generic_fill_statx_atomic_writes(stat, - unit_min, unit_max); - } + if (request_mask & (STATX_DIOALIGN | STATX_DIO_READ_ALIGN)) + xfs_report_dioalign(ip, stat); + if (request_mask & STATX_WRITE_ATOMIC) + xfs_report_atomic_write(ip, stat); fallthrough; default: stat->blksize = xfs_stat_blksize(ip); diff --git a/include/linux/stat.h b/include/linux/stat.h index 3d900c86981c..9d8382e23a9c 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -52,6 +52,7 @@ struct kstat { u64 mnt_id; u32 dio_mem_align; u32 dio_offset_align; + u32 dio_read_offset_align; u64 change_cookie; u64 subvol; u32 atomic_write_unit_min; diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 887a25286441..f78ee3670dd5 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -98,43 +98,93 @@ struct statx_timestamp { */ struct statx { /* 0x00 */ - __u32 stx_mask; /* What results were written [uncond] */ - __u32 stx_blksize; /* Preferred general I/O size [uncond] */ - __u64 stx_attributes; /* Flags conveying information about the file [uncond] */ + /* What results were written [uncond] */ + __u32 stx_mask; + + /* Preferred general I/O size [uncond] */ + __u32 stx_blksize; + + /* Flags conveying information about the file [uncond] */ + __u64 stx_attributes; + /* 0x10 */ - __u32 stx_nlink; /* Number of hard links */ - __u32 stx_uid; /* User ID of owner */ - __u32 stx_gid; /* Group ID of owner */ - __u16 stx_mode; /* File mode */ + /* Number of hard links */ + __u32 stx_nlink; + + /* User ID of owner */ + __u32 stx_uid; + + /* Group ID of owner */ + __u32 stx_gid; + + /* File mode */ + __u16 stx_mode; __u16 __spare0[1]; + /* 0x20 */ - __u64 stx_ino; /* Inode number */ - __u64 stx_size; /* File size */ - __u64 stx_blocks; /* Number of 512-byte blocks allocated */ - __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */ + /* Inode number */ + __u64 stx_ino; + + /* File size */ + __u64 stx_size; + + /* Number of 512-byte blocks allocated */ + __u64 stx_blocks; + + /* Mask to show what's supported in stx_attributes */ + __u64 stx_attributes_mask; + /* 0x40 */ - struct statx_timestamp stx_atime; /* Last access time */ - struct statx_timestamp stx_btime; /* File creation time */ - struct statx_timestamp stx_ctime; /* Last attribute change time */ - struct statx_timestamp stx_mtime; /* Last data modification time */ + /* Last access time */ + struct statx_timestamp stx_atime; + + /* File creation time */ + struct statx_timestamp stx_btime; + + /* Last attribute change time */ + struct statx_timestamp stx_ctime; + + /* Last data modification time */ + struct statx_timestamp stx_mtime; + /* 0x80 */ - __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */ + /* Device ID of special file [if bdev/cdev] */ + __u32 stx_rdev_major; __u32 stx_rdev_minor; - __u32 stx_dev_major; /* ID of device containing file [uncond] */ + + /* ID of device containing file [uncond] */ + __u32 stx_dev_major; __u32 stx_dev_minor; + /* 0x90 */ __u64 stx_mnt_id; - __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ - __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ + + /* Memory buffer alignment for direct I/O */ + __u32 stx_dio_mem_align; + + /* File offset alignment for direct I/O */ + __u32 stx_dio_offset_align; + /* 0xa0 */ - __u64 stx_subvol; /* Subvolume identifier */ - __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ - __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* Subvolume identifier */ + __u64 stx_subvol; + + /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_min; + + /* Max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; + /* 0xb0 */ - __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ - __u32 __spare1[1]; + /* Max atomic write segment count */ + __u32 stx_atomic_write_segments_max; + + /* File offset alignment for direct I/O reads */ + __u32 stx_dio_read_offset_align; + /* 0xb8 */ __u64 __spare3[9]; /* Spare space for future expansion */ + /* 0x100 */ }; @@ -164,6 +214,7 @@ struct statx { #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ #define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ +#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ |