summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/stat.c1
-rw-r--r--fs/xfs/xfs_ioctl.c11
-rw-r--r--fs/xfs/xfs_iops.c62
-rw-r--r--include/linux/stat.h1
-rw-r--r--include/uapi/linux/stat.h99
5 files changed, 125 insertions, 49 deletions
diff --git a/fs/stat.c b/fs/stat.c
index 0870e969a8a0..2c0e111a098a 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -725,6 +725,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
tmp.stx_mnt_id = stat->mnt_id;
tmp.stx_dio_mem_align = stat->dio_mem_align;
tmp.stx_dio_offset_align = stat->dio_offset_align;
+ tmp.stx_dio_read_offset_align = stat->dio_read_offset_align;
tmp.stx_subvol = stat->subvol;
tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min;
tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 0789c18aaa18..f95103325318 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1204,7 +1204,16 @@ xfs_file_ioctl(
struct xfs_buftarg *target = xfs_inode_buftarg(ip);
struct dioattr da;
- da.d_mem = da.d_miniosz = target->bt_logical_sectorsize;
+ da.d_mem = target->bt_logical_sectorsize;
+
+ /*
+ * See xfs_report_dioalign() for an explanation about why this
+ * reports a value larger than the sector size for COW inodes.
+ */
+ if (xfs_is_cow_inode(ip))
+ da.d_miniosz = xfs_inode_alloc_unitsize(ip);
+ else
+ da.d_miniosz = target->bt_logical_sectorsize;
da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
if (copy_to_user(arg, &da, sizeof(da)))
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 207e0dadffc3..40289fe6f5b2 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -573,17 +573,43 @@ xfs_stat_blksize(
}
static void
-xfs_get_atomic_write_attr(
+xfs_report_dioalign(
struct xfs_inode *ip,
- unsigned int *unit_min,
- unsigned int *unit_max)
+ struct kstat *stat)
{
- if (!xfs_inode_can_atomicwrite(ip)) {
- *unit_min = *unit_max = 0;
- return;
- }
+ struct xfs_buftarg *target = xfs_inode_buftarg(ip);
+ struct block_device *bdev = target->bt_bdev;
+
+ stat->result_mask |= STATX_DIOALIGN | STATX_DIO_READ_ALIGN;
+ stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
- *unit_min = *unit_max = ip->i_mount->m_sb.sb_blocksize;
+ /*
+ * For COW inodes, we can only perform out of place writes of entire
+ * allocation units (blocks or RT extents).
+ * For writes smaller than the allocation unit, we must fall back to
+ * buffered I/O to perform read-modify-write cycles. At best this is
+ * highly inefficient; at worst it leads to page cache invalidation
+ * races. Tell applications to avoid this by reporting the larger write
+ * alignment in dio_offset_align, and the smaller read alignment in
+ * dio_read_offset_align.
+ */
+ stat->dio_read_offset_align = bdev_logical_block_size(bdev);
+ if (xfs_is_cow_inode(ip))
+ stat->dio_offset_align = xfs_inode_alloc_unitsize(ip);
+ else
+ stat->dio_offset_align = stat->dio_read_offset_align;
+}
+
+static void
+xfs_report_atomic_write(
+ struct xfs_inode *ip,
+ struct kstat *stat)
+{
+ unsigned int unit_min = 0, unit_max = 0;
+
+ if (xfs_inode_can_atomicwrite(ip))
+ unit_min = unit_max = ip->i_mount->m_sb.sb_blocksize;
+ generic_fill_statx_atomic_writes(stat, unit_min, unit_max);
}
STATIC int
@@ -647,22 +673,10 @@ xfs_vn_getattr(
stat->rdev = inode->i_rdev;
break;
case S_IFREG:
- if (request_mask & STATX_DIOALIGN) {
- struct xfs_buftarg *target = xfs_inode_buftarg(ip);
- struct block_device *bdev = target->bt_bdev;
-
- stat->result_mask |= STATX_DIOALIGN;
- stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
- stat->dio_offset_align = bdev_logical_block_size(bdev);
- }
- if (request_mask & STATX_WRITE_ATOMIC) {
- unsigned int unit_min, unit_max;
-
- xfs_get_atomic_write_attr(ip, &unit_min,
- &unit_max);
- generic_fill_statx_atomic_writes(stat,
- unit_min, unit_max);
- }
+ if (request_mask & (STATX_DIOALIGN | STATX_DIO_READ_ALIGN))
+ xfs_report_dioalign(ip, stat);
+ if (request_mask & STATX_WRITE_ATOMIC)
+ xfs_report_atomic_write(ip, stat);
fallthrough;
default:
stat->blksize = xfs_stat_blksize(ip);
diff --git a/include/linux/stat.h b/include/linux/stat.h
index 3d900c86981c..9d8382e23a9c 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -52,6 +52,7 @@ struct kstat {
u64 mnt_id;
u32 dio_mem_align;
u32 dio_offset_align;
+ u32 dio_read_offset_align;
u64 change_cookie;
u64 subvol;
u32 atomic_write_unit_min;
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index 887a25286441..f78ee3670dd5 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -98,43 +98,93 @@ struct statx_timestamp {
*/
struct statx {
/* 0x00 */
- __u32 stx_mask; /* What results were written [uncond] */
- __u32 stx_blksize; /* Preferred general I/O size [uncond] */
- __u64 stx_attributes; /* Flags conveying information about the file [uncond] */
+ /* What results were written [uncond] */
+ __u32 stx_mask;
+
+ /* Preferred general I/O size [uncond] */
+ __u32 stx_blksize;
+
+ /* Flags conveying information about the file [uncond] */
+ __u64 stx_attributes;
+
/* 0x10 */
- __u32 stx_nlink; /* Number of hard links */
- __u32 stx_uid; /* User ID of owner */
- __u32 stx_gid; /* Group ID of owner */
- __u16 stx_mode; /* File mode */
+ /* Number of hard links */
+ __u32 stx_nlink;
+
+ /* User ID of owner */
+ __u32 stx_uid;
+
+ /* Group ID of owner */
+ __u32 stx_gid;
+
+ /* File mode */
+ __u16 stx_mode;
__u16 __spare0[1];
+
/* 0x20 */
- __u64 stx_ino; /* Inode number */
- __u64 stx_size; /* File size */
- __u64 stx_blocks; /* Number of 512-byte blocks allocated */
- __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */
+ /* Inode number */
+ __u64 stx_ino;
+
+ /* File size */
+ __u64 stx_size;
+
+ /* Number of 512-byte blocks allocated */
+ __u64 stx_blocks;
+
+ /* Mask to show what's supported in stx_attributes */
+ __u64 stx_attributes_mask;
+
/* 0x40 */
- struct statx_timestamp stx_atime; /* Last access time */
- struct statx_timestamp stx_btime; /* File creation time */
- struct statx_timestamp stx_ctime; /* Last attribute change time */
- struct statx_timestamp stx_mtime; /* Last data modification time */
+ /* Last access time */
+ struct statx_timestamp stx_atime;
+
+ /* File creation time */
+ struct statx_timestamp stx_btime;
+
+ /* Last attribute change time */
+ struct statx_timestamp stx_ctime;
+
+ /* Last data modification time */
+ struct statx_timestamp stx_mtime;
+
/* 0x80 */
- __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */
+ /* Device ID of special file [if bdev/cdev] */
+ __u32 stx_rdev_major;
__u32 stx_rdev_minor;
- __u32 stx_dev_major; /* ID of device containing file [uncond] */
+
+ /* ID of device containing file [uncond] */
+ __u32 stx_dev_major;
__u32 stx_dev_minor;
+
/* 0x90 */
__u64 stx_mnt_id;
- __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */
- __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */
+
+ /* Memory buffer alignment for direct I/O */
+ __u32 stx_dio_mem_align;
+
+ /* File offset alignment for direct I/O */
+ __u32 stx_dio_offset_align;
+
/* 0xa0 */
- __u64 stx_subvol; /* Subvolume identifier */
- __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */
- __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */
+ /* Subvolume identifier */
+ __u64 stx_subvol;
+
+ /* Min atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_min;
+
+ /* Max atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_max;
+
/* 0xb0 */
- __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */
- __u32 __spare1[1];
+ /* Max atomic write segment count */
+ __u32 stx_atomic_write_segments_max;
+
+ /* File offset alignment for direct I/O reads */
+ __u32 stx_dio_read_offset_align;
+
/* 0xb8 */
__u64 __spare3[9]; /* Spare space for future expansion */
+
/* 0x100 */
};
@@ -164,6 +214,7 @@ struct statx {
#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
#define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */
#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */
+#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */