summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_bmap_util.h
diff options
context:
space:
mode:
authorCarlos Maiolino <cem@kernel.org>2025-03-04 13:25:46 +0300
committerCarlos Maiolino <cem@kernel.org>2025-03-04 13:25:46 +0300
commit4c6283ec9284bb72906dba83bc7a809747e6331e (patch)
tree6a2ed104fc86a90bb787ff0dbee020461e59ec14 /fs/xfs/xfs_bmap_util.h
parent0a1fd78080c8c9a5582e82100bd91b87ae5ac57c (diff)
parent9c477912b2f58da71751f244aceecf5f8cc549ed (diff)
downloadlinux-4c6283ec9284bb72906dba83bc7a809747e6331e.tar.xz
Merge tag 'xfs-zoned-allocator-2025-03-03' of git://git.infradead.org/users/hch/xfs into xfs-6.15-zoned_devices
xfs: add support for zoned devices Add support for the new zoned space allocator and thus for zoned devices: https://zonedstorage.io/docs/introduction/zoned-storage to XFS. This has been developed for and tested on both SMR hard drives, which are the oldest and most common class of zoned devices: https://zonedstorage.io/docs/introduction/smr and ZNS SSDs: https://zonedstorage.io/docs/introduction/zns It has not been tested with zoned UFS devices, as their current capacity points and performance characteristics aren't too interesting for XFS use cases (but never say never). Sequential write only zones are only supported for data using a new allocator for the RT device, which maps each zone to a rtgroup which is written sequentially. All metadata and (for now) the log require using randomly writable space. This means a realtime device is required to support zoned storage, but for the common case of SMR hard drives that contain random writable zones and sequential write required zones on the same block device, the concept of an internal RT device is added which means using XFS on a SMR HDD is as simple as: $ mkfs.xfs /dev/sda $ mount /dev/sda /mnt When using NVMe ZNS SSDs that do not support conventional zones, the traditional multi-device RT configuration is required. E.g. for an SSD with a conventional namespace 1 and a zoned namespace 2: $ mkfs.xfs /dev/nvme0n1 -o rtdev=/dev/nvme0n2 $ mount -o rtdev=/dev/nvme0n2 /dev/nvme0n1 /mnt The zoned allocator can also be used on conventional block devices, or on conventional zones (e.g. when using an SMR HDD as the external RT device). For example using zoned XFS on normal SSDs shows very nice performance advantages and write amplification reduction for intelligent workloads like RocksDB. Some work is still in progress or planned, but should not affect the integration with the rest of XFS or the on-disk format: - support for quotas - support for reflinks Note that the I/O path already supports reflink, but garbage collection isn't refcount aware yet and would unshare shared blocks, thus rendering the feature useless.
Diffstat (limited to 'fs/xfs/xfs_bmap_util.h')
-rw-r--r--fs/xfs/xfs_bmap_util.h12
1 files changed, 7 insertions, 5 deletions
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index b29760d36e1a..c477b3361630 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -15,6 +15,7 @@ struct xfs_inode;
struct xfs_mount;
struct xfs_trans;
struct xfs_bmalloca;
+struct xfs_zone_alloc_ctx;
#ifdef CONFIG_XFS_RT
int xfs_bmap_rtalloc(struct xfs_bmalloca *ap);
@@ -31,7 +32,8 @@ xfs_bmap_rtalloc(struct xfs_bmalloca *ap)
#endif /* CONFIG_XFS_RT */
void xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, int whichfork,
- xfs_off_t start_byte, xfs_off_t end_byte);
+ xfs_off_t start_byte, xfs_off_t end_byte,
+ struct xfs_zone_alloc_ctx *ac);
struct kgetbmap {
__s64 bmv_offset; /* file offset of segment in blocks */
@@ -54,13 +56,13 @@ int xfs_bmap_last_extent(struct xfs_trans *tp, struct xfs_inode *ip,
/* preallocation and hole punch interface */
int xfs_alloc_file_space(struct xfs_inode *ip, xfs_off_t offset,
- xfs_off_t len);
+ xfs_off_t len);
int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset,
- xfs_off_t len);
+ xfs_off_t len, struct xfs_zone_alloc_ctx *ac);
int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
- xfs_off_t len);
+ xfs_off_t len, struct xfs_zone_alloc_ctx *ac);
int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
- xfs_off_t len);
+ xfs_off_t len);
/* EOF block manipulation functions */
bool xfs_can_free_eofblocks(struct xfs_inode *ip);