summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarlos Maiolino <cem@kernel.org>2026-03-18 13:04:17 +0300
committerCarlos Maiolino <cem@kernel.org>2026-03-18 13:04:17 +0300
commit2c0ff6151c7bc51b20e7b25be2073e6f01f750ef (patch)
tree5bc9da6135a1b0d3fe72ccc74e5b51ccb890fa96
parente5966096d0856d071269cb5928d6bc33342d2dfd (diff)
parentc1f955437440f92632e2efca4b591371bb3caefc (diff)
downloadlinux-2c0ff6151c7bc51b20e7b25be2073e6f01f750ef.tar.xz
Merge branch 'xfs-7.1-merge' into for-next
Signed-off-by: Carlos Maiolino <cem@kernel.org>
-rw-r--r--fs/xfs/libxfs/xfs_fs.h5
-rw-r--r--fs/xfs/xfs_file.c95
-rw-r--r--fs/xfs/xfs_ioctl.c19
-rw-r--r--fs/xfs/xfs_zone_alloc.c44
-rw-r--r--fs/xfs/xfs_zone_gc.c24
-rw-r--r--fs/xfs/xfs_zone_info.c7
6 files changed, 137 insertions, 57 deletions
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index d165de607d17..185f09f327c0 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -995,7 +995,8 @@ struct xfs_rtgroup_geometry {
__u32 rg_sick; /* o: sick things in ag */
__u32 rg_checked; /* o: checked metadata in ag */
__u32 rg_flags; /* i/o: flags for this ag */
- __u32 rg_reserved[27]; /* o: zero */
+ __u32 rg_writepointer; /* o: write pointer block offset for zoned */
+ __u32 rg_reserved[26]; /* o: zero */
};
#define XFS_RTGROUP_GEOM_SICK_SUPER (1U << 0) /* superblock */
#define XFS_RTGROUP_GEOM_SICK_BITMAP (1U << 1) /* rtbitmap */
@@ -1003,6 +1004,8 @@ struct xfs_rtgroup_geometry {
#define XFS_RTGROUP_GEOM_SICK_RMAPBT (1U << 3) /* reverse mappings */
#define XFS_RTGROUP_GEOM_SICK_REFCNTBT (1U << 4) /* reference counts */
+#define XFS_RTGROUP_GEOM_WRITEPOINTER (1U << 0) /* write pointer */
+
/* Health monitor event domains */
/* affects the whole fs */
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 6246f34df9fd..7918968e1d62 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -560,6 +560,72 @@ xfs_zoned_write_space_reserve(
flags, ac);
}
+/*
+ * We need to lock the test/set EOF update as we can be racing with
+ * other IO completions here to update the EOF. Failing to serialise
+ * here can result in EOF moving backwards and Bad Things Happen when
+ * that occurs.
+ *
+ * As IO completion only ever extends EOF, we can do an unlocked check
+ * here to avoid taking the spinlock. If we land within the current EOF,
+ * then we do not need to do an extending update at all, and we don't
+ * need to take the lock to check this. If we race with an update moving
+ * EOF, then we'll either still be beyond EOF and need to take the lock,
+ * or we'll be within EOF and we don't need to take it at all.
+ */
+static int
+xfs_dio_endio_set_isize(
+ struct inode *inode,
+ loff_t offset,
+ ssize_t size)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+
+ if (offset + size <= i_size_read(inode))
+ return 0;
+
+ spin_lock(&ip->i_flags_lock);
+ if (offset + size <= i_size_read(inode)) {
+ spin_unlock(&ip->i_flags_lock);
+ return 0;
+ }
+
+ i_size_write(inode, offset + size);
+ spin_unlock(&ip->i_flags_lock);
+
+ return xfs_setfilesize(ip, offset, size);
+}
+
+static int
+xfs_zoned_dio_write_end_io(
+ struct kiocb *iocb,
+ ssize_t size,
+ int error,
+ unsigned flags)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct xfs_inode *ip = XFS_I(inode);
+ unsigned int nofs_flag;
+
+ ASSERT(!(flags & (IOMAP_DIO_UNWRITTEN | IOMAP_DIO_COW)));
+
+ trace_xfs_end_io_direct_write(ip, iocb->ki_pos, size);
+
+ if (xfs_is_shutdown(ip->i_mount))
+ return -EIO;
+
+ if (error || !size)
+ return error;
+
+ XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size);
+
+ nofs_flag = memalloc_nofs_save();
+ error = xfs_dio_endio_set_isize(inode, iocb->ki_pos, size);
+ memalloc_nofs_restore(nofs_flag);
+
+ return error;
+}
+
static int
xfs_dio_write_end_io(
struct kiocb *iocb,
@@ -572,8 +638,7 @@ xfs_dio_write_end_io(
loff_t offset = iocb->ki_pos;
unsigned int nofs_flag;
- ASSERT(!xfs_is_zoned_inode(ip) ||
- !(flags & (IOMAP_DIO_UNWRITTEN | IOMAP_DIO_COW)));
+ ASSERT(!xfs_is_zoned_inode(ip));
trace_xfs_end_io_direct_write(ip, offset, size);
@@ -623,30 +688,8 @@ xfs_dio_write_end_io(
* with the on-disk inode size being outside the in-core inode size. We
* have no other method of updating EOF for AIO, so always do it here
* if necessary.
- *
- * We need to lock the test/set EOF update as we can be racing with
- * other IO completions here to update the EOF. Failing to serialise
- * here can result in EOF moving backwards and Bad Things Happen when
- * that occurs.
- *
- * As IO completion only ever extends EOF, we can do an unlocked check
- * here to avoid taking the spinlock. If we land within the current EOF,
- * then we do not need to do an extending update at all, and we don't
- * need to take the lock to check this. If we race with an update moving
- * EOF, then we'll either still be beyond EOF and need to take the lock,
- * or we'll be within EOF and we don't need to take it at all.
*/
- if (offset + size <= i_size_read(inode))
- goto out;
-
- spin_lock(&ip->i_flags_lock);
- if (offset + size > i_size_read(inode)) {
- i_size_write(inode, offset + size);
- spin_unlock(&ip->i_flags_lock);
- error = xfs_setfilesize(ip, offset, size);
- } else {
- spin_unlock(&ip->i_flags_lock);
- }
+ error = xfs_dio_endio_set_isize(inode, offset, size);
out:
memalloc_nofs_restore(nofs_flag);
@@ -688,7 +731,7 @@ xfs_dio_zoned_submit_io(
static const struct iomap_dio_ops xfs_dio_zoned_write_ops = {
.bio_set = &iomap_ioend_bioset,
.submit_io = xfs_dio_zoned_submit_io,
- .end_io = xfs_dio_write_end_io,
+ .end_io = xfs_zoned_dio_write_end_io,
};
/*
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index facffdc8dca8..46e234863644 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -37,12 +37,15 @@
#include "xfs_ioctl.h"
#include "xfs_xattr.h"
#include "xfs_rtbitmap.h"
+#include "xfs_rtrmap_btree.h"
#include "xfs_file.h"
#include "xfs_exchrange.h"
#include "xfs_handle.h"
#include "xfs_rtgroup.h"
#include "xfs_healthmon.h"
#include "xfs_verify_media.h"
+#include "xfs_zone_priv.h"
+#include "xfs_zone_alloc.h"
#include <linux/mount.h>
#include <linux/fileattr.h>
@@ -413,6 +416,7 @@ xfs_ioc_rtgroup_geometry(
{
struct xfs_rtgroup *rtg;
struct xfs_rtgroup_geometry rgeo;
+ xfs_rgblock_t highest_rgbno;
int error;
if (copy_from_user(&rgeo, arg, sizeof(rgeo)))
@@ -433,6 +437,21 @@ xfs_ioc_rtgroup_geometry(
if (error)
return error;
+ if (xfs_has_zoned(mp)) {
+ xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
+ if (rtg->rtg_open_zone) {
+ rgeo.rg_writepointer = rtg->rtg_open_zone->oz_allocated;
+ } else {
+ highest_rgbno = xfs_rtrmap_highest_rgbno(rtg);
+ if (highest_rgbno == NULLRGBLOCK)
+ rgeo.rg_writepointer = 0;
+ else
+ rgeo.rg_writepointer = highest_rgbno + 1;
+ }
+ xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);
+ rgeo.rg_flags |= XFS_RTGROUP_GEOM_WRITEPOINTER;
+ }
+
if (copy_to_user(arg, &rgeo, sizeof(rgeo)))
return -EFAULT;
return 0;
diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c
index e3d19b6dc64a..06e2cb79030e 100644
--- a/fs/xfs/xfs_zone_alloc.c
+++ b/fs/xfs/xfs_zone_alloc.c
@@ -189,27 +189,16 @@ xfs_open_zone_mark_full(
xfs_zone_account_reclaimable(rtg, rtg_blocks(rtg) - used);
}
-static void
-xfs_zone_record_blocks(
- struct xfs_trans *tp,
+static inline void
+xfs_zone_inc_written(
struct xfs_open_zone *oz,
- xfs_fsblock_t fsbno,
xfs_filblks_t len)
{
- struct xfs_mount *mp = tp->t_mountp;
- struct xfs_rtgroup *rtg = oz->oz_rtg;
- struct xfs_inode *rmapip = rtg_rmap(rtg);
-
- trace_xfs_zone_record_blocks(oz, xfs_rtb_to_rgbno(mp, fsbno), len);
+ xfs_assert_ilocked(rtg_rmap(oz->oz_rtg), XFS_ILOCK_EXCL);
- xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
- xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_RMAP);
- rmapip->i_used_blocks += len;
- ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg));
oz->oz_written += len;
- if (oz->oz_written == rtg_blocks(rtg))
+ if (oz->oz_written == rtg_blocks(oz->oz_rtg))
xfs_open_zone_mark_full(oz);
- xfs_trans_log_inode(tp, rmapip, XFS_ILOG_CORE);
}
/*
@@ -227,9 +216,7 @@ xfs_zone_skip_blocks(
trace_xfs_zone_skip_blocks(oz, 0, len);
xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
- oz->oz_written += len;
- if (oz->oz_written == rtg_blocks(rtg))
- xfs_open_zone_mark_full(oz);
+ xfs_zone_inc_written(oz, len);
xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);
xfs_add_frextents(rtg_mount(rtg), len);
@@ -244,6 +231,8 @@ xfs_zoned_map_extent(
xfs_fsblock_t old_startblock)
{
struct xfs_bmbt_irec data;
+ struct xfs_rtgroup *rtg = oz->oz_rtg;
+ struct xfs_inode *rmapip = rtg_rmap(rtg);
int nmaps = 1;
int error;
@@ -302,7 +291,15 @@ xfs_zoned_map_extent(
}
}
- xfs_zone_record_blocks(tp, oz, new->br_startblock, new->br_blockcount);
+ trace_xfs_zone_record_blocks(oz,
+ xfs_rtb_to_rgbno(tp->t_mountp, new->br_startblock),
+ new->br_blockcount);
+ xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
+ xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_RMAP);
+ rmapip->i_used_blocks += new->br_blockcount;
+ ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg));
+ xfs_zone_inc_written(oz, new->br_blockcount);
+ xfs_trans_log_inode(tp, rmapip, XFS_ILOG_CORE);
/* Map the new blocks into the data fork. */
xfs_bmap_map_extent(tp, ip, XFS_DATA_FORK, new);
@@ -681,10 +678,11 @@ xfs_select_zone_nowait(
if (oz)
goto out_unlock;
- if (pack_tight)
+ if (pack_tight) {
oz = xfs_select_open_zone_mru(zi, write_hint);
- if (oz)
- goto out_unlock;
+ if (oz)
+ goto out_unlock;
+ }
/*
* See if we can open a new zone and use that so that data for different
@@ -695,7 +693,7 @@ xfs_select_zone_nowait(
goto out_unlock;
/*
- * Try to find an zone that is an ok match to colocate data with.
+ * Try to find a zone that is an ok match to colocate data with.
*/
oz = xfs_select_open_zone_lru(zi, write_hint, XFS_ZONE_ALLOC_OK);
if (oz)
diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c
index 309f70098524..0ff710fa0ee7 100644
--- a/fs/xfs/xfs_zone_gc.c
+++ b/fs/xfs/xfs_zone_gc.c
@@ -170,25 +170,37 @@ xfs_zoned_need_gc(
s64 available, free, threshold;
s32 remainder;
+ /* If we have no reclaimable blocks, running GC is useless. */
if (!xfs_zoned_have_reclaimable(mp->m_zone_info))
return false;
+ /*
+ * In order to avoid file fragmentation as much as possible, we should
+ * make sure that we can open enough zones. So trigger GC if the number
+ * of blocks immediately available for writes is lower than the total
+ * number of blocks from all possible open zones.
+ */
available = xfs_estimate_freecounter(mp, XC_FREE_RTAVAILABLE);
-
if (available <
xfs_rtgs_to_rfsbs(mp, mp->m_max_open_zones - XFS_OPEN_GC_ZONES))
return true;
- free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS);
+ /*
+ * For cases where the user wants to be more aggressive with GC,
+ * the sysfs attribute zonegc_low_space may be set to a non zero value,
+ * to indicate that GC should try to maintain at least zonegc_low_space
+ * percent of the free space to be directly available for writing. Check
+ * this here.
+ */
+ if (!mp->m_zonegc_low_space)
+ return false;
+ free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS);
threshold = div_s64_rem(free, 100, &remainder);
threshold = threshold * mp->m_zonegc_low_space +
remainder * div_s64(mp->m_zonegc_low_space, 100);
- if (available < threshold)
- return true;
-
- return false;
+ return available < threshold;
}
static struct xfs_zone_gc_data *
diff --git a/fs/xfs/xfs_zone_info.c b/fs/xfs/xfs_zone_info.c
index 53eabbc3334c..a2af44011654 100644
--- a/fs/xfs/xfs_zone_info.c
+++ b/fs/xfs/xfs_zone_info.c
@@ -90,9 +90,14 @@ xfs_zoned_show_stats(
seq_printf(m, "\tRT GC required: %d\n",
xfs_zoned_need_gc(mp));
+ seq_printf(m, "\ttotal number of zones: %u\n",
+ mp->m_sb.sb_rgcount);
seq_printf(m, "\tfree zones: %d\n", atomic_read(&zi->zi_nr_free_zones));
- seq_puts(m, "\topen zones:\n");
+
spin_lock(&zi->zi_open_zones_lock);
+ seq_printf(m, "\tnumber of open zones: %u / %u\n",
+ zi->zi_nr_open_zones, mp->m_max_open_zones);
+ seq_puts(m, "\topen zones:\n");
list_for_each_entry(oz, &zi->zi_open_zones, oz_entry)
xfs_show_open_zone(m, oz);
if (zi->zi_open_gc_zone) {