From 05290bd5c6236b8ad659157edb36bd2d38f46d3e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 20 Nov 2024 16:20:19 -0800 Subject: xfs: allow inode-based btrees to reserve space in the data device Create a new space reservation scheme so that btree metadata for the realtime volume can reserve space in the data device to avoid space underruns. Back when we were testing the rmap and refcount btrees for the data device, people observed occasional shutdowns when xfs_btree_split was called for either of those two btrees. This happened when certain operations (mostly writeback ioends) created new rmap or refcount records, which would expand the size of the btree. If there were no free blocks available the allocation would fail and the split would shut down the filesystem. I considered pre-reserving blocks for btree expansion at the time of a write() call, but there wasn't any good way to attach the reservations to an inode and keep them there all the way to ioend processing. Unlike delalloc reservations which have that indlen mechanism, there's no way to do that for mapped extents; and indlen blocks are given back during the delalloc -> unwritten transition. The solution was to reserve sufficient blocks for rmap/refcount btree expansion at mount time. This is what the XFS_AG_RESV_* flags provide; any expansion of those two btrees can come from the pre-reserved space. This patch brings that pre-reservation ability to inode-rooted btrees so that the rt rmap and refcount btrees can also save room for future expansion. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_rtalloc.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'fs/xfs/xfs_rtalloc.c') diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index fcfa6e0eb3ad..5128c5ad72f5 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1344,6 +1344,12 @@ xfs_growfs_rt( if (!error) error = error2; + + /* Reset the rt metadata btree space reservations. */ + xfs_rt_resv_free(mp); + error2 = xfs_rt_resv_init(mp); + if (error2 && error2 != -ENOSPC) + error = error2; } out_unlock: @@ -1487,6 +1493,21 @@ xfs_rtalloc_reinit_frextents( return 0; } +/* Free space reservations for rt metadata inodes. */ +void +xfs_rt_resv_free( + struct xfs_mount *mp) +{ +} + +/* Reserve space for rt metadata inodes' space expansion. */ +int +xfs_rt_resv_init( + struct xfs_mount *mp) +{ + return 0; +} + /* * Read in the bmbt of an rt metadata inode so that we never have to load them * at runtime. This enables the use of shared ILOCKs for rtbitmap scans. Use -- cgit v1.2.3 From af32541081ed6b6ad49b1ea38b5128cb319841b0 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 22 Nov 2024 12:33:17 -0800 Subject: xfs: add some rtgroup inode helpers Create some simple helpers to reduce the amount of typing whenever we access rtgroup inodes. Conversion was done with this spatch and some minor reformatting: @@ expression rtg; @@ - rtg->rtg_inodes[XFS_RTGI_BITMAP] + rtg_bitmap(rtg) @@ expression rtg; @@ - rtg->rtg_inodes[XFS_RTGI_SUMMARY] + rtg_summary(rtg) and the CLI command: $ spatch --sp-file /tmp/moo.cocci --dir fs/xfs/ --use-gitgrep --in-place Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_rtbitmap.c | 2 +- fs/xfs/libxfs/xfs_rtgroup.c | 18 ++++++++---------- fs/xfs/libxfs/xfs_rtgroup.h | 10 ++++++++++ fs/xfs/scrub/rtbitmap.c | 7 +++---- fs/xfs/scrub/rtsummary.c | 12 +++++------- fs/xfs/xfs_qm.c | 8 ++++---- fs/xfs/xfs_rtalloc.c | 10 +++++----- 7 files changed, 36 insertions(+), 31 deletions(-) (limited to 'fs/xfs/xfs_rtalloc.c') diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 4ddfb7e395b3..770adf60dd73 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -1055,7 +1055,7 @@ xfs_rtfree_extent( xfs_rtxlen_t len) /* length of extent freed */ { struct xfs_mount *mp = tp->t_mountp; - struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP]; + struct xfs_inode *rbmip = rtg_bitmap(rtg); struct xfs_rtalloc_args args = { .mp = mp, .tp = tp, diff --git a/fs/xfs/libxfs/xfs_rtgroup.c b/fs/xfs/libxfs/xfs_rtgroup.c index 4f3bfc884aff..a79b734e7044 100644 --- a/fs/xfs/libxfs/xfs_rtgroup.c +++ b/fs/xfs/libxfs/xfs_rtgroup.c @@ -197,10 +197,10 @@ xfs_rtgroup_lock( * Lock both realtime free space metadata inodes for a freespace * update. */ - xfs_ilock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_EXCL); - xfs_ilock(rtg->rtg_inodes[XFS_RTGI_SUMMARY], XFS_ILOCK_EXCL); + xfs_ilock(rtg_bitmap(rtg), XFS_ILOCK_EXCL); + xfs_ilock(rtg_summary(rtg), XFS_ILOCK_EXCL); } else if (rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) { - xfs_ilock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_SHARED); + xfs_ilock(rtg_bitmap(rtg), XFS_ILOCK_SHARED); } } @@ -215,10 +215,10 @@ xfs_rtgroup_unlock( !(rtglock_flags & XFS_RTGLOCK_BITMAP)); if (rtglock_flags & XFS_RTGLOCK_BITMAP) { - xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_SUMMARY], XFS_ILOCK_EXCL); - xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_EXCL); + xfs_iunlock(rtg_summary(rtg), XFS_ILOCK_EXCL); + xfs_iunlock(rtg_bitmap(rtg), XFS_ILOCK_EXCL); } else if (rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) { - xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_SHARED); + xfs_iunlock(rtg_bitmap(rtg), XFS_ILOCK_SHARED); } } @@ -236,10 +236,8 @@ xfs_rtgroup_trans_join( ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED)); if (rtglock_flags & XFS_RTGLOCK_BITMAP) { - xfs_trans_ijoin(tp, rtg->rtg_inodes[XFS_RTGI_BITMAP], - XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, rtg->rtg_inodes[XFS_RTGI_SUMMARY], - XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, rtg_bitmap(rtg), XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, rtg_summary(rtg), XFS_ILOCK_EXCL); } } diff --git a/fs/xfs/libxfs/xfs_rtgroup.h b/fs/xfs/libxfs/xfs_rtgroup.h index 7e7e491ff06f..19f8d302b9aa 100644 --- a/fs/xfs/libxfs/xfs_rtgroup.h +++ b/fs/xfs/libxfs/xfs_rtgroup.h @@ -64,6 +64,16 @@ static inline xfs_rgnumber_t rtg_rgno(const struct xfs_rtgroup *rtg) return rtg->rtg_group.xg_gno; } +static inline struct xfs_inode *rtg_bitmap(const struct xfs_rtgroup *rtg) +{ + return rtg->rtg_inodes[XFS_RTGI_BITMAP]; +} + +static inline struct xfs_inode *rtg_summary(const struct xfs_rtgroup *rtg) +{ + return rtg->rtg_inodes[XFS_RTGI_SUMMARY]; +} + /* Passive rtgroup references */ static inline struct xfs_rtgroup * xfs_rtgroup_get( diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index 376a36fd9a9c..fb4970c877ab 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -49,8 +49,7 @@ xchk_setup_rtbitmap( if (error) return error; - error = xchk_install_live_inode(sc, - sc->sr.rtg->rtg_inodes[XFS_RTGI_BITMAP]); + error = xchk_install_live_inode(sc, rtg_bitmap(sc->sr.rtg)); if (error) return error; @@ -146,7 +145,7 @@ xchk_rtbitmap( { struct xfs_mount *mp = sc->mp; struct xfs_rtgroup *rtg = sc->sr.rtg; - struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP]; + struct xfs_inode *rbmip = rtg_bitmap(rtg); struct xchk_rtbitmap *rtb = sc->buf; int error; @@ -215,7 +214,7 @@ xchk_xref_is_used_rt_space( xfs_extlen_t len) { struct xfs_rtgroup *rtg = sc->sr.rtg; - struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP]; + struct xfs_inode *rbmip = rtg_bitmap(rtg); xfs_rtxnum_t startext; xfs_rtxnum_t endext; bool is_free; diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c index 49fc6250bafc..f1af5431b388 100644 --- a/fs/xfs/scrub/rtsummary.c +++ b/fs/xfs/scrub/rtsummary.c @@ -81,8 +81,7 @@ xchk_setup_rtsummary( if (error) return error; - error = xchk_install_live_inode(sc, - sc->sr.rtg->rtg_inodes[XFS_RTGI_SUMMARY]); + error = xchk_install_live_inode(sc, rtg_summary(sc->sr.rtg)); if (error) return error; @@ -191,8 +190,7 @@ xchk_rtsum_record_free( rtlen = xfs_rtxlen_to_extlen(mp, rec->ar_extcount); if (!xfs_verify_rtbext(mp, rtbno, rtlen)) { - xchk_ino_xref_set_corrupt(sc, - rtg->rtg_inodes[XFS_RTGI_BITMAP]->i_ino); + xchk_ino_xref_set_corrupt(sc, rtg_bitmap(rtg)->i_ino); return -EFSCORRUPTED; } @@ -218,7 +216,7 @@ xchk_rtsum_compute( /* If the bitmap size doesn't match the computed size, bail. */ if (XFS_FSB_TO_B(mp, xfs_rtbitmap_blockcount(mp)) != - rtg->rtg_inodes[XFS_RTGI_BITMAP]->i_disk_size) + rtg_bitmap(rtg)->i_disk_size) return -EFSCORRUPTED; return xfs_rtalloc_query_all(rtg, sc->tp, xchk_rtsum_record_free, sc); @@ -310,8 +308,8 @@ xchk_rtsummary( { struct xfs_mount *mp = sc->mp; struct xfs_rtgroup *rtg = sc->sr.rtg; - struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP]; - struct xfs_inode *rsumip = rtg->rtg_inodes[XFS_RTGI_SUMMARY]; + struct xfs_inode *rbmip = rtg_bitmap(rtg); + struct xfs_inode *rsumip = rtg_summary(rtg); struct xchk_rtsummary *rts = sc->buf; int error; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 3abab5fb593e..e1ba5af6250f 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -230,10 +230,10 @@ xfs_qm_unmount_rt( if (!rtg) return; - if (rtg->rtg_inodes[XFS_RTGI_BITMAP]) - xfs_qm_dqdetach(rtg->rtg_inodes[XFS_RTGI_BITMAP]); - if (rtg->rtg_inodes[XFS_RTGI_SUMMARY]) - xfs_qm_dqdetach(rtg->rtg_inodes[XFS_RTGI_SUMMARY]); + if (rtg_bitmap(rtg)) + xfs_qm_dqdetach(rtg_bitmap(rtg)); + if (rtg_summary(rtg)) + xfs_qm_dqdetach(rtg_summary(rtg)); xfs_rtgroup_rele(rtg); } diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 5128c5ad72f5..4cd2f32aa70a 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -856,8 +856,8 @@ xfs_growfs_rt_bmblock( xfs_fileoff_t bmbno) { struct xfs_mount *mp = rtg_mount(rtg); - struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP]; - struct xfs_inode *rsumip = rtg->rtg_inodes[XFS_RTGI_SUMMARY]; + struct xfs_inode *rbmip = rtg_bitmap(rtg); + struct xfs_inode *rsumip = rtg_summary(rtg); struct xfs_rtalloc_args args = { .mp = mp, .rtg = rtg, @@ -1041,8 +1041,8 @@ xfs_growfs_rt_alloc_blocks( xfs_extlen_t *nrbmblocks) { struct xfs_mount *mp = rtg_mount(rtg); - struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP]; - struct xfs_inode *rsumip = rtg->rtg_inodes[XFS_RTGI_SUMMARY]; + struct xfs_inode *rbmip = rtg_bitmap(rtg); + struct xfs_inode *rsumip = rtg_summary(rtg); xfs_extlen_t orbmblocks = 0; xfs_extlen_t orsumblocks = 0; struct xfs_mount *nmp; @@ -1622,7 +1622,7 @@ xfs_rtpick_extent( xfs_rtxlen_t len) /* allocation length (rtextents) */ { struct xfs_mount *mp = rtg_mount(rtg); - struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP]; + struct xfs_inode *rbmip = rtg_bitmap(rtg); xfs_rtxnum_t b = 0; /* result rtext */ int log2; /* log of sequence number */ uint64_t resid; /* residual after log removed */ -- cgit v1.2.3 From 8491a55cfc73ff5c2c637a70ade51d4d08abb90a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 20 Nov 2024 16:20:28 -0800 Subject: xfs: add metadata reservations for realtime rmap btrees Reserve some free blocks so that we will always have enough free blocks in the data volume to handle expansion of the realtime rmap btree. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_rtrmap_btree.c | 41 ++++++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_rtrmap_btree.h | 2 ++ fs/xfs/xfs_rtalloc.c | 23 +++++++++++++++++++++- 3 files changed, 65 insertions(+), 1 deletion(-) (limited to 'fs/xfs/xfs_rtalloc.c') diff --git a/fs/xfs/libxfs/xfs_rtrmap_btree.c b/fs/xfs/libxfs/xfs_rtrmap_btree.c index 22aabf326b2c..066deadcaac9 100644 --- a/fs/xfs/libxfs/xfs_rtrmap_btree.c +++ b/fs/xfs/libxfs/xfs_rtrmap_btree.c @@ -540,3 +540,44 @@ xfs_rtrmapbt_compute_maxlevels( /* Add one level to handle the inode root level. */ mp->m_rtrmap_maxlevels = min(d_maxlevels, r_maxlevels) + 1; } + +/* Calculate the rtrmap btree size for some records. */ +static unsigned long long +xfs_rtrmapbt_calc_size( + struct xfs_mount *mp, + unsigned long long len) +{ + return xfs_btree_calc_size(mp->m_rtrmap_mnr, len); +} + +/* + * Calculate the maximum rmap btree size. + */ +static unsigned long long +xfs_rtrmapbt_max_size( + struct xfs_mount *mp, + xfs_rtblock_t rtblocks) +{ + /* Bail out if we're uninitialized, which can happen in mkfs. */ + if (mp->m_rtrmap_mxr[0] == 0) + return 0; + + return xfs_rtrmapbt_calc_size(mp, rtblocks); +} + +/* + * Figure out how many blocks to reserve and how many are used by this btree. + */ +xfs_filblks_t +xfs_rtrmapbt_calc_reserves( + struct xfs_mount *mp) +{ + uint32_t blocks = mp->m_groups[XG_TYPE_RTG].blocks; + + if (!xfs_has_rtrmapbt(mp)) + return 0; + + /* Reserve 1% of the rtgroup or enough for 1 block per record. */ + return max_t(xfs_filblks_t, blocks / 100, + xfs_rtrmapbt_max_size(mp, blocks)); +} diff --git a/fs/xfs/libxfs/xfs_rtrmap_btree.h b/fs/xfs/libxfs/xfs_rtrmap_btree.h index 7d1a3a49a2d6..eaa2942297e2 100644 --- a/fs/xfs/libxfs/xfs_rtrmap_btree.h +++ b/fs/xfs/libxfs/xfs_rtrmap_btree.h @@ -79,4 +79,6 @@ unsigned int xfs_rtrmapbt_maxlevels_ondisk(void); int __init xfs_rtrmapbt_init_cur_cache(void); void xfs_rtrmapbt_destroy_cur_cache(void); +xfs_filblks_t xfs_rtrmapbt_calc_reserves(struct xfs_mount *mp); + #endif /* __XFS_RTRMAP_BTREE_H__ */ diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 4cd2f32aa70a..2245f9ecaa33 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -22,6 +22,7 @@ #include "xfs_rtalloc.h" #include "xfs_sb.h" #include "xfs_rtbitmap.h" +#include "xfs_rtrmap_btree.h" #include "xfs_quota.h" #include "xfs_log_priv.h" #include "xfs_health.h" @@ -1498,6 +1499,13 @@ void xfs_rt_resv_free( struct xfs_mount *mp) { + struct xfs_rtgroup *rtg = NULL; + unsigned int i; + + while ((rtg = xfs_rtgroup_next(mp, rtg))) { + for (i = 0; i < XFS_RTGI_MAX; i++) + xfs_metafile_resv_free(rtg->rtg_inodes[i]); + } } /* Reserve space for rt metadata inodes' space expansion. */ @@ -1505,7 +1513,20 @@ int xfs_rt_resv_init( struct xfs_mount *mp) { - return 0; + struct xfs_rtgroup *rtg = NULL; + xfs_filblks_t ask; + int error = 0; + + while ((rtg = xfs_rtgroup_next(mp, rtg))) { + int err2; + + ask = xfs_rtrmapbt_calc_reserves(mp); + err2 = xfs_metafile_resv_init(rtg_rmap(rtg), ask); + if (err2 && !error) + error = err2; + } + + return error; } /* -- cgit v1.2.3 From 71b8acb42be60e11810eb43a6f470589fcf7b7dd Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 20 Nov 2024 16:20:30 -0800 Subject: xfs: create routine to allocate and initialize a realtime rmap btree inode Create a library routine to allocate and initialize an empty realtime rmapbt inode. We'll use this for mkfs and repair. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_rtgroup.c | 2 ++ fs/xfs/libxfs/xfs_rtrmap_btree.c | 54 ++++++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_rtrmap_btree.h | 5 ++++ fs/xfs/xfs_rtalloc.c | 12 +++++++-- 4 files changed, 71 insertions(+), 2 deletions(-) (limited to 'fs/xfs/xfs_rtalloc.c') diff --git a/fs/xfs/libxfs/xfs_rtgroup.c b/fs/xfs/libxfs/xfs_rtgroup.c index af1716ec0691..5f31b6e65d5d 100644 --- a/fs/xfs/libxfs/xfs_rtgroup.c +++ b/fs/xfs/libxfs/xfs_rtgroup.c @@ -33,6 +33,7 @@ #include "xfs_rtbitmap.h" #include "xfs_metafile.h" #include "xfs_metadir.h" +#include "xfs_rtrmap_btree.h" /* Find the first usable fsblock in this rtgroup. */ static inline uint32_t @@ -363,6 +364,7 @@ static const struct xfs_rtginode_ops xfs_rtginode_ops[XFS_RTGI_MAX] = { * rtrmapbt predicate here. */ .enabled = xfs_has_rmapbt, + .create = xfs_rtrmapbt_create, }, }; diff --git a/fs/xfs/libxfs/xfs_rtrmap_btree.c b/fs/xfs/libxfs/xfs_rtrmap_btree.c index af0df0c7e61e..4de7720d2f4e 100644 --- a/fs/xfs/libxfs/xfs_rtrmap_btree.c +++ b/fs/xfs/libxfs/xfs_rtrmap_btree.c @@ -832,3 +832,57 @@ xfs_iflush_rtrmap( xfs_rtrmapbt_to_disk(ip->i_mount, ifp->if_broot, ifp->if_broot_bytes, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, XFS_DATA_FORK)); } + +/* + * Create a realtime rmap btree inode. + */ +int +xfs_rtrmapbt_create( + struct xfs_rtgroup *rtg, + struct xfs_inode *ip, + struct xfs_trans *tp, + bool init) +{ + struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); + struct xfs_mount *mp = ip->i_mount; + struct xfs_btree_block *broot; + + ifp->if_format = XFS_DINODE_FMT_META_BTREE; + ASSERT(ifp->if_broot_bytes == 0); + ASSERT(ifp->if_bytes == 0); + + /* Initialize the empty incore btree root. */ + broot = xfs_broot_realloc(ifp, xfs_rtrmap_broot_space_calc(mp, 0, 0)); + if (broot) + xfs_btree_init_block(mp, broot, &xfs_rtrmapbt_ops, 0, 0, + ip->i_ino); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE | XFS_ILOG_DBROOT); + + return 0; +} + +/* + * Initialize an rmap for a realtime superblock using the potentially updated + * rt geometry in the provided @mp. + */ +int +xfs_rtrmapbt_init_rtsb( + struct xfs_mount *mp, + struct xfs_rtgroup *rtg, + struct xfs_trans *tp) +{ + struct xfs_rmap_irec rmap = { + .rm_blockcount = mp->m_sb.sb_rextsize, + .rm_owner = XFS_RMAP_OWN_FS, + }; + struct xfs_btree_cur *cur; + int error; + + ASSERT(xfs_has_rtsb(mp)); + ASSERT(rtg_rgno(rtg) == 0); + + cur = xfs_rtrmapbt_init_cursor(tp, rtg); + error = xfs_rmap_map_raw(cur, &rmap); + xfs_btree_del_cursor(cur, error); + return error; +} diff --git a/fs/xfs/libxfs/xfs_rtrmap_btree.h b/fs/xfs/libxfs/xfs_rtrmap_btree.h index e97695066920..bf73460be274 100644 --- a/fs/xfs/libxfs/xfs_rtrmap_btree.h +++ b/fs/xfs/libxfs/xfs_rtrmap_btree.h @@ -193,4 +193,9 @@ void xfs_rtrmapbt_to_disk(struct xfs_mount *mp, struct xfs_btree_block *rblock, unsigned int dblocklen); void xfs_iflush_rtrmap(struct xfs_inode *ip, struct xfs_dinode *dip); +int xfs_rtrmapbt_create(struct xfs_rtgroup *rtg, struct xfs_inode *ip, + struct xfs_trans *tp, bool init); +int xfs_rtrmapbt_init_rtsb(struct xfs_mount *mp, struct xfs_rtgroup *rtg, + struct xfs_trans *tp); + #endif /* __XFS_RTRMAP_BTREE_H__ */ diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 2245f9ecaa33..c7efd9264139 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -846,6 +846,13 @@ xfs_growfs_rt_init_rtsb( mp->m_rtsb_bp = rtsb_bp; error = xfs_bwrite(rtsb_bp); xfs_buf_unlock(rtsb_bp); + if (error) + return error; + + /* Initialize the rtrmap to reflect the rtsb. */ + if (rtg_rmap(args->rtg) != NULL) + error = xfs_rtrmapbt_init_rtsb(nargs->mp, args->rtg, args->tp); + return error; } @@ -894,8 +901,9 @@ xfs_growfs_rt_bmblock( goto out_free; nargs.tp = args.tp; - xfs_rtgroup_lock(args.rtg, XFS_RTGLOCK_BITMAP); - xfs_rtgroup_trans_join(args.tp, args.rtg, XFS_RTGLOCK_BITMAP); + xfs_rtgroup_lock(args.rtg, XFS_RTGLOCK_BITMAP | XFS_RTGLOCK_RMAP); + xfs_rtgroup_trans_join(args.tp, args.rtg, + XFS_RTGLOCK_BITMAP | XFS_RTGLOCK_RMAP); /* * Update the bitmap inode's size ondisk and incore. We need to update -- cgit v1.2.3 From 59a57acbce282d5ff0ddcc308b934549b340c713 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 20 Nov 2024 16:20:32 -0800 Subject: xfs: check that the rtrmapbt maxlevels doesn't increase when growing fs The size of filesystem transaction reservations depends on the maximum height (maxlevels) of the realtime btrees. Since we don't want a grow operation to increase the reservation size enough that we'll fail the minimum log size checks on the next mount, constrain growfs operations if they would cause an increase in those maxlevels. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_fsops.c | 11 +++++++++++ fs/xfs/xfs_rtalloc.c | 25 ++++++++++++++++++------- fs/xfs/xfs_rtalloc.h | 10 ++++++++++ fs/xfs/xfs_trace.h | 21 +++++++++++++++++++++ 4 files changed, 60 insertions(+), 7 deletions(-) (limited to 'fs/xfs/xfs_rtalloc.c') diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index e1145107d8cb..9df5a09c0acd 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -22,6 +22,7 @@ #include "xfs_ag_resv.h" #include "xfs_trace.h" #include "xfs_rtalloc.h" +#include "xfs_rtrmap_btree.h" /* * Write new AG headers to disk. Non-transactional, but need to be @@ -114,6 +115,12 @@ xfs_growfs_data_private( xfs_buf_relse(bp); } + /* Make sure the new fs size won't cause problems with the log. */ + error = xfs_growfs_check_rtgeom(mp, nb, mp->m_sb.sb_rblocks, + mp->m_sb.sb_rextsize); + if (error) + return error; + nb_div = nb; nb_mod = do_div(nb_div, mp->m_sb.sb_agblocks); if (nb_mod && nb_mod >= XFS_MIN_AG_BLOCKS) @@ -221,7 +228,11 @@ xfs_growfs_data_private( error = xfs_fs_reserve_ag_blocks(mp); if (error == -ENOSPC) error = 0; + + /* Compute new maxlevels for rt btrees. */ + xfs_rtrmapbt_compute_maxlevels(mp); } + return error; out_trans_cancel: diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index c7efd9264139..3c1bce5a4855 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -989,9 +989,11 @@ xfs_growfs_rt_bmblock( goto out_free; /* - * Ensure the mount RT feature flag is now set. + * Ensure the mount RT feature flag is now set, and compute new + * maxlevels for rt btrees. */ mp->m_features |= XFS_FEAT_REALTIME; + xfs_rtrmapbt_compute_maxlevels(mp); kfree(nmp); return 0; @@ -1159,29 +1161,37 @@ out_rele: return error; } -static int +int xfs_growfs_check_rtgeom( const struct xfs_mount *mp, + xfs_rfsblock_t dblocks, xfs_rfsblock_t rblocks, xfs_extlen_t rextsize) { + xfs_extlen_t min_logfsbs; struct xfs_mount *nmp; - int error = 0; nmp = xfs_growfs_rt_alloc_fake_mount(mp, rblocks, rextsize); if (!nmp) return -ENOMEM; + nmp->m_sb.sb_dblocks = dblocks; + + xfs_rtrmapbt_compute_maxlevels(nmp); + xfs_trans_resv_calc(nmp, M_RES(nmp)); /* * New summary size can't be more than half the size of the log. This * prevents us from getting a log overflow, since we'll log basically * the whole summary file at once. */ - if (nmp->m_rsumblocks > (mp->m_sb.sb_logblocks >> 1)) - error = -EINVAL; + min_logfsbs = min_t(xfs_extlen_t, xfs_log_calc_minimum_size(nmp), + nmp->m_rsumblocks * 2); kfree(nmp); - return error; + + if (min_logfsbs > mp->m_sb.sb_logblocks) + return -EINVAL; + return 0; } /* @@ -1300,7 +1310,8 @@ xfs_growfs_rt( goto out_unlock; /* Make sure the new fs size won't cause problems with the log. */ - error = xfs_growfs_check_rtgeom(mp, in->newblocks, in->extsize); + error = xfs_growfs_check_rtgeom(mp, mp->m_sb.sb_dblocks, in->newblocks, + in->extsize); if (error) goto out_unlock; diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index d87523e6a550..9044f7226ab6 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -46,6 +46,8 @@ xfs_growfs_rt( xfs_growfs_rt_t *in); /* user supplied growfs struct */ int xfs_rtalloc_reinit_frextents(struct xfs_mount *mp); +int xfs_growfs_check_rtgeom(const struct xfs_mount *mp, xfs_rfsblock_t dblocks, + xfs_rfsblock_t rblocks, xfs_agblock_t rextsize); #else # define xfs_growfs_rt(mp,in) (-ENOSYS) # define xfs_rtalloc_reinit_frextents(m) (0) @@ -65,6 +67,14 @@ xfs_rtmount_init( # define xfs_rtunmount_inodes(m) # define xfs_rt_resv_free(mp) ((void)0) # define xfs_rt_resv_init(mp) (0) + +static inline int +xfs_growfs_check_rtgeom(const struct xfs_mount *mp, + xfs_rfsblock_t dblocks, xfs_rfsblock_t rblocks, + xfs_extlen_t rextsize) +{ + return 0; +} #endif /* CONFIG_XFS_RT */ #endif /* __XFS_RTALLOC_H__ */ diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index a098935163b7..84cdc145e2d9 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -5622,6 +5622,27 @@ DEFINE_METAFILE_RESV_EVENT(xfs_metafile_resv_free_space); DEFINE_METAFILE_RESV_EVENT(xfs_metafile_resv_critical); DEFINE_INODE_ERROR_EVENT(xfs_metafile_resv_init_error); +#ifdef CONFIG_XFS_RT +TRACE_EVENT(xfs_growfs_check_rtgeom, + TP_PROTO(const struct xfs_mount *mp, unsigned int min_logfsbs), + TP_ARGS(mp, min_logfsbs), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, logblocks) + __field(unsigned int, min_logfsbs) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->logblocks = mp->m_sb.sb_logblocks; + __entry->min_logfsbs = min_logfsbs; + ), + TP_printk("dev %d:%d logblocks %u min_logfsbs %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->logblocks, + __entry->min_logfsbs) +); +#endif /* CONFIG_XFS_RT */ + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From c2358439af374cad47f771797875d0beb8256738 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 20 Nov 2024 16:20:45 -0800 Subject: xfs: enable realtime rmap btree Permit mounting filesystems with realtime rmap btrees. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_rtalloc.c | 12 ++++++++---- fs/xfs/xfs_super.c | 6 ------ 2 files changed, 8 insertions(+), 10 deletions(-) (limited to 'fs/xfs/xfs_rtalloc.c') diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 3c1bce5a4855..a69967f9d88e 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1282,11 +1282,15 @@ xfs_growfs_rt( XFS_FSB_TO_B(mp, in->extsize) < XFS_MIN_RTEXTSIZE) goto out_unlock; - /* Unsupported realtime features. */ + /* Check for features supported only on rtgroups filesystems. */ error = -EOPNOTSUPP; - if (xfs_has_quota(mp) && !xfs_has_rtgroups(mp)) - goto out_unlock; - if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp)) + if (!xfs_has_rtgroups(mp)) { + if (xfs_has_rmapbt(mp)) + goto out_unlock; + if (xfs_has_quota(mp)) + goto out_unlock; + } + if (xfs_has_reflink(mp)) goto out_unlock; error = xfs_sb_validate_fsb_count(&mp->m_sb, in->newblocks); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 394fdf3bb535..ecd5a9f444d8 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1767,12 +1767,6 @@ xfs_fs_fill_super( } } - if (xfs_has_rmapbt(mp) && mp->m_sb.sb_rblocks) { - xfs_alert(mp, - "reverse mapping btree not compatible with realtime device!"); - error = -EINVAL; - goto out_filestream_unmount; - } if (xfs_has_exchange_range(mp)) xfs_warn_experimental(mp, XFS_EXPERIMENTAL_EXCHRANGE); -- cgit v1.2.3 From bf0b99411335db18a9ed4fcef278ce9e313f6076 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 20 Nov 2024 16:20:53 -0800 Subject: xfs: add metadata reservations for realtime refcount btree Reserve some free blocks so that we will always have enough free blocks in the data volume to handle expansion of the realtime refcount btree. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_rtrefcount_btree.c | 38 ++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_rtrefcount_btree.h | 4 ++++ fs/xfs/xfs_rtalloc.c | 6 ++++++ 3 files changed, 48 insertions(+) (limited to 'fs/xfs/xfs_rtalloc.c') diff --git a/fs/xfs/libxfs/xfs_rtrefcount_btree.c b/fs/xfs/libxfs/xfs_rtrefcount_btree.c index ebbeab112d14..ff72ed09e75f 100644 --- a/fs/xfs/libxfs/xfs_rtrefcount_btree.c +++ b/fs/xfs/libxfs/xfs_rtrefcount_btree.c @@ -419,3 +419,41 @@ xfs_rtrefcountbt_compute_maxlevels( /* Add one level to handle the inode root level. */ mp->m_rtrefc_maxlevels = min(d_maxlevels, r_maxlevels) + 1; } + +/* Calculate the rtrefcount btree size for some records. */ +unsigned long long +xfs_rtrefcountbt_calc_size( + struct xfs_mount *mp, + unsigned long long len) +{ + return xfs_btree_calc_size(mp->m_rtrefc_mnr, len); +} + +/* + * Calculate the maximum refcount btree size. + */ +static unsigned long long +xfs_rtrefcountbt_max_size( + struct xfs_mount *mp, + xfs_rtblock_t rtblocks) +{ + /* Bail out if we're uninitialized, which can happen in mkfs. */ + if (mp->m_rtrefc_mxr[0] == 0) + return 0; + + return xfs_rtrefcountbt_calc_size(mp, rtblocks); +} + +/* + * Figure out how many blocks to reserve and how many are used by this btree. + * We need enough space to hold one record for every rt extent in the rtgroup. + */ +xfs_filblks_t +xfs_rtrefcountbt_calc_reserves( + struct xfs_mount *mp) +{ + if (!xfs_has_rtreflink(mp)) + return 0; + + return xfs_rtrefcountbt_max_size(mp, mp->m_sb.sb_rgextents); +} diff --git a/fs/xfs/libxfs/xfs_rtrefcount_btree.h b/fs/xfs/libxfs/xfs_rtrefcount_btree.h index b713b3381880..3cd44590c930 100644 --- a/fs/xfs/libxfs/xfs_rtrefcount_btree.h +++ b/fs/xfs/libxfs/xfs_rtrefcount_btree.h @@ -67,4 +67,8 @@ unsigned int xfs_rtrefcountbt_maxlevels_ondisk(void); int __init xfs_rtrefcountbt_init_cur_cache(void); void xfs_rtrefcountbt_destroy_cur_cache(void); +xfs_filblks_t xfs_rtrefcountbt_calc_reserves(struct xfs_mount *mp); +unsigned long long xfs_rtrefcountbt_calc_size(struct xfs_mount *mp, + unsigned long long len); + #endif /* __XFS_RTREFCOUNT_BTREE_H__ */ diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index a69967f9d88e..294aa0739be3 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -31,6 +31,7 @@ #include "xfs_rtgroup.h" #include "xfs_error.h" #include "xfs_trace.h" +#include "xfs_rtrefcount_btree.h" /* * Return whether there are any free extents in the size range given @@ -1547,6 +1548,11 @@ xfs_rt_resv_init( err2 = xfs_metafile_resv_init(rtg_rmap(rtg), ask); if (err2 && !error) error = err2; + + ask = xfs_rtrefcountbt_calc_reserves(mp); + err2 = xfs_metafile_resv_init(rtg_refcount(rtg), ask); + if (err2 && !error) + error = err2; } return error; -- cgit v1.2.3 From 8e84e8052bc283ebb37f929eb9fb97483ea7385e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 20 Nov 2024 16:21:03 -0800 Subject: xfs: enable extent size hints for CoW operations Wire up the copy-on-write extent size hint for realtime files, and connect it to the rt allocator so that we avoid fragmentation on rt filesystems. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_bmap.c | 8 +++++++- fs/xfs/xfs_rtalloc.c | 5 ++++- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'fs/xfs/xfs_rtalloc.c') diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index ae3d33d60761..40ad22fb808b 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -6524,7 +6524,13 @@ xfs_get_cowextsz_hint( a = 0; if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) a = ip->i_cowextsize; - b = xfs_get_extsz_hint(ip); + if (XFS_IS_REALTIME_INODE(ip)) { + b = 0; + if (ip->i_diflags & XFS_DIFLAG_EXTSIZE) + b = ip->i_extsize; + } else { + b = xfs_get_extsz_hint(ip); + } a = max(a, b); if (a == 0) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 294aa0739be3..f5a3d5f8c948 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -2021,7 +2021,10 @@ xfs_rtallocate_align( if (*noalign) { align = mp->m_sb.sb_rextsize; } else { - align = xfs_get_extsz_hint(ap->ip); + if (ap->flags & XFS_BMAPI_COWFORK) + align = xfs_get_cowextsz_hint(ap->ip); + else + align = xfs_get_extsz_hint(ap->ip); if (!align) align = 1; if (align == mp->m_sb.sb_rextsize) -- cgit v1.2.3 From 88a70768df138b97b36bf1571fcde92907812a3d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 20 Nov 2024 16:21:04 -0800 Subject: xfs: check that the rtrefcount maxlevels doesn't increase when growing fs The size of filesystem transaction reservations depends on the maximum height (maxlevels) of the realtime btrees. Since we don't want a grow operation to increase the reservation size enough that we'll fail the minimum log size checks on the next mount, constrain growfs operations if they would cause an increase in the rt refcount btree maxlevels. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_fsops.c | 2 ++ fs/xfs/xfs_rtalloc.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'fs/xfs/xfs_rtalloc.c') diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 9df5a09c0acd..455298503d01 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -23,6 +23,7 @@ #include "xfs_trace.h" #include "xfs_rtalloc.h" #include "xfs_rtrmap_btree.h" +#include "xfs_rtrefcount_btree.h" /* * Write new AG headers to disk. Non-transactional, but need to be @@ -231,6 +232,7 @@ xfs_growfs_data_private( /* Compute new maxlevels for rt btrees. */ xfs_rtrmapbt_compute_maxlevels(mp); + xfs_rtrefcountbt_compute_maxlevels(mp); } return error; diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index f5a3d5f8c948..a5de5405800a 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -995,6 +995,7 @@ xfs_growfs_rt_bmblock( */ mp->m_features |= XFS_FEAT_REALTIME; xfs_rtrmapbt_compute_maxlevels(mp); + xfs_rtrefcountbt_compute_maxlevels(mp); kfree(nmp); return 0; @@ -1178,6 +1179,7 @@ xfs_growfs_check_rtgeom( nmp->m_sb.sb_dblocks = dblocks; xfs_rtrmapbt_compute_maxlevels(nmp); + xfs_rtrefcountbt_compute_maxlevels(nmp); xfs_trans_resv_calc(nmp, M_RES(nmp)); /* -- cgit v1.2.3 From fd97fe1112088c16594dcd45e809f1643e623309 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 20 Nov 2024 16:21:16 -0800 Subject: xfs: fix CoW forks for realtime files Port the copy on write fork repair to realtime files. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/agheader_repair.c | 2 +- fs/xfs/scrub/cow_repair.c | 178 +++++++++++++++++++++++++++--- fs/xfs/scrub/reap.c | 242 +++++++++++++++++++++++++++++++++++++++-- fs/xfs/scrub/reap.h | 7 ++ fs/xfs/scrub/repair.h | 1 + fs/xfs/scrub/rtb_bitmap.h | 37 +++++++ fs/xfs/scrub/trace.h | 36 +++--- fs/xfs/xfs_rtalloc.c | 4 +- fs/xfs/xfs_rtalloc.h | 5 + 9 files changed, 470 insertions(+), 42 deletions(-) create mode 100644 fs/xfs/scrub/rtb_bitmap.h (limited to 'fs/xfs/xfs_rtalloc.c') diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index b45d2b32051a..cd6f0223879f 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -647,7 +647,7 @@ xrep_agfl_fill( xfs_agblock_t agbno = start; int error; - trace_xrep_agfl_insert(sc->sa.pag, agbno, len); + trace_xrep_agfl_insert(pag_group(sc->sa.pag), agbno, len); while (agbno < start + len && af->fl_off < af->flcount) af->agfl_bno[af->fl_off++] = cpu_to_be32(agbno++); diff --git a/fs/xfs/scrub/cow_repair.c b/fs/xfs/scrub/cow_repair.c index ba695dd21f8b..38a246b8bf11 100644 --- a/fs/xfs/scrub/cow_repair.c +++ b/fs/xfs/scrub/cow_repair.c @@ -26,6 +26,9 @@ #include "xfs_errortag.h" #include "xfs_icache.h" #include "xfs_refcount_btree.h" +#include "xfs_rtalloc.h" +#include "xfs_rtbitmap.h" +#include "xfs_rtgroup.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -34,6 +37,7 @@ #include "scrub/bitmap.h" #include "scrub/off_bitmap.h" #include "scrub/fsb_bitmap.h" +#include "scrub/rtb_bitmap.h" #include "scrub/reap.h" /* @@ -61,7 +65,10 @@ struct xrep_cow { struct xoff_bitmap bad_fileoffs; /* Bitmap of fsblocks that were removed from the CoW fork. */ - struct xfsb_bitmap old_cowfork_fsblocks; + union { + struct xfsb_bitmap old_cowfork_fsblocks; + struct xrtb_bitmap old_cowfork_rtblocks; + }; /* CoW fork mappings used to scan for bad CoW staging extents. */ struct xfs_bmbt_irec irec; @@ -145,8 +152,7 @@ xrep_cow_mark_shared_staging( xrep_cow_trim_refcount(xc, &rrec, rec); return xrep_cow_mark_file_range(xc, - xfs_agbno_to_fsb(to_perag(cur->bc_group), - rrec.rc_startblock), + xfs_gbno_to_fsb(cur->bc_group, rrec.rc_startblock), rrec.rc_blockcount); } @@ -177,9 +183,8 @@ xrep_cow_mark_missing_staging( if (xc->next_bno >= rrec.rc_startblock) goto next; - error = xrep_cow_mark_file_range(xc, - xfs_agbno_to_fsb(to_perag(cur->bc_group), xc->next_bno), + xfs_gbno_to_fsb(cur->bc_group, xc->next_bno), rrec.rc_startblock - xc->next_bno); if (error) return error; @@ -222,8 +227,7 @@ xrep_cow_mark_missing_staging_rmap( } return xrep_cow_mark_file_range(xc, - xfs_agbno_to_fsb(to_perag(cur->bc_group), rec_bno), - rec_len); + xfs_gbno_to_fsb(cur->bc_group, rec_bno), rec_len); } /* @@ -310,6 +314,92 @@ out_pag: return 0; } +/* + * Find any part of the CoW fork mapping that isn't a single-owner CoW staging + * extent and mark the corresponding part of the file range in the bitmap. + */ +STATIC int +xrep_cow_find_bad_rt( + struct xrep_cow *xc) +{ + struct xfs_refcount_irec rc_low = { 0 }; + struct xfs_refcount_irec rc_high = { 0 }; + struct xfs_rmap_irec rm_low = { 0 }; + struct xfs_rmap_irec rm_high = { 0 }; + struct xfs_scrub *sc = xc->sc; + struct xfs_rtgroup *rtg; + int error = 0; + + xc->irec_startbno = xfs_rtb_to_rgbno(sc->mp, xc->irec.br_startblock); + + rtg = xfs_rtgroup_get(sc->mp, + xfs_rtb_to_rgno(sc->mp, xc->irec.br_startblock)); + if (!rtg) + return -EFSCORRUPTED; + + error = xrep_rtgroup_init(sc, rtg, &sc->sr, + XFS_RTGLOCK_RMAP | XFS_RTGLOCK_REFCOUNT); + if (error) + goto out_rtg; + + /* Mark any CoW fork extents that are shared. */ + rc_low.rc_startblock = xc->irec_startbno; + rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1; + rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_SHARED; + error = xfs_refcount_query_range(sc->sr.refc_cur, &rc_low, &rc_high, + xrep_cow_mark_shared_staging, xc); + if (error) + goto out_sr; + + /* Make sure there are CoW staging extents for the whole mapping. */ + rc_low.rc_startblock = xc->irec_startbno; + rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1; + rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_COW; + xc->next_bno = xc->irec_startbno; + error = xfs_refcount_query_range(sc->sr.refc_cur, &rc_low, &rc_high, + xrep_cow_mark_missing_staging, xc); + if (error) + goto out_sr; + + if (xc->next_bno < xc->irec_startbno + xc->irec.br_blockcount) { + error = xrep_cow_mark_file_range(xc, + xfs_rgbno_to_rtb(rtg, xc->next_bno), + xc->irec_startbno + xc->irec.br_blockcount - + xc->next_bno); + if (error) + goto out_sr; + } + + /* Mark any area has an rmap that isn't a COW staging extent. */ + rm_low.rm_startblock = xc->irec_startbno; + memset(&rm_high, 0xFF, sizeof(rm_high)); + rm_high.rm_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1; + error = xfs_rmap_query_range(sc->sr.rmap_cur, &rm_low, &rm_high, + xrep_cow_mark_missing_staging_rmap, xc); + if (error) + goto out_sr; + + /* + * If userspace is forcing us to rebuild the CoW fork or someone + * turned on the debugging knob, replace everything in the + * CoW fork and then scan for staging extents in the refcountbt. + */ + if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) || + XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) { + error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock, + xc->irec.br_blockcount); + if (error) + goto out_rtg; + } + +out_sr: + xchk_rtgroup_btcur_free(&sc->sr); + xchk_rtgroup_free(sc, &sc->sr); +out_rtg: + xfs_rtgroup_put(rtg); + return error; +} + /* * Allocate a replacement CoW staging extent of up to the given number of * blocks, and fill out the mapping. @@ -350,6 +440,32 @@ xrep_cow_alloc( return 0; } +/* + * Allocate a replacement rt CoW staging extent of up to the given number of + * blocks, and fill out the mapping. + */ +STATIC int +xrep_cow_alloc_rt( + struct xfs_scrub *sc, + xfs_extlen_t maxlen, + struct xrep_cow_extent *repl) +{ + xfs_rtxlen_t maxrtx = xfs_rtb_to_rtx(sc->mp, maxlen); + int error; + + error = xfs_trans_reserve_more(sc->tp, 0, maxrtx); + if (error) + return error; + + error = xfs_rtallocate_rtgs(sc->tp, NULLRTBLOCK, 1, maxrtx, 1, false, + false, &repl->fsbno, &repl->len); + if (error) + return error; + + xfs_refcount_alloc_cow_extent(sc->tp, true, repl->fsbno, repl->len); + return 0; +} + /* * Look up the current CoW fork mapping so that we only allocate enough to * replace a single mapping. If we don't find a mapping that covers the start @@ -467,7 +583,10 @@ xrep_cow_replace_range( */ alloc_len = min_t(xfs_fileoff_t, XFS_MAX_BMBT_EXTLEN, nextoff - startoff); - error = xrep_cow_alloc(sc, alloc_len, &repl); + if (XFS_IS_REALTIME_INODE(sc->ip)) + error = xrep_cow_alloc_rt(sc, alloc_len, &repl); + else + error = xrep_cow_alloc(sc, alloc_len, &repl); if (error) return error; @@ -483,8 +602,12 @@ xrep_cow_replace_range( return error; /* Note the old CoW staging extents; we'll reap them all later. */ - error = xfsb_bitmap_set(&xc->old_cowfork_fsblocks, got.br_startblock, - repl.len); + if (XFS_IS_REALTIME_INODE(sc->ip)) + error = xrtb_bitmap_set(&xc->old_cowfork_rtblocks, + got.br_startblock, repl.len); + else + error = xfsb_bitmap_set(&xc->old_cowfork_fsblocks, + got.br_startblock, repl.len); if (error) return error; @@ -540,8 +663,16 @@ xrep_bmap_cow( if (!ifp) return 0; - /* realtime files aren't supported yet */ - if (XFS_IS_REALTIME_INODE(sc->ip)) + /* + * Realtime files with large extent sizes are not supported because + * we could encounter an CoW mapping that has been partially written + * out *and* requires replacement, and there's no solution to that. + */ + if (xfs_inode_has_bigrtalloc(sc->ip)) + return -EOPNOTSUPP; + + /* Metadata inodes aren't supposed to have data on the rt volume. */ + if (xfs_is_metadir_inode(sc->ip) && XFS_IS_REALTIME_INODE(sc->ip)) return -EOPNOTSUPP; /* @@ -562,7 +693,10 @@ xrep_bmap_cow( xc->sc = sc; xoff_bitmap_init(&xc->bad_fileoffs); - xfsb_bitmap_init(&xc->old_cowfork_fsblocks); + if (XFS_IS_REALTIME_INODE(sc->ip)) + xrtb_bitmap_init(&xc->old_cowfork_rtblocks); + else + xfsb_bitmap_init(&xc->old_cowfork_fsblocks); for_each_xfs_iext(ifp, &icur, &xc->irec) { if (xchk_should_terminate(sc, &error)) @@ -585,7 +719,10 @@ xrep_bmap_cow( if (xfs_bmap_is_written_extent(&xc->irec)) continue; - error = xrep_cow_find_bad(xc); + if (XFS_IS_REALTIME_INODE(sc->ip)) + error = xrep_cow_find_bad_rt(xc); + else + error = xrep_cow_find_bad(xc); if (error) goto out_bitmap; } @@ -600,13 +737,20 @@ xrep_bmap_cow( * by the refcount btree, not the inode, so it is correct to treat them * like inode metadata. */ - error = xrep_reap_fsblocks(sc, &xc->old_cowfork_fsblocks, - &XFS_RMAP_OINFO_COW); + if (XFS_IS_REALTIME_INODE(sc->ip)) + error = xrep_reap_rtblocks(sc, &xc->old_cowfork_rtblocks, + &XFS_RMAP_OINFO_COW); + else + error = xrep_reap_fsblocks(sc, &xc->old_cowfork_fsblocks, + &XFS_RMAP_OINFO_COW); if (error) goto out_bitmap; out_bitmap: - xfsb_bitmap_destroy(&xc->old_cowfork_fsblocks); + if (XFS_IS_REALTIME_INODE(sc->ip)) + xrtb_bitmap_destroy(&xc->old_cowfork_rtblocks); + else + xfsb_bitmap_destroy(&xc->old_cowfork_fsblocks); xoff_bitmap_destroy(&xc->bad_fileoffs); kfree(xc); return error; diff --git a/fs/xfs/scrub/reap.c b/fs/xfs/scrub/reap.c index 534c7696e9a1..b32fb233cf84 100644 --- a/fs/xfs/scrub/reap.c +++ b/fs/xfs/scrub/reap.c @@ -34,6 +34,8 @@ #include "xfs_attr_remote.h" #include "xfs_defer.h" #include "xfs_metafile.h" +#include "xfs_rtgroup.h" +#include "xfs_rtrmap_btree.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" @@ -41,6 +43,7 @@ #include "scrub/bitmap.h" #include "scrub/agb_bitmap.h" #include "scrub/fsb_bitmap.h" +#include "scrub/rtb_bitmap.h" #include "scrub/reap.h" /* @@ -311,7 +314,7 @@ xreap_agextent_binval( } out: - trace_xreap_agextent_binval(sc->sa.pag, agbno, *aglenp); + trace_xreap_agextent_binval(pag_group(sc->sa.pag), agbno, *aglenp); } /* @@ -370,7 +373,8 @@ xreap_agextent_select( out_found: *aglenp = len; - trace_xreap_agextent_select(sc->sa.pag, agbno, len, *crosslinked); + trace_xreap_agextent_select(pag_group(sc->sa.pag), agbno, len, + *crosslinked); out_cur: xfs_btree_del_cursor(cur, error); return error; @@ -409,7 +413,8 @@ xreap_agextent_iter( * to run xfs_repair. */ if (crosslinked) { - trace_xreap_dispose_unmap_extent(sc->sa.pag, agbno, *aglenp); + trace_xreap_dispose_unmap_extent(pag_group(sc->sa.pag), agbno, + *aglenp); rs->force_roll = true; @@ -428,7 +433,7 @@ xreap_agextent_iter( *aglenp, rs->oinfo); } - trace_xreap_dispose_free_extent(sc->sa.pag, agbno, *aglenp); + trace_xreap_dispose_free_extent(pag_group(sc->sa.pag), agbno, *aglenp); /* * Invalidate as many buffers as we can, starting at agbno. If this @@ -679,6 +684,225 @@ xrep_reap_fsblocks( return 0; } +#ifdef CONFIG_XFS_RT +/* + * Figure out the longest run of blocks that we can dispose of with a single + * call. Cross-linked blocks should have their reverse mappings removed, but + * single-owner extents can be freed. Units are rt blocks, not rt extents. + */ +STATIC int +xreap_rgextent_select( + struct xreap_state *rs, + xfs_rgblock_t rgbno, + xfs_rgblock_t rgbno_next, + bool *crosslinked, + xfs_extlen_t *rglenp) +{ + struct xfs_scrub *sc = rs->sc; + struct xfs_btree_cur *cur; + xfs_rgblock_t bno = rgbno + 1; + xfs_extlen_t len = 1; + int error; + + /* + * Determine if there are any other rmap records covering the first + * block of this extent. If so, the block is crosslinked. + */ + cur = xfs_rtrmapbt_init_cursor(sc->tp, sc->sr.rtg); + error = xfs_rmap_has_other_keys(cur, rgbno, 1, rs->oinfo, + crosslinked); + if (error) + goto out_cur; + + /* + * Figure out how many of the subsequent blocks have the same crosslink + * status. + */ + while (bno < rgbno_next) { + bool also_crosslinked; + + error = xfs_rmap_has_other_keys(cur, bno, 1, rs->oinfo, + &also_crosslinked); + if (error) + goto out_cur; + + if (*crosslinked != also_crosslinked) + break; + + len++; + bno++; + } + + *rglenp = len; + trace_xreap_agextent_select(rtg_group(sc->sr.rtg), rgbno, len, + *crosslinked); +out_cur: + xfs_btree_del_cursor(cur, error); + return error; +} + +/* + * Dispose of as much of the beginning of this rtgroup extent as possible. + * The number of blocks disposed of will be returned in @rglenp. + */ +STATIC int +xreap_rgextent_iter( + struct xreap_state *rs, + xfs_rgblock_t rgbno, + xfs_extlen_t *rglenp, + bool crosslinked) +{ + struct xfs_scrub *sc = rs->sc; + xfs_rtblock_t rtbno; + int error; + + /* + * The only caller so far is CoW fork repair, so we only know how to + * unlink or free CoW staging extents. Here we don't have to worry + * about invalidating buffers! + */ + if (rs->oinfo != &XFS_RMAP_OINFO_COW) { + ASSERT(rs->oinfo == &XFS_RMAP_OINFO_COW); + return -EFSCORRUPTED; + } + ASSERT(rs->resv == XFS_AG_RESV_NONE); + + rtbno = xfs_rgbno_to_rtb(sc->sr.rtg, rgbno); + + /* + * If there are other rmappings, this block is cross linked and must + * not be freed. Remove the forward and reverse mapping and move on. + */ + if (crosslinked) { + trace_xreap_dispose_unmap_extent(rtg_group(sc->sr.rtg), rgbno, + *rglenp); + + xfs_refcount_free_cow_extent(sc->tp, true, rtbno, *rglenp); + rs->deferred++; + return 0; + } + + trace_xreap_dispose_free_extent(rtg_group(sc->sr.rtg), rgbno, *rglenp); + + /* + * The CoW staging extent is not crosslinked. Use deferred work items + * to remove the refcountbt records (which removes the rmap records) + * and free the extent. We're not worried about the system going down + * here because log recovery walks the refcount btree to clean out the + * CoW staging extents. + */ + xfs_refcount_free_cow_extent(sc->tp, true, rtbno, *rglenp); + error = xfs_free_extent_later(sc->tp, rtbno, *rglenp, NULL, + rs->resv, + XFS_FREE_EXTENT_REALTIME | + XFS_FREE_EXTENT_SKIP_DISCARD); + if (error) + return error; + + rs->deferred++; + return 0; +} + +#define XREAP_RTGLOCK_ALL (XFS_RTGLOCK_BITMAP | \ + XFS_RTGLOCK_RMAP | \ + XFS_RTGLOCK_REFCOUNT) + +/* + * Break a rt file metadata extent into sub-extents by fate (crosslinked, not + * crosslinked), and dispose of each sub-extent separately. The extent must + * be aligned to a realtime extent. + */ +STATIC int +xreap_rtmeta_extent( + uint64_t rtbno, + uint64_t len, + void *priv) +{ + struct xreap_state *rs = priv; + struct xfs_scrub *sc = rs->sc; + xfs_rgblock_t rgbno = xfs_rtb_to_rgbno(sc->mp, rtbno); + xfs_rgblock_t rgbno_next = rgbno + len; + int error = 0; + + ASSERT(sc->ip != NULL); + ASSERT(!sc->sr.rtg); + + /* + * We're reaping blocks after repairing file metadata, which means that + * we have to init the xchk_ag structure ourselves. + */ + sc->sr.rtg = xfs_rtgroup_get(sc->mp, xfs_rtb_to_rgno(sc->mp, rtbno)); + if (!sc->sr.rtg) + return -EFSCORRUPTED; + + xfs_rtgroup_lock(sc->sr.rtg, XREAP_RTGLOCK_ALL); + + while (rgbno < rgbno_next) { + xfs_extlen_t rglen; + bool crosslinked; + + error = xreap_rgextent_select(rs, rgbno, rgbno_next, + &crosslinked, &rglen); + if (error) + goto out_unlock; + + error = xreap_rgextent_iter(rs, rgbno, &rglen, crosslinked); + if (error) + goto out_unlock; + + if (xreap_want_defer_finish(rs)) { + error = xfs_defer_finish(&sc->tp); + if (error) + goto out_unlock; + xreap_defer_finish_reset(rs); + } else if (xreap_want_roll(rs)) { + error = xfs_trans_roll_inode(&sc->tp, sc->ip); + if (error) + goto out_unlock; + xreap_reset(rs); + } + + rgbno += rglen; + } + +out_unlock: + xfs_rtgroup_unlock(sc->sr.rtg, XREAP_RTGLOCK_ALL); + xfs_rtgroup_put(sc->sr.rtg); + sc->sr.rtg = NULL; + return error; +} + +/* + * Dispose of every block of every rt metadata extent in the bitmap. + * Do not use this to dispose of the mappings in an ondisk inode fork. + */ +int +xrep_reap_rtblocks( + struct xfs_scrub *sc, + struct xrtb_bitmap *bitmap, + const struct xfs_owner_info *oinfo) +{ + struct xreap_state rs = { + .sc = sc, + .oinfo = oinfo, + .resv = XFS_AG_RESV_NONE, + }; + int error; + + ASSERT(xfs_has_rmapbt(sc->mp)); + ASSERT(sc->ip != NULL); + + error = xrtb_bitmap_walk(bitmap, xreap_rtmeta_extent, &rs); + if (error) + return error; + + if (xreap_dirty(&rs)) + return xrep_defer_finish(sc); + + return 0; +} +#endif /* CONFIG_XFS_RT */ + /* * Dispose of every block of an old metadata btree that used to be rooted in a * metadata directory file. @@ -771,7 +995,8 @@ xreap_bmapi_select( } imap->br_blockcount = len; - trace_xreap_bmapi_select(sc->sa.pag, agbno, len, *crosslinked); + trace_xreap_bmapi_select(pag_group(sc->sa.pag), agbno, len, + *crosslinked); out_cur: xfs_btree_del_cursor(cur, error); return error; @@ -910,7 +1135,8 @@ xreap_bmapi_binval( } out: - trace_xreap_bmapi_binval(sc->sa.pag, agbno, imap->br_blockcount); + trace_xreap_bmapi_binval(pag_group(sc->sa.pag), agbno, + imap->br_blockcount); return 0; } @@ -937,7 +1163,7 @@ xrep_reap_bmapi_iter( * anybody else who thinks they own the block, even though that * runs the risk of stale buffer warnings in the future. */ - trace_xreap_dispose_unmap_extent(sc->sa.pag, + trace_xreap_dispose_unmap_extent(pag_group(sc->sa.pag), XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock), imap->br_blockcount); @@ -960,7 +1186,7 @@ xrep_reap_bmapi_iter( * by a block starting before the first block of the extent but overlap * anyway. */ - trace_xreap_dispose_free_extent(sc->sa.pag, + trace_xreap_dispose_free_extent(pag_group(sc->sa.pag), XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock), imap->br_blockcount); diff --git a/fs/xfs/scrub/reap.h b/fs/xfs/scrub/reap.h index 70e5e6bbb8d3..4c8f62701fb3 100644 --- a/fs/xfs/scrub/reap.h +++ b/fs/xfs/scrub/reap.h @@ -17,6 +17,13 @@ int xrep_reap_ifork(struct xfs_scrub *sc, struct xfs_inode *ip, int whichfork); int xrep_reap_metadir_fsblocks(struct xfs_scrub *sc, struct xfsb_bitmap *bitmap); +#ifdef CONFIG_XFS_RT +int xrep_reap_rtblocks(struct xfs_scrub *sc, struct xrtb_bitmap *bitmap, + const struct xfs_owner_info *oinfo); +#else +# define xrep_reap_rtblocks(...) (-EOPNOTSUPP) +#endif /* CONFIG_XFS_RT */ + /* Buffer cache scan context. */ struct xrep_bufscan { /* Disk address for the buffers we want to scan. */ diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index 8f8f18b48a44..823c00d1a502 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -52,6 +52,7 @@ struct xbitmap; struct xagb_bitmap; struct xrgb_bitmap; struct xfsb_bitmap; +struct xrtb_bitmap; int xrep_fix_freelist(struct xfs_scrub *sc, int alloc_flags); diff --git a/fs/xfs/scrub/rtb_bitmap.h b/fs/xfs/scrub/rtb_bitmap.h new file mode 100644 index 000000000000..1313ef605511 --- /dev/null +++ b/fs/xfs/scrub/rtb_bitmap.h @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#ifndef __XFS_SCRUB_RTB_BITMAP_H__ +#define __XFS_SCRUB_RTB_BITMAP_H__ + +/* Bitmaps, but for type-checked for xfs_rtblock_t */ + +struct xrtb_bitmap { + struct xbitmap64 rtbitmap; +}; + +static inline void xrtb_bitmap_init(struct xrtb_bitmap *bitmap) +{ + xbitmap64_init(&bitmap->rtbitmap); +} + +static inline void xrtb_bitmap_destroy(struct xrtb_bitmap *bitmap) +{ + xbitmap64_destroy(&bitmap->rtbitmap); +} + +static inline int xrtb_bitmap_set(struct xrtb_bitmap *bitmap, + xfs_rtblock_t start, xfs_filblks_t len) +{ + return xbitmap64_set(&bitmap->rtbitmap, start, len); +} + +static inline int xrtb_bitmap_walk(struct xrtb_bitmap *bitmap, + xbitmap64_walk_fn fn, void *priv) +{ + return xbitmap64_walk(&bitmap->rtbitmap, fn, priv); +} + +#endif /* __XFS_SCRUB_RTB_BITMAP_H__ */ diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 56811862aa82..d7c4ced47c15 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -1964,32 +1964,36 @@ DEFINE_XCHK_METAPATH_EVENT(xchk_metapath_lookup); #if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) DECLARE_EVENT_CLASS(xrep_extent_class, - TP_PROTO(const struct xfs_perag *pag, xfs_agblock_t agbno, + TP_PROTO(const struct xfs_group *xg, xfs_agblock_t agbno, xfs_extlen_t len), - TP_ARGS(pag, agbno, len), + TP_ARGS(xg, agbno, len), TP_STRUCT__entry( __field(dev_t, dev) + __field(enum xfs_group_type, type) __field(xfs_agnumber_t, agno) __field(xfs_agblock_t, agbno) __field(xfs_extlen_t, len) ), TP_fast_assign( - __entry->dev = pag_mount(pag)->m_super->s_dev; - __entry->agno = pag_agno(pag); + __entry->dev = xg->xg_mount->m_super->s_dev; + __entry->type = xg->xg_type; + __entry->agno = xg->xg_gno; __entry->agbno = agbno; __entry->len = len; ), - TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x", + TP_printk("dev %d:%d %sno 0x%x %sbno 0x%x fsbcount 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->type, XG_TYPE_STRINGS), __entry->agno, + __print_symbolic(__entry->type, XG_TYPE_STRINGS), __entry->agbno, __entry->len) ); #define DEFINE_REPAIR_EXTENT_EVENT(name) \ DEFINE_EVENT(xrep_extent_class, name, \ - TP_PROTO(const struct xfs_perag *pag, xfs_agblock_t agbno, \ + TP_PROTO(const struct xfs_group *xg, xfs_agblock_t agbno, \ xfs_extlen_t len), \ - TP_ARGS(pag, agbno, len)) + TP_ARGS(xg, agbno, len)) DEFINE_REPAIR_EXTENT_EVENT(xreap_dispose_unmap_extent); DEFINE_REPAIR_EXTENT_EVENT(xreap_dispose_free_extent); DEFINE_REPAIR_EXTENT_EVENT(xreap_agextent_binval); @@ -1997,35 +2001,39 @@ DEFINE_REPAIR_EXTENT_EVENT(xreap_bmapi_binval); DEFINE_REPAIR_EXTENT_EVENT(xrep_agfl_insert); DECLARE_EVENT_CLASS(xrep_reap_find_class, - TP_PROTO(const struct xfs_perag *pag, xfs_agblock_t agbno, + TP_PROTO(const struct xfs_group *xg, xfs_agblock_t agbno, xfs_extlen_t len, bool crosslinked), - TP_ARGS(pag, agbno, len, crosslinked), + TP_ARGS(xg, agbno, len, crosslinked), TP_STRUCT__entry( __field(dev_t, dev) + __field(enum xfs_group_type, type) __field(xfs_agnumber_t, agno) __field(xfs_agblock_t, agbno) __field(xfs_extlen_t, len) __field(bool, crosslinked) ), TP_fast_assign( - __entry->dev = pag_mount(pag)->m_super->s_dev; - __entry->agno = pag_agno(pag); + __entry->dev = xg->xg_mount->m_super->s_dev; + __entry->type = xg->xg_type; + __entry->agno = xg->xg_gno; __entry->agbno = agbno; __entry->len = len; __entry->crosslinked = crosslinked; ), - TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x crosslinked %d", + TP_printk("dev %d:%d %sno 0x%x %sbno 0x%x fsbcount 0x%x crosslinked %d", MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->type, XG_TYPE_STRINGS), __entry->agno, + __print_symbolic(__entry->type, XG_TYPE_STRINGS), __entry->agbno, __entry->len, __entry->crosslinked ? 1 : 0) ); #define DEFINE_REPAIR_REAP_FIND_EVENT(name) \ DEFINE_EVENT(xrep_reap_find_class, name, \ - TP_PROTO(const struct xfs_perag *pag, xfs_agblock_t agbno, \ + TP_PROTO(const struct xfs_group *xg, xfs_agblock_t agbno, \ xfs_extlen_t len, bool crosslinked), \ - TP_ARGS(pag, agbno, len, crosslinked)) + TP_ARGS(xg, agbno, len, crosslinked)) DEFINE_REPAIR_REAP_FIND_EVENT(xreap_agextent_select); DEFINE_REPAIR_REAP_FIND_EVENT(xreap_bmapi_select); diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index a5de5405800a..7135a6717a9e 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -594,7 +594,7 @@ xfs_rtalloc_sumlevel( * specified. If we don't get maxlen then use prod to trim * the length, if given. The lengths are all in rtextents. */ -STATIC int +static int xfs_rtallocate_extent_size( struct xfs_rtalloc_args *args, xfs_rtxlen_t minlen, /* minimum length to allocate */ @@ -1958,7 +1958,7 @@ out_unlock: goto out_release; } -static int +int xfs_rtallocate_rtgs( struct xfs_trans *tp, xfs_fsblock_t bno_hint, diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index 9044f7226ab6..0d95b29092c9 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -77,4 +77,9 @@ xfs_growfs_check_rtgeom(const struct xfs_mount *mp, } #endif /* CONFIG_XFS_RT */ +int xfs_rtallocate_rtgs(struct xfs_trans *tp, xfs_fsblock_t bno_hint, + xfs_rtxlen_t minlen, xfs_rtxlen_t maxlen, xfs_rtxlen_t prod, + bool wasdel, bool initial_user_data, xfs_rtblock_t *bno, + xfs_extlen_t *blen); + #endif /* __XFS_RTALLOC_H__ */ -- cgit v1.2.3 From 155debbe7e627baf6439e75f15856d40ec115c5d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 20 Nov 2024 16:21:17 -0800 Subject: xfs: enable realtime reflink Enable reflink for realtime devices, as long as the realtime allocation unit is a single fsblock. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_reflink.c | 25 +++++++++++++++++++++++++ fs/xfs/xfs_reflink.h | 2 ++ fs/xfs/xfs_rtalloc.c | 7 +++++-- fs/xfs/xfs_super.c | 6 ++++-- 4 files changed, 36 insertions(+), 4 deletions(-) (limited to 'fs/xfs/xfs_rtalloc.c') diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index d9b33e22c176..59f7fc16eb80 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1822,3 +1822,28 @@ out: trace_xfs_reflink_unshare_error(ip, error, _RET_IP_); return error; } + +/* + * Can we use reflink with this realtime extent size? Note that we don't check + * for rblocks > 0 here because this can be called as part of attaching a new + * rt section. + */ +bool +xfs_reflink_supports_rextsize( + struct xfs_mount *mp, + unsigned int rextsize) +{ + /* reflink on the realtime device requires rtgroups */ + if (!xfs_has_rtgroups(mp)) + return false; + + /* + * Reflink doesn't support rt extent size larger than a single fsblock + * because we would have to perform CoW-around for unaligned write + * requests to guarantee that we always remap entire rt extents. + */ + if (rextsize != 1) + return false; + + return true; +} diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 3bfd7ab9e114..cc4e92278279 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -62,4 +62,6 @@ extern int xfs_reflink_remap_blocks(struct xfs_inode *src, loff_t pos_in, extern int xfs_reflink_update_dest(struct xfs_inode *dest, xfs_off_t newlen, xfs_extlen_t cowextsize, unsigned int remap_flags); +bool xfs_reflink_supports_rextsize(struct xfs_mount *mp, unsigned int rextsize); + #endif /* __XFS_REFLINK_H */ diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 7135a6717a9e..d8e6d073d64d 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -32,6 +32,7 @@ #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_rtrefcount_btree.h" +#include "xfs_reflink.h" /* * Return whether there are any free extents in the size range given @@ -1292,8 +1293,10 @@ xfs_growfs_rt( goto out_unlock; if (xfs_has_quota(mp)) goto out_unlock; - } - if (xfs_has_reflink(mp)) + if (xfs_has_reflink(mp)) + goto out_unlock; + } else if (xfs_has_reflink(mp) && + !xfs_reflink_supports_rextsize(mp, in->extsize)) goto out_unlock; error = xfs_sb_validate_fsb_count(&mp->m_sb, in->newblocks); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index ecd5a9f444d8..7c3f996cd39e 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1754,9 +1754,11 @@ xfs_fs_fill_super( xfs_warn_experimental(mp, XFS_EXPERIMENTAL_METADIR); if (xfs_has_reflink(mp)) { - if (mp->m_sb.sb_rblocks) { + if (xfs_has_realtime(mp) && + !xfs_reflink_supports_rextsize(mp, mp->m_sb.sb_rextsize)) { xfs_alert(mp, - "reflink not compatible with realtime device!"); + "reflink not compatible with realtime extent size %u!", + mp->m_sb.sb_rextsize); error = -EINVAL; goto out_filestream_unmount; } -- cgit v1.2.3