summaryrefslogtreecommitdiff
path: root/fs/xfs/scrub
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/scrub')
-rw-r--r--fs/xfs/scrub/agheader.c79
-rw-r--r--fs/xfs/scrub/agheader_repair.c8
-rw-r--r--fs/xfs/scrub/alloc_repair.c5
-rw-r--r--fs/xfs/scrub/bmap.c130
-rw-r--r--fs/xfs/scrub/bmap_repair.c148
-rw-r--r--fs/xfs/scrub/btree.c2
-rw-r--r--fs/xfs/scrub/common.c177
-rw-r--r--fs/xfs/scrub/common.h33
-rw-r--r--fs/xfs/scrub/cow_repair.c180
-rw-r--r--fs/xfs/scrub/dir_repair.c8
-rw-r--r--fs/xfs/scrub/fscounters.c31
-rw-r--r--fs/xfs/scrub/fscounters_repair.c12
-rw-r--r--fs/xfs/scrub/health.c59
-rw-r--r--fs/xfs/scrub/ialloc.c4
-rw-r--r--fs/xfs/scrub/inode.c48
-rw-r--r--fs/xfs/scrub/inode_repair.c212
-rw-r--r--fs/xfs/scrub/metapath.c78
-rw-r--r--fs/xfs/scrub/newbt.c44
-rw-r--r--fs/xfs/scrub/newbt.h1
-rw-r--r--fs/xfs/scrub/nlinks.c8
-rw-r--r--fs/xfs/scrub/nlinks_repair.c4
-rw-r--r--fs/xfs/scrub/orphanage.c16
-rw-r--r--fs/xfs/scrub/parent_repair.c12
-rw-r--r--fs/xfs/scrub/quota.c8
-rw-r--r--fs/xfs/scrub/quota_repair.c2
-rw-r--r--fs/xfs/scrub/quotacheck.c4
-rw-r--r--fs/xfs/scrub/rcbag_btree.c38
-rw-r--r--fs/xfs/scrub/reap.c291
-rw-r--r--fs/xfs/scrub/reap.h9
-rw-r--r--fs/xfs/scrub/refcount.c4
-rw-r--r--fs/xfs/scrub/refcount_repair.c6
-rw-r--r--fs/xfs/scrub/repair.c240
-rw-r--r--fs/xfs/scrub/repair.h39
-rw-r--r--fs/xfs/scrub/rgb_bitmap.h37
-rw-r--r--fs/xfs/scrub/rgsuper.c6
-rw-r--r--fs/xfs/scrub/rmap_repair.c105
-rw-r--r--fs/xfs/scrub/rtb_bitmap.h37
-rw-r--r--fs/xfs/scrub/rtbitmap.c86
-rw-r--r--fs/xfs/scrub/rtbitmap.h55
-rw-r--r--fs/xfs/scrub/rtbitmap_repair.c451
-rw-r--r--fs/xfs/scrub/rtrefcount.c661
-rw-r--r--fs/xfs/scrub/rtrefcount_repair.c761
-rw-r--r--fs/xfs/scrub/rtrmap.c323
-rw-r--r--fs/xfs/scrub/rtrmap_repair.c981
-rw-r--r--fs/xfs/scrub/rtsummary.c17
-rw-r--r--fs/xfs/scrub/rtsummary_repair.c3
-rw-r--r--fs/xfs/scrub/scrub.c39
-rw-r--r--fs/xfs/scrub/scrub.h34
-rw-r--r--fs/xfs/scrub/stats.c2
-rw-r--r--fs/xfs/scrub/symlink_repair.c3
-rw-r--r--fs/xfs/scrub/tempexch.h2
-rw-r--r--fs/xfs/scrub/tempfile.c43
-rw-r--r--fs/xfs/scrub/trace.c1
-rw-r--r--fs/xfs/scrub/trace.h286
54 files changed, 5441 insertions, 432 deletions
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index 61f80a6410c7..303374df44bd 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -60,6 +60,34 @@ xchk_superblock_xref(
}
/*
+ * Calculate the ondisk superblock size in bytes given the feature set of the
+ * mounted filesystem (aka the primary sb). This is subtlely different from
+ * the logic in xfs_repair, which computes the size of a secondary sb given the
+ * featureset listed in the secondary sb.
+ */
+STATIC size_t
+xchk_superblock_ondisk_size(
+ struct xfs_mount *mp)
+{
+ if (xfs_has_zoned(mp))
+ return offsetofend(struct xfs_dsb, sb_rtreserved);
+ if (xfs_has_metadir(mp))
+ return offsetofend(struct xfs_dsb, sb_pad);
+ if (xfs_has_metauuid(mp))
+ return offsetofend(struct xfs_dsb, sb_meta_uuid);
+ if (xfs_has_crc(mp))
+ return offsetofend(struct xfs_dsb, sb_lsn);
+ if (xfs_sb_version_hasmorebits(&mp->m_sb))
+ return offsetofend(struct xfs_dsb, sb_bad_features2);
+ if (xfs_has_logv2(mp))
+ return offsetofend(struct xfs_dsb, sb_logsunit);
+ if (xfs_has_sector(mp))
+ return offsetofend(struct xfs_dsb, sb_logsectsize);
+ /* only support dirv2 or more recent */
+ return offsetofend(struct xfs_dsb, sb_dirblklog);
+}
+
+/*
* Scrub the filesystem superblock.
*
* Note: We do /not/ attempt to check AG 0's superblock. Mount is
@@ -75,6 +103,7 @@ xchk_superblock(
struct xfs_buf *bp;
struct xfs_dsb *sb;
struct xfs_perag *pag;
+ size_t sblen;
xfs_agnumber_t agno;
uint32_t v2_ok;
__be32 features_mask;
@@ -145,8 +174,11 @@ xchk_superblock(
xchk_block_set_preen(sc, bp);
if (xfs_has_metadir(sc->mp)) {
- if (sb->sb_metadirino != cpu_to_be64(mp->m_sb.sb_metadirino))
- xchk_block_set_preen(sc, bp);
+ if (sb->sb_rbmino != cpu_to_be64(0))
+ xchk_block_set_corrupt(sc, bp);
+
+ if (sb->sb_rsumino != cpu_to_be64(0))
+ xchk_block_set_corrupt(sc, bp);
} else {
if (sb->sb_rbmino != cpu_to_be64(mp->m_sb.sb_rbmino))
xchk_block_set_preen(sc, bp);
@@ -229,7 +261,13 @@ xchk_superblock(
* sb_icount, sb_ifree, sb_fdblocks, sb_frexents
*/
- if (!xfs_has_metadir(mp)) {
+ if (xfs_has_metadir(mp)) {
+ if (sb->sb_uquotino != cpu_to_be64(0))
+ xchk_block_set_corrupt(sc, bp);
+
+ if (sb->sb_gquotino != cpu_to_be64(0))
+ xchk_block_set_preen(sc, bp);
+ } else {
if (sb->sb_uquotino != cpu_to_be64(mp->m_sb.sb_uquotino))
xchk_block_set_preen(sc, bp);
@@ -281,15 +319,8 @@ xchk_superblock(
if (!!(sb->sb_features2 & cpu_to_be32(~v2_ok)))
xchk_block_set_corrupt(sc, bp);
- if (xfs_has_metadir(mp)) {
- if (sb->sb_rgblklog != mp->m_sb.sb_rgblklog)
- xchk_block_set_corrupt(sc, bp);
- if (memchr_inv(sb->sb_pad, 0, sizeof(sb->sb_pad)))
- xchk_block_set_preen(sc, bp);
- } else {
- if (sb->sb_features2 != sb->sb_bad_features2)
- xchk_block_set_preen(sc, bp);
- }
+ if (sb->sb_features2 != sb->sb_bad_features2)
+ xchk_block_set_preen(sc, bp);
}
/* Check sb_features2 flags that are set at mkfs time. */
@@ -351,7 +382,10 @@ xchk_superblock(
if (sb->sb_spino_align != cpu_to_be32(mp->m_sb.sb_spino_align))
xchk_block_set_corrupt(sc, bp);
- if (!xfs_has_metadir(mp)) {
+ if (xfs_has_metadir(mp)) {
+ if (sb->sb_pquotino != cpu_to_be64(0))
+ xchk_block_set_corrupt(sc, bp);
+ } else {
if (sb->sb_pquotino != cpu_to_be64(mp->m_sb.sb_pquotino))
xchk_block_set_preen(sc, bp);
}
@@ -366,16 +400,25 @@ xchk_superblock(
}
if (xfs_has_metadir(mp)) {
+ if (sb->sb_metadirino != cpu_to_be64(mp->m_sb.sb_metadirino))
+ xchk_block_set_preen(sc, bp);
+
if (sb->sb_rgcount != cpu_to_be32(mp->m_sb.sb_rgcount))
xchk_block_set_corrupt(sc, bp);
if (sb->sb_rgextents != cpu_to_be32(mp->m_sb.sb_rgextents))
xchk_block_set_corrupt(sc, bp);
+
+ if (sb->sb_rgblklog != mp->m_sb.sb_rgblklog)
+ xchk_block_set_corrupt(sc, bp);
+
+ if (memchr_inv(sb->sb_pad, 0, sizeof(sb->sb_pad)))
+ xchk_block_set_corrupt(sc, bp);
}
/* Everything else must be zero. */
- if (memchr_inv(sb + 1, 0,
- BBTOB(bp->b_length) - sizeof(struct xfs_dsb)))
+ sblen = xchk_superblock_ondisk_size(mp);
+ if (memchr_inv((char *)sb + sblen, 0, BBTOB(bp->b_length) - sblen))
xchk_block_set_corrupt(sc, bp);
xchk_superblock_xref(sc, bp);
@@ -458,7 +501,7 @@ xchk_agf_xref_btreeblks(
{
struct xfs_agf *agf = sc->sa.agf_bp->b_addr;
struct xfs_mount *mp = sc->mp;
- xfs_agblock_t blocks;
+ xfs_filblks_t blocks;
xfs_agblock_t btreeblks;
int error;
@@ -507,7 +550,7 @@ xchk_agf_xref_refcblks(
struct xfs_scrub *sc)
{
struct xfs_agf *agf = sc->sa.agf_bp->b_addr;
- xfs_agblock_t blocks;
+ xfs_filblks_t blocks;
int error;
if (!sc->sa.refc_cur)
@@ -840,7 +883,7 @@ xchk_agi_xref_fiblocks(
struct xfs_scrub *sc)
{
struct xfs_agi *agi = sc->sa.agi_bp->b_addr;
- xfs_agblock_t blocks;
+ xfs_filblks_t blocks;
int error = 0;
if (!xfs_has_inobtcounts(sc->mp))
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index 0fad0baaba2f..cd6f0223879f 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -256,7 +256,7 @@ xrep_agf_calc_from_btrees(
struct xfs_agf *agf = agf_bp->b_addr;
struct xfs_mount *mp = sc->mp;
xfs_agblock_t btreeblks;
- xfs_agblock_t blocks;
+ xfs_filblks_t blocks;
int error;
/* Update the AGF counters from the bnobt. */
@@ -647,7 +647,7 @@ xrep_agfl_fill(
xfs_agblock_t agbno = start;
int error;
- trace_xrep_agfl_insert(sc->sa.pag, agbno, len);
+ trace_xrep_agfl_insert(pag_group(sc->sa.pag), agbno, len);
while (agbno < start + len && af->fl_off < af->flcount)
af->agfl_bno[af->fl_off++] = cpu_to_be32(agbno++);
@@ -946,7 +946,7 @@ xrep_agi_calc_from_btrees(
if (error)
goto err;
if (xfs_has_inobtcounts(mp)) {
- xfs_agblock_t blocks;
+ xfs_filblks_t blocks;
error = xfs_btree_count_blocks(cur, &blocks);
if (error)
@@ -959,7 +959,7 @@ xrep_agi_calc_from_btrees(
agi->agi_freecount = cpu_to_be32(freecount);
if (xfs_has_finobt(mp) && xfs_has_inobtcounts(mp)) {
- xfs_agblock_t blocks;
+ xfs_filblks_t blocks;
cur = xfs_finobt_init_cursor(sc->sa.pag, sc->tp, agi_bp);
error = xfs_btree_count_blocks(cur, &blocks);
diff --git a/fs/xfs/scrub/alloc_repair.c b/fs/xfs/scrub/alloc_repair.c
index 0433363a90b6..bed6a09aa791 100644
--- a/fs/xfs/scrub/alloc_repair.c
+++ b/fs/xfs/scrub/alloc_repair.c
@@ -542,8 +542,9 @@ xrep_abt_dispose_one(
/* Add a deferred rmap for each extent we used. */
if (resv->used > 0)
- xfs_rmap_alloc_extent(sc->tp, pag_agno(pag), resv->agbno,
- resv->used, XFS_RMAP_OWN_AG);
+ xfs_rmap_alloc_extent(sc->tp, false,
+ xfs_agbno_to_fsb(pag, resv->agbno), resv->used,
+ XFS_RMAP_OWN_AG);
/*
* For each reserved btree block we didn't use, add it to the free
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 7e00312225ed..4f1e2574660d 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -21,6 +21,8 @@
#include "xfs_rmap_btree.h"
#include "xfs_rtgroup.h"
#include "xfs_health.h"
+#include "xfs_rtalloc.h"
+#include "xfs_rtrmap_btree.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
@@ -143,15 +145,22 @@ static inline bool
xchk_bmap_get_rmap(
struct xchk_bmap_info *info,
struct xfs_bmbt_irec *irec,
- xfs_agblock_t agbno,
+ xfs_agblock_t bno,
uint64_t owner,
struct xfs_rmap_irec *rmap)
{
+ struct xfs_btree_cur **curp = &info->sc->sa.rmap_cur;
xfs_fileoff_t offset;
unsigned int rflags = 0;
int has_rmap;
int error;
+ if (xfs_ifork_is_realtime(info->sc->ip, info->whichfork))
+ curp = &info->sc->sr.rmap_cur;
+
+ if (*curp == NULL)
+ return false;
+
if (info->whichfork == XFS_ATTR_FORK)
rflags |= XFS_RMAP_ATTR_FORK;
if (irec->br_state == XFS_EXT_UNWRITTEN)
@@ -172,13 +181,13 @@ xchk_bmap_get_rmap(
* range rmap lookup to make sure we get the correct owner/offset.
*/
if (info->is_shared) {
- error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
- owner, offset, rflags, rmap, &has_rmap);
+ error = xfs_rmap_lookup_le_range(*curp, bno, owner, offset,
+ rflags, rmap, &has_rmap);
} else {
- error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno,
- owner, offset, rflags, rmap, &has_rmap);
+ error = xfs_rmap_lookup_le(*curp, bno, owner, offset,
+ rflags, rmap, &has_rmap);
}
- if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur))
+ if (!xchk_should_check_xref(info->sc, &error, curp))
return false;
if (!has_rmap)
@@ -192,29 +201,29 @@ STATIC void
xchk_bmap_xref_rmap(
struct xchk_bmap_info *info,
struct xfs_bmbt_irec *irec,
- xfs_agblock_t agbno)
+ xfs_agblock_t bno)
{
struct xfs_rmap_irec rmap;
unsigned long long rmap_end;
uint64_t owner = info->sc->ip->i_ino;
- if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
+ if (xchk_skip_xref(info->sc->sm))
return;
/* Find the rmap record for this irec. */
- if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
+ if (!xchk_bmap_get_rmap(info, irec, bno, owner, &rmap))
return;
/*
* The rmap must be an exact match for this incore file mapping record,
* which may have arisen from multiple ondisk records.
*/
- if (rmap.rm_startblock != agbno)
+ if (rmap.rm_startblock != bno)
xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
- if (rmap_end != agbno + irec->br_blockcount)
+ if (rmap_end != bno + irec->br_blockcount)
xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
@@ -259,7 +268,7 @@ STATIC void
xchk_bmap_xref_rmap_cow(
struct xchk_bmap_info *info,
struct xfs_bmbt_irec *irec,
- xfs_agblock_t agbno)
+ xfs_agblock_t bno)
{
struct xfs_rmap_irec rmap;
unsigned long long rmap_end;
@@ -269,7 +278,7 @@ xchk_bmap_xref_rmap_cow(
return;
/* Find the rmap record for this irec. */
- if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
+ if (!xchk_bmap_get_rmap(info, irec, bno, owner, &rmap))
return;
/*
@@ -277,12 +286,12 @@ xchk_bmap_xref_rmap_cow(
* can start before and end after the physical space allocated to this
* mapping. There are no offsets to check.
*/
- if (rmap.rm_startblock > agbno)
+ if (rmap.rm_startblock > bno)
xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
- if (rmap_end < agbno + irec->br_blockcount)
+ if (rmap_end < bno + irec->br_blockcount)
xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
@@ -315,6 +324,8 @@ xchk_bmap_rt_iextent_xref(
struct xchk_bmap_info *info,
struct xfs_bmbt_irec *irec)
{
+ struct xfs_owner_info oinfo;
+ xfs_rgblock_t rgbno;
int error;
error = xchk_rtgroup_init_existing(info->sc,
@@ -324,10 +335,46 @@ xchk_bmap_rt_iextent_xref(
irec->br_startoff, &error))
return;
- xchk_rtgroup_lock(&info->sc->sr, XCHK_RTGLOCK_ALL);
+ error = xchk_rtgroup_lock(info->sc, &info->sc->sr, XCHK_RTGLOCK_ALL);
+ if (!xchk_fblock_process_error(info->sc, info->whichfork,
+ irec->br_startoff, &error))
+ goto out_free;
+
xchk_xref_is_used_rt_space(info->sc, irec->br_startblock,
irec->br_blockcount);
+ if (!xfs_has_rtrmapbt(info->sc->mp))
+ goto out_cur;
+
+ rgbno = xfs_rtb_to_rgbno(info->sc->mp, irec->br_startblock);
+
+ switch (info->whichfork) {
+ case XFS_DATA_FORK:
+ xchk_bmap_xref_rmap(info, irec, rgbno);
+ if (!xfs_is_reflink_inode(info->sc->ip)) {
+ xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino,
+ info->whichfork, irec->br_startoff);
+ xchk_xref_is_only_rt_owned_by(info->sc, rgbno,
+ irec->br_blockcount, &oinfo);
+ xchk_xref_is_not_rt_shared(info->sc, rgbno,
+ irec->br_blockcount);
+ }
+ xchk_xref_is_not_rt_cow_staging(info->sc, rgbno,
+ irec->br_blockcount);
+ break;
+ case XFS_COW_FORK:
+ xchk_bmap_xref_rmap_cow(info, irec, rgbno);
+ xchk_xref_is_only_rt_owned_by(info->sc, rgbno,
+ irec->br_blockcount, &XFS_RMAP_OINFO_COW);
+ xchk_xref_is_rt_cow_staging(info->sc, rgbno,
+ irec->br_blockcount);
+ xchk_xref_is_not_rt_shared(info->sc, rgbno,
+ irec->br_blockcount);
+ break;
+ }
+out_cur:
+ xchk_rtgroup_btcur_free(&info->sc->sr);
+out_free:
xchk_rtgroup_free(info->sc, &info->sc->sr);
}
@@ -614,8 +661,7 @@ xchk_bmap_check_rmap(
xchk_fblock_set_corrupt(sc, sbcri->whichfork,
check_rec.rm_offset);
if (irec.br_startblock !=
- xfs_agbno_to_fsb(to_perag(cur->bc_group),
- check_rec.rm_startblock))
+ xfs_gbno_to_fsb(cur->bc_group, check_rec.rm_startblock))
xchk_fblock_set_corrupt(sc, sbcri->whichfork,
check_rec.rm_offset);
if (irec.br_blockcount > check_rec.rm_blockcount)
@@ -669,6 +715,30 @@ xchk_bmap_check_ag_rmaps(
return error;
}
+/* Make sure each rt rmap has a corresponding bmbt entry. */
+STATIC int
+xchk_bmap_check_rt_rmaps(
+ struct xfs_scrub *sc,
+ struct xfs_rtgroup *rtg)
+{
+ struct xchk_bmap_check_rmap_info sbcri;
+ struct xfs_btree_cur *cur;
+ int error;
+
+ xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
+ cur = xfs_rtrmapbt_init_cursor(sc->tp, rtg);
+
+ sbcri.sc = sc;
+ sbcri.whichfork = XFS_DATA_FORK;
+ error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri);
+ if (error == -ECANCELED)
+ error = 0;
+
+ xfs_btree_del_cursor(cur, error);
+ xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);
+ return error;
+}
+
/*
* Decide if we want to scan the reverse mappings to determine if the attr
* fork /really/ has zero space mappings.
@@ -723,10 +793,6 @@ xchk_bmap_check_empty_datafork(
{
struct xfs_ifork *ifp = &ip->i_df;
- /* Don't support realtime rmap checks yet. */
- if (XFS_IS_REALTIME_INODE(ip))
- return false;
-
/*
* If the dinode repair found a bad data fork, it will reset the fork
* to extents format with zero records and wait for the this scrubber
@@ -777,6 +843,21 @@ xchk_bmap_check_rmaps(
struct xfs_perag *pag = NULL;
int error;
+ if (xfs_ifork_is_realtime(sc->ip, whichfork)) {
+ struct xfs_rtgroup *rtg = NULL;
+
+ while ((rtg = xfs_rtgroup_next(sc->mp, rtg))) {
+ error = xchk_bmap_check_rt_rmaps(sc, rtg);
+ if (error ||
+ (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
+ xfs_rtgroup_rele(rtg);
+ return error;
+ }
+ }
+
+ return 0;
+ }
+
while ((pag = xfs_perag_next(sc->mp, pag))) {
error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag);
if (error ||
@@ -957,8 +1038,8 @@ xchk_bmap(
switch (whichfork) {
case XFS_COW_FORK:
- /* No CoW forks on non-reflink filesystems. */
- if (!xfs_has_reflink(mp)) {
+ /* No CoW forks filesystem doesn't support out of place writes */
+ if (!xfs_has_reflink(mp) && !xfs_has_zoned(mp)) {
xchk_ino_set_corrupt(sc, sc->ip->i_ino);
return 0;
}
@@ -983,6 +1064,7 @@ xchk_bmap(
case XFS_DINODE_FMT_UUID:
case XFS_DINODE_FMT_DEV:
case XFS_DINODE_FMT_LOCAL:
+ case XFS_DINODE_FMT_META_BTREE:
/* No mappings to check. */
if (whichfork == XFS_COW_FORK)
xchk_fblock_set_corrupt(sc, whichfork, 0);
diff --git a/fs/xfs/scrub/bmap_repair.c b/fs/xfs/scrub/bmap_repair.c
index 7c4955482641..1084213b8e9b 100644
--- a/fs/xfs/scrub/bmap_repair.c
+++ b/fs/xfs/scrub/bmap_repair.c
@@ -25,11 +25,13 @@
#include "xfs_bmap_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
+#include "xfs_rtrmap_btree.h"
#include "xfs_refcount.h"
#include "xfs_quota.h"
#include "xfs_ialloc.h"
#include "xfs_ag.h"
#include "xfs_reflink.h"
+#include "xfs_rtgroup.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
@@ -99,14 +101,21 @@ xrep_bmap_discover_shared(
xfs_filblks_t blockcount)
{
struct xfs_scrub *sc = rb->sc;
+ struct xfs_btree_cur *cur;
xfs_agblock_t agbno;
xfs_agblock_t fbno;
xfs_extlen_t flen;
int error;
- agbno = XFS_FSB_TO_AGBNO(sc->mp, startblock);
- error = xfs_refcount_find_shared(sc->sa.refc_cur, agbno, blockcount,
- &fbno, &flen, false);
+ if (XFS_IS_REALTIME_INODE(sc->ip)) {
+ agbno = xfs_rtb_to_rgbno(sc->mp, startblock);
+ cur = sc->sr.refc_cur;
+ } else {
+ agbno = XFS_FSB_TO_AGBNO(sc->mp, startblock);
+ cur = sc->sa.refc_cur;
+ }
+ error = xfs_refcount_find_shared(cur, agbno, blockcount, &fbno, &flen,
+ false);
if (error)
return error;
@@ -359,6 +368,114 @@ xrep_bmap_scan_ag(
return error;
}
+#ifdef CONFIG_XFS_RT
+/* Check for any obvious errors or conflicts in the file mapping. */
+STATIC int
+xrep_bmap_check_rtfork_rmap(
+ struct xfs_scrub *sc,
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec)
+{
+ /* xattr extents are never stored on realtime devices */
+ if (rec->rm_flags & XFS_RMAP_ATTR_FORK)
+ return -EFSCORRUPTED;
+
+ /* bmbt blocks are never stored on realtime devices */
+ if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
+ return -EFSCORRUPTED;
+
+ /* Data extents for non-rt files are never stored on the rt device. */
+ if (!XFS_IS_REALTIME_INODE(sc->ip))
+ return -EFSCORRUPTED;
+
+ /* Check the file offsets and physical extents. */
+ if (!xfs_verify_fileext(sc->mp, rec->rm_offset, rec->rm_blockcount))
+ return -EFSCORRUPTED;
+
+ /* Check that this is within the rtgroup. */
+ if (!xfs_verify_rgbext(to_rtg(cur->bc_group), rec->rm_startblock,
+ rec->rm_blockcount))
+ return -EFSCORRUPTED;
+
+ /* Make sure this isn't free space. */
+ return xrep_require_rtext_inuse(sc, rec->rm_startblock,
+ rec->rm_blockcount);
+}
+
+/* Record realtime extents that belong to this inode's fork. */
+STATIC int
+xrep_bmap_walk_rtrmap(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xrep_bmap *rb = priv;
+ int error = 0;
+
+ if (xchk_should_terminate(rb->sc, &error))
+ return error;
+
+ /* Skip extents which are not owned by this inode and fork. */
+ if (rec->rm_owner != rb->sc->ip->i_ino)
+ return 0;
+
+ error = xrep_bmap_check_rtfork_rmap(rb->sc, cur, rec);
+ if (error)
+ return error;
+
+ /*
+ * Record all blocks allocated to this file even if the extent isn't
+ * for the fork we're rebuilding so that we can reset di_nblocks later.
+ */
+ rb->nblocks += rec->rm_blockcount;
+
+ /* If this rmap isn't for the fork we want, we're done. */
+ if (rb->whichfork == XFS_DATA_FORK &&
+ (rec->rm_flags & XFS_RMAP_ATTR_FORK))
+ return 0;
+ if (rb->whichfork == XFS_ATTR_FORK &&
+ !(rec->rm_flags & XFS_RMAP_ATTR_FORK))
+ return 0;
+
+ return xrep_bmap_from_rmap(rb, rec->rm_offset,
+ xfs_rgbno_to_rtb(to_rtg(cur->bc_group),
+ rec->rm_startblock),
+ rec->rm_blockcount,
+ rec->rm_flags & XFS_RMAP_UNWRITTEN);
+}
+
+/* Scan the realtime reverse mappings to build the new extent map. */
+STATIC int
+xrep_bmap_scan_rtgroup(
+ struct xrep_bmap *rb,
+ struct xfs_rtgroup *rtg)
+{
+ struct xfs_scrub *sc = rb->sc;
+ int error;
+
+ if (!xfs_has_rtrmapbt(sc->mp))
+ return 0;
+
+ error = xrep_rtgroup_init(sc, rtg, &sc->sr,
+ XFS_RTGLOCK_RMAP |
+ XFS_RTGLOCK_REFCOUNT |
+ XFS_RTGLOCK_BITMAP_SHARED);
+ if (error)
+ return error;
+
+ error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_bmap_walk_rtrmap, rb);
+ xchk_rtgroup_btcur_free(&sc->sr);
+ xchk_rtgroup_free(sc, &sc->sr);
+ return error;
+}
+#else
+static inline int
+xrep_bmap_scan_rtgroup(struct xrep_bmap *rb, struct xfs_rtgroup *rtg)
+{
+ return -EFSCORRUPTED;
+}
+#endif
+
/* Find the delalloc extents from the old incore extent tree. */
STATIC int
xrep_bmap_find_delalloc(
@@ -410,6 +527,22 @@ xrep_bmap_find_mappings(
struct xfs_perag *pag = NULL;
int error = 0;
+ /*
+ * Iterate the rtrmaps for extents. Metadata files never have content
+ * on the realtime device, so there's no need to scan them.
+ */
+ if (!xfs_is_metadir_inode(sc->ip)) {
+ struct xfs_rtgroup *rtg = NULL;
+
+ while ((rtg = xfs_rtgroup_next(sc->mp, rtg))) {
+ error = xrep_bmap_scan_rtgroup(rb, rtg);
+ if (error) {
+ xfs_rtgroup_rele(rtg);
+ return error;
+ }
+ }
+ }
+
/* Iterate the rmaps for extents. */
while ((pag = xfs_perag_next(sc->mp, pag))) {
error = xrep_bmap_scan_ag(rb, pag);
@@ -731,6 +864,7 @@ xrep_bmap_check_inputs(
case XFS_DINODE_FMT_DEV:
case XFS_DINODE_FMT_LOCAL:
case XFS_DINODE_FMT_UUID:
+ case XFS_DINODE_FMT_META_BTREE:
return -ECANCELED;
case XFS_DINODE_FMT_EXTENTS:
case XFS_DINODE_FMT_BTREE:
@@ -753,10 +887,6 @@ xrep_bmap_check_inputs(
return -EINVAL;
}
- /* Don't know how to rebuild realtime data forks. */
- if (XFS_IS_REALTIME_INODE(sc->ip))
- return -EOPNOTSUPP;
-
return 0;
}
@@ -782,10 +912,6 @@ xrep_bmap_init_reflink_scan(
if (whichfork != XFS_DATA_FORK)
return RLS_IRRELEVANT;
- /* cannot share realtime extents */
- if (XFS_IS_REALTIME_INODE(sc->ip))
- return RLS_IRRELEVANT;
-
return RLS_UNKNOWN;
}
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index fe678a0438bc..cd6f0ff382a7 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -306,7 +306,7 @@ xchk_btree_block_check_sibling(
if (pbp)
xchk_buffer_recheck(bs->sc, pbp);
- if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
+ if (xfs_btree_cmp_two_ptrs(cur, pp, sibling))
xchk_btree_set_corrupt(bs->sc, cur, level);
out:
xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 5cbd94b56582..2ef7742be7d3 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -35,6 +35,9 @@
#include "xfs_exchmaps.h"
#include "xfs_rtbitmap.h"
#include "xfs_rtgroup.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_bmap_util.h"
+#include "xfs_rtrefcount_btree.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -719,20 +722,111 @@ xchk_rtgroup_init(
return 0;
}
-void
+/* Lock all the rt group metadata inode ILOCKs and wait for intents. */
+int
xchk_rtgroup_lock(
+ struct xfs_scrub *sc,
struct xchk_rt *sr,
unsigned int rtglock_flags)
{
- xfs_rtgroup_lock(sr->rtg, rtglock_flags);
+ int error = 0;
+
+ ASSERT(sr->rtg != NULL);
+
+ /*
+ * If we're /only/ locking the rtbitmap in shared mode, then we're
+ * obviously not trying to compare records in two metadata inodes.
+ * There's no need to drain intents here because the caller (most
+ * likely the rgsuper scanner) doesn't need that level of consistency.
+ */
+ if (rtglock_flags == XFS_RTGLOCK_BITMAP_SHARED) {
+ xfs_rtgroup_lock(sr->rtg, rtglock_flags);
+ sr->rtlock_flags = rtglock_flags;
+ return 0;
+ }
+
+ do {
+ if (xchk_should_terminate(sc, &error))
+ return error;
+
+ xfs_rtgroup_lock(sr->rtg, rtglock_flags);
+
+ /*
+ * If we've grabbed a non-metadata file for scrubbing, we
+ * assume that holding its ILOCK will suffice to coordinate
+ * with any rt intent chains involving this inode.
+ */
+ if (sc->ip && !xfs_is_internal_inode(sc->ip))
+ break;
+
+ /*
+ * Decide if the rt group is quiet enough for all metadata to
+ * be consistent with each other. Regular file IO doesn't get
+ * to lock all the rt inodes at the same time, which means that
+ * there could be other threads in the middle of processing a
+ * chain of deferred ops.
+ *
+ * We just locked all the metadata inodes for this rt group;
+ * now take a look to see if there are any intents in progress.
+ * If there are, drop the rt group inode locks and wait for the
+ * intents to drain. Since we hold the rt group inode locks
+ * for the duration of the scrub, this is the only time we have
+ * to sample the intents counter; any threads increasing it
+ * after this point can't possibly be in the middle of a chain
+ * of rt metadata updates.
+ *
+ * Obviously, this should be slanted against scrub and in favor
+ * of runtime threads.
+ */
+ if (!xfs_group_intent_busy(rtg_group(sr->rtg)))
+ break;
+
+ xfs_rtgroup_unlock(sr->rtg, rtglock_flags);
+
+ if (!(sc->flags & XCHK_FSGATES_DRAIN))
+ return -ECHRNG;
+ error = xfs_group_intent_drain(rtg_group(sr->rtg));
+ if (error) {
+ if (error == -ERESTARTSYS)
+ error = -EINTR;
+ return error;
+ }
+ } while (1);
+
sr->rtlock_flags = rtglock_flags;
+
+ if (xfs_has_rtrmapbt(sc->mp) && (rtglock_flags & XFS_RTGLOCK_RMAP))
+ sr->rmap_cur = xfs_rtrmapbt_init_cursor(sc->tp, sr->rtg);
+
+ if (xfs_has_rtreflink(sc->mp) && (rtglock_flags & XFS_RTGLOCK_REFCOUNT))
+ sr->refc_cur = xfs_rtrefcountbt_init_cursor(sc->tp, sr->rtg);
+
+ return 0;
+}
+
+/*
+ * Free all the btree cursors and other incore data relating to the realtime
+ * group. This has to be done /before/ committing (or cancelling) the scrub
+ * transaction.
+ */
+void
+xchk_rtgroup_btcur_free(
+ struct xchk_rt *sr)
+{
+ if (sr->rmap_cur)
+ xfs_btree_del_cursor(sr->rmap_cur, XFS_BTREE_ERROR);
+ if (sr->refc_cur)
+ xfs_btree_del_cursor(sr->refc_cur, XFS_BTREE_ERROR);
+
+ sr->refc_cur = NULL;
+ sr->rmap_cur = NULL;
}
/*
* Unlock the realtime group. This must be done /after/ committing (or
* cancelling) the scrub transaction.
*/
-static void
+void
xchk_rtgroup_unlock(
struct xchk_rt *sr)
{
@@ -772,11 +866,11 @@ xchk_trans_cancel(
sc->tp = NULL;
}
-int
+void
xchk_trans_alloc_empty(
struct xfs_scrub *sc)
{
- return xfs_trans_alloc_empty(sc->mp, &sc->tp);
+ sc->tp = xfs_trans_alloc_empty(sc->mp);
}
/*
@@ -798,7 +892,8 @@ xchk_trans_alloc(
return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
resblks, 0, 0, &sc->tp);
- return xchk_trans_alloc_empty(sc);
+ xchk_trans_alloc_empty(sc);
+ return 0;
}
/* Set us up with a transaction and an empty context. */
@@ -812,6 +907,14 @@ xchk_setup_fs(
return xchk_trans_alloc(sc, resblks);
}
+/* Set us up with a transaction and an empty context to repair rt metadata. */
+int
+xchk_setup_rt(
+ struct xfs_scrub *sc)
+{
+ return xchk_trans_alloc(sc, xrep_calc_rtgroup_resblks(sc));
+}
+
/* Set us up with AG headers and btree cursors. */
int
xchk_setup_ag_btree(
@@ -1379,7 +1482,7 @@ xchk_fsgates_enable(
trace_xchk_fsgates_enable(sc, scrub_fsgates);
if (scrub_fsgates & XCHK_FSGATES_DRAIN)
- xfs_drain_wait_enable();
+ xfs_defer_drain_wait_enable();
if (scrub_fsgates & XCHK_FSGATES_QUOTA)
xfs_dqtrx_hook_enable();
@@ -1573,3 +1676,63 @@ xchk_inode_rootdir_inum(const struct xfs_inode *ip)
return mp->m_metadirip->i_ino;
return mp->m_rootip->i_ino;
}
+
+static int
+xchk_meta_btree_count_blocks(
+ struct xfs_scrub *sc,
+ xfs_extnum_t *nextents,
+ xfs_filblks_t *count)
+{
+ struct xfs_btree_cur *cur;
+ int error;
+
+ if (!sc->sr.rtg) {
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+
+ switch (sc->ip->i_metatype) {
+ case XFS_METAFILE_RTRMAP:
+ cur = xfs_rtrmapbt_init_cursor(sc->tp, sc->sr.rtg);
+ break;
+ case XFS_METAFILE_RTREFCOUNT:
+ cur = xfs_rtrefcountbt_init_cursor(sc->tp, sc->sr.rtg);
+ break;
+ default:
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+
+ error = xfs_btree_count_blocks(cur, count);
+ xfs_btree_del_cursor(cur, error);
+ if (!error) {
+ *nextents = 0;
+ (*count)--; /* don't count the btree iroot */
+ }
+ return error;
+}
+
+/* Count the blocks used by a file, even if it's a metadata inode. */
+int
+xchk_inode_count_blocks(
+ struct xfs_scrub *sc,
+ int whichfork,
+ xfs_extnum_t *nextents,
+ xfs_filblks_t *count)
+{
+ struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, whichfork);
+
+ if (!ifp) {
+ *nextents = 0;
+ *count = 0;
+ return 0;
+ }
+
+ if (ifp->if_format == XFS_DINODE_FMT_META_BTREE) {
+ ASSERT(whichfork == XFS_DATA_FORK);
+ return xchk_meta_btree_count_blocks(sc, nextents, count);
+ }
+
+ return xfs_bmap_count_blocks(sc->tp, sc->ip, whichfork, nextents,
+ count);
+}
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 9ff3cafd8679..ddbc065c798c 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -7,7 +7,7 @@
#define __XFS_SCRUB_COMMON_H__
int xchk_trans_alloc(struct xfs_scrub *sc, uint resblks);
-int xchk_trans_alloc_empty(struct xfs_scrub *sc);
+void xchk_trans_alloc_empty(struct xfs_scrub *sc);
void xchk_trans_cancel(struct xfs_scrub *sc);
bool xchk_process_error(struct xfs_scrub *sc, xfs_agnumber_t agno,
@@ -63,6 +63,7 @@ static inline int xchk_setup_nothing(struct xfs_scrub *sc)
/* Setup functions */
int xchk_setup_agheader(struct xfs_scrub *sc);
int xchk_setup_fs(struct xfs_scrub *sc);
+int xchk_setup_rt(struct xfs_scrub *sc);
int xchk_setup_ag_allocbt(struct xfs_scrub *sc);
int xchk_setup_ag_iallocbt(struct xfs_scrub *sc);
int xchk_setup_ag_rmapbt(struct xfs_scrub *sc);
@@ -80,10 +81,14 @@ int xchk_setup_metapath(struct xfs_scrub *sc);
int xchk_setup_rtbitmap(struct xfs_scrub *sc);
int xchk_setup_rtsummary(struct xfs_scrub *sc);
int xchk_setup_rgsuperblock(struct xfs_scrub *sc);
+int xchk_setup_rtrmapbt(struct xfs_scrub *sc);
+int xchk_setup_rtrefcountbt(struct xfs_scrub *sc);
#else
# define xchk_setup_rtbitmap xchk_setup_nothing
# define xchk_setup_rtsummary xchk_setup_nothing
# define xchk_setup_rgsuperblock xchk_setup_nothing
+# define xchk_setup_rtrmapbt xchk_setup_nothing
+# define xchk_setup_rtrefcountbt xchk_setup_nothing
#endif
#ifdef CONFIG_XFS_QUOTA
int xchk_ino_dqattach(struct xfs_scrub *sc);
@@ -125,7 +130,9 @@ xchk_ag_init_existing(
#ifdef CONFIG_XFS_RT
/* All the locks we need to check an rtgroup. */
-#define XCHK_RTGLOCK_ALL (XFS_RTGLOCK_BITMAP)
+#define XCHK_RTGLOCK_ALL (XFS_RTGLOCK_BITMAP | \
+ XFS_RTGLOCK_RMAP | \
+ XFS_RTGLOCK_REFCOUNT)
int xchk_rtgroup_init(struct xfs_scrub *sc, xfs_rgnumber_t rgno,
struct xchk_rt *sr);
@@ -141,12 +148,17 @@ xchk_rtgroup_init_existing(
return error == -ENOENT ? -EFSCORRUPTED : error;
}
-void xchk_rtgroup_lock(struct xchk_rt *sr, unsigned int rtglock_flags);
+int xchk_rtgroup_lock(struct xfs_scrub *sc, struct xchk_rt *sr,
+ unsigned int rtglock_flags);
+void xchk_rtgroup_unlock(struct xchk_rt *sr);
+void xchk_rtgroup_btcur_free(struct xchk_rt *sr);
void xchk_rtgroup_free(struct xfs_scrub *sc, struct xchk_rt *sr);
#else
# define xchk_rtgroup_init(sc, rgno, sr) (-EFSCORRUPTED)
# define xchk_rtgroup_init_existing(sc, rgno, sr) (-EFSCORRUPTED)
-# define xchk_rtgroup_lock(sc, lockflags) do { } while (0)
+# define xchk_rtgroup_lock(sc, sr, lockflags) (-EFSCORRUPTED)
+# define xchk_rtgroup_unlock(sr) do { } while (0)
+# define xchk_rtgroup_btcur_free(sr) do { } while (0)
# define xchk_rtgroup_free(sc, sr) do { } while (0)
#endif /* CONFIG_XFS_RT */
@@ -212,7 +224,6 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm)
bool xchk_dir_looks_zapped(struct xfs_inode *dp);
bool xchk_pptr_looks_zapped(struct xfs_inode *ip);
-#ifdef CONFIG_XFS_ONLINE_REPAIR
/* Decide if a repair is required. */
static inline bool xchk_needs_repair(const struct xfs_scrub_metadata *sm)
{
@@ -232,10 +243,6 @@ static inline bool xchk_could_repair(const struct xfs_scrub *sc)
return (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
!(sc->flags & XREP_ALREADY_FIXED);
}
-#else
-# define xchk_needs_repair(sc) (false)
-# define xchk_could_repair(sc) (false)
-#endif /* CONFIG_XFS_ONLINE_REPAIR */
int xchk_metadata_inode_forks(struct xfs_scrub *sc);
@@ -257,6 +264,12 @@ int xchk_metadata_inode_forks(struct xfs_scrub *sc);
(sc)->mp->m_super->s_id, \
(sc)->ip ? (sc)->ip->i_ino : (sc)->sm->sm_ino, \
##__VA_ARGS__)
+#define xchk_xfile_rtgroup_descr(sc, fmt, ...) \
+ kasprintf(XCHK_GFP_FLAGS, "XFS (%s): rtgroup 0x%x " fmt, \
+ (sc)->mp->m_super->s_id, \
+ (sc)->sa.pag ? \
+ rtg_rgno((sc)->sr.rtg) : (sc)->sm->sm_agno, \
+ ##__VA_ARGS__)
/*
* Setting up a hook to wait for intents to drain is costly -- we have to take
@@ -274,6 +287,8 @@ void xchk_fsgates_enable(struct xfs_scrub *sc, unsigned int scrub_fshooks);
int xchk_inode_is_allocated(struct xfs_scrub *sc, xfs_agino_t agino,
bool *inuse);
+int xchk_inode_count_blocks(struct xfs_scrub *sc, int whichfork,
+ xfs_extnum_t *nextents, xfs_filblks_t *count);
bool xchk_inode_is_dirtree_root(const struct xfs_inode *ip);
bool xchk_inode_is_sb_rooted(const struct xfs_inode *ip);
diff --git a/fs/xfs/scrub/cow_repair.c b/fs/xfs/scrub/cow_repair.c
index 5b6194cef3e5..38a246b8bf11 100644
--- a/fs/xfs/scrub/cow_repair.c
+++ b/fs/xfs/scrub/cow_repair.c
@@ -26,6 +26,9 @@
#include "xfs_errortag.h"
#include "xfs_icache.h"
#include "xfs_refcount_btree.h"
+#include "xfs_rtalloc.h"
+#include "xfs_rtbitmap.h"
+#include "xfs_rtgroup.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
@@ -34,6 +37,7 @@
#include "scrub/bitmap.h"
#include "scrub/off_bitmap.h"
#include "scrub/fsb_bitmap.h"
+#include "scrub/rtb_bitmap.h"
#include "scrub/reap.h"
/*
@@ -61,7 +65,10 @@ struct xrep_cow {
struct xoff_bitmap bad_fileoffs;
/* Bitmap of fsblocks that were removed from the CoW fork. */
- struct xfsb_bitmap old_cowfork_fsblocks;
+ union {
+ struct xfsb_bitmap old_cowfork_fsblocks;
+ struct xrtb_bitmap old_cowfork_rtblocks;
+ };
/* CoW fork mappings used to scan for bad CoW staging extents. */
struct xfs_bmbt_irec irec;
@@ -145,8 +152,7 @@ xrep_cow_mark_shared_staging(
xrep_cow_trim_refcount(xc, &rrec, rec);
return xrep_cow_mark_file_range(xc,
- xfs_agbno_to_fsb(to_perag(cur->bc_group),
- rrec.rc_startblock),
+ xfs_gbno_to_fsb(cur->bc_group, rrec.rc_startblock),
rrec.rc_blockcount);
}
@@ -177,9 +183,8 @@ xrep_cow_mark_missing_staging(
if (xc->next_bno >= rrec.rc_startblock)
goto next;
-
error = xrep_cow_mark_file_range(xc,
- xfs_agbno_to_fsb(to_perag(cur->bc_group), xc->next_bno),
+ xfs_gbno_to_fsb(cur->bc_group, xc->next_bno),
rrec.rc_startblock - xc->next_bno);
if (error)
return error;
@@ -222,8 +227,7 @@ xrep_cow_mark_missing_staging_rmap(
}
return xrep_cow_mark_file_range(xc,
- xfs_agbno_to_fsb(to_perag(cur->bc_group), rec_bno),
- rec_len);
+ xfs_gbno_to_fsb(cur->bc_group, rec_bno), rec_len);
}
/*
@@ -311,6 +315,92 @@ out_pag:
}
/*
+ * Find any part of the CoW fork mapping that isn't a single-owner CoW staging
+ * extent and mark the corresponding part of the file range in the bitmap.
+ */
+STATIC int
+xrep_cow_find_bad_rt(
+ struct xrep_cow *xc)
+{
+ struct xfs_refcount_irec rc_low = { 0 };
+ struct xfs_refcount_irec rc_high = { 0 };
+ struct xfs_rmap_irec rm_low = { 0 };
+ struct xfs_rmap_irec rm_high = { 0 };
+ struct xfs_scrub *sc = xc->sc;
+ struct xfs_rtgroup *rtg;
+ int error = 0;
+
+ xc->irec_startbno = xfs_rtb_to_rgbno(sc->mp, xc->irec.br_startblock);
+
+ rtg = xfs_rtgroup_get(sc->mp,
+ xfs_rtb_to_rgno(sc->mp, xc->irec.br_startblock));
+ if (!rtg)
+ return -EFSCORRUPTED;
+
+ error = xrep_rtgroup_init(sc, rtg, &sc->sr,
+ XFS_RTGLOCK_RMAP | XFS_RTGLOCK_REFCOUNT);
+ if (error)
+ goto out_rtg;
+
+ /* Mark any CoW fork extents that are shared. */
+ rc_low.rc_startblock = xc->irec_startbno;
+ rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
+ rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_SHARED;
+ error = xfs_refcount_query_range(sc->sr.refc_cur, &rc_low, &rc_high,
+ xrep_cow_mark_shared_staging, xc);
+ if (error)
+ goto out_sr;
+
+ /* Make sure there are CoW staging extents for the whole mapping. */
+ rc_low.rc_startblock = xc->irec_startbno;
+ rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
+ rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_COW;
+ xc->next_bno = xc->irec_startbno;
+ error = xfs_refcount_query_range(sc->sr.refc_cur, &rc_low, &rc_high,
+ xrep_cow_mark_missing_staging, xc);
+ if (error)
+ goto out_sr;
+
+ if (xc->next_bno < xc->irec_startbno + xc->irec.br_blockcount) {
+ error = xrep_cow_mark_file_range(xc,
+ xfs_rgbno_to_rtb(rtg, xc->next_bno),
+ xc->irec_startbno + xc->irec.br_blockcount -
+ xc->next_bno);
+ if (error)
+ goto out_sr;
+ }
+
+ /* Mark any area has an rmap that isn't a COW staging extent. */
+ rm_low.rm_startblock = xc->irec_startbno;
+ memset(&rm_high, 0xFF, sizeof(rm_high));
+ rm_high.rm_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1;
+ error = xfs_rmap_query_range(sc->sr.rmap_cur, &rm_low, &rm_high,
+ xrep_cow_mark_missing_staging_rmap, xc);
+ if (error)
+ goto out_sr;
+
+ /*
+ * If userspace is forcing us to rebuild the CoW fork or someone
+ * turned on the debugging knob, replace everything in the
+ * CoW fork and then scan for staging extents in the refcountbt.
+ */
+ if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
+ XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) {
+ error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock,
+ xc->irec.br_blockcount);
+ if (error)
+ goto out_rtg;
+ }
+
+out_sr:
+ xchk_rtgroup_btcur_free(&sc->sr);
+ xchk_rtgroup_free(sc, &sc->sr);
+out_rtg:
+ xfs_rtgroup_put(rtg);
+ return error;
+}
+
+/*
* Allocate a replacement CoW staging extent of up to the given number of
* blocks, and fill out the mapping.
*/
@@ -343,7 +433,7 @@ xrep_cow_alloc(
if (args.fsbno == NULLFSBLOCK)
return -ENOSPC;
- xfs_refcount_alloc_cow_extent(sc->tp, args.fsbno, args.len);
+ xfs_refcount_alloc_cow_extent(sc->tp, false, args.fsbno, args.len);
repl->fsbno = args.fsbno;
repl->len = args.len;
@@ -351,6 +441,32 @@ xrep_cow_alloc(
}
/*
+ * Allocate a replacement rt CoW staging extent of up to the given number of
+ * blocks, and fill out the mapping.
+ */
+STATIC int
+xrep_cow_alloc_rt(
+ struct xfs_scrub *sc,
+ xfs_extlen_t maxlen,
+ struct xrep_cow_extent *repl)
+{
+ xfs_rtxlen_t maxrtx = xfs_rtb_to_rtx(sc->mp, maxlen);
+ int error;
+
+ error = xfs_trans_reserve_more(sc->tp, 0, maxrtx);
+ if (error)
+ return error;
+
+ error = xfs_rtallocate_rtgs(sc->tp, NULLRTBLOCK, 1, maxrtx, 1, false,
+ false, &repl->fsbno, &repl->len);
+ if (error)
+ return error;
+
+ xfs_refcount_alloc_cow_extent(sc->tp, true, repl->fsbno, repl->len);
+ return 0;
+}
+
+/*
* Look up the current CoW fork mapping so that we only allocate enough to
* replace a single mapping. If we don't find a mapping that covers the start
* of the file range, or we find a delalloc or written extent, something is
@@ -467,7 +583,10 @@ xrep_cow_replace_range(
*/
alloc_len = min_t(xfs_fileoff_t, XFS_MAX_BMBT_EXTLEN,
nextoff - startoff);
- error = xrep_cow_alloc(sc, alloc_len, &repl);
+ if (XFS_IS_REALTIME_INODE(sc->ip))
+ error = xrep_cow_alloc_rt(sc, alloc_len, &repl);
+ else
+ error = xrep_cow_alloc(sc, alloc_len, &repl);
if (error)
return error;
@@ -483,8 +602,12 @@ xrep_cow_replace_range(
return error;
/* Note the old CoW staging extents; we'll reap them all later. */
- error = xfsb_bitmap_set(&xc->old_cowfork_fsblocks, got.br_startblock,
- repl.len);
+ if (XFS_IS_REALTIME_INODE(sc->ip))
+ error = xrtb_bitmap_set(&xc->old_cowfork_rtblocks,
+ got.br_startblock, repl.len);
+ else
+ error = xfsb_bitmap_set(&xc->old_cowfork_fsblocks,
+ got.br_startblock, repl.len);
if (error)
return error;
@@ -540,8 +663,16 @@ xrep_bmap_cow(
if (!ifp)
return 0;
- /* realtime files aren't supported yet */
- if (XFS_IS_REALTIME_INODE(sc->ip))
+ /*
+ * Realtime files with large extent sizes are not supported because
+ * we could encounter an CoW mapping that has been partially written
+ * out *and* requires replacement, and there's no solution to that.
+ */
+ if (xfs_inode_has_bigrtalloc(sc->ip))
+ return -EOPNOTSUPP;
+
+ /* Metadata inodes aren't supposed to have data on the rt volume. */
+ if (xfs_is_metadir_inode(sc->ip) && XFS_IS_REALTIME_INODE(sc->ip))
return -EOPNOTSUPP;
/*
@@ -562,7 +693,10 @@ xrep_bmap_cow(
xc->sc = sc;
xoff_bitmap_init(&xc->bad_fileoffs);
- xfsb_bitmap_init(&xc->old_cowfork_fsblocks);
+ if (XFS_IS_REALTIME_INODE(sc->ip))
+ xrtb_bitmap_init(&xc->old_cowfork_rtblocks);
+ else
+ xfsb_bitmap_init(&xc->old_cowfork_fsblocks);
for_each_xfs_iext(ifp, &icur, &xc->irec) {
if (xchk_should_terminate(sc, &error))
@@ -585,7 +719,10 @@ xrep_bmap_cow(
if (xfs_bmap_is_written_extent(&xc->irec))
continue;
- error = xrep_cow_find_bad(xc);
+ if (XFS_IS_REALTIME_INODE(sc->ip))
+ error = xrep_cow_find_bad_rt(xc);
+ else
+ error = xrep_cow_find_bad(xc);
if (error)
goto out_bitmap;
}
@@ -600,13 +737,20 @@ xrep_bmap_cow(
* by the refcount btree, not the inode, so it is correct to treat them
* like inode metadata.
*/
- error = xrep_reap_fsblocks(sc, &xc->old_cowfork_fsblocks,
- &XFS_RMAP_OINFO_COW);
+ if (XFS_IS_REALTIME_INODE(sc->ip))
+ error = xrep_reap_rtblocks(sc, &xc->old_cowfork_rtblocks,
+ &XFS_RMAP_OINFO_COW);
+ else
+ error = xrep_reap_fsblocks(sc, &xc->old_cowfork_fsblocks,
+ &XFS_RMAP_OINFO_COW);
if (error)
goto out_bitmap;
out_bitmap:
- xfsb_bitmap_destroy(&xc->old_cowfork_fsblocks);
+ if (XFS_IS_REALTIME_INODE(sc->ip))
+ xrtb_bitmap_destroy(&xc->old_cowfork_rtblocks);
+ else
+ xfsb_bitmap_destroy(&xc->old_cowfork_fsblocks);
xoff_bitmap_destroy(&xc->bad_fileoffs);
kfree(xc);
return error;
diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c
index 249313882108..8d3b550990b5 100644
--- a/fs/xfs/scrub/dir_repair.c
+++ b/fs/xfs/scrub/dir_repair.c
@@ -1289,9 +1289,7 @@ xrep_dir_scan_dirtree(
if (sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL))
xchk_iunlock(sc, sc->ilock_flags & (XFS_ILOCK_SHARED |
XFS_ILOCK_EXCL));
- error = xchk_trans_alloc_empty(sc);
- if (error)
- return error;
+ xchk_trans_alloc_empty(sc);
while ((error = xchk_iscan_iter(&rd->pscan.iscan, &ip)) == 1) {
bool flush;
@@ -1317,9 +1315,7 @@ xrep_dir_scan_dirtree(
if (error)
break;
- error = xchk_trans_alloc_empty(sc);
- if (error)
- break;
+ xchk_trans_alloc_empty(sc);
}
if (xchk_should_terminate(sc, &error))
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index 4a50f8e00040..cebd0d526926 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -123,7 +123,7 @@ xchk_fsfreeze(
{
int error;
- error = freeze_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL);
+ error = freeze_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL, NULL);
trace_xchk_fsfreeze(sc, error);
return error;
}
@@ -135,7 +135,7 @@ xchk_fsthaw(
int error;
/* This should always succeed, we have a kernel freeze */
- error = thaw_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL);
+ error = thaw_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL, NULL);
trace_xchk_fsthaw(sc, error);
return error;
}
@@ -237,7 +237,8 @@ xchk_setup_fscounters(
return error;
}
- return xchk_trans_alloc_empty(sc);
+ xchk_trans_alloc_empty(sc);
+ return 0;
}
/*
@@ -261,7 +262,7 @@ xchk_fscount_btreeblks(
struct xchk_fscounters *fsc,
xfs_agnumber_t agno)
{
- xfs_extlen_t blocks;
+ xfs_filblks_t blocks;
int error;
error = xchk_ag_init_existing(sc, agno, &sc->sa);
@@ -350,7 +351,7 @@ retry:
* The global incore space reservation is taken from the incore
* counters, so leave that out of the computation.
*/
- fsc->fdblocks -= mp->m_resblks_avail;
+ fsc->fdblocks -= mp->m_free[XC_FREE_BLOCKS].res_avail;
/*
* Delayed allocation reservations are taken out of the incore counters
@@ -413,7 +414,13 @@ xchk_fscount_count_frextents(
fsc->frextents = 0;
fsc->frextents_delayed = 0;
- if (!xfs_has_realtime(mp))
+
+ /*
+ * Don't bother verifying and repairing the fs counters for zoned file
+ * systems as they don't track an on-disk frextents count, and the
+ * in-memory percpu counter also includes reservations.
+ */
+ if (!xfs_has_realtime(mp) || xfs_has_zoned(mp))
return 0;
while ((rtg = xfs_rtgroup_next(mp, rtg))) {
@@ -513,8 +520,8 @@ xchk_fscounters(
/* Snapshot the percpu counters. */
icount = percpu_counter_sum(&mp->m_icount);
ifree = percpu_counter_sum(&mp->m_ifree);
- fdblocks = percpu_counter_sum(&mp->m_fdblocks);
- frextents = percpu_counter_sum(&mp->m_frextents);
+ fdblocks = xfs_sum_freecounter_raw(mp, XC_FREE_BLOCKS);
+ frextents = xfs_sum_freecounter_raw(mp, XC_FREE_RTEXTENTS);
/* No negative values, please! */
if (icount < 0 || ifree < 0)
@@ -589,15 +596,17 @@ xchk_fscounters(
try_again = true;
}
- if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks,
- fsc->fdblocks)) {
+ if (!xchk_fscount_within_range(sc, fdblocks,
+ &mp->m_free[XC_FREE_BLOCKS].count, fsc->fdblocks)) {
if (fsc->frozen)
xchk_set_corrupt(sc);
else
try_again = true;
}
- if (!xchk_fscount_within_range(sc, frextents, &mp->m_frextents,
+ if (!xfs_has_zoned(mp) &&
+ !xchk_fscount_within_range(sc, frextents,
+ &mp->m_free[XC_FREE_RTEXTENTS].count,
fsc->frextents - fsc->frextents_delayed)) {
if (fsc->frozen)
xchk_set_corrupt(sc);
diff --git a/fs/xfs/scrub/fscounters_repair.c b/fs/xfs/scrub/fscounters_repair.c
index cda13447a373..f0d2b04644e4 100644
--- a/fs/xfs/scrub/fscounters_repair.c
+++ b/fs/xfs/scrub/fscounters_repair.c
@@ -64,7 +64,7 @@ xrep_fscounters(
percpu_counter_set(&mp->m_icount, fsc->icount);
percpu_counter_set(&mp->m_ifree, fsc->ifree);
- percpu_counter_set(&mp->m_fdblocks, fsc->fdblocks);
+ xfs_set_freecounter(mp, XC_FREE_BLOCKS, fsc->fdblocks);
/*
* Online repair is only supported on v5 file systems, which require
@@ -74,10 +74,12 @@ xrep_fscounters(
* track of the delalloc reservations separately, as they are are
* subtracted from m_frextents, but not included in sb_frextents.
*/
- percpu_counter_set(&mp->m_frextents,
- fsc->frextents - fsc->frextents_delayed);
- if (!xfs_has_rtgroups(mp))
- mp->m_sb.sb_frextents = fsc->frextents;
+ if (!xfs_has_zoned(mp)) {
+ xfs_set_freecounter(mp, XC_FREE_RTEXTENTS,
+ fsc->frextents - fsc->frextents_delayed);
+ if (!xfs_has_rtgroups(mp))
+ mp->m_sb.sb_frextents = fsc->frextents;
+ }
return 0;
}
diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c
index ce86bdad37fa..3c0f25098b69 100644
--- a/fs/xfs/scrub/health.c
+++ b/fs/xfs/scrub/health.c
@@ -71,7 +71,8 @@
/* Map our scrub type to a sick mask and a set of health update functions. */
enum xchk_health_group {
- XHG_FS = 1,
+ XHG_NONE = 1,
+ XHG_FS,
XHG_AG,
XHG_INO,
XHG_RTGROUP,
@@ -83,6 +84,7 @@ struct xchk_health_map {
};
static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = {
+ [XFS_SCRUB_TYPE_PROBE] = { XHG_NONE, 0 },
[XFS_SCRUB_TYPE_SB] = { XHG_AG, XFS_SICK_AG_SB },
[XFS_SCRUB_TYPE_AGF] = { XHG_AG, XFS_SICK_AG_AGF },
[XFS_SCRUB_TYPE_AGFL] = { XHG_AG, XFS_SICK_AG_AGFL },
@@ -112,6 +114,8 @@ static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = {
[XFS_SCRUB_TYPE_DIRTREE] = { XHG_INO, XFS_SICK_INO_DIRTREE },
[XFS_SCRUB_TYPE_METAPATH] = { XHG_FS, XFS_SICK_FS_METAPATH },
[XFS_SCRUB_TYPE_RGSUPER] = { XHG_RTGROUP, XFS_SICK_RG_SUPER },
+ [XFS_SCRUB_TYPE_RTRMAPBT] = { XHG_RTGROUP, XFS_SICK_RG_RMAPBT },
+ [XFS_SCRUB_TYPE_RTREFCBT] = { XHG_RTGROUP, XFS_SICK_RG_REFCNTBT },
};
/* Return the health status mask for this scrub type. */
@@ -133,7 +137,7 @@ xchk_mark_healthy_if_clean(
{
if (!(sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
XFS_SCRUB_OFLAG_XCORRUPT)))
- sc->sick_mask |= mask;
+ sc->healthy_mask |= mask;
}
/*
@@ -189,6 +193,7 @@ xchk_update_health(
{
struct xfs_perag *pag;
struct xfs_rtgroup *rtg;
+ unsigned int mask = sc->sick_mask;
bool bad;
/*
@@ -203,50 +208,56 @@ xchk_update_health(
return;
}
- if (!sc->sick_mask)
- return;
-
bad = (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
XFS_SCRUB_OFLAG_XCORRUPT));
+ if (!bad)
+ mask |= sc->healthy_mask;
switch (type_to_health_flag[sc->sm->sm_type].group) {
+ case XHG_NONE:
+ break;
case XHG_AG:
+ if (!mask)
+ return;
pag = xfs_perag_get(sc->mp, sc->sm->sm_agno);
if (bad)
- xfs_group_mark_corrupt(pag_group(pag), sc->sick_mask);
+ xfs_group_mark_corrupt(pag_group(pag), mask);
else
- xfs_group_mark_healthy(pag_group(pag), sc->sick_mask);
+ xfs_group_mark_healthy(pag_group(pag), mask);
xfs_perag_put(pag);
break;
case XHG_INO:
if (!sc->ip)
return;
- if (bad) {
- unsigned int mask = sc->sick_mask;
-
- /*
- * If we're coming in for repairs then we don't want
- * sickness flags to propagate to the incore health
- * status if the inode gets inactivated before we can
- * fix it.
- */
- if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
- mask |= XFS_SICK_INO_FORGET;
+ /*
+ * If we're coming in for repairs then we don't want sickness
+ * flags to propagate to the incore health status if the inode
+ * gets inactivated before we can fix it.
+ */
+ if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
+ mask |= XFS_SICK_INO_FORGET;
+ if (!mask)
+ return;
+ if (bad)
xfs_inode_mark_corrupt(sc->ip, mask);
- } else
- xfs_inode_mark_healthy(sc->ip, sc->sick_mask);
+ else
+ xfs_inode_mark_healthy(sc->ip, mask);
break;
case XHG_FS:
+ if (!mask)
+ return;
if (bad)
- xfs_fs_mark_corrupt(sc->mp, sc->sick_mask);
+ xfs_fs_mark_corrupt(sc->mp, mask);
else
- xfs_fs_mark_healthy(sc->mp, sc->sick_mask);
+ xfs_fs_mark_healthy(sc->mp, mask);
break;
case XHG_RTGROUP:
+ if (!mask)
+ return;
rtg = xfs_rtgroup_get(sc->mp, sc->sm->sm_agno);
if (bad)
- xfs_group_mark_corrupt(rtg_group(rtg), sc->sick_mask);
+ xfs_group_mark_corrupt(rtg_group(rtg), mask);
else
- xfs_group_mark_healthy(rtg_group(rtg), sc->sick_mask);
+ xfs_group_mark_healthy(rtg_group(rtg), mask);
xfs_rtgroup_put(rtg);
break;
default:
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index abad54c3621d..4dc7c83dc08a 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -650,8 +650,8 @@ xchk_iallocbt_xref_rmap_btreeblks(
struct xfs_scrub *sc)
{
xfs_filblks_t blocks;
- xfs_extlen_t inobt_blocks = 0;
- xfs_extlen_t finobt_blocks = 0;
+ xfs_filblks_t inobt_blocks = 0;
+ xfs_filblks_t finobt_blocks = 0;
int error;
if (!sc->sa.ino_cur || !sc->sa.rmap_cur ||
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 25ee66e7649d..bb3f475b6353 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -260,12 +260,7 @@ xchk_inode_extsize(
xchk_ino_set_warning(sc, ino);
}
-/*
- * Validate di_cowextsize hint.
- *
- * The rules are documented at xfs_ioctl_setattr_check_cowextsize().
- * These functions must be kept in sync with each other.
- */
+/* Validate di_cowextsize hint. */
STATIC void
xchk_inode_cowextsize(
struct xfs_scrub *sc,
@@ -276,12 +271,32 @@ xchk_inode_cowextsize(
uint64_t flags2)
{
xfs_failaddr_t fa;
+ uint32_t value = be32_to_cpu(dip->di_cowextsize);
+
+ /*
+ * The used block counter for rtrmap is checked and repaired elsewhere.
+ */
+ if (xfs_has_zoned(sc->mp) &&
+ dip->di_metatype == cpu_to_be16(XFS_METAFILE_RTRMAP))
+ return;
- fa = xfs_inode_validate_cowextsize(sc->mp,
- be32_to_cpu(dip->di_cowextsize), mode, flags,
- flags2);
+ fa = xfs_inode_validate_cowextsize(sc->mp, value, mode, flags, flags2);
if (fa)
xchk_ino_set_corrupt(sc, ino);
+
+ /*
+ * XFS allows a sysadmin to change the rt extent size when adding a rt
+ * section to a filesystem after formatting. If there are any
+ * directories with cowextsize and rtinherit set, the hint could become
+ * misaligned with the new rextsize. The verifier doesn't check this,
+ * because we allow rtinherit directories even without an rt device.
+ * Flag this as an administrative warning since we will clean this up
+ * eventually.
+ */
+ if ((flags & XFS_DIFLAG_RTINHERIT) &&
+ (flags2 & XFS_DIFLAG2_COWEXTSIZE) &&
+ value % sc->mp->m_sb.sb_rextsize > 0)
+ xchk_ino_set_warning(sc, ino);
}
/* Make sure the di_flags make sense for the inode. */
@@ -360,8 +375,9 @@ xchk_inode_flags2(
if ((flags2 & XFS_DIFLAG2_REFLINK) && !S_ISREG(mode))
goto bad;
- /* realtime and reflink make no sense, currently */
- if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK))
+ /* realtime and reflink don't always go together */
+ if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK) &&
+ !xfs_has_rtreflink(mp))
goto bad;
/* no bigtime iflag without the bigtime feature */
@@ -502,6 +518,10 @@ xchk_dinode(
if (!S_ISREG(mode) && !S_ISDIR(mode))
xchk_ino_set_corrupt(sc, ino);
break;
+ case XFS_DINODE_FMT_META_BTREE:
+ if (!S_ISREG(mode))
+ xchk_ino_set_corrupt(sc, ino);
+ break;
case XFS_DINODE_FMT_UUID:
default:
xchk_ino_set_corrupt(sc, ino);
@@ -686,15 +706,13 @@ xchk_inode_xref_bmap(
return;
/* Walk all the extents to check nextents/naextents/nblocks. */
- error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
- &nextents, &count);
+ error = xchk_inode_count_blocks(sc, XFS_DATA_FORK, &nextents, &count);
if (!xchk_should_check_xref(sc, &error, NULL))
return;
if (nextents < xfs_dfork_data_extents(dip))
xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino);
- error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
- &nextents, &acount);
+ error = xchk_inode_count_blocks(sc, XFS_ATTR_FORK, &nextents, &acount);
if (!xchk_should_check_xref(sc, &error, NULL))
return;
if (nextents != xfs_dfork_attr_extents(dip))
diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c
index 5a58ddd27bd2..a90a011c7e5f 100644
--- a/fs/xfs/scrub/inode_repair.c
+++ b/fs/xfs/scrub/inode_repair.c
@@ -38,6 +38,9 @@
#include "xfs_log_priv.h"
#include "xfs_health.h"
#include "xfs_symlink_remote.h"
+#include "xfs_rtgroup.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_rtrefcount_btree.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
@@ -562,8 +565,6 @@ xrep_dinode_flags(
flags2 |= XFS_DIFLAG2_REFLINK;
else
flags2 &= ~(XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE);
- if (flags & XFS_DIFLAG_REALTIME)
- flags2 &= ~XFS_DIFLAG2_REFLINK;
if (!xfs_has_bigtime(mp))
flags2 &= ~XFS_DIFLAG2_BIGTIME;
if (!xfs_has_large_extent_counts(mp))
@@ -709,7 +710,9 @@ xrep_dinode_extsize_hints(
XFS_DIFLAG_EXTSZINHERIT);
}
- if (dip->di_version < 3)
+ if (dip->di_version < 3 ||
+ (xfs_has_zoned(sc->mp) &&
+ dip->di_metatype == cpu_to_be16(XFS_METAFILE_RTRMAP)))
return;
fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
@@ -773,17 +776,71 @@ xrep_dinode_count_ag_rmaps(
return error;
}
+/* Count extents and blocks for an inode given an rt rmap. */
+STATIC int
+xrep_dinode_walk_rtrmap(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xrep_inode *ri = priv;
+ int error = 0;
+
+ if (xchk_should_terminate(ri->sc, &error))
+ return error;
+
+ /* We only care about this inode. */
+ if (rec->rm_owner != ri->sc->sm->sm_ino)
+ return 0;
+
+ if (rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))
+ return -EFSCORRUPTED;
+
+ ri->rt_blocks += rec->rm_blockcount;
+ ri->rt_extents++;
+ return 0;
+}
+
+/* Count extents and blocks for an inode from all realtime rmap data. */
+STATIC int
+xrep_dinode_count_rtgroup_rmaps(
+ struct xrep_inode *ri,
+ struct xfs_rtgroup *rtg)
+{
+ struct xfs_scrub *sc = ri->sc;
+ int error;
+
+ error = xrep_rtgroup_init(sc, rtg, &sc->sr, XFS_RTGLOCK_RMAP);
+ if (error)
+ return error;
+
+ error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_dinode_walk_rtrmap,
+ ri);
+ xchk_rtgroup_btcur_free(&sc->sr);
+ xchk_rtgroup_free(sc, &sc->sr);
+ return error;
+}
+
/* Count extents and blocks for a given inode from all rmap data. */
STATIC int
xrep_dinode_count_rmaps(
struct xrep_inode *ri)
{
struct xfs_perag *pag = NULL;
+ struct xfs_rtgroup *rtg = NULL;
int error;
- if (!xfs_has_rmapbt(ri->sc->mp) || xfs_has_realtime(ri->sc->mp))
+ if (!xfs_has_rmapbt(ri->sc->mp))
return -EOPNOTSUPP;
+ while ((rtg = xfs_rtgroup_next(ri->sc->mp, rtg))) {
+ error = xrep_dinode_count_rtgroup_rmaps(ri, rtg);
+ if (error) {
+ xfs_rtgroup_rele(rtg);
+ return error;
+ }
+ }
+
while ((pag = xfs_perag_next(ri->sc->mp, pag))) {
error = xrep_dinode_count_ag_rmaps(ri, pag);
if (error) {
@@ -888,6 +945,85 @@ xrep_dinode_bad_bmbt_fork(
return false;
}
+/* Return true if this rmap-format ifork looks like garbage. */
+STATIC bool
+xrep_dinode_bad_rtrmapbt_fork(
+ struct xfs_scrub *sc,
+ struct xfs_dinode *dip,
+ unsigned int dfork_size)
+{
+ struct xfs_rtrmap_root *dfp;
+ unsigned int nrecs;
+ unsigned int level;
+
+ if (dfork_size < sizeof(struct xfs_rtrmap_root))
+ return true;
+
+ dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
+ nrecs = be16_to_cpu(dfp->bb_numrecs);
+ level = be16_to_cpu(dfp->bb_level);
+
+ if (level > sc->mp->m_rtrmap_maxlevels)
+ return true;
+ if (xfs_rtrmap_droot_space_calc(level, nrecs) > dfork_size)
+ return true;
+ if (level > 0 && nrecs == 0)
+ return true;
+
+ return false;
+}
+
+/* Return true if this refcount-format ifork looks like garbage. */
+STATIC bool
+xrep_dinode_bad_rtrefcountbt_fork(
+ struct xfs_scrub *sc,
+ struct xfs_dinode *dip,
+ unsigned int dfork_size)
+{
+ struct xfs_rtrefcount_root *dfp;
+ unsigned int nrecs;
+ unsigned int level;
+
+ if (dfork_size < sizeof(struct xfs_rtrefcount_root))
+ return true;
+
+ dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
+ nrecs = be16_to_cpu(dfp->bb_numrecs);
+ level = be16_to_cpu(dfp->bb_level);
+
+ if (level > sc->mp->m_rtrefc_maxlevels)
+ return true;
+ if (xfs_rtrefcount_droot_space_calc(level, nrecs) > dfork_size)
+ return true;
+ if (level > 0 && nrecs == 0)
+ return true;
+
+ return false;
+}
+
+/* Check a metadata-btree fork. */
+STATIC bool
+xrep_dinode_bad_metabt_fork(
+ struct xfs_scrub *sc,
+ struct xfs_dinode *dip,
+ unsigned int dfork_size,
+ int whichfork)
+{
+ if (whichfork != XFS_DATA_FORK)
+ return true;
+
+ switch (be16_to_cpu(dip->di_metatype)) {
+ case XFS_METAFILE_RTRMAP:
+ return xrep_dinode_bad_rtrmapbt_fork(sc, dip, dfork_size);
+ case XFS_METAFILE_RTREFCOUNT:
+ return xrep_dinode_bad_rtrefcountbt_fork(sc, dip, dfork_size);
+ default:
+ return true;
+ }
+
+ return false;
+}
+
/*
* Check the data fork for things that will fail the ifork verifiers or the
* ifork formatters.
@@ -921,9 +1057,17 @@ xrep_dinode_check_dfork(
return true;
break;
case S_IFREG:
- if (fmt == XFS_DINODE_FMT_LOCAL)
+ switch (fmt) {
+ case XFS_DINODE_FMT_LOCAL:
return true;
- fallthrough;
+ case XFS_DINODE_FMT_EXTENTS:
+ case XFS_DINODE_FMT_BTREE:
+ case XFS_DINODE_FMT_META_BTREE:
+ break;
+ default:
+ return true;
+ }
+ break;
case S_IFLNK:
case S_IFDIR:
switch (fmt) {
@@ -968,6 +1112,11 @@ xrep_dinode_check_dfork(
XFS_DATA_FORK))
return true;
break;
+ case XFS_DINODE_FMT_META_BTREE:
+ if (xrep_dinode_bad_metabt_fork(sc, dip, dfork_size,
+ XFS_DATA_FORK))
+ return true;
+ break;
default:
return true;
}
@@ -1088,6 +1237,11 @@ xrep_dinode_check_afork(
XFS_ATTR_FORK))
return true;
break;
+ case XFS_DINODE_FMT_META_BTREE:
+ if (xrep_dinode_bad_metabt_fork(sc, dip, afork_size,
+ XFS_ATTR_FORK))
+ return true;
+ break;
default:
return true;
}
@@ -1135,6 +1289,8 @@ xrep_dinode_ensure_forkoff(
uint16_t mode)
{
struct xfs_bmdr_block *bmdr;
+ struct xfs_rtrmap_root *rmdr;
+ struct xfs_rtrefcount_root *rcdr;
struct xfs_scrub *sc = ri->sc;
xfs_extnum_t attr_extents, data_extents;
size_t bmdr_minsz = xfs_bmdr_space_calc(1);
@@ -1241,6 +1397,21 @@ xrep_dinode_ensure_forkoff(
bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
dfork_min = xfs_bmap_broot_space(sc->mp, bmdr);
break;
+ case XFS_DINODE_FMT_META_BTREE:
+ switch (be16_to_cpu(dip->di_metatype)) {
+ case XFS_METAFILE_RTRMAP:
+ rmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
+ dfork_min = xfs_rtrmap_broot_space(sc->mp, rmdr);
+ break;
+ case XFS_METAFILE_RTREFCOUNT:
+ rcdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
+ dfork_min = xfs_rtrefcount_broot_space(sc->mp, rcdr);
+ break;
+ default:
+ dfork_min = 0;
+ break;
+ }
+ break;
default:
dfork_min = 0;
break;
@@ -1389,8 +1560,7 @@ xrep_dinode_core(
/* Read the inode cluster buffer. */
error = xfs_trans_read_buf(sc->mp, sc->tp, sc->mp->m_ddev_targp,
- ri->imap.im_blkno, ri->imap.im_len, XBF_UNMAPPED, &bp,
- NULL);
+ ri->imap.im_blkno, ri->imap.im_len, 0, &bp, NULL);
if (error)
return error;
@@ -1500,8 +1670,7 @@ xrep_inode_blockcounts(
trace_xrep_inode_blockcounts(sc);
/* Set data fork counters from the data fork mappings. */
- error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
- &nextents, &count);
+ error = xchk_inode_count_blocks(sc, XFS_DATA_FORK, &nextents, &count);
if (error)
return error;
if (xfs_is_reflink_inode(sc->ip)) {
@@ -1525,8 +1694,8 @@ xrep_inode_blockcounts(
/* Set attr fork counters from the attr fork mappings. */
ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
if (ifp) {
- error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
- &nextents, &acount);
+ error = xchk_inode_count_blocks(sc, XFS_ATTR_FORK, &nextents,
+ &acount);
if (error)
return error;
if (count >= sc->mp->m_sb.sb_dblocks)
@@ -1664,10 +1833,6 @@ xrep_inode_flags(
/* DAX only applies to files and dirs. */
if (!(S_ISREG(mode) || S_ISDIR(mode)))
sc->ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
-
- /* No reflink files on the realtime device. */
- if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
- sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
}
/*
@@ -1783,6 +1948,20 @@ xrep_inode_pptr(
sizeof(struct xfs_attr_sf_hdr), true);
}
+/* Fix COW extent size hint problems. */
+STATIC void
+xrep_inode_cowextsize(
+ struct xfs_scrub *sc)
+{
+ /* Fix misaligned CoW extent size hints on a directory. */
+ if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
+ (sc->ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) &&
+ sc->ip->i_extsize % sc->mp->m_sb.sb_rextsize > 0) {
+ sc->ip->i_cowextsize = 0;
+ sc->ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
+ }
+}
+
/* Fix any irregularities in an inode that the verifiers don't catch. */
STATIC int
xrep_inode_problems(
@@ -1806,6 +1985,7 @@ xrep_inode_problems(
if (S_ISDIR(VFS_I(sc->ip)->i_mode))
xrep_inode_dir_size(sc);
xrep_inode_extsize(sc);
+ xrep_inode_cowextsize(sc);
trace_xrep_inode_fixed(sc);
xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
diff --git a/fs/xfs/scrub/metapath.c b/fs/xfs/scrub/metapath.c
index b78db6513465..14939d7de349 100644
--- a/fs/xfs/scrub/metapath.c
+++ b/fs/xfs/scrub/metapath.c
@@ -21,6 +21,8 @@
#include "xfs_trans_space.h"
#include "xfs_attr.h"
#include "xfs_rtgroup.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_rtrefcount_btree.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -171,23 +173,13 @@ static int
xchk_setup_metapath_quotadir(
struct xfs_scrub *sc)
{
- struct xfs_trans *tp;
- struct xfs_inode *dp = NULL;
- int error;
-
- error = xfs_trans_alloc_empty(sc->mp, &tp);
- if (error)
- return error;
+ struct xfs_quotainfo *qi = sc->mp->m_quotainfo;
- error = xfs_dqinode_load_parent(tp, &dp);
- xfs_trans_cancel(tp);
- if (error)
- return error;
+ if (!qi || !qi->qi_dirip)
+ return -ENOENT;
- error = xchk_setup_metapath_scan(sc, sc->mp->m_metadirip,
- kasprintf(GFP_KERNEL, "quota"), dp);
- xfs_irele(dp);
- return error;
+ return xchk_setup_metapath_scan(sc, sc->mp->m_metadirip,
+ kstrdup("quota", GFP_KERNEL), qi->qi_dirip);
}
/* Scan a quota inode under the /quota directory. */
@@ -196,37 +188,31 @@ xchk_setup_metapath_dqinode(
struct xfs_scrub *sc,
xfs_dqtype_t type)
{
- struct xfs_trans *tp = NULL;
- struct xfs_inode *dp = NULL;
+ struct xfs_quotainfo *qi = sc->mp->m_quotainfo;
struct xfs_inode *ip = NULL;
- const char *path;
- int error;
- error = xfs_trans_alloc_empty(sc->mp, &tp);
- if (error)
- return error;
-
- error = xfs_dqinode_load_parent(tp, &dp);
- if (error)
- goto out_cancel;
-
- error = xfs_dqinode_load(tp, dp, type, &ip);
- if (error)
- goto out_dp;
-
- xfs_trans_cancel(tp);
- tp = NULL;
+ if (!qi)
+ return -ENOENT;
- path = kasprintf(GFP_KERNEL, "%s", xfs_dqinode_path(type));
- error = xchk_setup_metapath_scan(sc, dp, path, ip);
+ switch (type) {
+ case XFS_DQTYPE_USER:
+ ip = qi->qi_uquotaip;
+ break;
+ case XFS_DQTYPE_GROUP:
+ ip = qi->qi_gquotaip;
+ break;
+ case XFS_DQTYPE_PROJ:
+ ip = qi->qi_pquotaip;
+ break;
+ default:
+ ASSERT(0);
+ return -EINVAL;
+ }
+ if (!ip)
+ return -ENOENT;
- xfs_irele(ip);
-out_dp:
- xfs_irele(dp);
-out_cancel:
- if (tp)
- xfs_trans_cancel(tp);
- return error;
+ return xchk_setup_metapath_scan(sc, qi->qi_dirip,
+ kstrdup(xfs_dqinode_path(type), GFP_KERNEL), ip);
}
#else
# define xchk_setup_metapath_quotadir(...) (-ENOENT)
@@ -262,6 +248,10 @@ xchk_setup_metapath(
return xchk_setup_metapath_dqinode(sc, XFS_DQTYPE_GROUP);
case XFS_SCRUB_METAPATH_PRJQUOTA:
return xchk_setup_metapath_dqinode(sc, XFS_DQTYPE_PROJ);
+ case XFS_SCRUB_METAPATH_RTRMAPBT:
+ return xchk_setup_metapath_rtginode(sc, XFS_RTGI_RMAP);
+ case XFS_SCRUB_METAPATH_RTREFCOUNTBT:
+ return xchk_setup_metapath_rtginode(sc, XFS_RTGI_REFCOUNT);
default:
return -ENOENT;
}
@@ -328,9 +318,7 @@ xchk_metapath(
return 0;
}
- error = xchk_trans_alloc_empty(sc);
- if (error)
- return error;
+ xchk_trans_alloc_empty(sc);
error = xchk_metapath_ilock_both(mpath);
if (error)
diff --git a/fs/xfs/scrub/newbt.c b/fs/xfs/scrub/newbt.c
index 70af27d98734..1588ce971cb8 100644
--- a/fs/xfs/scrub/newbt.c
+++ b/fs/xfs/scrub/newbt.c
@@ -19,6 +19,8 @@
#include "xfs_rmap.h"
#include "xfs_ag.h"
#include "xfs_defer.h"
+#include "xfs_metafile.h"
+#include "xfs_quota.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -60,7 +62,7 @@ xrep_newbt_estimate_slack(
free = sc->sa.pag->pagf_freeblks;
sz = xfs_ag_block_count(sc->mp, pag_agno(sc->sa.pag));
} else {
- free = percpu_counter_sum(&sc->mp->m_fdblocks);
+ free = xfs_sum_freecounter_raw(sc->mp, XC_FREE_BLOCKS);
sz = sc->mp->m_sb.sb_dblocks;
}
@@ -121,6 +123,43 @@ xrep_newbt_init_inode(
}
/*
+ * Initialize accounting resources for staging a new metadata inode btree.
+ * If the metadata file has a space reservation, the caller must adjust that
+ * reservation when committing the new ondisk btree.
+ */
+int
+xrep_newbt_init_metadir_inode(
+ struct xrep_newbt *xnr,
+ struct xfs_scrub *sc)
+{
+ struct xfs_owner_info oinfo;
+ struct xfs_ifork *ifp;
+
+ ASSERT(xfs_is_metadir_inode(sc->ip));
+
+ xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, XFS_DATA_FORK);
+
+ ifp = kmem_cache_zalloc(xfs_ifork_cache, XCHK_GFP_FLAGS);
+ if (!ifp)
+ return -ENOMEM;
+
+ /*
+ * Allocate new metadir btree blocks with XFS_AG_RESV_NONE because the
+ * inode metadata space reservations can only account allocated space
+ * to the i_nblocks. We do not want to change the inode core fields
+ * until we're ready to commit the new tree, so we allocate the blocks
+ * as if they were regular file blocks. This exposes us to a higher
+ * risk of the repair being cancelled due to ENOSPC.
+ */
+ xrep_newbt_init_ag(xnr, sc, &oinfo,
+ XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino),
+ XFS_AG_RESV_NONE);
+ xnr->ifake.if_fork = ifp;
+ xnr->ifake.if_fork_size = xfs_inode_fork_size(sc->ip, XFS_DATA_FORK);
+ return 0;
+}
+
+/*
* Initialize accounting resources for staging a new btree. Callers are
* expected to add their own reservations (and clean them up) manually.
*/
@@ -224,6 +263,7 @@ xrep_newbt_alloc_ag_blocks(
int error = 0;
ASSERT(sc->sa.pag != NULL);
+ ASSERT(xnr->resv != XFS_AG_RESV_METAFILE);
while (nr_blocks > 0) {
struct xfs_alloc_arg args = {
@@ -297,6 +337,8 @@ xrep_newbt_alloc_file_blocks(
struct xfs_mount *mp = sc->mp;
int error = 0;
+ ASSERT(xnr->resv != XFS_AG_RESV_METAFILE);
+
while (nr_blocks > 0) {
struct xfs_alloc_arg args = {
.tp = sc->tp,
diff --git a/fs/xfs/scrub/newbt.h b/fs/xfs/scrub/newbt.h
index 3d804d31af24..5ce785599287 100644
--- a/fs/xfs/scrub/newbt.h
+++ b/fs/xfs/scrub/newbt.h
@@ -63,6 +63,7 @@ void xrep_newbt_init_ag(struct xrep_newbt *xnr, struct xfs_scrub *sc,
enum xfs_ag_resv_type resv);
int xrep_newbt_init_inode(struct xrep_newbt *xnr, struct xfs_scrub *sc,
int whichfork, const struct xfs_owner_info *oinfo);
+int xrep_newbt_init_metadir_inode(struct xrep_newbt *xnr, struct xfs_scrub *sc);
int xrep_newbt_alloc_blocks(struct xrep_newbt *xnr, uint64_t nr_blocks);
int xrep_newbt_add_extent(struct xrep_newbt *xnr, struct xfs_perag *pag,
xfs_agblock_t agbno, xfs_extlen_t len);
diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c
index 4a47d0aabf73..26721fab5cab 100644
--- a/fs/xfs/scrub/nlinks.c
+++ b/fs/xfs/scrub/nlinks.c
@@ -555,9 +555,7 @@ xchk_nlinks_collect(
* do not take sb_internal.
*/
xchk_trans_cancel(sc);
- error = xchk_trans_alloc_empty(sc);
- if (error)
- return error;
+ xchk_trans_alloc_empty(sc);
while ((error = xchk_iscan_iter(&xnc->collect_iscan, &ip)) == 1) {
if (S_ISDIR(VFS_I(ip)->i_mode))
@@ -880,9 +878,7 @@ xchk_nlinks_compare(
* inactivation workqueue.
*/
xchk_trans_cancel(sc);
- error = xchk_trans_alloc_empty(sc);
- if (error)
- return error;
+ xchk_trans_alloc_empty(sc);
/*
* Use the inobt to walk all allocated inodes to compare the link
diff --git a/fs/xfs/scrub/nlinks_repair.c b/fs/xfs/scrub/nlinks_repair.c
index 4ebdee095428..6ef2ee9c3814 100644
--- a/fs/xfs/scrub/nlinks_repair.c
+++ b/fs/xfs/scrub/nlinks_repair.c
@@ -340,9 +340,7 @@ xrep_nlinks(
* We can only push the inactivation workqueues with an empty
* transaction.
*/
- error = xchk_trans_alloc_empty(sc);
- if (error)
- break;
+ xchk_trans_alloc_empty(sc);
}
xchk_iscan_iter_finish(&xnc->compare_iscan);
xchk_iscan_teardown(&xnc->compare_iscan);
diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c
index c287c755f2c5..9c12cb844231 100644
--- a/fs/xfs/scrub/orphanage.c
+++ b/fs/xfs/scrub/orphanage.c
@@ -153,8 +153,7 @@ xrep_orphanage_create(
/* Try to find the orphanage directory. */
inode_lock_nested(root_inode, I_MUTEX_PARENT);
- orphanage_dentry = lookup_one_len(ORPHANAGE, root_dentry,
- strlen(ORPHANAGE));
+ orphanage_dentry = lookup_noperm(&QSTR(ORPHANAGE), root_dentry);
if (IS_ERR(orphanage_dentry)) {
error = PTR_ERR(orphanage_dentry);
goto out_unlock_root;
@@ -167,10 +166,11 @@ xrep_orphanage_create(
* directory to control access to a file we put in here.
*/
if (d_really_is_negative(orphanage_dentry)) {
- error = vfs_mkdir(&nop_mnt_idmap, root_inode, orphanage_dentry,
- 0750);
- if (error)
- goto out_dput_orphanage;
+ orphanage_dentry = vfs_mkdir(&nop_mnt_idmap, root_inode,
+ orphanage_dentry, 0750);
+ error = PTR_ERR(orphanage_dentry);
+ if (IS_ERR(orphanage_dentry))
+ goto out_unlock_root;
}
/* Not a directory? Bail out. */
@@ -444,7 +444,7 @@ xrep_adoption_check_dcache(
if (!d_orphanage)
return 0;
- d_child = d_hash_and_lookup(d_orphanage, &qname);
+ d_child = try_lookup_noperm(&qname, d_orphanage);
if (d_child) {
trace_xrep_adoption_check_child(sc->mp, d_child);
@@ -481,7 +481,7 @@ xrep_adoption_zap_dcache(
if (!d_orphanage)
return;
- d_child = d_hash_and_lookup(d_orphanage, &qname);
+ d_child = try_lookup_noperm(&qname, d_orphanage);
while (d_child != NULL) {
trace_xrep_adoption_invalidate_child(sc->mp, d_child);
diff --git a/fs/xfs/scrub/parent_repair.c b/fs/xfs/scrub/parent_repair.c
index 31bfe10be22a..2949feda6271 100644
--- a/fs/xfs/scrub/parent_repair.c
+++ b/fs/xfs/scrub/parent_repair.c
@@ -569,9 +569,7 @@ xrep_parent_scan_dirtree(
if (sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL))
xchk_iunlock(sc, sc->ilock_flags & (XFS_ILOCK_SHARED |
XFS_ILOCK_EXCL));
- error = xchk_trans_alloc_empty(sc);
- if (error)
- return error;
+ xchk_trans_alloc_empty(sc);
while ((error = xchk_iscan_iter(&rp->pscan.iscan, &ip)) == 1) {
bool flush;
@@ -597,9 +595,7 @@ xrep_parent_scan_dirtree(
if (error)
break;
- error = xchk_trans_alloc_empty(sc);
- if (error)
- break;
+ xchk_trans_alloc_empty(sc);
}
if (xchk_should_terminate(sc, &error))
@@ -1099,9 +1095,7 @@ xrep_parent_flush_xattrs(
xrep_tempfile_iounlock(rp->sc);
/* Recreate the empty transaction and relock the inode. */
- error = xchk_trans_alloc_empty(rp->sc);
- if (error)
- return error;
+ xchk_trans_alloc_empty(rp->sc);
xchk_ilock(rp->sc, XFS_ILOCK_EXCL);
return 0;
}
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 183d531875ea..58d6d4ed2853 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -212,12 +212,18 @@ xchk_quota_item(
if (mp->m_sb.sb_dblocks < dq->q_blk.count)
xchk_fblock_set_warning(sc, XFS_DATA_FORK,
offset);
+ if (mp->m_sb.sb_rblocks < dq->q_rtb.count)
+ xchk_fblock_set_warning(sc, XFS_DATA_FORK,
+ offset);
} else {
if (mp->m_sb.sb_dblocks < dq->q_blk.count)
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK,
offset);
+ if (mp->m_sb.sb_rblocks < dq->q_rtb.count)
+ xchk_fblock_set_corrupt(sc, XFS_DATA_FORK,
+ offset);
}
- if (dq->q_ino.count > fs_icount || dq->q_rtb.count > mp->m_sb.sb_rblocks)
+ if (dq->q_ino.count > fs_icount)
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
/*
diff --git a/fs/xfs/scrub/quota_repair.c b/fs/xfs/scrub/quota_repair.c
index cd51f10f2920..8f4c8d41f308 100644
--- a/fs/xfs/scrub/quota_repair.c
+++ b/fs/xfs/scrub/quota_repair.c
@@ -233,7 +233,7 @@ xrep_quota_item(
rqi->need_quotacheck = true;
dirty = true;
}
- if (dq->q_rtb.count > mp->m_sb.sb_rblocks) {
+ if (!xfs_has_reflink(mp) && dq->q_rtb.count > mp->m_sb.sb_rblocks) {
dq->q_rtb.reserved -= dq->q_rtb.count;
dq->q_rtb.reserved += mp->m_sb.sb_rblocks;
dq->q_rtb.count = mp->m_sb.sb_rblocks;
diff --git a/fs/xfs/scrub/quotacheck.c b/fs/xfs/scrub/quotacheck.c
index dc4033b91e44..e4105aaafe84 100644
--- a/fs/xfs/scrub/quotacheck.c
+++ b/fs/xfs/scrub/quotacheck.c
@@ -505,9 +505,7 @@ xqcheck_collect_counts(
* transactions do not take sb_internal.
*/
xchk_trans_cancel(sc);
- error = xchk_trans_alloc_empty(sc);
- if (error)
- return error;
+ xchk_trans_alloc_empty(sc);
while ((error = xchk_iscan_iter(&xqc->iscan, &ip)) == 1) {
error = xqcheck_collect_inode(xqc, ip);
diff --git a/fs/xfs/scrub/rcbag_btree.c b/fs/xfs/scrub/rcbag_btree.c
index 709356dc6256..9a4ef823c5a7 100644
--- a/fs/xfs/scrub/rcbag_btree.c
+++ b/fs/xfs/scrub/rcbag_btree.c
@@ -47,29 +47,20 @@ rcbagbt_init_rec_from_cur(
bag_rec->rbg_refcount = bag_irec->rbg_refcount;
}
-STATIC int64_t
-rcbagbt_key_diff(
+STATIC int
+rcbagbt_cmp_key_with_cur(
struct xfs_btree_cur *cur,
const union xfs_btree_key *key)
{
struct rcbag_rec *rec = (struct rcbag_rec *)&cur->bc_rec;
const struct rcbag_key *kp = (const struct rcbag_key *)key;
- if (kp->rbg_startblock > rec->rbg_startblock)
- return 1;
- if (kp->rbg_startblock < rec->rbg_startblock)
- return -1;
-
- if (kp->rbg_blockcount > rec->rbg_blockcount)
- return 1;
- if (kp->rbg_blockcount < rec->rbg_blockcount)
- return -1;
-
- return 0;
+ return cmp_int(kp->rbg_startblock, rec->rbg_startblock) ?:
+ cmp_int(kp->rbg_blockcount, rec->rbg_blockcount);
}
-STATIC int64_t
-rcbagbt_diff_two_keys(
+STATIC int
+rcbagbt_cmp_two_keys(
struct xfs_btree_cur *cur,
const union xfs_btree_key *k1,
const union xfs_btree_key *k2,
@@ -80,17 +71,8 @@ rcbagbt_diff_two_keys(
ASSERT(mask == NULL);
- if (kp1->rbg_startblock > kp2->rbg_startblock)
- return 1;
- if (kp1->rbg_startblock < kp2->rbg_startblock)
- return -1;
-
- if (kp1->rbg_blockcount > kp2->rbg_blockcount)
- return 1;
- if (kp1->rbg_blockcount < kp2->rbg_blockcount)
- return -1;
-
- return 0;
+ return cmp_int(kp1->rbg_startblock, kp2->rbg_startblock) ?:
+ cmp_int(kp1->rbg_blockcount, kp2->rbg_blockcount);
}
STATIC int
@@ -201,9 +183,9 @@ static const struct xfs_btree_ops rcbagbt_mem_ops = {
.init_key_from_rec = rcbagbt_init_key_from_rec,
.init_rec_from_cur = rcbagbt_init_rec_from_cur,
.init_ptr_from_cur = xfbtree_init_ptr_from_cur,
- .key_diff = rcbagbt_key_diff,
+ .cmp_key_with_cur = rcbagbt_cmp_key_with_cur,
.buf_ops = &rcbagbt_mem_buf_ops,
- .diff_two_keys = rcbagbt_diff_two_keys,
+ .cmp_two_keys = rcbagbt_cmp_two_keys,
.keys_inorder = rcbagbt_keys_inorder,
.recs_inorder = rcbagbt_recs_inorder,
};
diff --git a/fs/xfs/scrub/reap.c b/fs/xfs/scrub/reap.c
index 08230952053b..8703897c0a9c 100644
--- a/fs/xfs/scrub/reap.c
+++ b/fs/xfs/scrub/reap.c
@@ -33,6 +33,9 @@
#include "xfs_attr.h"
#include "xfs_attr_remote.h"
#include "xfs_defer.h"
+#include "xfs_metafile.h"
+#include "xfs_rtgroup.h"
+#include "xfs_rtrmap_btree.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -40,6 +43,7 @@
#include "scrub/bitmap.h"
#include "scrub/agb_bitmap.h"
#include "scrub/fsb_bitmap.h"
+#include "scrub/rtb_bitmap.h"
#include "scrub/reap.h"
/*
@@ -310,7 +314,7 @@ xreap_agextent_binval(
}
out:
- trace_xreap_agextent_binval(sc->sa.pag, agbno, *aglenp);
+ trace_xreap_agextent_binval(pag_group(sc->sa.pag), agbno, *aglenp);
}
/*
@@ -369,7 +373,8 @@ xreap_agextent_select(
out_found:
*aglenp = len;
- trace_xreap_agextent_select(sc->sa.pag, agbno, len, *crosslinked);
+ trace_xreap_agextent_select(pag_group(sc->sa.pag), agbno, len,
+ *crosslinked);
out_cur:
xfs_btree_del_cursor(cur, error);
return error;
@@ -390,6 +395,8 @@ xreap_agextent_iter(
xfs_fsblock_t fsbno;
int error = 0;
+ ASSERT(rs->resv != XFS_AG_RESV_METAFILE);
+
fsbno = xfs_agbno_to_fsb(sc->sa.pag, agbno);
/*
@@ -406,7 +413,8 @@ xreap_agextent_iter(
* to run xfs_repair.
*/
if (crosslinked) {
- trace_xreap_dispose_unmap_extent(sc->sa.pag, agbno, *aglenp);
+ trace_xreap_dispose_unmap_extent(pag_group(sc->sa.pag), agbno,
+ *aglenp);
rs->force_roll = true;
@@ -416,7 +424,8 @@ xreap_agextent_iter(
* records from the refcountbt, which will remove the
* rmap record as well.
*/
- xfs_refcount_free_cow_extent(sc->tp, fsbno, *aglenp);
+ xfs_refcount_free_cow_extent(sc->tp, false, fsbno,
+ *aglenp);
return 0;
}
@@ -424,7 +433,7 @@ xreap_agextent_iter(
*aglenp, rs->oinfo);
}
- trace_xreap_dispose_free_extent(sc->sa.pag, agbno, *aglenp);
+ trace_xreap_dispose_free_extent(pag_group(sc->sa.pag), agbno, *aglenp);
/*
* Invalidate as many buffers as we can, starting at agbno. If this
@@ -448,7 +457,7 @@ xreap_agextent_iter(
if (rs->oinfo == &XFS_RMAP_OINFO_COW) {
ASSERT(rs->resv == XFS_AG_RESV_NONE);
- xfs_refcount_free_cow_extent(sc->tp, fsbno, *aglenp);
+ xfs_refcount_free_cow_extent(sc->tp, false, fsbno, *aglenp);
error = xfs_free_extent_later(sc->tp, fsbno, *aglenp, NULL,
rs->resv, XFS_FREE_EXTENT_SKIP_DISCARD);
if (error)
@@ -675,6 +684,266 @@ xrep_reap_fsblocks(
return 0;
}
+#ifdef CONFIG_XFS_RT
+/*
+ * Figure out the longest run of blocks that we can dispose of with a single
+ * call. Cross-linked blocks should have their reverse mappings removed, but
+ * single-owner extents can be freed. Units are rt blocks, not rt extents.
+ */
+STATIC int
+xreap_rgextent_select(
+ struct xreap_state *rs,
+ xfs_rgblock_t rgbno,
+ xfs_rgblock_t rgbno_next,
+ bool *crosslinked,
+ xfs_extlen_t *rglenp)
+{
+ struct xfs_scrub *sc = rs->sc;
+ struct xfs_btree_cur *cur;
+ xfs_rgblock_t bno = rgbno + 1;
+ xfs_extlen_t len = 1;
+ int error;
+
+ /*
+ * Determine if there are any other rmap records covering the first
+ * block of this extent. If so, the block is crosslinked.
+ */
+ cur = xfs_rtrmapbt_init_cursor(sc->tp, sc->sr.rtg);
+ error = xfs_rmap_has_other_keys(cur, rgbno, 1, rs->oinfo,
+ crosslinked);
+ if (error)
+ goto out_cur;
+
+ /*
+ * Figure out how many of the subsequent blocks have the same crosslink
+ * status.
+ */
+ while (bno < rgbno_next) {
+ bool also_crosslinked;
+
+ error = xfs_rmap_has_other_keys(cur, bno, 1, rs->oinfo,
+ &also_crosslinked);
+ if (error)
+ goto out_cur;
+
+ if (*crosslinked != also_crosslinked)
+ break;
+
+ len++;
+ bno++;
+ }
+
+ *rglenp = len;
+ trace_xreap_agextent_select(rtg_group(sc->sr.rtg), rgbno, len,
+ *crosslinked);
+out_cur:
+ xfs_btree_del_cursor(cur, error);
+ return error;
+}
+
+/*
+ * Dispose of as much of the beginning of this rtgroup extent as possible.
+ * The number of blocks disposed of will be returned in @rglenp.
+ */
+STATIC int
+xreap_rgextent_iter(
+ struct xreap_state *rs,
+ xfs_rgblock_t rgbno,
+ xfs_extlen_t *rglenp,
+ bool crosslinked)
+{
+ struct xfs_scrub *sc = rs->sc;
+ xfs_rtblock_t rtbno;
+ int error;
+
+ /*
+ * The only caller so far is CoW fork repair, so we only know how to
+ * unlink or free CoW staging extents. Here we don't have to worry
+ * about invalidating buffers!
+ */
+ if (rs->oinfo != &XFS_RMAP_OINFO_COW) {
+ ASSERT(rs->oinfo == &XFS_RMAP_OINFO_COW);
+ return -EFSCORRUPTED;
+ }
+ ASSERT(rs->resv == XFS_AG_RESV_NONE);
+
+ rtbno = xfs_rgbno_to_rtb(sc->sr.rtg, rgbno);
+
+ /*
+ * If there are other rmappings, this block is cross linked and must
+ * not be freed. Remove the forward and reverse mapping and move on.
+ */
+ if (crosslinked) {
+ trace_xreap_dispose_unmap_extent(rtg_group(sc->sr.rtg), rgbno,
+ *rglenp);
+
+ xfs_refcount_free_cow_extent(sc->tp, true, rtbno, *rglenp);
+ rs->deferred++;
+ return 0;
+ }
+
+ trace_xreap_dispose_free_extent(rtg_group(sc->sr.rtg), rgbno, *rglenp);
+
+ /*
+ * The CoW staging extent is not crosslinked. Use deferred work items
+ * to remove the refcountbt records (which removes the rmap records)
+ * and free the extent. We're not worried about the system going down
+ * here because log recovery walks the refcount btree to clean out the
+ * CoW staging extents.
+ */
+ xfs_refcount_free_cow_extent(sc->tp, true, rtbno, *rglenp);
+ error = xfs_free_extent_later(sc->tp, rtbno, *rglenp, NULL,
+ rs->resv,
+ XFS_FREE_EXTENT_REALTIME |
+ XFS_FREE_EXTENT_SKIP_DISCARD);
+ if (error)
+ return error;
+
+ rs->deferred++;
+ return 0;
+}
+
+#define XREAP_RTGLOCK_ALL (XFS_RTGLOCK_BITMAP | \
+ XFS_RTGLOCK_RMAP | \
+ XFS_RTGLOCK_REFCOUNT)
+
+/*
+ * Break a rt file metadata extent into sub-extents by fate (crosslinked, not
+ * crosslinked), and dispose of each sub-extent separately. The extent must
+ * be aligned to a realtime extent.
+ */
+STATIC int
+xreap_rtmeta_extent(
+ uint64_t rtbno,
+ uint64_t len,
+ void *priv)
+{
+ struct xreap_state *rs = priv;
+ struct xfs_scrub *sc = rs->sc;
+ xfs_rgblock_t rgbno = xfs_rtb_to_rgbno(sc->mp, rtbno);
+ xfs_rgblock_t rgbno_next = rgbno + len;
+ int error = 0;
+
+ ASSERT(sc->ip != NULL);
+ ASSERT(!sc->sr.rtg);
+
+ /*
+ * We're reaping blocks after repairing file metadata, which means that
+ * we have to init the xchk_ag structure ourselves.
+ */
+ sc->sr.rtg = xfs_rtgroup_get(sc->mp, xfs_rtb_to_rgno(sc->mp, rtbno));
+ if (!sc->sr.rtg)
+ return -EFSCORRUPTED;
+
+ xfs_rtgroup_lock(sc->sr.rtg, XREAP_RTGLOCK_ALL);
+
+ while (rgbno < rgbno_next) {
+ xfs_extlen_t rglen;
+ bool crosslinked;
+
+ error = xreap_rgextent_select(rs, rgbno, rgbno_next,
+ &crosslinked, &rglen);
+ if (error)
+ goto out_unlock;
+
+ error = xreap_rgextent_iter(rs, rgbno, &rglen, crosslinked);
+ if (error)
+ goto out_unlock;
+
+ if (xreap_want_defer_finish(rs)) {
+ error = xfs_defer_finish(&sc->tp);
+ if (error)
+ goto out_unlock;
+ xreap_defer_finish_reset(rs);
+ } else if (xreap_want_roll(rs)) {
+ error = xfs_trans_roll_inode(&sc->tp, sc->ip);
+ if (error)
+ goto out_unlock;
+ xreap_reset(rs);
+ }
+
+ rgbno += rglen;
+ }
+
+out_unlock:
+ xfs_rtgroup_unlock(sc->sr.rtg, XREAP_RTGLOCK_ALL);
+ xfs_rtgroup_put(sc->sr.rtg);
+ sc->sr.rtg = NULL;
+ return error;
+}
+
+/*
+ * Dispose of every block of every rt metadata extent in the bitmap.
+ * Do not use this to dispose of the mappings in an ondisk inode fork.
+ */
+int
+xrep_reap_rtblocks(
+ struct xfs_scrub *sc,
+ struct xrtb_bitmap *bitmap,
+ const struct xfs_owner_info *oinfo)
+{
+ struct xreap_state rs = {
+ .sc = sc,
+ .oinfo = oinfo,
+ .resv = XFS_AG_RESV_NONE,
+ };
+ int error;
+
+ ASSERT(xfs_has_rmapbt(sc->mp));
+ ASSERT(sc->ip != NULL);
+
+ error = xrtb_bitmap_walk(bitmap, xreap_rtmeta_extent, &rs);
+ if (error)
+ return error;
+
+ if (xreap_dirty(&rs))
+ return xrep_defer_finish(sc);
+
+ return 0;
+}
+#endif /* CONFIG_XFS_RT */
+
+/*
+ * Dispose of every block of an old metadata btree that used to be rooted in a
+ * metadata directory file.
+ */
+int
+xrep_reap_metadir_fsblocks(
+ struct xfs_scrub *sc,
+ struct xfsb_bitmap *bitmap)
+{
+ /*
+ * Reap old metadir btree blocks with XFS_AG_RESV_NONE because the old
+ * blocks are no longer mapped by the inode, and inode metadata space
+ * reservations can only account freed space to the i_nblocks.
+ */
+ struct xfs_owner_info oinfo;
+ struct xreap_state rs = {
+ .sc = sc,
+ .oinfo = &oinfo,
+ .resv = XFS_AG_RESV_NONE,
+ };
+ int error;
+
+ ASSERT(xfs_has_rmapbt(sc->mp));
+ ASSERT(sc->ip != NULL);
+ ASSERT(xfs_is_metadir_inode(sc->ip));
+
+ xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, XFS_DATA_FORK);
+
+ error = xfsb_bitmap_walk(bitmap, xreap_fsmeta_extent, &rs);
+ if (error)
+ return error;
+
+ if (xreap_dirty(&rs)) {
+ error = xrep_defer_finish(sc);
+ if (error)
+ return error;
+ }
+
+ return xrep_reset_metafile_resv(sc);
+}
+
/*
* Metadata files are not supposed to share blocks with anything else.
* If blocks are shared, we remove the reverse mapping (thus reducing the
@@ -729,7 +998,8 @@ xreap_bmapi_select(
}
imap->br_blockcount = len;
- trace_xreap_bmapi_select(sc->sa.pag, agbno, len, *crosslinked);
+ trace_xreap_bmapi_select(pag_group(sc->sa.pag), agbno, len,
+ *crosslinked);
out_cur:
xfs_btree_del_cursor(cur, error);
return error;
@@ -868,7 +1138,8 @@ xreap_bmapi_binval(
}
out:
- trace_xreap_bmapi_binval(sc->sa.pag, agbno, imap->br_blockcount);
+ trace_xreap_bmapi_binval(pag_group(sc->sa.pag), agbno,
+ imap->br_blockcount);
return 0;
}
@@ -895,7 +1166,7 @@ xrep_reap_bmapi_iter(
* anybody else who thinks they own the block, even though that
* runs the risk of stale buffer warnings in the future.
*/
- trace_xreap_dispose_unmap_extent(sc->sa.pag,
+ trace_xreap_dispose_unmap_extent(pag_group(sc->sa.pag),
XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock),
imap->br_blockcount);
@@ -918,7 +1189,7 @@ xrep_reap_bmapi_iter(
* by a block starting before the first block of the extent but overlap
* anyway.
*/
- trace_xreap_dispose_free_extent(sc->sa.pag,
+ trace_xreap_dispose_free_extent(pag_group(sc->sa.pag),
XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock),
imap->br_blockcount);
diff --git a/fs/xfs/scrub/reap.h b/fs/xfs/scrub/reap.h
index 3f2f1775e29d..4c8f62701fb3 100644
--- a/fs/xfs/scrub/reap.h
+++ b/fs/xfs/scrub/reap.h
@@ -14,6 +14,15 @@ int xrep_reap_agblocks(struct xfs_scrub *sc, struct xagb_bitmap *bitmap,
int xrep_reap_fsblocks(struct xfs_scrub *sc, struct xfsb_bitmap *bitmap,
const struct xfs_owner_info *oinfo);
int xrep_reap_ifork(struct xfs_scrub *sc, struct xfs_inode *ip, int whichfork);
+int xrep_reap_metadir_fsblocks(struct xfs_scrub *sc,
+ struct xfsb_bitmap *bitmap);
+
+#ifdef CONFIG_XFS_RT
+int xrep_reap_rtblocks(struct xfs_scrub *sc, struct xrtb_bitmap *bitmap,
+ const struct xfs_owner_info *oinfo);
+#else
+# define xrep_reap_rtblocks(...) (-EOPNOTSUPP)
+#endif /* CONFIG_XFS_RT */
/* Buffer cache scan context. */
struct xrep_bufscan {
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index 2b6be75e9424..d46528023015 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -421,7 +421,7 @@ xchk_refcount_mergeable(
if (r1->rc_refcount != r2->rc_refcount)
return false;
if ((unsigned long long)r1->rc_blockcount + r2->rc_blockcount >
- MAXREFCEXTLEN)
+ XFS_REFC_LEN_MAX)
return false;
return true;
@@ -491,7 +491,7 @@ xchk_refcount_xref_rmap(
struct xfs_scrub *sc,
xfs_filblks_t cow_blocks)
{
- xfs_extlen_t refcbt_blocks = 0;
+ xfs_filblks_t refcbt_blocks = 0;
xfs_filblks_t blocks;
int error;
diff --git a/fs/xfs/scrub/refcount_repair.c b/fs/xfs/scrub/refcount_repair.c
index 4e572b81c986..9c8cb5332da0 100644
--- a/fs/xfs/scrub/refcount_repair.c
+++ b/fs/xfs/scrub/refcount_repair.c
@@ -183,13 +183,13 @@ xrep_refc_stash(
if (xchk_should_terminate(sc, &error))
return error;
- irec.rc_refcount = min_t(uint64_t, MAXREFCOUNT, refcount);
+ irec.rc_refcount = min_t(uint64_t, XFS_REFC_REFCOUNT_MAX, refcount);
error = xrep_refc_check_ext(rr->sc, &irec);
if (error)
return error;
- trace_xrep_refc_found(sc->sa.pag, &irec);
+ trace_xrep_refc_found(pag_group(sc->sa.pag), &irec);
return xfarray_append(rr->refcount_records, &irec);
}
@@ -422,7 +422,7 @@ xrep_refc_find_refcounts(
/*
* Set up a bag to store all the rmap records that we're tracking to
* generate a reference count record. If the size of the bag exceeds
- * MAXREFCOUNT, we clamp rc_refcount.
+ * XFS_REFC_REFCOUNT_MAX, we clamp rc_refcount.
*/
error = rcbag_init(sc->mp, sc->xmbtp, &rcstack);
if (error)
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 91c8bc055a4f..d00c18954a26 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -37,6 +37,13 @@
#include "xfs_da_btree.h"
#include "xfs_attr.h"
#include "xfs_dir2.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_rtbitmap.h"
+#include "xfs_rtgroup.h"
+#include "xfs_rtalloc.h"
+#include "xfs_metafile.h"
+#include "xfs_rtrefcount_btree.h"
+#include "xfs_zone_alloc.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -62,6 +69,7 @@ xrep_attempt(
trace_xrep_attempt(XFS_I(file_inode(sc->file)), sc->sm, error);
xchk_ag_btcur_free(&sc->sa);
+ xchk_rtgroup_btcur_free(&sc->sr);
/* Repair whatever's broken. */
ASSERT(sc->ops->repair);
@@ -378,6 +386,41 @@ xrep_calc_ag_resblks(
return max(max(bnobt_sz, inobt_sz), max(rmapbt_sz, refcbt_sz));
}
+#ifdef CONFIG_XFS_RT
+/*
+ * Figure out how many blocks to reserve for a rtgroup repair. We calculate
+ * the worst case estimate for the number of blocks we'd need to rebuild one of
+ * any type of per-rtgroup btree.
+ */
+xfs_extlen_t
+xrep_calc_rtgroup_resblks(
+ struct xfs_scrub *sc)
+{
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_scrub_metadata *sm = sc->sm;
+ uint64_t usedlen;
+ xfs_extlen_t rmapbt_sz = 0;
+
+ if (!(sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
+ return 0;
+ if (!xfs_has_rtgroups(mp)) {
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+
+ usedlen = xfs_rtbxlen_to_blen(mp, xfs_rtgroup_extents(mp, sm->sm_agno));
+ ASSERT(usedlen <= XFS_MAX_RGBLOCKS);
+
+ if (xfs_has_rmapbt(mp))
+ rmapbt_sz = xfs_rtrmapbt_calc_size(mp, usedlen);
+
+ trace_xrep_calc_rtgroup_resblks_btsize(mp, sm->sm_agno, usedlen,
+ rmapbt_sz);
+
+ return rmapbt_sz;
+}
+#endif /* CONFIG_XFS_RT */
+
/*
* Reconstructing per-AG Btrees
*
@@ -954,6 +997,27 @@ xrep_ag_init(
}
#ifdef CONFIG_XFS_RT
+/* Initialize all the btree cursors for a RT repair. */
+void
+xrep_rtgroup_btcur_init(
+ struct xfs_scrub *sc,
+ struct xchk_rt *sr)
+{
+ struct xfs_mount *mp = sc->mp;
+
+ ASSERT(sr->rtg != NULL);
+
+ if (sc->sm->sm_type != XFS_SCRUB_TYPE_RTRMAPBT &&
+ (sr->rtlock_flags & XFS_RTGLOCK_RMAP) &&
+ xfs_has_rtrmapbt(mp))
+ sr->rmap_cur = xfs_rtrmapbt_init_cursor(sc->tp, sr->rtg);
+
+ if (sc->sm->sm_type != XFS_SCRUB_TYPE_RTREFCBT &&
+ (sr->rtlock_flags & XFS_RTGLOCK_REFCOUNT) &&
+ xfs_has_rtreflink(mp))
+ sr->refc_cur = xfs_rtrefcountbt_init_cursor(sc->tp, sr->rtg);
+}
+
/*
* Given a reference to a rtgroup structure, lock rtgroup btree inodes and
* create btree cursors. Must only be called to repair a regular rt file.
@@ -972,6 +1036,39 @@ xrep_rtgroup_init(
/* Grab our own passive reference from the caller's ref. */
sr->rtg = xfs_rtgroup_hold(rtg);
+ xrep_rtgroup_btcur_init(sc, sr);
+ return 0;
+}
+
+/* Ensure that all rt blocks in the given range are not marked free. */
+int
+xrep_require_rtext_inuse(
+ struct xfs_scrub *sc,
+ xfs_rgblock_t rgbno,
+ xfs_filblks_t len)
+{
+ struct xfs_mount *mp = sc->mp;
+ xfs_rtxnum_t startrtx;
+ xfs_rtxnum_t endrtx;
+ bool is_free = false;
+ int error = 0;
+
+ if (xfs_has_zoned(mp)) {
+ if (!xfs_zone_rgbno_is_valid(sc->sr.rtg, rgbno + len - 1))
+ return -EFSCORRUPTED;
+ return 0;
+ }
+
+ startrtx = xfs_rgbno_to_rtx(mp, rgbno);
+ endrtx = xfs_rgbno_to_rtx(mp, rgbno + len - 1);
+
+ error = xfs_rtalloc_extent_is_free(sc->sr.rtg, sc->tp, startrtx,
+ endrtx - startrtx + 1, &is_free);
+ if (error)
+ return error;
+ if (is_free)
+ return -EFSCORRUPTED;
+
return 0;
}
#endif /* CONFIG_XFS_RT */
@@ -1172,42 +1269,6 @@ xrep_setup_xfbtree(
}
/*
- * Create a dummy transaction for use in a live update hook function. This
- * function MUST NOT be called from regular repair code because the current
- * process' transaction is saved via the cookie.
- */
-int
-xrep_trans_alloc_hook_dummy(
- struct xfs_mount *mp,
- void **cookiep,
- struct xfs_trans **tpp)
-{
- int error;
-
- *cookiep = current->journal_info;
- current->journal_info = NULL;
-
- error = xfs_trans_alloc_empty(mp, tpp);
- if (!error)
- return 0;
-
- current->journal_info = *cookiep;
- *cookiep = NULL;
- return error;
-}
-
-/* Cancel a dummy transaction used by a live update hook function. */
-void
-xrep_trans_cancel_hook_dummy(
- void **cookiep,
- struct xfs_trans *tp)
-{
- xfs_trans_cancel(tp);
- current->journal_info = *cookiep;
- *cookiep = NULL;
-}
-
-/*
* See if this buffer can pass the given ->verify_struct() function.
*
* If the buffer already has ops attached and they're not the ones that were
@@ -1237,3 +1298,110 @@ xrep_buf_verify_struct(
return fa == NULL;
}
+
+/* Check the sanity of a rmap record for a metadata btree inode. */
+int
+xrep_check_ino_btree_mapping(
+ struct xfs_scrub *sc,
+ const struct xfs_rmap_irec *rec)
+{
+ enum xbtree_recpacking outcome;
+ int error;
+
+ /*
+ * Metadata btree inodes never have extended attributes, and all blocks
+ * should have the bmbt block flag set.
+ */
+ if ((rec->rm_flags & XFS_RMAP_ATTR_FORK) ||
+ !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
+ return -EFSCORRUPTED;
+
+ /* Make sure the block is within the AG. */
+ if (!xfs_verify_agbext(sc->sa.pag, rec->rm_startblock,
+ rec->rm_blockcount))
+ return -EFSCORRUPTED;
+
+ /* Make sure this isn't free space. */
+ error = xfs_alloc_has_records(sc->sa.bno_cur, rec->rm_startblock,
+ rec->rm_blockcount, &outcome);
+ if (error)
+ return error;
+ if (outcome != XBTREE_RECPACKING_EMPTY)
+ return -EFSCORRUPTED;
+
+ return 0;
+}
+
+/*
+ * Reset the block count of the inode being repaired, and adjust the dquot
+ * block usage to match. The inode must not have an xattr fork.
+ */
+void
+xrep_inode_set_nblocks(
+ struct xfs_scrub *sc,
+ int64_t new_blocks)
+{
+ int64_t delta =
+ new_blocks - sc->ip->i_nblocks;
+
+ sc->ip->i_nblocks = new_blocks;
+
+ xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
+ if (delta != 0)
+ xfs_trans_mod_dquot_byino(sc->tp, sc->ip, XFS_TRANS_DQ_BCOUNT,
+ delta);
+}
+
+/* Reset the block reservation for a metadata inode. */
+int
+xrep_reset_metafile_resv(
+ struct xfs_scrub *sc)
+{
+ struct xfs_mount *mp = sc->mp;
+ int64_t delta;
+ int error;
+
+ delta = mp->m_metafile_resv_used + mp->m_metafile_resv_avail -
+ mp->m_metafile_resv_target;
+ if (delta == 0)
+ return 0;
+
+ /*
+ * Too many blocks have been reserved, transfer some from the incore
+ * reservation back to the filesystem.
+ */
+ if (delta > 0) {
+ int64_t give_back;
+
+ give_back = min_t(uint64_t, delta, mp->m_metafile_resv_avail);
+ if (give_back > 0) {
+ xfs_mod_sb_delalloc(mp, -give_back);
+ xfs_add_fdblocks(mp, give_back);
+ mp->m_metafile_resv_avail -= give_back;
+ }
+
+ return 0;
+ }
+
+ /*
+ * Not enough reservation; try to take some blocks from the filesystem
+ * to the metabtree reservation.
+ */
+ delta = -delta; /* delta is negative here, so invert the sign. */
+ error = xfs_dec_fdblocks(mp, delta, true);
+ while (error == -ENOSPC) {
+ delta--;
+ if (delta == 0) {
+ xfs_warn(sc->mp,
+"Insufficient free space to reset metabtree reservation after repair.");
+ return 0;
+ }
+ error = xfs_dec_fdblocks(mp, delta, true);
+ }
+ if (error)
+ return error;
+
+ xfs_mod_sb_delalloc(mp, delta);
+ mp->m_metafile_resv_avail += delta;
+ return 0;
+}
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index b649da1a93eb..9c04295742c8 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -50,7 +50,9 @@ xrep_trans_commit(
struct xbitmap;
struct xagb_bitmap;
+struct xrgb_bitmap;
struct xfsb_bitmap;
+struct xrtb_bitmap;
int xrep_fix_freelist(struct xfs_scrub *sc, int alloc_flags);
@@ -97,6 +99,8 @@ int xrep_setup_parent(struct xfs_scrub *sc);
int xrep_setup_nlinks(struct xfs_scrub *sc);
int xrep_setup_symlink(struct xfs_scrub *sc, unsigned int *resblks);
int xrep_setup_dirtree(struct xfs_scrub *sc);
+int xrep_setup_rtrmapbt(struct xfs_scrub *sc);
+int xrep_setup_rtrefcountbt(struct xfs_scrub *sc);
/* Repair setup functions */
int xrep_setup_ag_allocbt(struct xfs_scrub *sc);
@@ -110,10 +114,18 @@ int xrep_ag_init(struct xfs_scrub *sc, struct xfs_perag *pag,
#ifdef CONFIG_XFS_RT
int xrep_rtgroup_init(struct xfs_scrub *sc, struct xfs_rtgroup *rtg,
struct xchk_rt *sr, unsigned int rtglock_flags);
+void xrep_rtgroup_btcur_init(struct xfs_scrub *sc, struct xchk_rt *sr);
+int xrep_require_rtext_inuse(struct xfs_scrub *sc, xfs_rgblock_t rgbno,
+ xfs_filblks_t len);
+xfs_extlen_t xrep_calc_rtgroup_resblks(struct xfs_scrub *sc);
#else
# define xrep_rtgroup_init(sc, rtg, sr, lockflags) (-ENOSYS)
+# define xrep_calc_rtgroup_resblks(sc) (0)
#endif /* CONFIG_XFS_RT */
+int xrep_check_ino_btree_mapping(struct xfs_scrub *sc,
+ const struct xfs_rmap_irec *rec);
+
/* Metadata revalidators */
int xrep_revalidate_allocbt(struct xfs_scrub *sc);
@@ -147,10 +159,14 @@ int xrep_metapath(struct xfs_scrub *sc);
int xrep_rtbitmap(struct xfs_scrub *sc);
int xrep_rtsummary(struct xfs_scrub *sc);
int xrep_rgsuperblock(struct xfs_scrub *sc);
+int xrep_rtrmapbt(struct xfs_scrub *sc);
+int xrep_rtrefcountbt(struct xfs_scrub *sc);
#else
# define xrep_rtbitmap xrep_notsupported
# define xrep_rtsummary xrep_notsupported
# define xrep_rgsuperblock xrep_notsupported
+# define xrep_rtrmapbt xrep_notsupported
+# define xrep_rtrefcountbt xrep_notsupported
#endif /* CONFIG_XFS_RT */
#ifdef CONFIG_XFS_QUOTA
@@ -164,16 +180,23 @@ int xrep_quotacheck(struct xfs_scrub *sc);
int xrep_reinit_pagf(struct xfs_scrub *sc);
int xrep_reinit_pagi(struct xfs_scrub *sc);
-int xrep_trans_alloc_hook_dummy(struct xfs_mount *mp, void **cookiep,
- struct xfs_trans **tpp);
-void xrep_trans_cancel_hook_dummy(void **cookiep, struct xfs_trans *tp);
-
bool xrep_buf_verify_struct(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
+void xrep_inode_set_nblocks(struct xfs_scrub *sc, int64_t new_blocks);
+int xrep_reset_metafile_resv(struct xfs_scrub *sc);
#else
#define xrep_ino_dqattach(sc) (0)
-#define xrep_will_attempt(sc) (false)
+
+/*
+ * When online repair is not built into the kernel, we still want to attempt
+ * the repair so that the stub xrep_attempt below will return EOPNOTSUPP.
+ */
+static inline bool xrep_will_attempt(const struct xfs_scrub *sc)
+{
+ return (sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) ||
+ xchk_needs_repair(sc->sm);
+}
static inline int
xrep_attempt(
@@ -192,6 +215,8 @@ xrep_calc_ag_resblks(
return 0;
}
+#define xrep_calc_rtgroup_resblks xrep_calc_ag_resblks
+
static inline int
xrep_reset_perag_resv(
struct xfs_scrub *sc)
@@ -219,6 +244,8 @@ xrep_setup_nothing(
#define xrep_setup_nlinks xrep_setup_nothing
#define xrep_setup_dirtree xrep_setup_nothing
#define xrep_setup_metapath xrep_setup_nothing
+#define xrep_setup_rtrmapbt xrep_setup_nothing
+#define xrep_setup_rtrefcountbt xrep_setup_nothing
#define xrep_setup_inode(sc, imap) ((void)0)
@@ -256,6 +283,8 @@ static inline int xrep_setup_symlink(struct xfs_scrub *sc, unsigned int *x)
#define xrep_dirtree xrep_notsupported
#define xrep_metapath xrep_notsupported
#define xrep_rgsuperblock xrep_notsupported
+#define xrep_rtrmapbt xrep_notsupported
+#define xrep_rtrefcountbt xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/rgb_bitmap.h b/fs/xfs/scrub/rgb_bitmap.h
new file mode 100644
index 000000000000..4c3126b66dcb
--- /dev/null
+++ b/fs/xfs/scrub/rgb_bitmap.h
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2020-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_SCRUB_RGB_BITMAP_H__
+#define __XFS_SCRUB_RGB_BITMAP_H__
+
+/* Bitmaps, but for type-checked for xfs_rgblock_t */
+
+struct xrgb_bitmap {
+ struct xbitmap32 rgbitmap;
+};
+
+static inline void xrgb_bitmap_init(struct xrgb_bitmap *bitmap)
+{
+ xbitmap32_init(&bitmap->rgbitmap);
+}
+
+static inline void xrgb_bitmap_destroy(struct xrgb_bitmap *bitmap)
+{
+ xbitmap32_destroy(&bitmap->rgbitmap);
+}
+
+static inline int xrgb_bitmap_set(struct xrgb_bitmap *bitmap,
+ xfs_rgblock_t start, xfs_extlen_t len)
+{
+ return xbitmap32_set(&bitmap->rgbitmap, start, len);
+}
+
+static inline int xrgb_bitmap_walk(struct xrgb_bitmap *bitmap,
+ xbitmap32_walk_fn fn, void *priv)
+{
+ return xbitmap32_walk(&bitmap->rgbitmap, fn, priv);
+}
+
+#endif /* __XFS_SCRUB_RGB_BITMAP_H__ */
diff --git a/fs/xfs/scrub/rgsuper.c b/fs/xfs/scrub/rgsuper.c
index 463b3573bb76..d189732d0e24 100644
--- a/fs/xfs/scrub/rgsuper.c
+++ b/fs/xfs/scrub/rgsuper.c
@@ -13,6 +13,7 @@
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
+#include "xfs_rmap.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/repair.h"
@@ -34,6 +35,7 @@ xchk_rgsuperblock_xref(
return;
xchk_xref_is_used_rt_space(sc, xfs_rgbno_to_rtb(sc->sr.rtg, 0), 1);
+ xchk_xref_is_only_rt_owned_by(sc, 0, 1, &XFS_RMAP_OINFO_FS);
}
int
@@ -61,7 +63,9 @@ xchk_rgsuperblock(
if (!xchk_xref_process_error(sc, 0, 0, &error))
return error;
- xchk_rtgroup_lock(&sc->sr, XFS_RTGLOCK_BITMAP_SHARED);
+ error = xchk_rtgroup_lock(sc, &sc->sr, XFS_RTGLOCK_BITMAP_SHARED);
+ if (error)
+ return error;
/*
* Since we already validated the rt superblock at mount time, we don't
diff --git a/fs/xfs/scrub/rmap_repair.c b/fs/xfs/scrub/rmap_repair.c
index a0a227d183d2..17d4a38d735c 100644
--- a/fs/xfs/scrub/rmap_repair.c
+++ b/fs/xfs/scrub/rmap_repair.c
@@ -31,6 +31,9 @@
#include "xfs_refcount.h"
#include "xfs_refcount_btree.h"
#include "xfs_ag.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_rtgroup.h"
+#include "xfs_rtrefcount_btree.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
@@ -499,6 +502,69 @@ xrep_rmap_scan_iext(
return xrep_rmap_stash_accumulated(rf);
}
+static int
+xrep_rmap_scan_meta_btree(
+ struct xrep_rmap_ifork *rf,
+ struct xfs_inode *ip)
+{
+ struct xfs_scrub *sc = rf->rr->sc;
+ struct xfs_rtgroup *rtg = NULL;
+ struct xfs_btree_cur *cur = NULL;
+ enum xfs_rtg_inodes type;
+ int error;
+
+ if (rf->whichfork != XFS_DATA_FORK)
+ return -EFSCORRUPTED;
+
+ switch (ip->i_metatype) {
+ case XFS_METAFILE_RTRMAP:
+ type = XFS_RTGI_RMAP;
+ break;
+ case XFS_METAFILE_RTREFCOUNT:
+ type = XFS_RTGI_REFCOUNT;
+ break;
+ default:
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+
+ while ((rtg = xfs_rtgroup_next(sc->mp, rtg))) {
+ if (ip == rtg->rtg_inodes[type])
+ goto found;
+ }
+
+ /*
+ * We should never find an rt metadata btree inode that isn't
+ * associated with an rtgroup yet has ondisk blocks allocated to it.
+ */
+ if (ip->i_nblocks) {
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+
+ return 0;
+
+found:
+ switch (ip->i_metatype) {
+ case XFS_METAFILE_RTRMAP:
+ cur = xfs_rtrmapbt_init_cursor(sc->tp, rtg);
+ break;
+ case XFS_METAFILE_RTREFCOUNT:
+ cur = xfs_rtrefcountbt_init_cursor(sc->tp, rtg);
+ break;
+ default:
+ ASSERT(0);
+ error = -EFSCORRUPTED;
+ goto out_rtg;
+ }
+
+ error = xrep_rmap_scan_iroot_btree(rf, cur);
+ xfs_btree_del_cursor(cur, error);
+out_rtg:
+ xfs_rtgroup_rele(rtg);
+ return error;
+}
+
/* Find all the extents from a given AG in an inode fork. */
STATIC int
xrep_rmap_scan_ifork(
@@ -512,14 +578,14 @@ xrep_rmap_scan_ifork(
.whichfork = whichfork,
};
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
+ bool mappings_done;
int error = 0;
if (!ifp)
return 0;
- if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
- bool mappings_done;
-
+ switch (ifp->if_format) {
+ case XFS_DINODE_FMT_BTREE:
/*
* Scan the bmap btree for data device mappings. This includes
* the btree blocks themselves, even if this is a realtime
@@ -528,15 +594,18 @@ xrep_rmap_scan_ifork(
error = xrep_rmap_scan_bmbt(&rf, ip, &mappings_done);
if (error || mappings_done)
return error;
- } else if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) {
- return 0;
+ fallthrough;
+ case XFS_DINODE_FMT_EXTENTS:
+ /* Scan incore extent cache if this isn't a realtime file. */
+ if (xfs_ifork_is_realtime(ip, whichfork))
+ return 0;
+
+ return xrep_rmap_scan_iext(&rf, ifp);
+ case XFS_DINODE_FMT_META_BTREE:
+ return xrep_rmap_scan_meta_btree(&rf, ip);
}
- /* Scan incore extent cache if this isn't a realtime file. */
- if (xfs_ifork_is_realtime(ip, whichfork))
- return 0;
-
- return xrep_rmap_scan_iext(&rf, ifp);
+ return 0;
}
/*
@@ -882,9 +951,7 @@ end_agscan:
sa->agf_bp = NULL;
sa->agi_bp = NULL;
xchk_trans_cancel(sc);
- error = xchk_trans_alloc_empty(sc);
- if (error)
- return error;
+ xchk_trans_alloc_empty(sc);
/* Iterate all AGs for inodes rmaps. */
while ((error = xchk_iscan_iter(&rr->iscan, &ip)) == 1) {
@@ -1543,7 +1610,6 @@ xrep_rmapbt_live_update(
struct xfs_mount *mp;
struct xfs_btree_cur *mcur;
struct xfs_trans *tp;
- void *txcookie;
int error;
rr = container_of(nb, struct xrep_rmap, rhook.rmap_hook.nb);
@@ -1552,11 +1618,9 @@ xrep_rmapbt_live_update(
if (!xrep_rmapbt_want_live_update(&rr->iscan, &p->oinfo))
goto out_unlock;
- trace_xrep_rmap_live_update(rr->sc->sa.pag, action, p);
+ trace_xrep_rmap_live_update(pag_group(rr->sc->sa.pag), action, p);
- error = xrep_trans_alloc_hook_dummy(mp, &txcookie, &tp);
- if (error)
- goto out_abort;
+ tp = xfs_trans_alloc_empty(mp);
mutex_lock(&rr->lock);
mcur = xfs_rmapbt_mem_cursor(rr->sc->sa.pag, tp, &rr->rmap_btree);
@@ -1570,14 +1634,13 @@ xrep_rmapbt_live_update(
if (error)
goto out_cancel;
- xrep_trans_cancel_hook_dummy(&txcookie, tp);
+ xfs_trans_cancel(tp);
mutex_unlock(&rr->lock);
return NOTIFY_DONE;
out_cancel:
xfbtree_trans_cancel(&rr->rmap_btree, tp);
- xrep_trans_cancel_hook_dummy(&txcookie, tp);
-out_abort:
+ xfs_trans_cancel(tp);
mutex_unlock(&rr->lock);
xchk_iscan_abort(&rr->iscan);
out_unlock:
diff --git a/fs/xfs/scrub/rtb_bitmap.h b/fs/xfs/scrub/rtb_bitmap.h
new file mode 100644
index 000000000000..1313ef605511
--- /dev/null
+++ b/fs/xfs/scrub/rtb_bitmap.h
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2022-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_SCRUB_RTB_BITMAP_H__
+#define __XFS_SCRUB_RTB_BITMAP_H__
+
+/* Bitmaps, but for type-checked for xfs_rtblock_t */
+
+struct xrtb_bitmap {
+ struct xbitmap64 rtbitmap;
+};
+
+static inline void xrtb_bitmap_init(struct xrtb_bitmap *bitmap)
+{
+ xbitmap64_init(&bitmap->rtbitmap);
+}
+
+static inline void xrtb_bitmap_destroy(struct xrtb_bitmap *bitmap)
+{
+ xbitmap64_destroy(&bitmap->rtbitmap);
+}
+
+static inline int xrtb_bitmap_set(struct xrtb_bitmap *bitmap,
+ xfs_rtblock_t start, xfs_filblks_t len)
+{
+ return xbitmap64_set(&bitmap->rtbitmap, start, len);
+}
+
+static inline int xrtb_bitmap_walk(struct xrtb_bitmap *bitmap,
+ xbitmap64_walk_fn fn, void *priv)
+{
+ return xbitmap64_walk(&bitmap->rtbitmap, fn, priv);
+}
+
+#endif /* __XFS_SCRUB_RTB_BITMAP_H__ */
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index 376a36fd9a9c..d5ff8609dbfb 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -9,17 +9,25 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
+#include "xfs_btree.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_rtbitmap.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
#include "xfs_bit.h"
+#include "xfs_rtgroup.h"
#include "xfs_sb.h"
+#include "xfs_rmap.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_exchmaps.h"
+#include "xfs_zone_alloc.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/repair.h"
+#include "scrub/tempexch.h"
#include "scrub/rtbitmap.h"
+#include "scrub/btree.h"
/* Set us up with the realtime metadata locked. */
int
@@ -30,10 +38,15 @@ xchk_setup_rtbitmap(
struct xchk_rtbitmap *rtb;
int error;
- rtb = kzalloc(sizeof(struct xchk_rtbitmap), XCHK_GFP_FLAGS);
+ if (xchk_need_intent_drain(sc))
+ xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
+
+ rtb = kzalloc(struct_size(rtb, words, xchk_rtbitmap_wordcnt(sc)),
+ XCHK_GFP_FLAGS);
if (!rtb)
return -ENOMEM;
sc->buf = rtb;
+ rtb->sc = sc;
error = xchk_rtgroup_init(sc, sc->sm->sm_agno, &sc->sr);
if (error)
@@ -49,8 +62,7 @@ xchk_setup_rtbitmap(
if (error)
return error;
- error = xchk_install_live_inode(sc,
- sc->sr.rtg->rtg_inodes[XFS_RTGI_BITMAP]);
+ error = xchk_install_live_inode(sc, rtg_bitmap(sc->sr.rtg));
if (error)
return error;
@@ -58,12 +70,15 @@ xchk_setup_rtbitmap(
if (error)
return error;
+ error = xchk_rtgroup_lock(sc, &sc->sr, XCHK_RTGLOCK_ALL);
+ if (error)
+ return error;
+
/*
* Now that we've locked the rtbitmap, we can't race with growfsrt
* trying to expand the bitmap or change the size of the rt volume.
* Hence it is safe to compute and check the geometry values.
*/
- xchk_rtgroup_lock(&sc->sr, XFS_RTGLOCK_BITMAP);
if (mp->m_sb.sb_rblocks) {
rtb->rextents = xfs_blen_to_rtbxlen(mp, mp->m_sb.sb_rblocks);
rtb->rextslog = xfs_compute_rextslog(rtb->rextents);
@@ -73,7 +88,32 @@ xchk_setup_rtbitmap(
return 0;
}
-/* Realtime bitmap. */
+/* Per-rtgroup bitmap contents. */
+
+/* Cross-reference rtbitmap entries with other metadata. */
+STATIC void
+xchk_rtbitmap_xref(
+ struct xchk_rtbitmap *rtb,
+ xfs_rtblock_t startblock,
+ xfs_rtblock_t blockcount)
+{
+ struct xfs_scrub *sc = rtb->sc;
+ xfs_rgblock_t rgbno = xfs_rtb_to_rgbno(sc->mp, startblock);
+
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+ if (!sc->sr.rmap_cur)
+ return;
+
+ xchk_xref_has_no_rt_owner(sc, rgbno, blockcount);
+ xchk_xref_is_not_rt_shared(sc, rgbno, blockcount);
+ xchk_xref_is_not_rt_cow_staging(sc, rgbno, blockcount);
+
+ if (rtb->next_free_rgbno < rgbno)
+ xchk_xref_has_rt_owner(sc, rtb->next_free_rgbno,
+ rgbno - rtb->next_free_rgbno);
+ rtb->next_free_rgbno = rgbno + blockcount;
+}
/* Scrub a free extent record from the realtime bitmap. */
STATIC int
@@ -83,7 +123,8 @@ xchk_rtbitmap_rec(
const struct xfs_rtalloc_rec *rec,
void *priv)
{
- struct xfs_scrub *sc = priv;
+ struct xchk_rtbitmap *rtb = priv;
+ struct xfs_scrub *sc = rtb->sc;
xfs_rtblock_t startblock;
xfs_filblks_t blockcount;
@@ -92,6 +133,12 @@ xchk_rtbitmap_rec(
if (!xfs_verify_rtbext(rtg_mount(rtg), startblock, blockcount))
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
+
+ xchk_rtbitmap_xref(rtb, startblock, blockcount);
+
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return -ECANCELED;
+
return 0;
}
@@ -139,15 +186,16 @@ xchk_rtbitmap_check_extents(
return error;
}
-/* Scrub the realtime bitmap. */
+/* Scrub this group's realtime bitmap. */
int
xchk_rtbitmap(
struct xfs_scrub *sc)
{
struct xfs_mount *mp = sc->mp;
struct xfs_rtgroup *rtg = sc->sr.rtg;
- struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP];
+ struct xfs_inode *rbmip = rtg_bitmap(rtg);
struct xchk_rtbitmap *rtb = sc->buf;
+ xfs_rgblock_t last_rgbno;
int error;
/* Is sb_rextents correct? */
@@ -200,10 +248,20 @@ xchk_rtbitmap(
if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
return error;
- error = xfs_rtalloc_query_all(rtg, sc->tp, xchk_rtbitmap_rec, sc);
+ rtb->next_free_rgbno = 0;
+ error = xfs_rtalloc_query_all(rtg, sc->tp, xchk_rtbitmap_rec, rtb);
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
return error;
+ /*
+ * Check that the are rmappings for all rt extents between the end of
+ * the last free extent we saw and the last possible extent in the rt
+ * group.
+ */
+ last_rgbno = rtg->rtg_extents * mp->m_sb.sb_rextsize - 1;
+ if (rtb->next_free_rgbno < last_rgbno)
+ xchk_xref_has_rt_owner(sc, rtb->next_free_rgbno,
+ last_rgbno - rtb->next_free_rgbno);
return 0;
}
@@ -215,7 +273,6 @@ xchk_xref_is_used_rt_space(
xfs_extlen_t len)
{
struct xfs_rtgroup *rtg = sc->sr.rtg;
- struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP];
xfs_rtxnum_t startext;
xfs_rtxnum_t endext;
bool is_free;
@@ -224,6 +281,13 @@ xchk_xref_is_used_rt_space(
if (xchk_skip_xref(sc->sm))
return;
+ if (xfs_has_zoned(sc->mp)) {
+ if (!xfs_zone_rgbno_is_valid(rtg,
+ xfs_rtb_to_rgbno(sc->mp, rtbno) + len - 1))
+ xchk_ino_xref_set_corrupt(sc, rtg_rmap(rtg)->i_ino);
+ return;
+ }
+
startext = xfs_rtb_to_rtx(sc->mp, rtbno);
endext = xfs_rtb_to_rtx(sc->mp, rtbno + len - 1);
error = xfs_rtalloc_extent_is_free(rtg, sc->tp, startext,
@@ -231,5 +295,5 @@ xchk_xref_is_used_rt_space(
if (!xchk_should_check_xref(sc, &error, NULL))
return;
if (is_free)
- xchk_ino_xref_set_corrupt(sc, rbmip->i_ino);
+ xchk_ino_xref_set_corrupt(sc, rtg_bitmap(rtg)->i_ino);
}
diff --git a/fs/xfs/scrub/rtbitmap.h b/fs/xfs/scrub/rtbitmap.h
index 85304ff019e1..fe52b877253d 100644
--- a/fs/xfs/scrub/rtbitmap.h
+++ b/fs/xfs/scrub/rtbitmap.h
@@ -6,17 +6,72 @@
#ifndef __XFS_SCRUB_RTBITMAP_H__
#define __XFS_SCRUB_RTBITMAP_H__
+/*
+ * We use an xfile to construct new bitmap blocks for the portion of the
+ * rtbitmap file that we're replacing. Whereas the ondisk bitmap must be
+ * accessed through the buffer cache, the xfile bitmap supports direct
+ * word-level accesses. Therefore, we create a small abstraction for linear
+ * access.
+ */
+typedef unsigned long long xrep_wordoff_t;
+typedef unsigned int xrep_wordcnt_t;
+
+/* Mask to round an rtx down to the nearest bitmap word. */
+#define XREP_RTBMP_WORDMASK ((1ULL << XFS_NBWORDLOG) - 1)
+
+
struct xchk_rtbitmap {
+ struct xfs_scrub *sc;
+
uint64_t rextents;
uint64_t rbmblocks;
unsigned int rextslog;
unsigned int resblks;
+
+ /* The next free rt group block number that we expect to see. */
+ xfs_rgblock_t next_free_rgbno;
+
+#ifdef CONFIG_XFS_ONLINE_REPAIR
+ /* stuff for staging a new bitmap */
+ struct xfs_rtalloc_args args;
+ struct xrep_tempexch tempexch;
+#endif
+
+ /* The next rtgroup block we expect to see during our rtrmapbt walk. */
+ xfs_rgblock_t next_rgbno;
+
+ /* rtgroup lock flags */
+ unsigned int rtglock_flags;
+
+ /* rtword position of xfile as we write buffers to disk. */
+ xrep_wordoff_t prep_wordoff;
+
+ /* In-Memory rtbitmap for repair. */
+ union xfs_rtword_raw words[];
};
#ifdef CONFIG_XFS_ONLINE_REPAIR
int xrep_setup_rtbitmap(struct xfs_scrub *sc, struct xchk_rtbitmap *rtb);
+
+/*
+ * How big should the words[] buffer be?
+ *
+ * For repairs, we want a full fsblock worth of space so that we can memcpy a
+ * buffer full of 1s into the xfile bitmap. The xfile bitmap doesn't have
+ * rtbitmap block headers, so we don't use blockwsize. Scrub doesn't use the
+ * words buffer at all.
+ */
+static inline unsigned int
+xchk_rtbitmap_wordcnt(
+ struct xfs_scrub *sc)
+{
+ if (xchk_could_repair(sc))
+ return sc->mp->m_sb.sb_blocksize >> XFS_WORDLOG;
+ return 0;
+}
#else
# define xrep_setup_rtbitmap(sc, rtb) (0)
+# define xchk_rtbitmap_wordcnt(sc) (0)
#endif /* CONFIG_XFS_ONLINE_REPAIR */
#endif /* __XFS_SCRUB_RTBITMAP_H__ */
diff --git a/fs/xfs/scrub/rtbitmap_repair.c b/fs/xfs/scrub/rtbitmap_repair.c
index 0fef98e9f834..203a1a97c502 100644
--- a/fs/xfs/scrub/rtbitmap_repair.c
+++ b/fs/xfs/scrub/rtbitmap_repair.c
@@ -12,32 +12,66 @@
#include "xfs_btree.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
+#include "xfs_rtalloc.h"
#include "xfs_inode.h"
#include "xfs_bit.h"
#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
+#include "xfs_rmap.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_exchmaps.h"
+#include "xfs_rtbitmap.h"
+#include "xfs_rtgroup.h"
+#include "xfs_extent_busy.h"
+#include "xfs_refcount.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/xfile.h"
+#include "scrub/tempfile.h"
+#include "scrub/tempexch.h"
+#include "scrub/reap.h"
#include "scrub/rtbitmap.h"
-/* Set up to repair the realtime bitmap file metadata. */
+/* rt bitmap content repairs */
+
+/* Set up to repair the realtime bitmap for this group. */
int
xrep_setup_rtbitmap(
struct xfs_scrub *sc,
struct xchk_rtbitmap *rtb)
{
struct xfs_mount *mp = sc->mp;
- unsigned long long blocks = 0;
+ char *descr;
+ unsigned long long blocks = mp->m_sb.sb_rbmblocks;
+ int error;
+
+ error = xrep_tempfile_create(sc, S_IFREG);
+ if (error)
+ return error;
+
+ /* Create an xfile to hold our reconstructed bitmap. */
+ descr = xchk_xfile_rtgroup_descr(sc, "bitmap file");
+ error = xfile_create(descr, blocks * mp->m_sb.sb_blocksize, &sc->xfile);
+ kfree(descr);
+ if (error)
+ return error;
/*
- * Reserve enough blocks to write out a completely new bmbt for a
- * maximally fragmented bitmap file. We do not hold the rtbitmap
- * ILOCK yet, so this is entirely speculative.
+ * Reserve enough blocks to write out a completely new bitmap file,
+ * plus twice as many blocks as we would need if we can only allocate
+ * one block per data fork mapping. This should cover the
+ * preallocation of the temporary file and exchanging the extent
+ * mappings.
+ *
+ * We cannot use xfs_exchmaps_estimate because we have not yet
+ * constructed the replacement bitmap and therefore do not know how
+ * many extents it will use. By the time we do, we will have a dirty
+ * transaction (which we cannot drop because we cannot drop the
+ * rtbitmap ILOCK) and cannot ask for more reservation.
*/
- blocks = xfs_bmbt_calc_size(mp, mp->m_sb.sb_rbmblocks);
+ blocks += xfs_bmbt_calc_size(mp, blocks) * 2;
if (blocks > UINT_MAX)
return -EOPNOTSUPP;
@@ -45,6 +79,325 @@ xrep_setup_rtbitmap(
return 0;
}
+static inline xrep_wordoff_t
+rtx_to_wordoff(
+ struct xfs_mount *mp,
+ xfs_rtxnum_t rtx)
+{
+ return rtx >> XFS_NBWORDLOG;
+}
+
+static inline xrep_wordcnt_t
+rtxlen_to_wordcnt(
+ xfs_rtxlen_t rtxlen)
+{
+ return rtxlen >> XFS_NBWORDLOG;
+}
+
+/* Helper functions to record rtwords in an xfile. */
+
+static inline int
+xfbmp_load(
+ struct xchk_rtbitmap *rtb,
+ xrep_wordoff_t wordoff,
+ xfs_rtword_t *word)
+{
+ union xfs_rtword_raw urk;
+ int error;
+
+ ASSERT(xfs_has_rtgroups(rtb->sc->mp));
+
+ error = xfile_load(rtb->sc->xfile, &urk,
+ sizeof(union xfs_rtword_raw),
+ wordoff << XFS_WORDLOG);
+ if (error)
+ return error;
+
+ *word = be32_to_cpu(urk.rtg);
+ return 0;
+}
+
+static inline int
+xfbmp_store(
+ struct xchk_rtbitmap *rtb,
+ xrep_wordoff_t wordoff,
+ const xfs_rtword_t word)
+{
+ union xfs_rtword_raw urk;
+
+ ASSERT(xfs_has_rtgroups(rtb->sc->mp));
+
+ urk.rtg = cpu_to_be32(word);
+ return xfile_store(rtb->sc->xfile, &urk,
+ sizeof(union xfs_rtword_raw),
+ wordoff << XFS_WORDLOG);
+}
+
+static inline int
+xfbmp_copyin(
+ struct xchk_rtbitmap *rtb,
+ xrep_wordoff_t wordoff,
+ const union xfs_rtword_raw *word,
+ xrep_wordcnt_t nr_words)
+{
+ return xfile_store(rtb->sc->xfile, word, nr_words << XFS_WORDLOG,
+ wordoff << XFS_WORDLOG);
+}
+
+static inline int
+xfbmp_copyout(
+ struct xchk_rtbitmap *rtb,
+ xrep_wordoff_t wordoff,
+ union xfs_rtword_raw *word,
+ xrep_wordcnt_t nr_words)
+{
+ return xfile_load(rtb->sc->xfile, word, nr_words << XFS_WORDLOG,
+ wordoff << XFS_WORDLOG);
+}
+
+/* Perform a logical OR operation on an rtword in the incore bitmap. */
+static int
+xrep_rtbitmap_or(
+ struct xchk_rtbitmap *rtb,
+ xrep_wordoff_t wordoff,
+ xfs_rtword_t mask)
+{
+ xfs_rtword_t word;
+ int error;
+
+ error = xfbmp_load(rtb, wordoff, &word);
+ if (error)
+ return error;
+
+ trace_xrep_rtbitmap_or(rtb->sc->mp, wordoff, mask, word);
+
+ return xfbmp_store(rtb, wordoff, word | mask);
+}
+
+/*
+ * Mark as free every rt extent between the next rt block we expected to see
+ * in the rtrmap records and the given rt block.
+ */
+STATIC int
+xrep_rtbitmap_mark_free(
+ struct xchk_rtbitmap *rtb,
+ xfs_rgblock_t rgbno)
+{
+ struct xfs_mount *mp = rtb->sc->mp;
+ struct xchk_rt *sr = &rtb->sc->sr;
+ struct xfs_rtgroup *rtg = sr->rtg;
+ xfs_rtxnum_t startrtx;
+ xfs_rtxnum_t nextrtx;
+ xrep_wordoff_t wordoff, nextwordoff;
+ unsigned int bit;
+ unsigned int bufwsize;
+ xfs_extlen_t mod;
+ xfs_rtword_t mask;
+ enum xbtree_recpacking outcome;
+ int error;
+
+ if (!xfs_verify_rgbext(rtg, rtb->next_rgbno, rgbno - rtb->next_rgbno))
+ return -EFSCORRUPTED;
+
+ /*
+ * Convert rt blocks to rt extents The block range we find must be
+ * aligned to an rtextent boundary on both ends.
+ */
+ startrtx = xfs_rgbno_to_rtx(mp, rtb->next_rgbno);
+ mod = xfs_rgbno_to_rtxoff(mp, rtb->next_rgbno);
+ if (mod)
+ return -EFSCORRUPTED;
+
+ nextrtx = xfs_rgbno_to_rtx(mp, rgbno - 1) + 1;
+ mod = xfs_rgbno_to_rtxoff(mp, rgbno - 1);
+ if (mod != mp->m_sb.sb_rextsize - 1)
+ return -EFSCORRUPTED;
+
+ /* Must not be shared or CoW staging. */
+ if (sr->refc_cur) {
+ error = xfs_refcount_has_records(sr->refc_cur,
+ XFS_REFC_DOMAIN_SHARED, rtb->next_rgbno,
+ rgbno - rtb->next_rgbno, &outcome);
+ if (error)
+ return error;
+ if (outcome != XBTREE_RECPACKING_EMPTY)
+ return -EFSCORRUPTED;
+
+ error = xfs_refcount_has_records(sr->refc_cur,
+ XFS_REFC_DOMAIN_COW, rtb->next_rgbno,
+ rgbno - rtb->next_rgbno, &outcome);
+ if (error)
+ return error;
+ if (outcome != XBTREE_RECPACKING_EMPTY)
+ return -EFSCORRUPTED;
+ }
+
+ trace_xrep_rtbitmap_record_free(mp, startrtx, nextrtx - 1);
+
+ /* Set bits as needed to round startrtx up to the nearest word. */
+ bit = startrtx & XREP_RTBMP_WORDMASK;
+ if (bit) {
+ xfs_rtblock_t len = nextrtx - startrtx;
+ unsigned int lastbit;
+
+ lastbit = min(bit + len, XFS_NBWORD);
+ mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
+
+ error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, startrtx),
+ mask);
+ if (error || lastbit - bit == len)
+ return error;
+ startrtx += XFS_NBWORD - bit;
+ }
+
+ /* Set bits as needed to round nextrtx down to the nearest word. */
+ bit = nextrtx & XREP_RTBMP_WORDMASK;
+ if (bit) {
+ mask = ((xfs_rtword_t)1 << bit) - 1;
+
+ error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, nextrtx),
+ mask);
+ if (error || startrtx + bit == nextrtx)
+ return error;
+ nextrtx -= bit;
+ }
+
+ trace_xrep_rtbitmap_record_free_bulk(mp, startrtx, nextrtx - 1);
+
+ /* Set all the words in between, up to a whole fs block at once. */
+ wordoff = rtx_to_wordoff(mp, startrtx);
+ nextwordoff = rtx_to_wordoff(mp, nextrtx);
+ bufwsize = mp->m_sb.sb_blocksize >> XFS_WORDLOG;
+
+ while (wordoff < nextwordoff) {
+ xrep_wordoff_t rem;
+ xrep_wordcnt_t wordcnt;
+
+ wordcnt = min_t(xrep_wordcnt_t, nextwordoff - wordoff,
+ bufwsize);
+
+ /*
+ * Try to keep us aligned to the rtwords buffer to reduce the
+ * number of xfile writes.
+ */
+ rem = wordoff & (bufwsize - 1);
+ if (rem)
+ wordcnt = min_t(xrep_wordcnt_t, wordcnt,
+ bufwsize - rem);
+
+ error = xfbmp_copyin(rtb, wordoff, rtb->words, wordcnt);
+ if (error)
+ return error;
+
+ wordoff += wordcnt;
+ }
+
+ return 0;
+}
+
+/* Set free space in the rtbitmap based on rtrmapbt records. */
+STATIC int
+xrep_rtbitmap_walk_rtrmap(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xchk_rtbitmap *rtb = priv;
+ int error = 0;
+
+ if (xchk_should_terminate(rtb->sc, &error))
+ return error;
+
+ if (rtb->next_rgbno < rec->rm_startblock) {
+ error = xrep_rtbitmap_mark_free(rtb, rec->rm_startblock);
+ if (error)
+ return error;
+ }
+
+ rtb->next_rgbno = max(rtb->next_rgbno,
+ rec->rm_startblock + rec->rm_blockcount);
+ return 0;
+}
+
+/*
+ * Walk the rtrmapbt to find all the gaps between records, and mark the gaps
+ * in the realtime bitmap that we're computing.
+ */
+STATIC int
+xrep_rtbitmap_find_freespace(
+ struct xchk_rtbitmap *rtb)
+{
+ struct xfs_scrub *sc = rtb->sc;
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_rtgroup *rtg = sc->sr.rtg;
+ uint64_t blockcount;
+ int error;
+
+ /* Prepare a buffer of ones so that we can accelerate bulk setting. */
+ memset(rtb->words, 0xFF, mp->m_sb.sb_blocksize);
+
+ xrep_rtgroup_btcur_init(sc, &sc->sr);
+ error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_rtbitmap_walk_rtrmap,
+ rtb);
+ if (error)
+ goto out;
+
+ /*
+ * Mark as free every possible rt extent from the last one we saw to
+ * the end of the rt group.
+ */
+ blockcount = rtg->rtg_extents * mp->m_sb.sb_rextsize;
+ if (rtb->next_rgbno < blockcount) {
+ error = xrep_rtbitmap_mark_free(rtb, blockcount);
+ if (error)
+ goto out;
+ }
+
+out:
+ xchk_rtgroup_btcur_free(&sc->sr);
+ return error;
+}
+
+static int
+xrep_rtbitmap_prep_buf(
+ struct xfs_scrub *sc,
+ struct xfs_buf *bp,
+ void *data)
+{
+ struct xchk_rtbitmap *rtb = data;
+ struct xfs_mount *mp = sc->mp;
+ union xfs_rtword_raw *ondisk;
+ int error;
+
+ rtb->args.mp = sc->mp;
+ rtb->args.tp = sc->tp;
+ rtb->args.rbmbp = bp;
+ ondisk = xfs_rbmblock_wordptr(&rtb->args, 0);
+ rtb->args.rbmbp = NULL;
+
+ error = xfbmp_copyout(rtb, rtb->prep_wordoff, ondisk,
+ mp->m_blockwsize);
+ if (error)
+ return error;
+
+ if (xfs_has_rtgroups(sc->mp)) {
+ struct xfs_rtbuf_blkinfo *hdr = bp->b_addr;
+
+ hdr->rt_magic = cpu_to_be32(XFS_RTBITMAP_MAGIC);
+ hdr->rt_owner = cpu_to_be64(sc->ip->i_ino);
+ hdr->rt_blkno = cpu_to_be64(xfs_buf_daddr(bp));
+ hdr->rt_lsn = 0;
+ uuid_copy(&hdr->rt_uuid, &sc->mp->m_sb.sb_meta_uuid);
+ bp->b_ops = &xfs_rtbitmap_buf_ops;
+ } else {
+ bp->b_ops = &xfs_rtbuf_ops;
+ }
+
+ rtb->prep_wordoff += mp->m_blockwsize;
+ xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_RTBITMAP_BUF);
+ return 0;
+}
+
/*
* Make sure that the given range of the data fork of the realtime file is
* mapped to written blocks. The caller must ensure that the inode is joined
@@ -160,9 +513,18 @@ xrep_rtbitmap(
{
struct xchk_rtbitmap *rtb = sc->buf;
struct xfs_mount *mp = sc->mp;
+ struct xfs_group *xg = rtg_group(sc->sr.rtg);
unsigned long long blocks = 0;
+ unsigned int busy_gen;
int error;
+ /* We require the realtime rmapbt to rebuild anything. */
+ if (!xfs_has_rtrmapbt(sc->mp))
+ return -EOPNOTSUPP;
+ /* We require atomic file exchange range to rebuild anything. */
+ if (!xfs_has_exchange_range(sc->mp))
+ return -EOPNOTSUPP;
+
/* Impossibly large rtbitmap means we can't touch the filesystem. */
if (rtb->rbmblocks > U32_MAX)
return 0;
@@ -195,6 +557,79 @@ xrep_rtbitmap(
if (error)
return error;
- /* Fix inconsistent bitmap geometry */
- return xrep_rtbitmap_geometry(sc, rtb);
+ /*
+ * Fix inconsistent bitmap geometry. This function returns with a
+ * clean scrub transaction.
+ */
+ error = xrep_rtbitmap_geometry(sc, rtb);
+ if (error)
+ return error;
+
+ /*
+ * Make sure the busy extent list is clear because we can't put extents
+ * on there twice.
+ */
+ if (!xfs_extent_busy_list_empty(xg, &busy_gen)) {
+ error = xfs_extent_busy_flush(sc->tp, xg, busy_gen, 0);
+ if (error)
+ return error;
+ }
+
+ /*
+ * Generate the new rtbitmap data. We don't need the rtbmp information
+ * once this call is finished.
+ */
+ error = xrep_rtbitmap_find_freespace(rtb);
+ if (error)
+ return error;
+
+ /*
+ * Try to take ILOCK_EXCL of the temporary file. We had better be the
+ * only ones holding onto this inode, but we can't block while holding
+ * the rtbitmap file's ILOCK_EXCL.
+ */
+ while (!xrep_tempfile_ilock_nowait(sc)) {
+ if (xchk_should_terminate(sc, &error))
+ return error;
+ delay(1);
+ }
+
+ /*
+ * Make sure we have space allocated for the part of the bitmap
+ * file that corresponds to this group. We already joined sc->ip.
+ */
+ xfs_trans_ijoin(sc->tp, sc->tempip, 0);
+ error = xrep_tempfile_prealloc(sc, 0, rtb->rbmblocks);
+ if (error)
+ return error;
+
+ /* Last chance to abort before we start committing fixes. */
+ if (xchk_should_terminate(sc, &error))
+ return error;
+
+ /* Copy the bitmap file that we generated. */
+ error = xrep_tempfile_copyin(sc, 0, rtb->rbmblocks,
+ xrep_rtbitmap_prep_buf, rtb);
+ if (error)
+ return error;
+ error = xrep_tempfile_set_isize(sc,
+ XFS_FSB_TO_B(sc->mp, sc->mp->m_sb.sb_rbmblocks));
+ if (error)
+ return error;
+
+ /*
+ * Now exchange the data fork contents. We're done with the temporary
+ * buffer, so we can reuse it for the tempfile exchmaps information.
+ */
+ error = xrep_tempexch_trans_reserve(sc, XFS_DATA_FORK, 0,
+ rtb->rbmblocks, &rtb->tempexch);
+ if (error)
+ return error;
+
+ error = xrep_tempexch_contents(sc, &rtb->tempexch);
+ if (error)
+ return error;
+
+ /* Free the old rtbitmap blocks if they're not in use. */
+ return xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK);
}
diff --git a/fs/xfs/scrub/rtrefcount.c b/fs/xfs/scrub/rtrefcount.c
new file mode 100644
index 000000000000..4c5dffc73641
--- /dev/null
+++ b/fs/xfs/scrub/rtrefcount.c
@@ -0,0 +1,661 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2021-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_trans.h"
+#include "xfs_btree.h"
+#include "xfs_rmap.h"
+#include "xfs_refcount.h"
+#include "xfs_inode.h"
+#include "xfs_rtbitmap.h"
+#include "xfs_rtgroup.h"
+#include "xfs_metafile.h"
+#include "xfs_rtrefcount_btree.h"
+#include "xfs_rtalloc.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/btree.h"
+#include "scrub/repair.h"
+
+/* Set us up with the realtime refcount metadata locked. */
+int
+xchk_setup_rtrefcountbt(
+ struct xfs_scrub *sc)
+{
+ int error;
+
+ if (xchk_need_intent_drain(sc))
+ xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
+
+ if (xchk_could_repair(sc)) {
+ error = xrep_setup_rtrefcountbt(sc);
+ if (error)
+ return error;
+ }
+
+ error = xchk_rtgroup_init(sc, sc->sm->sm_agno, &sc->sr);
+ if (error)
+ return error;
+
+ error = xchk_setup_rt(sc);
+ if (error)
+ return error;
+
+ error = xchk_install_live_inode(sc, rtg_refcount(sc->sr.rtg));
+ if (error)
+ return error;
+
+ return xchk_rtgroup_lock(sc, &sc->sr, XCHK_RTGLOCK_ALL);
+}
+
+/* Realtime Reference count btree scrubber. */
+
+/*
+ * Confirming Reference Counts via Reverse Mappings
+ *
+ * We want to count the reverse mappings overlapping a refcount record
+ * (bno, len, refcount), allowing for the possibility that some of the
+ * overlap may come from smaller adjoining reverse mappings, while some
+ * comes from single extents which overlap the range entirely. The
+ * outer loop is as follows:
+ *
+ * 1. For all reverse mappings overlapping the refcount extent,
+ * a. If a given rmap completely overlaps, mark it as seen.
+ * b. Otherwise, record the fragment (in agbno order) for later
+ * processing.
+ *
+ * Once we've seen all the rmaps, we know that for all blocks in the
+ * refcount record we want to find $refcount owners and we've already
+ * visited $seen extents that overlap all the blocks. Therefore, we
+ * need to find ($refcount - $seen) owners for every block in the
+ * extent; call that quantity $target_nr. Proceed as follows:
+ *
+ * 2. Pull the first $target_nr fragments from the list; all of them
+ * should start at or before the start of the extent.
+ * Call this subset of fragments the working set.
+ * 3. Until there are no more unprocessed fragments,
+ * a. Find the shortest fragments in the set and remove them.
+ * b. Note the block number of the end of these fragments.
+ * c. Pull the same number of fragments from the list. All of these
+ * fragments should start at the block number recorded in the
+ * previous step.
+ * d. Put those fragments in the set.
+ * 4. Check that there are $target_nr fragments remaining in the list,
+ * and that they all end at or beyond the end of the refcount extent.
+ *
+ * If the refcount is correct, all the check conditions in the algorithm
+ * should always hold true. If not, the refcount is incorrect.
+ */
+struct xchk_rtrefcnt_frag {
+ struct list_head list;
+ struct xfs_rmap_irec rm;
+};
+
+struct xchk_rtrefcnt_check {
+ struct xfs_scrub *sc;
+ struct list_head fragments;
+
+ /* refcount extent we're examining */
+ xfs_rgblock_t bno;
+ xfs_extlen_t len;
+ xfs_nlink_t refcount;
+
+ /* number of owners seen */
+ xfs_nlink_t seen;
+};
+
+/*
+ * Decide if the given rmap is large enough that we can redeem it
+ * towards refcount verification now, or if it's a fragment, in
+ * which case we'll hang onto it in the hopes that we'll later
+ * discover that we've collected exactly the correct number of
+ * fragments as the rtrefcountbt says we should have.
+ */
+STATIC int
+xchk_rtrefcountbt_rmap_check(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xchk_rtrefcnt_check *refchk = priv;
+ struct xchk_rtrefcnt_frag *frag;
+ xfs_rgblock_t rm_last;
+ xfs_rgblock_t rc_last;
+ int error = 0;
+
+ if (xchk_should_terminate(refchk->sc, &error))
+ return error;
+
+ rm_last = rec->rm_startblock + rec->rm_blockcount - 1;
+ rc_last = refchk->bno + refchk->len - 1;
+
+ /* Confirm that a single-owner refc extent is a CoW stage. */
+ if (refchk->refcount == 1 && rec->rm_owner != XFS_RMAP_OWN_COW) {
+ xchk_btree_xref_set_corrupt(refchk->sc, cur, 0);
+ return 0;
+ }
+
+ if (rec->rm_startblock <= refchk->bno && rm_last >= rc_last) {
+ /*
+ * The rmap overlaps the refcount record, so we can confirm
+ * one refcount owner seen.
+ */
+ refchk->seen++;
+ } else {
+ /*
+ * This rmap covers only part of the refcount record, so
+ * save the fragment for later processing. If the rmapbt
+ * is healthy each rmap_irec we see will be in agbno order
+ * so we don't need insertion sort here.
+ */
+ frag = kmalloc(sizeof(struct xchk_rtrefcnt_frag),
+ XCHK_GFP_FLAGS);
+ if (!frag)
+ return -ENOMEM;
+ memcpy(&frag->rm, rec, sizeof(frag->rm));
+ list_add_tail(&frag->list, &refchk->fragments);
+ }
+
+ return 0;
+}
+
+/*
+ * Given a bunch of rmap fragments, iterate through them, keeping
+ * a running tally of the refcount. If this ever deviates from
+ * what we expect (which is the rtrefcountbt's refcount minus the
+ * number of extents that totally covered the rtrefcountbt extent),
+ * we have a rtrefcountbt error.
+ */
+STATIC void
+xchk_rtrefcountbt_process_rmap_fragments(
+ struct xchk_rtrefcnt_check *refchk)
+{
+ struct list_head worklist;
+ struct xchk_rtrefcnt_frag *frag;
+ struct xchk_rtrefcnt_frag *n;
+ xfs_rgblock_t bno;
+ xfs_rgblock_t rbno;
+ xfs_rgblock_t next_rbno;
+ xfs_nlink_t nr;
+ xfs_nlink_t target_nr;
+
+ target_nr = refchk->refcount - refchk->seen;
+ if (target_nr == 0)
+ return;
+
+ /*
+ * There are (refchk->rc.rc_refcount - refchk->nr refcount)
+ * references we haven't found yet. Pull that many off the
+ * fragment list and figure out where the smallest rmap ends
+ * (and therefore the next rmap should start). All the rmaps
+ * we pull off should start at or before the beginning of the
+ * refcount record's range.
+ */
+ INIT_LIST_HEAD(&worklist);
+ rbno = NULLRGBLOCK;
+
+ /* Make sure the fragments actually /are/ in bno order. */
+ bno = 0;
+ list_for_each_entry(frag, &refchk->fragments, list) {
+ if (frag->rm.rm_startblock < bno)
+ goto done;
+ bno = frag->rm.rm_startblock;
+ }
+
+ /*
+ * Find all the rmaps that start at or before the refc extent,
+ * and put them on the worklist.
+ */
+ nr = 0;
+ list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
+ if (frag->rm.rm_startblock > refchk->bno || nr > target_nr)
+ break;
+ bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
+ if (bno < rbno)
+ rbno = bno;
+ list_move_tail(&frag->list, &worklist);
+ nr++;
+ }
+
+ /*
+ * We should have found exactly $target_nr rmap fragments starting
+ * at or before the refcount extent.
+ */
+ if (nr != target_nr)
+ goto done;
+
+ while (!list_empty(&refchk->fragments)) {
+ /* Discard any fragments ending at rbno from the worklist. */
+ nr = 0;
+ next_rbno = NULLRGBLOCK;
+ list_for_each_entry_safe(frag, n, &worklist, list) {
+ bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
+ if (bno != rbno) {
+ if (bno < next_rbno)
+ next_rbno = bno;
+ continue;
+ }
+ list_del(&frag->list);
+ kfree(frag);
+ nr++;
+ }
+
+ /* Try to add nr rmaps starting at rbno to the worklist. */
+ list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
+ bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
+ if (frag->rm.rm_startblock != rbno)
+ goto done;
+ list_move_tail(&frag->list, &worklist);
+ if (next_rbno > bno)
+ next_rbno = bno;
+ nr--;
+ if (nr == 0)
+ break;
+ }
+
+ /*
+ * If we get here and nr > 0, this means that we added fewer
+ * items to the worklist than we discarded because the fragment
+ * list ran out of items. Therefore, we cannot maintain the
+ * required refcount. Something is wrong, so we're done.
+ */
+ if (nr)
+ goto done;
+
+ rbno = next_rbno;
+ }
+
+ /*
+ * Make sure the last extent we processed ends at or beyond
+ * the end of the refcount extent.
+ */
+ if (rbno < refchk->bno + refchk->len)
+ goto done;
+
+ /* Actually record us having seen the remaining refcount. */
+ refchk->seen = refchk->refcount;
+done:
+ /* Delete fragments and work list. */
+ list_for_each_entry_safe(frag, n, &worklist, list) {
+ list_del(&frag->list);
+ kfree(frag);
+ }
+ list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
+ list_del(&frag->list);
+ kfree(frag);
+ }
+}
+
+/* Use the rmap entries covering this extent to verify the refcount. */
+STATIC void
+xchk_rtrefcountbt_xref_rmap(
+ struct xfs_scrub *sc,
+ const struct xfs_refcount_irec *irec)
+{
+ struct xchk_rtrefcnt_check refchk = {
+ .sc = sc,
+ .bno = irec->rc_startblock,
+ .len = irec->rc_blockcount,
+ .refcount = irec->rc_refcount,
+ .seen = 0,
+ };
+ struct xfs_rmap_irec low;
+ struct xfs_rmap_irec high;
+ struct xchk_rtrefcnt_frag *frag;
+ struct xchk_rtrefcnt_frag *n;
+ int error;
+
+ if (!sc->sr.rmap_cur || xchk_skip_xref(sc->sm))
+ return;
+
+ /* Cross-reference with the rmapbt to confirm the refcount. */
+ memset(&low, 0, sizeof(low));
+ low.rm_startblock = irec->rc_startblock;
+ memset(&high, 0xFF, sizeof(high));
+ high.rm_startblock = irec->rc_startblock + irec->rc_blockcount - 1;
+
+ INIT_LIST_HEAD(&refchk.fragments);
+ error = xfs_rmap_query_range(sc->sr.rmap_cur, &low, &high,
+ xchk_rtrefcountbt_rmap_check, &refchk);
+ if (!xchk_should_check_xref(sc, &error, &sc->sr.rmap_cur))
+ goto out_free;
+
+ xchk_rtrefcountbt_process_rmap_fragments(&refchk);
+ if (irec->rc_refcount != refchk.seen)
+ xchk_btree_xref_set_corrupt(sc, sc->sr.rmap_cur, 0);
+
+out_free:
+ list_for_each_entry_safe(frag, n, &refchk.fragments, list) {
+ list_del(&frag->list);
+ kfree(frag);
+ }
+}
+
+/* Cross-reference with the other btrees. */
+STATIC void
+xchk_rtrefcountbt_xref(
+ struct xfs_scrub *sc,
+ const struct xfs_refcount_irec *irec)
+{
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ xchk_xref_is_used_rt_space(sc,
+ xfs_rgbno_to_rtb(sc->sr.rtg, irec->rc_startblock),
+ irec->rc_blockcount);
+ xchk_rtrefcountbt_xref_rmap(sc, irec);
+}
+
+struct xchk_rtrefcbt_records {
+ /* Previous refcount record. */
+ struct xfs_refcount_irec prev_rec;
+
+ /* The next rtgroup block where we aren't expecting shared extents. */
+ xfs_rgblock_t next_unshared_rgbno;
+
+ /* Number of CoW blocks we expect. */
+ xfs_extlen_t cow_blocks;
+
+ /* Was the last record a shared or CoW staging extent? */
+ enum xfs_refc_domain prev_domain;
+};
+
+static inline bool
+xchk_rtrefcount_mergeable(
+ struct xchk_rtrefcbt_records *rrc,
+ const struct xfs_refcount_irec *r2)
+{
+ const struct xfs_refcount_irec *r1 = &rrc->prev_rec;
+
+ /* Ignore if prev_rec is not yet initialized. */
+ if (r1->rc_blockcount > 0)
+ return false;
+
+ if (r1->rc_startblock + r1->rc_blockcount != r2->rc_startblock)
+ return false;
+ if (r1->rc_refcount != r2->rc_refcount)
+ return false;
+ if ((unsigned long long)r1->rc_blockcount + r2->rc_blockcount >
+ XFS_REFC_LEN_MAX)
+ return false;
+
+ return true;
+}
+
+/* Flag failures for records that could be merged. */
+STATIC void
+xchk_rtrefcountbt_check_mergeable(
+ struct xchk_btree *bs,
+ struct xchk_rtrefcbt_records *rrc,
+ const struct xfs_refcount_irec *irec)
+{
+ if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ if (xchk_rtrefcount_mergeable(rrc, irec))
+ xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+
+ memcpy(&rrc->prev_rec, irec, sizeof(struct xfs_refcount_irec));
+}
+
+STATIC int
+xchk_rtrefcountbt_rmap_check_gap(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ xfs_rgblock_t *next_bno = priv;
+
+ if (*next_bno != NULLRGBLOCK && rec->rm_startblock < *next_bno)
+ return -ECANCELED;
+
+ *next_bno = rec->rm_startblock + rec->rm_blockcount;
+ return 0;
+}
+
+/*
+ * Make sure that a gap in the reference count records does not correspond to
+ * overlapping records (i.e. shared extents) in the reverse mappings.
+ */
+static inline void
+xchk_rtrefcountbt_xref_gaps(
+ struct xfs_scrub *sc,
+ struct xchk_rtrefcbt_records *rrc,
+ xfs_rtblock_t bno)
+{
+ struct xfs_rmap_irec low;
+ struct xfs_rmap_irec high;
+ xfs_rgblock_t next_bno = NULLRGBLOCK;
+ int error;
+
+ if (bno <= rrc->next_unshared_rgbno || !sc->sr.rmap_cur ||
+ xchk_skip_xref(sc->sm))
+ return;
+
+ memset(&low, 0, sizeof(low));
+ low.rm_startblock = rrc->next_unshared_rgbno;
+ memset(&high, 0xFF, sizeof(high));
+ high.rm_startblock = bno - 1;
+
+ error = xfs_rmap_query_range(sc->sr.rmap_cur, &low, &high,
+ xchk_rtrefcountbt_rmap_check_gap, &next_bno);
+ if (error == -ECANCELED)
+ xchk_btree_xref_set_corrupt(sc, sc->sr.rmap_cur, 0);
+ else
+ xchk_should_check_xref(sc, &error, &sc->sr.rmap_cur);
+}
+
+/* Scrub a rtrefcountbt record. */
+STATIC int
+xchk_rtrefcountbt_rec(
+ struct xchk_btree *bs,
+ const union xfs_btree_rec *rec)
+{
+ struct xfs_mount *mp = bs->cur->bc_mp;
+ struct xchk_rtrefcbt_records *rrc = bs->private;
+ struct xfs_refcount_irec irec;
+ u32 mod;
+
+ xfs_refcount_btrec_to_irec(rec, &irec);
+ if (xfs_rtrefcount_check_irec(to_rtg(bs->cur->bc_group), &irec) !=
+ NULL) {
+ xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+ return 0;
+ }
+
+ /* We can only share full rt extents. */
+ mod = xfs_rgbno_to_rtxoff(mp, irec.rc_startblock);
+ if (mod)
+ xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+ mod = xfs_extlen_to_rtxmod(mp, irec.rc_blockcount);
+ if (mod)
+ xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+
+ if (irec.rc_domain == XFS_REFC_DOMAIN_COW)
+ rrc->cow_blocks += irec.rc_blockcount;
+
+ /* Shared records always come before CoW records. */
+ if (irec.rc_domain == XFS_REFC_DOMAIN_SHARED &&
+ rrc->prev_domain == XFS_REFC_DOMAIN_COW)
+ xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+ rrc->prev_domain = irec.rc_domain;
+
+ xchk_rtrefcountbt_check_mergeable(bs, rrc, &irec);
+ xchk_rtrefcountbt_xref(bs->sc, &irec);
+
+ /*
+ * If this is a record for a shared extent, check that all blocks
+ * between the previous record and this one have at most one reverse
+ * mapping.
+ */
+ if (irec.rc_domain == XFS_REFC_DOMAIN_SHARED) {
+ xchk_rtrefcountbt_xref_gaps(bs->sc, rrc, irec.rc_startblock);
+ rrc->next_unshared_rgbno = irec.rc_startblock +
+ irec.rc_blockcount;
+ }
+
+ return 0;
+}
+
+/* Make sure we have as many refc blocks as the rmap says. */
+STATIC void
+xchk_refcount_xref_rmap(
+ struct xfs_scrub *sc,
+ const struct xfs_owner_info *btree_oinfo,
+ xfs_extlen_t cow_blocks)
+{
+ xfs_filblks_t refcbt_blocks = 0;
+ xfs_filblks_t blocks;
+ int error;
+
+ if (!sc->sr.rmap_cur || !sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
+ return;
+
+ /* Check that we saw as many refcbt blocks as the rmap knows about. */
+ error = xfs_btree_count_blocks(sc->sr.refc_cur, &refcbt_blocks);
+ if (!xchk_btree_process_error(sc, sc->sr.refc_cur, 0, &error))
+ return;
+ error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, btree_oinfo,
+ &blocks);
+ if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+ return;
+ if (blocks != refcbt_blocks)
+ xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+
+ /* Check that we saw as many cow blocks as the rmap knows about. */
+ error = xchk_count_rmap_ownedby_ag(sc, sc->sr.rmap_cur,
+ &XFS_RMAP_OINFO_COW, &blocks);
+ if (!xchk_should_check_xref(sc, &error, &sc->sr.rmap_cur))
+ return;
+ if (blocks != cow_blocks)
+ xchk_btree_xref_set_corrupt(sc, sc->sr.rmap_cur, 0);
+}
+
+/* Scrub the refcount btree for some AG. */
+int
+xchk_rtrefcountbt(
+ struct xfs_scrub *sc)
+{
+ struct xfs_owner_info btree_oinfo;
+ struct xchk_rtrefcbt_records rrc = {
+ .cow_blocks = 0,
+ .next_unshared_rgbno = 0,
+ .prev_domain = XFS_REFC_DOMAIN_SHARED,
+ };
+ int error;
+
+ error = xchk_metadata_inode_forks(sc);
+ if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
+ return error;
+
+ xfs_rmap_ino_bmbt_owner(&btree_oinfo, rtg_refcount(sc->sr.rtg)->i_ino,
+ XFS_DATA_FORK);
+ error = xchk_btree(sc, sc->sr.refc_cur, xchk_rtrefcountbt_rec,
+ &btree_oinfo, &rrc);
+ if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
+ return error;
+
+ /*
+ * Check that all blocks between the last refcount > 1 record and the
+ * end of the rt volume have at most one reverse mapping.
+ */
+ xchk_rtrefcountbt_xref_gaps(sc, &rrc, sc->mp->m_sb.sb_rblocks);
+
+ xchk_refcount_xref_rmap(sc, &btree_oinfo, rrc.cow_blocks);
+
+ return 0;
+}
+
+/* xref check that a cow staging extent is marked in the rtrefcountbt. */
+void
+xchk_xref_is_rt_cow_staging(
+ struct xfs_scrub *sc,
+ xfs_rgblock_t bno,
+ xfs_extlen_t len)
+{
+ struct xfs_refcount_irec rc;
+ int has_refcount;
+ int error;
+
+ if (!sc->sr.refc_cur || xchk_skip_xref(sc->sm))
+ return;
+
+ /* Find the CoW staging extent. */
+ error = xfs_refcount_lookup_le(sc->sr.refc_cur, XFS_REFC_DOMAIN_COW,
+ bno, &has_refcount);
+ if (!xchk_should_check_xref(sc, &error, &sc->sr.refc_cur))
+ return;
+ if (!has_refcount) {
+ xchk_btree_xref_set_corrupt(sc, sc->sr.refc_cur, 0);
+ return;
+ }
+
+ error = xfs_refcount_get_rec(sc->sr.refc_cur, &rc, &has_refcount);
+ if (!xchk_should_check_xref(sc, &error, &sc->sr.refc_cur))
+ return;
+ if (!has_refcount) {
+ xchk_btree_xref_set_corrupt(sc, sc->sr.refc_cur, 0);
+ return;
+ }
+
+ /* CoW lookup returned a shared extent record? */
+ if (rc.rc_domain != XFS_REFC_DOMAIN_COW)
+ xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
+
+ /* Must be at least as long as what was passed in */
+ if (rc.rc_blockcount < len)
+ xchk_btree_xref_set_corrupt(sc, sc->sr.refc_cur, 0);
+}
+
+/*
+ * xref check that the extent is not shared. Only file data blocks
+ * can have multiple owners.
+ */
+void
+xchk_xref_is_not_rt_shared(
+ struct xfs_scrub *sc,
+ xfs_rgblock_t bno,
+ xfs_extlen_t len)
+{
+ enum xbtree_recpacking outcome;
+ int error;
+
+ if (!sc->sr.refc_cur || xchk_skip_xref(sc->sm))
+ return;
+
+ error = xfs_refcount_has_records(sc->sr.refc_cur,
+ XFS_REFC_DOMAIN_SHARED, bno, len, &outcome);
+ if (!xchk_should_check_xref(sc, &error, &sc->sr.refc_cur))
+ return;
+ if (outcome != XBTREE_RECPACKING_EMPTY)
+ xchk_btree_xref_set_corrupt(sc, sc->sr.refc_cur, 0);
+}
+
+/* xref check that the extent is not being used for CoW staging. */
+void
+xchk_xref_is_not_rt_cow_staging(
+ struct xfs_scrub *sc,
+ xfs_rgblock_t bno,
+ xfs_extlen_t len)
+{
+ enum xbtree_recpacking outcome;
+ int error;
+
+ if (!sc->sr.refc_cur || xchk_skip_xref(sc->sm))
+ return;
+
+ error = xfs_refcount_has_records(sc->sr.refc_cur, XFS_REFC_DOMAIN_COW,
+ bno, len, &outcome);
+ if (!xchk_should_check_xref(sc, &error, &sc->sr.refc_cur))
+ return;
+ if (outcome != XBTREE_RECPACKING_EMPTY)
+ xchk_btree_xref_set_corrupt(sc, sc->sr.refc_cur, 0);
+}
diff --git a/fs/xfs/scrub/rtrefcount_repair.c b/fs/xfs/scrub/rtrefcount_repair.c
new file mode 100644
index 000000000000..983362447826
--- /dev/null
+++ b/fs/xfs/scrub/rtrefcount_repair.c
@@ -0,0 +1,761 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2021-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_btree_staging.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_alloc.h"
+#include "xfs_ialloc.h"
+#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_refcount.h"
+#include "xfs_rtrefcount_btree.h"
+#include "xfs_error.h"
+#include "xfs_health.h"
+#include "xfs_inode.h"
+#include "xfs_quota.h"
+#include "xfs_rtalloc.h"
+#include "xfs_ag.h"
+#include "xfs_rtgroup.h"
+#include "xfs_rtbitmap.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/btree.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+#include "scrub/bitmap.h"
+#include "scrub/fsb_bitmap.h"
+#include "scrub/xfile.h"
+#include "scrub/xfarray.h"
+#include "scrub/newbt.h"
+#include "scrub/reap.h"
+#include "scrub/rcbag.h"
+
+/*
+ * Rebuilding the Reference Count Btree
+ * ====================================
+ *
+ * This algorithm is "borrowed" from xfs_repair. Imagine the rmap
+ * entries as rectangles representing extents of physical blocks, and
+ * that the rectangles can be laid down to allow them to overlap each
+ * other; then we know that we must emit a refcnt btree entry wherever
+ * the amount of overlap changes, i.e. the emission stimulus is
+ * level-triggered:
+ *
+ * - ---
+ * -- ----- ---- --- ------
+ * -- ---- ----------- ---- ---------
+ * -------------------------------- -----------
+ * ^ ^ ^^ ^^ ^ ^^ ^^^ ^^^^ ^ ^^ ^ ^ ^
+ * 2 1 23 21 3 43 234 2123 1 01 2 3 0
+ *
+ * For our purposes, a rmap is a tuple (startblock, len, fileoff, owner).
+ *
+ * Note that in the actual refcnt btree we don't store the refcount < 2
+ * cases because the bnobt tells us which blocks are free; single-use
+ * blocks aren't recorded in the bnobt or the refcntbt. If the rmapbt
+ * supports storing multiple entries covering a given block we could
+ * theoretically dispense with the refcntbt and simply count rmaps, but
+ * that's inefficient in the (hot) write path, so we'll take the cost of
+ * the extra tree to save time. Also there's no guarantee that rmap
+ * will be enabled.
+ *
+ * Given an array of rmaps sorted by physical block number, a starting
+ * physical block (sp), a bag to hold rmaps that cover sp, and the next
+ * physical block where the level changes (np), we can reconstruct the
+ * rt refcount btree as follows:
+ *
+ * While there are still unprocessed rmaps in the array,
+ * - Set sp to the physical block (pblk) of the next unprocessed rmap.
+ * - Add to the bag all rmaps in the array where startblock == sp.
+ * - Set np to the physical block where the bag size will change. This
+ * is the minimum of (the pblk of the next unprocessed rmap) and
+ * (startblock + len of each rmap in the bag).
+ * - Record the bag size as old_bag_size.
+ *
+ * - While the bag isn't empty,
+ * - Remove from the bag all rmaps where startblock + len == np.
+ * - Add to the bag all rmaps in the array where startblock == np.
+ * - If the bag size isn't old_bag_size, store the refcount entry
+ * (sp, np - sp, bag_size) in the refcnt btree.
+ * - If the bag is empty, break out of the inner loop.
+ * - Set old_bag_size to the bag size
+ * - Set sp = np.
+ * - Set np to the physical block where the bag size will change.
+ * This is the minimum of (the pblk of the next unprocessed rmap)
+ * and (startblock + len of each rmap in the bag).
+ *
+ * Like all the other repairers, we make a list of all the refcount
+ * records we need, then reinitialize the rt refcount btree root and
+ * insert all the records.
+ */
+
+struct xrep_rtrefc {
+ /* refcount extents */
+ struct xfarray *refcount_records;
+
+ /* new refcountbt information */
+ struct xrep_newbt new_btree;
+
+ /* old refcountbt blocks */
+ struct xfsb_bitmap old_rtrefcountbt_blocks;
+
+ struct xfs_scrub *sc;
+
+ /* get_records()'s position in the rt refcount record array. */
+ xfarray_idx_t array_cur;
+
+ /* # of refcountbt blocks */
+ xfs_filblks_t btblocks;
+};
+
+/* Set us up to repair refcount btrees. */
+int
+xrep_setup_rtrefcountbt(
+ struct xfs_scrub *sc)
+{
+ char *descr;
+ int error;
+
+ descr = xchk_xfile_ag_descr(sc, "rmap record bag");
+ error = xrep_setup_xfbtree(sc, descr);
+ kfree(descr);
+ return error;
+}
+
+/* Check for any obvious conflicts with this shared/CoW staging extent. */
+STATIC int
+xrep_rtrefc_check_ext(
+ struct xfs_scrub *sc,
+ const struct xfs_refcount_irec *rec)
+{
+ xfs_rgblock_t last;
+
+ if (xfs_rtrefcount_check_irec(sc->sr.rtg, rec) != NULL)
+ return -EFSCORRUPTED;
+
+ if (xfs_rgbno_to_rtxoff(sc->mp, rec->rc_startblock) != 0)
+ return -EFSCORRUPTED;
+
+ last = rec->rc_startblock + rec->rc_blockcount - 1;
+ if (xfs_rgbno_to_rtxoff(sc->mp, last) != sc->mp->m_sb.sb_rextsize - 1)
+ return -EFSCORRUPTED;
+
+ /* Make sure this isn't free space or misaligned. */
+ return xrep_require_rtext_inuse(sc, rec->rc_startblock,
+ rec->rc_blockcount);
+}
+
+/* Record a reference count extent. */
+STATIC int
+xrep_rtrefc_stash(
+ struct xrep_rtrefc *rr,
+ enum xfs_refc_domain domain,
+ xfs_rgblock_t bno,
+ xfs_extlen_t len,
+ uint64_t refcount)
+{
+ struct xfs_refcount_irec irec = {
+ .rc_startblock = bno,
+ .rc_blockcount = len,
+ .rc_refcount = refcount,
+ .rc_domain = domain,
+ };
+ int error = 0;
+
+ if (xchk_should_terminate(rr->sc, &error))
+ return error;
+
+ irec.rc_refcount = min_t(uint64_t, XFS_REFC_REFCOUNT_MAX, refcount);
+
+ error = xrep_rtrefc_check_ext(rr->sc, &irec);
+ if (error)
+ return error;
+
+ trace_xrep_refc_found(rtg_group(rr->sc->sr.rtg), &irec);
+
+ return xfarray_append(rr->refcount_records, &irec);
+}
+
+/* Record a CoW staging extent. */
+STATIC int
+xrep_rtrefc_stash_cow(
+ struct xrep_rtrefc *rr,
+ xfs_rgblock_t bno,
+ xfs_extlen_t len)
+{
+ return xrep_rtrefc_stash(rr, XFS_REFC_DOMAIN_COW, bno, len, 1);
+}
+
+/* Decide if an rmap could describe a shared extent. */
+static inline bool
+xrep_rtrefc_rmap_shareable(
+ const struct xfs_rmap_irec *rmap)
+{
+ /* rt metadata are never sharable */
+ if (XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
+ return false;
+
+ /* Unwritten file blocks are not shareable. */
+ if (rmap->rm_flags & XFS_RMAP_UNWRITTEN)
+ return false;
+
+ return true;
+}
+
+/* Grab the next (abbreviated) rmap record from the rmapbt. */
+STATIC int
+xrep_rtrefc_walk_rmaps(
+ struct xrep_rtrefc *rr,
+ struct xfs_rmap_irec *rmap,
+ bool *have_rec)
+{
+ struct xfs_btree_cur *cur = rr->sc->sr.rmap_cur;
+ struct xfs_mount *mp = cur->bc_mp;
+ int have_gt;
+ int error = 0;
+
+ *have_rec = false;
+
+ /*
+ * Loop through the remaining rmaps. Remember CoW staging
+ * extents and the refcountbt blocks from the old tree for later
+ * disposal. We can only share written data fork extents, so
+ * keep looping until we find an rmap for one.
+ */
+ do {
+ if (xchk_should_terminate(rr->sc, &error))
+ return error;
+
+ error = xfs_btree_increment(cur, 0, &have_gt);
+ if (error)
+ return error;
+ if (!have_gt)
+ return 0;
+
+ error = xfs_rmap_get_rec(cur, rmap, &have_gt);
+ if (error)
+ return error;
+ if (XFS_IS_CORRUPT(mp, !have_gt)) {
+ xfs_btree_mark_sick(cur);
+ return -EFSCORRUPTED;
+ }
+
+ if (rmap->rm_owner == XFS_RMAP_OWN_COW) {
+ error = xrep_rtrefc_stash_cow(rr, rmap->rm_startblock,
+ rmap->rm_blockcount);
+ if (error)
+ return error;
+ } else if (xfs_is_sb_inum(mp, rmap->rm_owner) ||
+ (rmap->rm_flags & (XFS_RMAP_ATTR_FORK |
+ XFS_RMAP_BMBT_BLOCK))) {
+ xfs_btree_mark_sick(cur);
+ return -EFSCORRUPTED;
+ }
+ } while (!xrep_rtrefc_rmap_shareable(rmap));
+
+ *have_rec = true;
+ return 0;
+}
+
+static inline uint32_t
+xrep_rtrefc_encode_startblock(
+ const struct xfs_refcount_irec *irec)
+{
+ uint32_t start;
+
+ start = irec->rc_startblock & ~XFS_REFC_COWFLAG;
+ if (irec->rc_domain == XFS_REFC_DOMAIN_COW)
+ start |= XFS_REFC_COWFLAG;
+
+ return start;
+}
+
+/*
+ * Compare two refcount records. We want to sort in order of increasing block
+ * number.
+ */
+static int
+xrep_rtrefc_extent_cmp(
+ const void *a,
+ const void *b)
+{
+ const struct xfs_refcount_irec *ap = a;
+ const struct xfs_refcount_irec *bp = b;
+ uint32_t sa, sb;
+
+ sa = xrep_rtrefc_encode_startblock(ap);
+ sb = xrep_rtrefc_encode_startblock(bp);
+
+ if (sa > sb)
+ return 1;
+ if (sa < sb)
+ return -1;
+ return 0;
+}
+
+/*
+ * Sort the refcount extents by startblock or else the btree records will be in
+ * the wrong order. Make sure the records do not overlap in physical space.
+ */
+STATIC int
+xrep_rtrefc_sort_records(
+ struct xrep_rtrefc *rr)
+{
+ struct xfs_refcount_irec irec;
+ xfarray_idx_t cur;
+ enum xfs_refc_domain dom = XFS_REFC_DOMAIN_SHARED;
+ xfs_rgblock_t next_rgbno = 0;
+ int error;
+
+ error = xfarray_sort(rr->refcount_records, xrep_rtrefc_extent_cmp,
+ XFARRAY_SORT_KILLABLE);
+ if (error)
+ return error;
+
+ foreach_xfarray_idx(rr->refcount_records, cur) {
+ if (xchk_should_terminate(rr->sc, &error))
+ return error;
+
+ error = xfarray_load(rr->refcount_records, cur, &irec);
+ if (error)
+ return error;
+
+ if (dom == XFS_REFC_DOMAIN_SHARED &&
+ irec.rc_domain == XFS_REFC_DOMAIN_COW) {
+ dom = irec.rc_domain;
+ next_rgbno = 0;
+ }
+
+ if (dom != irec.rc_domain)
+ return -EFSCORRUPTED;
+ if (irec.rc_startblock < next_rgbno)
+ return -EFSCORRUPTED;
+
+ next_rgbno = irec.rc_startblock + irec.rc_blockcount;
+ }
+
+ return error;
+}
+
+/* Record extents that belong to the realtime refcount inode. */
+STATIC int
+xrep_rtrefc_walk_rmap(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xrep_rtrefc *rr = priv;
+ int error = 0;
+
+ if (xchk_should_terminate(rr->sc, &error))
+ return error;
+
+ /* Skip extents which are not owned by this inode and fork. */
+ if (rec->rm_owner != rr->sc->ip->i_ino)
+ return 0;
+
+ error = xrep_check_ino_btree_mapping(rr->sc, rec);
+ if (error)
+ return error;
+
+ return xfsb_bitmap_set(&rr->old_rtrefcountbt_blocks,
+ xfs_gbno_to_fsb(cur->bc_group, rec->rm_startblock),
+ rec->rm_blockcount);
+}
+
+/*
+ * Walk forward through the rmap btree to collect all rmaps starting at
+ * @bno in @rmap_bag. These represent the file(s) that share ownership of
+ * the current block. Upon return, the rmap cursor points to the last record
+ * satisfying the startblock constraint.
+ */
+static int
+xrep_rtrefc_push_rmaps_at(
+ struct xrep_rtrefc *rr,
+ struct rcbag *rcstack,
+ xfs_rgblock_t bno,
+ struct xfs_rmap_irec *rmap,
+ bool *have)
+{
+ struct xfs_scrub *sc = rr->sc;
+ int have_gt;
+ int error;
+
+ while (*have && rmap->rm_startblock == bno) {
+ error = rcbag_add(rcstack, rr->sc->tp, rmap);
+ if (error)
+ return error;
+
+ error = xrep_rtrefc_walk_rmaps(rr, rmap, have);
+ if (error)
+ return error;
+ }
+
+ error = xfs_btree_decrement(sc->sr.rmap_cur, 0, &have_gt);
+ if (error)
+ return error;
+ if (XFS_IS_CORRUPT(sc->mp, !have_gt)) {
+ xfs_btree_mark_sick(sc->sr.rmap_cur);
+ return -EFSCORRUPTED;
+ }
+
+ return 0;
+}
+
+/* Scan one AG for reverse mappings for the realtime refcount btree. */
+STATIC int
+xrep_rtrefc_scan_ag(
+ struct xrep_rtrefc *rr,
+ struct xfs_perag *pag)
+{
+ struct xfs_scrub *sc = rr->sc;
+ int error;
+
+ error = xrep_ag_init(sc, pag, &sc->sa);
+ if (error)
+ return error;
+
+ error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_rtrefc_walk_rmap, rr);
+ xchk_ag_free(sc, &sc->sa);
+ return error;
+}
+
+/* Iterate all the rmap records to generate reference count data. */
+STATIC int
+xrep_rtrefc_find_refcounts(
+ struct xrep_rtrefc *rr)
+{
+ struct xfs_scrub *sc = rr->sc;
+ struct rcbag *rcstack;
+ struct xfs_perag *pag = NULL;
+ uint64_t old_stack_height;
+ xfs_rgblock_t sbno;
+ xfs_rgblock_t cbno;
+ xfs_rgblock_t nbno;
+ bool have;
+ int error;
+
+ /* Scan for old rtrefc btree blocks. */
+ while ((pag = xfs_perag_next(sc->mp, pag))) {
+ error = xrep_rtrefc_scan_ag(rr, pag);
+ if (error) {
+ xfs_perag_rele(pag);
+ return error;
+ }
+ }
+
+ xrep_rtgroup_btcur_init(sc, &sc->sr);
+
+ /*
+ * Set up a bag to store all the rmap records that we're tracking to
+ * generate a reference count record. If this exceeds
+ * XFS_REFC_REFCOUNT_MAX, we clamp rc_refcount.
+ */
+ error = rcbag_init(sc->mp, sc->xmbtp, &rcstack);
+ if (error)
+ goto out_cur;
+
+ /* Start the rtrmapbt cursor to the left of all records. */
+ error = xfs_btree_goto_left_edge(sc->sr.rmap_cur);
+ if (error)
+ goto out_bag;
+
+ /* Process reverse mappings into refcount data. */
+ while (xfs_btree_has_more_records(sc->sr.rmap_cur)) {
+ struct xfs_rmap_irec rmap;
+
+ /* Push all rmaps with pblk == sbno onto the stack */
+ error = xrep_rtrefc_walk_rmaps(rr, &rmap, &have);
+ if (error)
+ goto out_bag;
+ if (!have)
+ break;
+ sbno = cbno = rmap.rm_startblock;
+ error = xrep_rtrefc_push_rmaps_at(rr, rcstack, sbno, &rmap,
+ &have);
+ if (error)
+ goto out_bag;
+
+ /* Set nbno to the bno of the next refcount change */
+ error = rcbag_next_edge(rcstack, sc->tp, &rmap, have, &nbno);
+ if (error)
+ goto out_bag;
+
+ ASSERT(nbno > sbno);
+ old_stack_height = rcbag_count(rcstack);
+
+ /* While stack isn't empty... */
+ while (rcbag_count(rcstack) > 0) {
+ /* Pop all rmaps that end at nbno */
+ error = rcbag_remove_ending_at(rcstack, sc->tp, nbno);
+ if (error)
+ goto out_bag;
+
+ /* Push array items that start at nbno */
+ error = xrep_rtrefc_walk_rmaps(rr, &rmap, &have);
+ if (error)
+ goto out_bag;
+ if (have) {
+ error = xrep_rtrefc_push_rmaps_at(rr, rcstack,
+ nbno, &rmap, &have);
+ if (error)
+ goto out_bag;
+ }
+
+ /* Emit refcount if necessary */
+ ASSERT(nbno > cbno);
+ if (rcbag_count(rcstack) != old_stack_height) {
+ if (old_stack_height > 1) {
+ error = xrep_rtrefc_stash(rr,
+ XFS_REFC_DOMAIN_SHARED,
+ cbno, nbno - cbno,
+ old_stack_height);
+ if (error)
+ goto out_bag;
+ }
+ cbno = nbno;
+ }
+
+ /* Stack empty, go find the next rmap */
+ if (rcbag_count(rcstack) == 0)
+ break;
+ old_stack_height = rcbag_count(rcstack);
+ sbno = nbno;
+
+ /* Set nbno to the bno of the next refcount change */
+ error = rcbag_next_edge(rcstack, sc->tp, &rmap, have,
+ &nbno);
+ if (error)
+ goto out_bag;
+
+ ASSERT(nbno > sbno);
+ }
+ }
+
+ ASSERT(rcbag_count(rcstack) == 0);
+out_bag:
+ rcbag_free(&rcstack);
+out_cur:
+ xchk_rtgroup_btcur_free(&sc->sr);
+ return error;
+}
+
+/* Retrieve refcountbt data for bulk load. */
+STATIC int
+xrep_rtrefc_get_records(
+ struct xfs_btree_cur *cur,
+ unsigned int idx,
+ struct xfs_btree_block *block,
+ unsigned int nr_wanted,
+ void *priv)
+{
+ struct xrep_rtrefc *rr = priv;
+ union xfs_btree_rec *block_rec;
+ unsigned int loaded;
+ int error;
+
+ for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
+ error = xfarray_load(rr->refcount_records, rr->array_cur++,
+ &cur->bc_rec.rc);
+ if (error)
+ return error;
+
+ block_rec = xfs_btree_rec_addr(cur, idx, block);
+ cur->bc_ops->init_rec_from_cur(cur, block_rec);
+ }
+
+ return loaded;
+}
+
+/* Feed one of the new btree blocks to the bulk loader. */
+STATIC int
+xrep_rtrefc_claim_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr,
+ void *priv)
+{
+ struct xrep_rtrefc *rr = priv;
+
+ return xrep_newbt_claim_block(cur, &rr->new_btree, ptr);
+}
+
+/* Figure out how much space we need to create the incore btree root block. */
+STATIC size_t
+xrep_rtrefc_iroot_size(
+ struct xfs_btree_cur *cur,
+ unsigned int level,
+ unsigned int nr_this_level,
+ void *priv)
+{
+ return xfs_rtrefcount_broot_space_calc(cur->bc_mp, level,
+ nr_this_level);
+}
+
+/*
+ * Use the collected refcount information to stage a new rt refcount btree. If
+ * this is successful we'll return with the new btree root information logged
+ * to the repair transaction but not yet committed.
+ */
+STATIC int
+xrep_rtrefc_build_new_tree(
+ struct xrep_rtrefc *rr)
+{
+ struct xfs_scrub *sc = rr->sc;
+ struct xfs_rtgroup *rtg = sc->sr.rtg;
+ struct xfs_btree_cur *refc_cur;
+ int error;
+
+ error = xrep_rtrefc_sort_records(rr);
+ if (error)
+ return error;
+
+ /*
+ * Prepare to construct the new btree by reserving disk space for the
+ * new btree and setting up all the accounting information we'll need
+ * to root the new btree while it's under construction and before we
+ * attach it to the realtime refcount inode.
+ */
+ error = xrep_newbt_init_metadir_inode(&rr->new_btree, sc);
+ if (error)
+ return error;
+
+ rr->new_btree.bload.get_records = xrep_rtrefc_get_records;
+ rr->new_btree.bload.claim_block = xrep_rtrefc_claim_block;
+ rr->new_btree.bload.iroot_size = xrep_rtrefc_iroot_size;
+
+ refc_cur = xfs_rtrefcountbt_init_cursor(NULL, rtg);
+ xfs_btree_stage_ifakeroot(refc_cur, &rr->new_btree.ifake);
+
+ /* Compute how many blocks we'll need. */
+ error = xfs_btree_bload_compute_geometry(refc_cur, &rr->new_btree.bload,
+ xfarray_length(rr->refcount_records));
+ if (error)
+ goto err_cur;
+
+ /* Last chance to abort before we start committing fixes. */
+ if (xchk_should_terminate(sc, &error))
+ goto err_cur;
+
+ /*
+ * Guess how many blocks we're going to need to rebuild an entire
+ * rtrefcountbt from the number of extents we found, and pump up our
+ * transaction to have sufficient block reservation. We're allowed
+ * to exceed quota to repair inconsistent metadata, though this is
+ * unlikely.
+ */
+ error = xfs_trans_reserve_more_inode(sc->tp, rtg_refcount(rtg),
+ rr->new_btree.bload.nr_blocks, 0, true);
+ if (error)
+ goto err_cur;
+
+ /* Reserve the space we'll need for the new btree. */
+ error = xrep_newbt_alloc_blocks(&rr->new_btree,
+ rr->new_btree.bload.nr_blocks);
+ if (error)
+ goto err_cur;
+
+ /* Add all observed refcount records. */
+ rr->new_btree.ifake.if_fork->if_format = XFS_DINODE_FMT_META_BTREE;
+ rr->array_cur = XFARRAY_CURSOR_INIT;
+ error = xfs_btree_bload(refc_cur, &rr->new_btree.bload, rr);
+ if (error)
+ goto err_cur;
+
+ /*
+ * Install the new rtrefc btree in the inode. After this point the old
+ * btree is no longer accessible, the new tree is live, and we can
+ * delete the cursor.
+ */
+ xfs_rtrefcountbt_commit_staged_btree(refc_cur, sc->tp);
+ xrep_inode_set_nblocks(rr->sc, rr->new_btree.ifake.if_blocks);
+ xfs_btree_del_cursor(refc_cur, 0);
+
+ /* Dispose of any unused blocks and the accounting information. */
+ error = xrep_newbt_commit(&rr->new_btree);
+ if (error)
+ return error;
+
+ return xrep_roll_trans(sc);
+err_cur:
+ xfs_btree_del_cursor(refc_cur, error);
+ xrep_newbt_cancel(&rr->new_btree);
+ return error;
+}
+
+/* Rebuild the rt refcount btree. */
+int
+xrep_rtrefcountbt(
+ struct xfs_scrub *sc)
+{
+ struct xrep_rtrefc *rr;
+ struct xfs_mount *mp = sc->mp;
+ char *descr;
+ int error;
+
+ /* We require the rmapbt to rebuild anything. */
+ if (!xfs_has_rtrmapbt(mp))
+ return -EOPNOTSUPP;
+
+ /* Make sure any problems with the fork are fixed. */
+ error = xrep_metadata_inode_forks(sc);
+ if (error)
+ return error;
+
+ rr = kzalloc(sizeof(struct xrep_rtrefc), XCHK_GFP_FLAGS);
+ if (!rr)
+ return -ENOMEM;
+ rr->sc = sc;
+
+ /* Set up enough storage to handle one refcount record per rt extent. */
+ descr = xchk_xfile_ag_descr(sc, "reference count records");
+ error = xfarray_create(descr, mp->m_sb.sb_rextents,
+ sizeof(struct xfs_refcount_irec),
+ &rr->refcount_records);
+ kfree(descr);
+ if (error)
+ goto out_rr;
+
+ /* Collect all reference counts. */
+ xfsb_bitmap_init(&rr->old_rtrefcountbt_blocks);
+ error = xrep_rtrefc_find_refcounts(rr);
+ if (error)
+ goto out_bitmap;
+
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+
+ /* Rebuild the refcount information. */
+ error = xrep_rtrefc_build_new_tree(rr);
+ if (error)
+ goto out_bitmap;
+
+ /*
+ * Free all the extents that were allocated to the former rtrefcountbt
+ * and aren't cross-linked with something else.
+ */
+ error = xrep_reap_metadir_fsblocks(rr->sc,
+ &rr->old_rtrefcountbt_blocks);
+ if (error)
+ goto out_bitmap;
+
+out_bitmap:
+ xfsb_bitmap_destroy(&rr->old_rtrefcountbt_blocks);
+ xfarray_destroy(rr->refcount_records);
+out_rr:
+ kfree(rr);
+ return error;
+}
diff --git a/fs/xfs/scrub/rtrmap.c b/fs/xfs/scrub/rtrmap.c
new file mode 100644
index 000000000000..12989fe80e8b
--- /dev/null
+++ b/fs/xfs/scrub/rtrmap.c
@@ -0,0 +1,323 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_rtalloc.h"
+#include "xfs_rtgroup.h"
+#include "xfs_metafile.h"
+#include "xfs_refcount.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/btree.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+
+/* Set us up with the realtime metadata locked. */
+int
+xchk_setup_rtrmapbt(
+ struct xfs_scrub *sc)
+{
+ int error;
+
+ if (xchk_need_intent_drain(sc))
+ xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
+
+ if (xchk_could_repair(sc)) {
+ error = xrep_setup_rtrmapbt(sc);
+ if (error)
+ return error;
+ }
+
+ error = xchk_rtgroup_init(sc, sc->sm->sm_agno, &sc->sr);
+ if (error)
+ return error;
+
+ error = xchk_setup_rt(sc);
+ if (error)
+ return error;
+
+ error = xchk_install_live_inode(sc, rtg_rmap(sc->sr.rtg));
+ if (error)
+ return error;
+
+ return xchk_rtgroup_lock(sc, &sc->sr, XCHK_RTGLOCK_ALL);
+}
+
+/* Realtime reverse mapping. */
+
+struct xchk_rtrmap {
+ /*
+ * The furthest-reaching of the rmapbt records that we've already
+ * processed. This enables us to detect overlapping records for space
+ * allocations that cannot be shared.
+ */
+ struct xfs_rmap_irec overlap_rec;
+
+ /*
+ * The previous rmapbt record, so that we can check for two records
+ * that could be one.
+ */
+ struct xfs_rmap_irec prev_rec;
+};
+
+static inline bool
+xchk_rtrmapbt_is_shareable(
+ struct xfs_scrub *sc,
+ const struct xfs_rmap_irec *irec)
+{
+ if (!xfs_has_rtreflink(sc->mp))
+ return false;
+ if (irec->rm_flags & XFS_RMAP_UNWRITTEN)
+ return false;
+ return true;
+}
+
+/* Flag failures for records that overlap but cannot. */
+STATIC void
+xchk_rtrmapbt_check_overlapping(
+ struct xchk_btree *bs,
+ struct xchk_rtrmap *cr,
+ const struct xfs_rmap_irec *irec)
+{
+ xfs_rtblock_t pnext, inext;
+
+ if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ /* No previous record? */
+ if (cr->overlap_rec.rm_blockcount == 0)
+ goto set_prev;
+
+ /* Do overlap_rec and irec overlap? */
+ pnext = cr->overlap_rec.rm_startblock + cr->overlap_rec.rm_blockcount;
+ if (pnext <= irec->rm_startblock)
+ goto set_prev;
+
+ /* Overlap is only allowed if both records are data fork mappings. */
+ if (!xchk_rtrmapbt_is_shareable(bs->sc, &cr->overlap_rec) ||
+ !xchk_rtrmapbt_is_shareable(bs->sc, irec))
+ xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+
+ /* Save whichever rmap record extends furthest. */
+ inext = irec->rm_startblock + irec->rm_blockcount;
+ if (pnext > inext)
+ return;
+
+set_prev:
+ memcpy(&cr->overlap_rec, irec, sizeof(struct xfs_rmap_irec));
+}
+
+/* Decide if two reverse-mapping records can be merged. */
+static inline bool
+xchk_rtrmap_mergeable(
+ struct xchk_rtrmap *cr,
+ const struct xfs_rmap_irec *r2)
+{
+ const struct xfs_rmap_irec *r1 = &cr->prev_rec;
+
+ /* Ignore if prev_rec is not yet initialized. */
+ if (cr->prev_rec.rm_blockcount == 0)
+ return false;
+
+ if (r1->rm_owner != r2->rm_owner)
+ return false;
+ if (r1->rm_startblock + r1->rm_blockcount != r2->rm_startblock)
+ return false;
+ if ((unsigned long long)r1->rm_blockcount + r2->rm_blockcount >
+ XFS_RMAP_LEN_MAX)
+ return false;
+ if (r1->rm_flags != r2->rm_flags)
+ return false;
+ return r1->rm_offset + r1->rm_blockcount == r2->rm_offset;
+}
+
+/* Flag failures for records that could be merged. */
+STATIC void
+xchk_rtrmapbt_check_mergeable(
+ struct xchk_btree *bs,
+ struct xchk_rtrmap *cr,
+ const struct xfs_rmap_irec *irec)
+{
+ if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ if (xchk_rtrmap_mergeable(cr, irec))
+ xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+
+ memcpy(&cr->prev_rec, irec, sizeof(struct xfs_rmap_irec));
+}
+
+/* Cross-reference a rmap against the refcount btree. */
+STATIC void
+xchk_rtrmapbt_xref_rtrefc(
+ struct xfs_scrub *sc,
+ struct xfs_rmap_irec *irec)
+{
+ xfs_rgblock_t fbno;
+ xfs_extlen_t flen;
+ bool is_inode;
+ bool is_bmbt;
+ bool is_attr;
+ bool is_unwritten;
+ int error;
+
+ if (!sc->sr.refc_cur || xchk_skip_xref(sc->sm))
+ return;
+
+ is_inode = !XFS_RMAP_NON_INODE_OWNER(irec->rm_owner);
+ is_bmbt = irec->rm_flags & XFS_RMAP_BMBT_BLOCK;
+ is_attr = irec->rm_flags & XFS_RMAP_ATTR_FORK;
+ is_unwritten = irec->rm_flags & XFS_RMAP_UNWRITTEN;
+
+ /* If this is shared, must be a data fork extent. */
+ error = xfs_refcount_find_shared(sc->sr.refc_cur, irec->rm_startblock,
+ irec->rm_blockcount, &fbno, &flen, false);
+ if (!xchk_should_check_xref(sc, &error, &sc->sr.refc_cur))
+ return;
+ if (flen != 0 && (!is_inode || is_attr || is_bmbt || is_unwritten))
+ xchk_btree_xref_set_corrupt(sc, sc->sr.refc_cur, 0);
+}
+
+/* Cross-reference with other metadata. */
+STATIC void
+xchk_rtrmapbt_xref(
+ struct xfs_scrub *sc,
+ struct xfs_rmap_irec *irec)
+{
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ xchk_xref_is_used_rt_space(sc,
+ xfs_rgbno_to_rtb(sc->sr.rtg, irec->rm_startblock),
+ irec->rm_blockcount);
+ if (irec->rm_owner == XFS_RMAP_OWN_COW)
+ xchk_xref_is_cow_staging(sc, irec->rm_startblock,
+ irec->rm_blockcount);
+ else
+ xchk_rtrmapbt_xref_rtrefc(sc, irec);
+}
+
+/* Scrub a realtime rmapbt record. */
+STATIC int
+xchk_rtrmapbt_rec(
+ struct xchk_btree *bs,
+ const union xfs_btree_rec *rec)
+{
+ struct xchk_rtrmap *cr = bs->private;
+ struct xfs_rmap_irec irec;
+
+ if (xfs_rmap_btrec_to_irec(rec, &irec) != NULL ||
+ xfs_rtrmap_check_irec(to_rtg(bs->cur->bc_group), &irec) != NULL) {
+ xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+ return 0;
+ }
+
+ if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return 0;
+
+ xchk_rtrmapbt_check_mergeable(bs, cr, &irec);
+ xchk_rtrmapbt_check_overlapping(bs, cr, &irec);
+ xchk_rtrmapbt_xref(bs->sc, &irec);
+ return 0;
+}
+
+/* Scrub the realtime rmap btree. */
+int
+xchk_rtrmapbt(
+ struct xfs_scrub *sc)
+{
+ struct xfs_inode *ip = rtg_rmap(sc->sr.rtg);
+ struct xfs_owner_info oinfo;
+ struct xchk_rtrmap cr = { };
+ int error;
+
+ error = xchk_metadata_inode_forks(sc);
+ if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
+ return error;
+
+ xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, XFS_DATA_FORK);
+ return xchk_btree(sc, sc->sr.rmap_cur, xchk_rtrmapbt_rec, &oinfo, &cr);
+}
+
+/* xref check that the extent has no realtime reverse mapping at all */
+void
+xchk_xref_has_no_rt_owner(
+ struct xfs_scrub *sc,
+ xfs_rgblock_t bno,
+ xfs_extlen_t len)
+{
+ enum xbtree_recpacking outcome;
+ int error;
+
+ if (!sc->sr.rmap_cur || xchk_skip_xref(sc->sm))
+ return;
+
+ error = xfs_rmap_has_records(sc->sr.rmap_cur, bno, len, &outcome);
+ if (!xchk_should_check_xref(sc, &error, &sc->sr.rmap_cur))
+ return;
+ if (outcome != XBTREE_RECPACKING_EMPTY)
+ xchk_btree_xref_set_corrupt(sc, sc->sr.rmap_cur, 0);
+}
+
+/* xref check that the extent is completely mapped */
+void
+xchk_xref_has_rt_owner(
+ struct xfs_scrub *sc,
+ xfs_rgblock_t bno,
+ xfs_extlen_t len)
+{
+ enum xbtree_recpacking outcome;
+ int error;
+
+ if (!sc->sr.rmap_cur || xchk_skip_xref(sc->sm))
+ return;
+
+ error = xfs_rmap_has_records(sc->sr.rmap_cur, bno, len, &outcome);
+ if (!xchk_should_check_xref(sc, &error, &sc->sr.rmap_cur))
+ return;
+ if (outcome != XBTREE_RECPACKING_FULL)
+ xchk_btree_xref_set_corrupt(sc, sc->sr.rmap_cur, 0);
+}
+
+/* xref check that the extent is only owned by a given owner */
+void
+xchk_xref_is_only_rt_owned_by(
+ struct xfs_scrub *sc,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ const struct xfs_owner_info *oinfo)
+{
+ struct xfs_rmap_matches res;
+ int error;
+
+ if (!sc->sr.rmap_cur || xchk_skip_xref(sc->sm))
+ return;
+
+ error = xfs_rmap_count_owners(sc->sr.rmap_cur, bno, len, oinfo, &res);
+ if (!xchk_should_check_xref(sc, &error, &sc->sr.rmap_cur))
+ return;
+ if (res.matches != 1)
+ xchk_btree_xref_set_corrupt(sc, sc->sr.rmap_cur, 0);
+ if (res.bad_non_owner_matches)
+ xchk_btree_xref_set_corrupt(sc, sc->sr.rmap_cur, 0);
+ if (res.non_owner_matches)
+ xchk_btree_xref_set_corrupt(sc, sc->sr.rmap_cur, 0);
+}
diff --git a/fs/xfs/scrub/rtrmap_repair.c b/fs/xfs/scrub/rtrmap_repair.c
new file mode 100644
index 000000000000..7561941a337a
--- /dev/null
+++ b/fs/xfs/scrub/rtrmap_repair.c
@@ -0,0 +1,981 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2020-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_btree_staging.h"
+#include "xfs_buf_mem.h"
+#include "xfs_btree_mem.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_alloc.h"
+#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_icache.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_quota.h"
+#include "xfs_rtalloc.h"
+#include "xfs_ag.h"
+#include "xfs_rtgroup.h"
+#include "xfs_refcount.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/btree.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+#include "scrub/bitmap.h"
+#include "scrub/fsb_bitmap.h"
+#include "scrub/rgb_bitmap.h"
+#include "scrub/xfile.h"
+#include "scrub/xfarray.h"
+#include "scrub/iscan.h"
+#include "scrub/newbt.h"
+#include "scrub/reap.h"
+
+/*
+ * Realtime Reverse Mapping Btree Repair
+ * =====================================
+ *
+ * This isn't quite as difficult as repairing the rmap btree on the data
+ * device, since we only store the data fork extents of realtime files on the
+ * realtime device. We still have to freeze the filesystem and stop the
+ * background threads like we do for the rmap repair, but we only have to scan
+ * realtime inodes.
+ *
+ * Collecting entries for the new realtime rmap btree is easy -- all we have
+ * to do is generate rtrmap entries from the data fork mappings of all realtime
+ * files in the filesystem. We then scan the rmap btrees of the data device
+ * looking for extents belonging to the old btree and note them in a bitmap.
+ *
+ * To rebuild the realtime rmap btree, we bulk-load the collected mappings into
+ * a new btree cursor and atomically swap that into the realtime inode. Then
+ * we can free the blocks from the old btree.
+ *
+ * We use the 'xrep_rtrmap' prefix for all the rmap functions.
+ */
+
+/* Context for collecting rmaps */
+struct xrep_rtrmap {
+ /* new rtrmapbt information */
+ struct xrep_newbt new_btree;
+
+ /* lock for the xfbtree and xfile */
+ struct mutex lock;
+
+ /* rmap records generated from primary metadata */
+ struct xfbtree rtrmap_btree;
+
+ struct xfs_scrub *sc;
+
+ /* bitmap of old rtrmapbt blocks */
+ struct xfsb_bitmap old_rtrmapbt_blocks;
+
+ /* Hooks into rtrmap update code. */
+ struct xfs_rmap_hook rhook;
+
+ /* inode scan cursor */
+ struct xchk_iscan iscan;
+
+ /* in-memory btree cursor for the ->get_blocks walk */
+ struct xfs_btree_cur *mcur;
+
+ /* Number of records we're staging in the new btree. */
+ uint64_t nr_records;
+};
+
+/* Set us up to repair rt reverse mapping btrees. */
+int
+xrep_setup_rtrmapbt(
+ struct xfs_scrub *sc)
+{
+ struct xrep_rtrmap *rr;
+ char *descr;
+ int error;
+
+ xchk_fsgates_enable(sc, XCHK_FSGATES_RMAP);
+
+ descr = xchk_xfile_rtgroup_descr(sc, "reverse mapping records");
+ error = xrep_setup_xfbtree(sc, descr);
+ kfree(descr);
+ if (error)
+ return error;
+
+ rr = kzalloc(sizeof(struct xrep_rtrmap), XCHK_GFP_FLAGS);
+ if (!rr)
+ return -ENOMEM;
+
+ rr->sc = sc;
+ sc->buf = rr;
+ return 0;
+}
+
+/* Make sure there's nothing funny about this mapping. */
+STATIC int
+xrep_rtrmap_check_mapping(
+ struct xfs_scrub *sc,
+ const struct xfs_rmap_irec *rec)
+{
+ if (xfs_rtrmap_check_irec(sc->sr.rtg, rec) != NULL)
+ return -EFSCORRUPTED;
+
+ /* Make sure this isn't free space. */
+ return xrep_require_rtext_inuse(sc, rec->rm_startblock,
+ rec->rm_blockcount);
+}
+
+/* Store a reverse-mapping record. */
+static inline int
+xrep_rtrmap_stash(
+ struct xrep_rtrmap *rr,
+ xfs_rgblock_t startblock,
+ xfs_extlen_t blockcount,
+ uint64_t owner,
+ uint64_t offset,
+ unsigned int flags)
+{
+ struct xfs_rmap_irec rmap = {
+ .rm_startblock = startblock,
+ .rm_blockcount = blockcount,
+ .rm_owner = owner,
+ .rm_offset = offset,
+ .rm_flags = flags,
+ };
+ struct xfs_scrub *sc = rr->sc;
+ struct xfs_btree_cur *mcur;
+ int error = 0;
+
+ if (xchk_should_terminate(sc, &error))
+ return error;
+
+ if (xchk_iscan_aborted(&rr->iscan))
+ return -EFSCORRUPTED;
+
+ trace_xrep_rtrmap_found(sc->mp, &rmap);
+
+ /* Add entry to in-memory btree. */
+ mutex_lock(&rr->lock);
+ mcur = xfs_rtrmapbt_mem_cursor(sc->sr.rtg, sc->tp, &rr->rtrmap_btree);
+ error = xfs_rmap_map_raw(mcur, &rmap);
+ xfs_btree_del_cursor(mcur, error);
+ if (error)
+ goto out_cancel;
+
+ error = xfbtree_trans_commit(&rr->rtrmap_btree, sc->tp);
+ if (error)
+ goto out_abort;
+
+ mutex_unlock(&rr->lock);
+ return 0;
+
+out_cancel:
+ xfbtree_trans_cancel(&rr->rtrmap_btree, sc->tp);
+out_abort:
+ xchk_iscan_abort(&rr->iscan);
+ mutex_unlock(&rr->lock);
+ return error;
+}
+
+/* Finding all file and bmbt extents. */
+
+/* Context for accumulating rmaps for an inode fork. */
+struct xrep_rtrmap_ifork {
+ /*
+ * Accumulate rmap data here to turn multiple adjacent bmaps into a
+ * single rmap.
+ */
+ struct xfs_rmap_irec accum;
+
+ struct xrep_rtrmap *rr;
+};
+
+/* Stash an rmap that we accumulated while walking an inode fork. */
+STATIC int
+xrep_rtrmap_stash_accumulated(
+ struct xrep_rtrmap_ifork *rf)
+{
+ if (rf->accum.rm_blockcount == 0)
+ return 0;
+
+ return xrep_rtrmap_stash(rf->rr, rf->accum.rm_startblock,
+ rf->accum.rm_blockcount, rf->accum.rm_owner,
+ rf->accum.rm_offset, rf->accum.rm_flags);
+}
+
+/* Accumulate a bmbt record. */
+STATIC int
+xrep_rtrmap_visit_bmbt(
+ struct xfs_btree_cur *cur,
+ struct xfs_bmbt_irec *rec,
+ void *priv)
+{
+ struct xrep_rtrmap_ifork *rf = priv;
+ struct xfs_rmap_irec *accum = &rf->accum;
+ struct xfs_mount *mp = rf->rr->sc->mp;
+ xfs_rgblock_t rgbno;
+ unsigned int rmap_flags = 0;
+ int error;
+
+ if (xfs_rtb_to_rgno(mp, rec->br_startblock) !=
+ rtg_rgno(rf->rr->sc->sr.rtg))
+ return 0;
+
+ if (rec->br_state == XFS_EXT_UNWRITTEN)
+ rmap_flags |= XFS_RMAP_UNWRITTEN;
+
+ /* If this bmap is adjacent to the previous one, just add it. */
+ rgbno = xfs_rtb_to_rgbno(mp, rec->br_startblock);
+ if (accum->rm_blockcount > 0 &&
+ rec->br_startoff == accum->rm_offset + accum->rm_blockcount &&
+ rgbno == accum->rm_startblock + accum->rm_blockcount &&
+ rmap_flags == accum->rm_flags) {
+ accum->rm_blockcount += rec->br_blockcount;
+ return 0;
+ }
+
+ /* Otherwise stash the old rmap and start accumulating a new one. */
+ error = xrep_rtrmap_stash_accumulated(rf);
+ if (error)
+ return error;
+
+ accum->rm_startblock = rgbno;
+ accum->rm_blockcount = rec->br_blockcount;
+ accum->rm_offset = rec->br_startoff;
+ accum->rm_flags = rmap_flags;
+ return 0;
+}
+
+/*
+ * Iterate the block mapping btree to collect rmap records for anything in this
+ * fork that maps to the rt volume. Sets @mappings_done to true if we've
+ * scanned the block mappings in this fork.
+ */
+STATIC int
+xrep_rtrmap_scan_bmbt(
+ struct xrep_rtrmap_ifork *rf,
+ struct xfs_inode *ip,
+ bool *mappings_done)
+{
+ struct xrep_rtrmap *rr = rf->rr;
+ struct xfs_btree_cur *cur;
+ struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
+ int error = 0;
+
+ *mappings_done = false;
+
+ /*
+ * If the incore extent cache is already loaded, we'll just use the
+ * incore extent scanner to record mappings. Don't bother walking the
+ * ondisk extent tree.
+ */
+ if (!xfs_need_iread_extents(ifp))
+ return 0;
+
+ /* Accumulate all the mappings in the bmap btree. */
+ cur = xfs_bmbt_init_cursor(rr->sc->mp, rr->sc->tp, ip, XFS_DATA_FORK);
+ error = xfs_bmap_query_all(cur, xrep_rtrmap_visit_bmbt, rf);
+ xfs_btree_del_cursor(cur, error);
+ if (error)
+ return error;
+
+ /* Stash any remaining accumulated rmaps and exit. */
+ *mappings_done = true;
+ return xrep_rtrmap_stash_accumulated(rf);
+}
+
+/*
+ * Iterate the in-core extent cache to collect rmap records for anything in
+ * this fork that matches the AG.
+ */
+STATIC int
+xrep_rtrmap_scan_iext(
+ struct xrep_rtrmap_ifork *rf,
+ struct xfs_ifork *ifp)
+{
+ struct xfs_bmbt_irec rec;
+ struct xfs_iext_cursor icur;
+ int error;
+
+ for_each_xfs_iext(ifp, &icur, &rec) {
+ if (isnullstartblock(rec.br_startblock))
+ continue;
+ error = xrep_rtrmap_visit_bmbt(NULL, &rec, rf);
+ if (error)
+ return error;
+ }
+
+ return xrep_rtrmap_stash_accumulated(rf);
+}
+
+/* Find all the extents on the realtime device mapped by an inode fork. */
+STATIC int
+xrep_rtrmap_scan_dfork(
+ struct xrep_rtrmap *rr,
+ struct xfs_inode *ip)
+{
+ struct xrep_rtrmap_ifork rf = {
+ .accum = { .rm_owner = ip->i_ino, },
+ .rr = rr,
+ };
+ struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
+ int error = 0;
+
+ if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
+ bool mappings_done;
+
+ /*
+ * Scan the bmbt for mappings. If the incore extent tree is
+ * loaded, we want to scan the cached mappings since that's
+ * faster when the extent counts are very high.
+ */
+ error = xrep_rtrmap_scan_bmbt(&rf, ip, &mappings_done);
+ if (error || mappings_done)
+ return error;
+ } else if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) {
+ /* realtime data forks should only be extents or btree */
+ return -EFSCORRUPTED;
+ }
+
+ /* Scan incore extent cache. */
+ return xrep_rtrmap_scan_iext(&rf, ifp);
+}
+
+/* Record reverse mappings for a file. */
+STATIC int
+xrep_rtrmap_scan_inode(
+ struct xrep_rtrmap *rr,
+ struct xfs_inode *ip)
+{
+ unsigned int lock_mode;
+ int error = 0;
+
+ /* Skip the rt rmap btree inode. */
+ if (rr->sc->ip == ip)
+ return 0;
+
+ lock_mode = xfs_ilock_data_map_shared(ip);
+
+ /* Check the data fork if it's on the realtime device. */
+ if (XFS_IS_REALTIME_INODE(ip)) {
+ error = xrep_rtrmap_scan_dfork(rr, ip);
+ if (error)
+ goto out_unlock;
+ }
+
+ xchk_iscan_mark_visited(&rr->iscan, ip);
+out_unlock:
+ xfs_iunlock(ip, lock_mode);
+ return error;
+}
+
+/* Record extents that belong to the realtime rmap inode. */
+STATIC int
+xrep_rtrmap_walk_rmap(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xrep_rtrmap *rr = priv;
+ int error = 0;
+
+ if (xchk_should_terminate(rr->sc, &error))
+ return error;
+
+ /* Skip extents which are not owned by this inode and fork. */
+ if (rec->rm_owner != rr->sc->ip->i_ino)
+ return 0;
+
+ error = xrep_check_ino_btree_mapping(rr->sc, rec);
+ if (error)
+ return error;
+
+ return xfsb_bitmap_set(&rr->old_rtrmapbt_blocks,
+ xfs_gbno_to_fsb(cur->bc_group, rec->rm_startblock),
+ rec->rm_blockcount);
+}
+
+/* Scan one AG for reverse mappings for the realtime rmap btree. */
+STATIC int
+xrep_rtrmap_scan_ag(
+ struct xrep_rtrmap *rr,
+ struct xfs_perag *pag)
+{
+ struct xfs_scrub *sc = rr->sc;
+ int error;
+
+ error = xrep_ag_init(sc, pag, &sc->sa);
+ if (error)
+ return error;
+
+ error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_rtrmap_walk_rmap, rr);
+ xchk_ag_free(sc, &sc->sa);
+ return error;
+}
+
+struct xrep_rtrmap_stash_run {
+ struct xrep_rtrmap *rr;
+ uint64_t owner;
+};
+
+static int
+xrep_rtrmap_stash_run(
+ uint32_t start,
+ uint32_t len,
+ void *priv)
+{
+ struct xrep_rtrmap_stash_run *rsr = priv;
+ struct xrep_rtrmap *rr = rsr->rr;
+ xfs_rgblock_t rgbno = start;
+
+ return xrep_rtrmap_stash(rr, rgbno, len, rsr->owner, 0, 0);
+}
+
+/*
+ * Emit rmaps for every extent of bits set in the bitmap. Caller must ensure
+ * that the ranges are in units of FS blocks.
+ */
+STATIC int
+xrep_rtrmap_stash_bitmap(
+ struct xrep_rtrmap *rr,
+ struct xrgb_bitmap *bitmap,
+ const struct xfs_owner_info *oinfo)
+{
+ struct xrep_rtrmap_stash_run rsr = {
+ .rr = rr,
+ .owner = oinfo->oi_owner,
+ };
+
+ return xrgb_bitmap_walk(bitmap, xrep_rtrmap_stash_run, &rsr);
+}
+
+/* Record a CoW staging extent. */
+STATIC int
+xrep_rtrmap_walk_cowblocks(
+ struct xfs_btree_cur *cur,
+ const struct xfs_refcount_irec *irec,
+ void *priv)
+{
+ struct xrgb_bitmap *bitmap = priv;
+
+ if (!xfs_refcount_check_domain(irec) ||
+ irec->rc_domain != XFS_REFC_DOMAIN_COW)
+ return -EFSCORRUPTED;
+
+ return xrgb_bitmap_set(bitmap, irec->rc_startblock,
+ irec->rc_blockcount);
+}
+
+/*
+ * Collect rmaps for the blocks containing the refcount btree, and all CoW
+ * staging extents.
+ */
+STATIC int
+xrep_rtrmap_find_refcount_rmaps(
+ struct xrep_rtrmap *rr)
+{
+ struct xrgb_bitmap cow_blocks; /* COWBIT */
+ struct xfs_refcount_irec low = {
+ .rc_startblock = 0,
+ .rc_domain = XFS_REFC_DOMAIN_COW,
+ };
+ struct xfs_refcount_irec high = {
+ .rc_startblock = -1U,
+ .rc_domain = XFS_REFC_DOMAIN_COW,
+ };
+ struct xfs_scrub *sc = rr->sc;
+ int error;
+
+ if (!xfs_has_rtreflink(sc->mp))
+ return 0;
+
+ xrgb_bitmap_init(&cow_blocks);
+
+ /* Collect rmaps for CoW staging extents. */
+ error = xfs_refcount_query_range(sc->sr.refc_cur, &low, &high,
+ xrep_rtrmap_walk_cowblocks, &cow_blocks);
+ if (error)
+ goto out_bitmap;
+
+ /* Generate rmaps for everything. */
+ error = xrep_rtrmap_stash_bitmap(rr, &cow_blocks, &XFS_RMAP_OINFO_COW);
+ if (error)
+ goto out_bitmap;
+
+out_bitmap:
+ xrgb_bitmap_destroy(&cow_blocks);
+ return error;
+}
+
+/* Count and check all collected records. */
+STATIC int
+xrep_rtrmap_check_record(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xrep_rtrmap *rr = priv;
+ int error;
+
+ error = xrep_rtrmap_check_mapping(rr->sc, rec);
+ if (error)
+ return error;
+
+ rr->nr_records++;
+ return 0;
+}
+
+/* Generate all the reverse-mappings for the realtime device. */
+STATIC int
+xrep_rtrmap_find_rmaps(
+ struct xrep_rtrmap *rr)
+{
+ struct xfs_scrub *sc = rr->sc;
+ struct xfs_perag *pag = NULL;
+ struct xfs_inode *ip;
+ struct xfs_btree_cur *mcur;
+ int error;
+
+ /* Generate rmaps for the realtime superblock */
+ if (xfs_has_rtsb(sc->mp) && rtg_rgno(rr->sc->sr.rtg) == 0) {
+ error = xrep_rtrmap_stash(rr, 0, sc->mp->m_sb.sb_rextsize,
+ XFS_RMAP_OWN_FS, 0, 0);
+ if (error)
+ return error;
+ }
+
+ /* Find CoW staging extents. */
+ xrep_rtgroup_btcur_init(sc, &sc->sr);
+ error = xrep_rtrmap_find_refcount_rmaps(rr);
+ xchk_rtgroup_btcur_free(&sc->sr);
+ if (error)
+ return error;
+
+ /*
+ * Set up for a potentially lengthy filesystem scan by reducing our
+ * transaction resource usage for the duration. Specifically:
+ *
+ * Unlock the realtime metadata inodes and cancel the transaction to
+ * release the log grant space while we scan the filesystem.
+ *
+ * Create a new empty transaction to eliminate the possibility of the
+ * inode scan deadlocking on cyclical metadata.
+ *
+ * We pass the empty transaction to the file scanning function to avoid
+ * repeatedly cycling empty transactions. This can be done even though
+ * we take the IOLOCK to quiesce the file because empty transactions
+ * do not take sb_internal.
+ */
+ xchk_trans_cancel(sc);
+ xchk_rtgroup_unlock(&sc->sr);
+ xchk_trans_alloc_empty(sc);
+
+ while ((error = xchk_iscan_iter(&rr->iscan, &ip)) == 1) {
+ error = xrep_rtrmap_scan_inode(rr, ip);
+ xchk_irele(sc, ip);
+ if (error)
+ break;
+
+ if (xchk_should_terminate(sc, &error))
+ break;
+ }
+ xchk_iscan_iter_finish(&rr->iscan);
+ if (error)
+ return error;
+
+ /*
+ * Switch out for a real transaction and lock the RT metadata in
+ * preparation for building a new tree.
+ */
+ xchk_trans_cancel(sc);
+ error = xchk_setup_rt(sc);
+ if (error)
+ return error;
+ error = xchk_rtgroup_lock(sc, &sc->sr, XCHK_RTGLOCK_ALL);
+ if (error)
+ return error;
+
+ /*
+ * If a hook failed to update the in-memory btree, we lack the data to
+ * continue the repair.
+ */
+ if (xchk_iscan_aborted(&rr->iscan))
+ return -EFSCORRUPTED;
+
+ /* Scan for old rtrmap blocks. */
+ while ((pag = xfs_perag_next(sc->mp, pag))) {
+ error = xrep_rtrmap_scan_ag(rr, pag);
+ if (error) {
+ xfs_perag_rele(pag);
+ return error;
+ }
+ }
+
+ /*
+ * Now that we have everything locked again, we need to count the
+ * number of rmap records stashed in the btree. This should reflect
+ * all actively-owned rt files in the filesystem. At the same time,
+ * check all our records before we start building a new btree, which
+ * requires the rtbitmap lock.
+ */
+ mcur = xfs_rtrmapbt_mem_cursor(rr->sc->sr.rtg, NULL, &rr->rtrmap_btree);
+ rr->nr_records = 0;
+ error = xfs_rmap_query_all(mcur, xrep_rtrmap_check_record, rr);
+ xfs_btree_del_cursor(mcur, error);
+
+ return error;
+}
+
+/* Building the new rtrmap btree. */
+
+/* Retrieve rtrmapbt data for bulk load. */
+STATIC int
+xrep_rtrmap_get_records(
+ struct xfs_btree_cur *cur,
+ unsigned int idx,
+ struct xfs_btree_block *block,
+ unsigned int nr_wanted,
+ void *priv)
+{
+ struct xrep_rtrmap *rr = priv;
+ union xfs_btree_rec *block_rec;
+ unsigned int loaded;
+ int error;
+
+ for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
+ int stat = 0;
+
+ error = xfs_btree_increment(rr->mcur, 0, &stat);
+ if (error)
+ return error;
+ if (!stat)
+ return -EFSCORRUPTED;
+
+ error = xfs_rmap_get_rec(rr->mcur, &cur->bc_rec.r, &stat);
+ if (error)
+ return error;
+ if (!stat)
+ return -EFSCORRUPTED;
+
+ block_rec = xfs_btree_rec_addr(cur, idx, block);
+ cur->bc_ops->init_rec_from_cur(cur, block_rec);
+ }
+
+ return loaded;
+}
+
+/* Feed one of the new btree blocks to the bulk loader. */
+STATIC int
+xrep_rtrmap_claim_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr,
+ void *priv)
+{
+ struct xrep_rtrmap *rr = priv;
+
+ return xrep_newbt_claim_block(cur, &rr->new_btree, ptr);
+}
+
+/* Figure out how much space we need to create the incore btree root block. */
+STATIC size_t
+xrep_rtrmap_iroot_size(
+ struct xfs_btree_cur *cur,
+ unsigned int level,
+ unsigned int nr_this_level,
+ void *priv)
+{
+ return xfs_rtrmap_broot_space_calc(cur->bc_mp, level, nr_this_level);
+}
+
+/*
+ * Use the collected rmap information to stage a new rmap btree. If this is
+ * successful we'll return with the new btree root information logged to the
+ * repair transaction but not yet committed. This implements section (III)
+ * above.
+ */
+STATIC int
+xrep_rtrmap_build_new_tree(
+ struct xrep_rtrmap *rr)
+{
+ struct xfs_scrub *sc = rr->sc;
+ struct xfs_rtgroup *rtg = sc->sr.rtg;
+ struct xfs_btree_cur *rmap_cur;
+ int error;
+
+ /*
+ * Prepare to construct the new btree by reserving disk space for the
+ * new btree and setting up all the accounting information we'll need
+ * to root the new btree while it's under construction and before we
+ * attach it to the realtime rmapbt inode.
+ */
+ error = xrep_newbt_init_metadir_inode(&rr->new_btree, sc);
+ if (error)
+ return error;
+
+ rr->new_btree.bload.get_records = xrep_rtrmap_get_records;
+ rr->new_btree.bload.claim_block = xrep_rtrmap_claim_block;
+ rr->new_btree.bload.iroot_size = xrep_rtrmap_iroot_size;
+
+ rmap_cur = xfs_rtrmapbt_init_cursor(NULL, rtg);
+ xfs_btree_stage_ifakeroot(rmap_cur, &rr->new_btree.ifake);
+
+ /* Compute how many blocks we'll need for the rmaps collected. */
+ error = xfs_btree_bload_compute_geometry(rmap_cur,
+ &rr->new_btree.bload, rr->nr_records);
+ if (error)
+ goto err_cur;
+
+ /* Last chance to abort before we start committing fixes. */
+ if (xchk_should_terminate(sc, &error))
+ goto err_cur;
+
+ /*
+ * Guess how many blocks we're going to need to rebuild an entire
+ * rtrmapbt from the number of extents we found, and pump up our
+ * transaction to have sufficient block reservation. We're allowed
+ * to exceed quota to repair inconsistent metadata, though this is
+ * unlikely.
+ */
+ error = xfs_trans_reserve_more_inode(sc->tp, rtg_rmap(rtg),
+ rr->new_btree.bload.nr_blocks, 0, true);
+ if (error)
+ goto err_cur;
+
+ /* Reserve the space we'll need for the new btree. */
+ error = xrep_newbt_alloc_blocks(&rr->new_btree,
+ rr->new_btree.bload.nr_blocks);
+ if (error)
+ goto err_cur;
+
+ /*
+ * Create a cursor to the in-memory btree so that we can bulk load the
+ * new btree.
+ */
+ rr->mcur = xfs_rtrmapbt_mem_cursor(sc->sr.rtg, NULL, &rr->rtrmap_btree);
+ error = xfs_btree_goto_left_edge(rr->mcur);
+ if (error)
+ goto err_mcur;
+
+ /* Add all observed rmap records. */
+ rr->new_btree.ifake.if_fork->if_format = XFS_DINODE_FMT_META_BTREE;
+ error = xfs_btree_bload(rmap_cur, &rr->new_btree.bload, rr);
+ if (error)
+ goto err_mcur;
+
+ /*
+ * Install the new rtrmap btree in the inode. After this point the old
+ * btree is no longer accessible, the new tree is live, and we can
+ * delete the cursor.
+ */
+ xfs_rtrmapbt_commit_staged_btree(rmap_cur, sc->tp);
+ xrep_inode_set_nblocks(rr->sc, rr->new_btree.ifake.if_blocks);
+ xfs_btree_del_cursor(rmap_cur, 0);
+ xfs_btree_del_cursor(rr->mcur, 0);
+ rr->mcur = NULL;
+
+ /*
+ * Now that we've written the new btree to disk, we don't need to keep
+ * updating the in-memory btree. Abort the scan to stop live updates.
+ */
+ xchk_iscan_abort(&rr->iscan);
+
+ /* Dispose of any unused blocks and the accounting information. */
+ error = xrep_newbt_commit(&rr->new_btree);
+ if (error)
+ return error;
+
+ return xrep_roll_trans(sc);
+
+err_mcur:
+ xfs_btree_del_cursor(rr->mcur, error);
+err_cur:
+ xfs_btree_del_cursor(rmap_cur, error);
+ xrep_newbt_cancel(&rr->new_btree);
+ return error;
+}
+
+/* Reaping the old btree. */
+
+static inline bool
+xrep_rtrmapbt_want_live_update(
+ struct xchk_iscan *iscan,
+ const struct xfs_owner_info *oi)
+{
+ if (xchk_iscan_aborted(iscan))
+ return false;
+
+ /*
+ * We scanned the CoW staging extents before we started the iscan, so
+ * we need all the updates.
+ */
+ if (XFS_RMAP_NON_INODE_OWNER(oi->oi_owner))
+ return true;
+
+ /* Ignore updates to files that the scanner hasn't visited yet. */
+ return xchk_iscan_want_live_update(iscan, oi->oi_owner);
+}
+
+/*
+ * Apply a rtrmapbt update from the regular filesystem into our shadow btree.
+ * We're running from the thread that owns the rtrmap ILOCK and is generating
+ * the update, so we must be careful about which parts of the struct
+ * xrep_rtrmap that we change.
+ */
+static int
+xrep_rtrmapbt_live_update(
+ struct notifier_block *nb,
+ unsigned long action,
+ void *data)
+{
+ struct xfs_rmap_update_params *p = data;
+ struct xrep_rtrmap *rr;
+ struct xfs_mount *mp;
+ struct xfs_btree_cur *mcur;
+ struct xfs_trans *tp;
+ int error;
+
+ rr = container_of(nb, struct xrep_rtrmap, rhook.rmap_hook.nb);
+ mp = rr->sc->mp;
+
+ if (!xrep_rtrmapbt_want_live_update(&rr->iscan, &p->oinfo))
+ goto out_unlock;
+
+ trace_xrep_rmap_live_update(rtg_group(rr->sc->sr.rtg), action, p);
+
+ tp = xfs_trans_alloc_empty(mp);
+
+ mutex_lock(&rr->lock);
+ mcur = xfs_rtrmapbt_mem_cursor(rr->sc->sr.rtg, tp, &rr->rtrmap_btree);
+ error = __xfs_rmap_finish_intent(mcur, action, p->startblock,
+ p->blockcount, &p->oinfo, p->unwritten);
+ xfs_btree_del_cursor(mcur, error);
+ if (error)
+ goto out_cancel;
+
+ error = xfbtree_trans_commit(&rr->rtrmap_btree, tp);
+ if (error)
+ goto out_cancel;
+
+ xfs_trans_cancel(tp);
+ mutex_unlock(&rr->lock);
+ return NOTIFY_DONE;
+
+out_cancel:
+ xfbtree_trans_cancel(&rr->rtrmap_btree, tp);
+ xfs_trans_cancel(tp);
+ xchk_iscan_abort(&rr->iscan);
+ mutex_unlock(&rr->lock);
+out_unlock:
+ return NOTIFY_DONE;
+}
+
+/* Set up the filesystem scan components. */
+STATIC int
+xrep_rtrmap_setup_scan(
+ struct xrep_rtrmap *rr)
+{
+ struct xfs_scrub *sc = rr->sc;
+ int error;
+
+ mutex_init(&rr->lock);
+ xfsb_bitmap_init(&rr->old_rtrmapbt_blocks);
+
+ /* Set up some storage */
+ error = xfs_rtrmapbt_mem_init(sc->mp, &rr->rtrmap_btree, sc->xmbtp,
+ rtg_rgno(sc->sr.rtg));
+ if (error)
+ goto out_bitmap;
+
+ /* Retry iget every tenth of a second for up to 30 seconds. */
+ xchk_iscan_start(sc, 30000, 100, &rr->iscan);
+
+ /*
+ * Hook into live rtrmap operations so that we can update our in-memory
+ * btree to reflect live changes on the filesystem. Since we drop the
+ * rtrmap ILOCK to scan all the inodes, we need this piece to avoid
+ * installing a stale btree.
+ */
+ ASSERT(sc->flags & XCHK_FSGATES_RMAP);
+ xfs_rmap_hook_setup(&rr->rhook, xrep_rtrmapbt_live_update);
+ error = xfs_rmap_hook_add(rtg_group(sc->sr.rtg), &rr->rhook);
+ if (error)
+ goto out_iscan;
+ return 0;
+
+out_iscan:
+ xchk_iscan_teardown(&rr->iscan);
+ xfbtree_destroy(&rr->rtrmap_btree);
+out_bitmap:
+ xfsb_bitmap_destroy(&rr->old_rtrmapbt_blocks);
+ mutex_destroy(&rr->lock);
+ return error;
+}
+
+/* Tear down scan components. */
+STATIC void
+xrep_rtrmap_teardown(
+ struct xrep_rtrmap *rr)
+{
+ struct xfs_scrub *sc = rr->sc;
+
+ xchk_iscan_abort(&rr->iscan);
+ xfs_rmap_hook_del(rtg_group(sc->sr.rtg), &rr->rhook);
+ xchk_iscan_teardown(&rr->iscan);
+ xfbtree_destroy(&rr->rtrmap_btree);
+ xfsb_bitmap_destroy(&rr->old_rtrmapbt_blocks);
+ mutex_destroy(&rr->lock);
+}
+
+/* Repair the realtime rmap btree. */
+int
+xrep_rtrmapbt(
+ struct xfs_scrub *sc)
+{
+ struct xrep_rtrmap *rr = sc->buf;
+ int error;
+
+ /* Make sure any problems with the fork are fixed. */
+ error = xrep_metadata_inode_forks(sc);
+ if (error)
+ return error;
+
+ error = xrep_rtrmap_setup_scan(rr);
+ if (error)
+ return error;
+
+ /* Collect rmaps for realtime files. */
+ error = xrep_rtrmap_find_rmaps(rr);
+ if (error)
+ goto out_records;
+
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+
+ /* Rebuild the rtrmap information. */
+ error = xrep_rtrmap_build_new_tree(rr);
+ if (error)
+ goto out_records;
+
+ /*
+ * Free all the extents that were allocated to the former rtrmapbt and
+ * aren't cross-linked with something else.
+ */
+ error = xrep_reap_metadir_fsblocks(rr->sc, &rr->old_rtrmapbt_blocks);
+ if (error)
+ goto out_records;
+
+out_records:
+ xrep_rtrmap_teardown(rr);
+ return error;
+}
diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c
index 49fc6250bafc..4ac679c1bd29 100644
--- a/fs/xfs/scrub/rtsummary.c
+++ b/fs/xfs/scrub/rtsummary.c
@@ -81,8 +81,7 @@ xchk_setup_rtsummary(
if (error)
return error;
- error = xchk_install_live_inode(sc,
- sc->sr.rtg->rtg_inodes[XFS_RTGI_SUMMARY]);
+ error = xchk_install_live_inode(sc, rtg_summary(sc->sr.rtg));
if (error)
return error;
@@ -90,6 +89,10 @@ xchk_setup_rtsummary(
if (error)
return error;
+ error = xchk_rtgroup_lock(sc, &sc->sr, XFS_RTGLOCK_BITMAP);
+ if (error)
+ return error;
+
/*
* Now that we've locked the rtbitmap and rtsummary, we can't race with
* growfsrt trying to expand the summary or change the size of the rt
@@ -100,7 +103,6 @@ xchk_setup_rtsummary(
* exclusively here. If we ever start caring about running concurrent
* fsmap with scrub this could be changed.
*/
- xchk_rtgroup_lock(&sc->sr, XFS_RTGLOCK_BITMAP);
if (mp->m_sb.sb_rblocks) {
rts->rextents = xfs_blen_to_rtbxlen(mp, mp->m_sb.sb_rblocks);
rts->rbmblocks = xfs_rtbitmap_blockcount(mp);
@@ -191,8 +193,7 @@ xchk_rtsum_record_free(
rtlen = xfs_rtxlen_to_extlen(mp, rec->ar_extcount);
if (!xfs_verify_rtbext(mp, rtbno, rtlen)) {
- xchk_ino_xref_set_corrupt(sc,
- rtg->rtg_inodes[XFS_RTGI_BITMAP]->i_ino);
+ xchk_ino_xref_set_corrupt(sc, rtg_bitmap(rtg)->i_ino);
return -EFSCORRUPTED;
}
@@ -218,7 +219,7 @@ xchk_rtsum_compute(
/* If the bitmap size doesn't match the computed size, bail. */
if (XFS_FSB_TO_B(mp, xfs_rtbitmap_blockcount(mp)) !=
- rtg->rtg_inodes[XFS_RTGI_BITMAP]->i_disk_size)
+ rtg_bitmap(rtg)->i_disk_size)
return -EFSCORRUPTED;
return xfs_rtalloc_query_all(rtg, sc->tp, xchk_rtsum_record_free, sc);
@@ -310,8 +311,8 @@ xchk_rtsummary(
{
struct xfs_mount *mp = sc->mp;
struct xfs_rtgroup *rtg = sc->sr.rtg;
- struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP];
- struct xfs_inode *rsumip = rtg->rtg_inodes[XFS_RTGI_SUMMARY];
+ struct xfs_inode *rbmip = rtg_bitmap(rtg);
+ struct xfs_inode *rsumip = rtg_summary(rtg);
struct xchk_rtsummary *rts = sc->buf;
int error;
diff --git a/fs/xfs/scrub/rtsummary_repair.c b/fs/xfs/scrub/rtsummary_repair.c
index 8198ea84ad70..d593977d70df 100644
--- a/fs/xfs/scrub/rtsummary_repair.c
+++ b/fs/xfs/scrub/rtsummary_repair.c
@@ -165,7 +165,8 @@ xrep_rtsummary(
* Now exchange the contents. Nothing in repair uses the temporary
* buffer, so we can reuse it for the tempfile exchrange information.
*/
- error = xrep_tempexch_trans_reserve(sc, XFS_DATA_FORK, &rts->tempexch);
+ error = xrep_tempexch_trans_reserve(sc, XFS_DATA_FORK, 0,
+ rts->rsumblocks, &rts->tempexch);
if (error)
return error;
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 950f5a58dcd9..3c3b0d25006f 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -149,6 +149,18 @@ xchk_probe(
if (xchk_should_terminate(sc, &error))
return error;
+ /*
+ * If the caller is probing to see if repair works but repair isn't
+ * built into the kernel, return EOPNOTSUPP because that's the signal
+ * that userspace expects. If online repair is built in, set the
+ * CORRUPT flag (without any of the usual tracing/logging) to force us
+ * into xrep_probe.
+ */
+ if (xchk_could_repair(sc)) {
+ if (!IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR))
+ return -EOPNOTSUPP;
+ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+ }
return 0;
}
@@ -164,7 +176,7 @@ xchk_fsgates_disable(
trace_xchk_fsgates_disable(sc, sc->flags & XCHK_FSGATES_ALL);
if (sc->flags & XCHK_FSGATES_DRAIN)
- xfs_drain_wait_disable();
+ xfs_defer_drain_wait_disable();
if (sc->flags & XCHK_FSGATES_QUOTA)
xfs_dqtrx_hook_disable();
@@ -218,6 +230,8 @@ xchk_teardown(
int error)
{
xchk_ag_free(sc, &sc->sa);
+ xchk_rtgroup_btcur_free(&sc->sr);
+
if (sc->tp) {
if (error == 0 && (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
error = xfs_trans_commit(sc->tp);
@@ -385,12 +399,14 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
},
[XFS_SCRUB_TYPE_RTBITMAP] = { /* realtime bitmap */
.type = ST_RTGROUP,
+ .has = xfs_has_nonzoned,
.setup = xchk_setup_rtbitmap,
.scrub = xchk_rtbitmap,
.repair = xrep_rtbitmap,
},
[XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */
.type = ST_RTGROUP,
+ .has = xfs_has_nonzoned,
.setup = xchk_setup_rtsummary,
.scrub = xchk_rtsummary,
.repair = xrep_rtsummary,
@@ -458,6 +474,20 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.has = xfs_has_rtsb,
.repair = xrep_rgsuperblock,
},
+ [XFS_SCRUB_TYPE_RTRMAPBT] = { /* realtime group rmapbt */
+ .type = ST_RTGROUP,
+ .setup = xchk_setup_rtrmapbt,
+ .scrub = xchk_rtrmapbt,
+ .has = xfs_has_rtrmapbt,
+ .repair = xrep_rtrmapbt,
+ },
+ [XFS_SCRUB_TYPE_RTREFCBT] = { /* realtime refcountbt */
+ .type = ST_RTGROUP,
+ .setup = xchk_setup_rtrefcountbt,
+ .scrub = xchk_rtrefcountbt,
+ .has = xfs_has_rtreflink,
+ .repair = xrep_rtrefcountbt,
+ },
};
static int
@@ -650,8 +680,6 @@ xfs_scrub_metadata(
if (error)
goto out;
- xfs_warn_experimental(mp, XFS_EXPERIMENTAL_SCRUB);
-
sc = kzalloc(sizeof(struct xfs_scrub), XCHK_GFP_FLAGS);
if (!sc) {
error = -ENOMEM;
@@ -848,10 +876,7 @@ xchk_scrubv_open_by_handle(
struct xfs_inode *ip;
int error;
- error = xfs_trans_alloc_empty(mp, &tp);
- if (error)
- return NULL;
-
+ tp = xfs_trans_alloc_empty(mp);
error = xfs_iget(mp, tp, head->svh_ino, XCHK_IGET_FLAGS, 0, &ip);
xfs_trans_cancel(tp);
if (error)
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index a7fda3e2b013..a3f1abc91390 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -96,7 +96,7 @@ struct xchk_meta_ops {
int (*repair_eval)(struct xfs_scrub *sc);
/* Decide if we even have this piece of metadata. */
- bool (*has)(struct xfs_mount *);
+ bool (*has)(const struct xfs_mount *);
/* type describing required/allowed inputs */
enum xchk_type type;
@@ -126,6 +126,10 @@ struct xchk_rt {
/* XFS_RTGLOCK_* lock state if locked */
unsigned int rtlock_flags;
+
+ /* rtgroup btrees */
+ struct xfs_btree_cur *rmap_cur;
+ struct xfs_btree_cur *refc_cur;
};
struct xfs_scrub {
@@ -184,6 +188,12 @@ struct xfs_scrub {
*/
unsigned int sick_mask;
+ /*
+ * Clear these XFS_SICK_* flags but only if the scan is ok. Useful for
+ * removing ZAPPED flags after a repair.
+ */
+ unsigned int healthy_mask;
+
/* next time we want to cond_resched() */
struct xchk_relax relax;
@@ -274,10 +284,14 @@ int xchk_metapath(struct xfs_scrub *sc);
int xchk_rtbitmap(struct xfs_scrub *sc);
int xchk_rtsummary(struct xfs_scrub *sc);
int xchk_rgsuperblock(struct xfs_scrub *sc);
+int xchk_rtrmapbt(struct xfs_scrub *sc);
+int xchk_rtrefcountbt(struct xfs_scrub *sc);
#else
# define xchk_rtbitmap xchk_nothing
# define xchk_rtsummary xchk_nothing
# define xchk_rgsuperblock xchk_nothing
+# define xchk_rtrmapbt xchk_nothing
+# define xchk_rtrefcountbt xchk_nothing
#endif
#ifdef CONFIG_XFS_QUOTA
int xchk_quota(struct xfs_scrub *sc);
@@ -311,8 +325,26 @@ void xchk_xref_is_not_cow_staging(struct xfs_scrub *sc, xfs_agblock_t bno,
#ifdef CONFIG_XFS_RT
void xchk_xref_is_used_rt_space(struct xfs_scrub *sc, xfs_rtblock_t rtbno,
xfs_extlen_t len);
+void xchk_xref_has_no_rt_owner(struct xfs_scrub *sc, xfs_rgblock_t rgbno,
+ xfs_extlen_t len);
+void xchk_xref_has_rt_owner(struct xfs_scrub *sc, xfs_rgblock_t rgbno,
+ xfs_extlen_t len);
+void xchk_xref_is_only_rt_owned_by(struct xfs_scrub *sc, xfs_rgblock_t rgbno,
+ xfs_extlen_t len, const struct xfs_owner_info *oinfo);
+void xchk_xref_is_rt_cow_staging(struct xfs_scrub *sc, xfs_rgblock_t rgbno,
+ xfs_extlen_t len);
+void xchk_xref_is_not_rt_shared(struct xfs_scrub *sc, xfs_rgblock_t rgbno,
+ xfs_extlen_t len);
+void xchk_xref_is_not_rt_cow_staging(struct xfs_scrub *sc, xfs_rgblock_t rgbno,
+ xfs_extlen_t len);
#else
# define xchk_xref_is_used_rt_space(sc, rtbno, len) do { } while (0)
+# define xchk_xref_has_no_rt_owner(sc, rtbno, len) do { } while (0)
+# define xchk_xref_has_rt_owner(sc, rtbno, len) do { } while (0)
+# define xchk_xref_is_only_rt_owned_by(sc, bno, len, oinfo) do { } while (0)
+# define xchk_xref_is_rt_cow_staging(sc, bno, len) do { } while (0)
+# define xchk_xref_is_not_rt_shared(sc, bno, len) do { } while (0)
+# define xchk_xref_is_not_rt_cow_staging(sc, bno, len) do { } while (0)
#endif
#endif /* __XFS_SCRUB_SCRUB_H__ */
diff --git a/fs/xfs/scrub/stats.c b/fs/xfs/scrub/stats.c
index a476c7b2ab75..f8a37ea97791 100644
--- a/fs/xfs/scrub/stats.c
+++ b/fs/xfs/scrub/stats.c
@@ -82,6 +82,8 @@ static const char *name_map[XFS_SCRUB_TYPE_NR] = {
[XFS_SCRUB_TYPE_DIRTREE] = "dirtree",
[XFS_SCRUB_TYPE_METAPATH] = "metapath",
[XFS_SCRUB_TYPE_RGSUPER] = "rgsuper",
+ [XFS_SCRUB_TYPE_RTRMAPBT] = "rtrmapbt",
+ [XFS_SCRUB_TYPE_RTREFCBT] = "rtrefcountbt",
};
/* Format the scrub stats into a text buffer, similar to pcp style. */
diff --git a/fs/xfs/scrub/symlink_repair.c b/fs/xfs/scrub/symlink_repair.c
index d015a86ef460..953ce7be78dc 100644
--- a/fs/xfs/scrub/symlink_repair.c
+++ b/fs/xfs/scrub/symlink_repair.c
@@ -36,6 +36,7 @@
#include "scrub/tempfile.h"
#include "scrub/tempexch.h"
#include "scrub/reap.h"
+#include "scrub/health.h"
/*
* Symbolic Link Repair
@@ -233,7 +234,7 @@ xrep_symlink_salvage(
* target zapped flag.
*/
if (buflen == 0) {
- sc->sick_mask |= XFS_SICK_INO_SYMLINK_ZAPPED;
+ xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_SYMLINK_ZAPPED);
sprintf(target_buf, DUMMY_TARGET);
}
diff --git a/fs/xfs/scrub/tempexch.h b/fs/xfs/scrub/tempexch.h
index 995ba187c5aa..eccda720c2ca 100644
--- a/fs/xfs/scrub/tempexch.h
+++ b/fs/xfs/scrub/tempexch.h
@@ -12,7 +12,7 @@ struct xrep_tempexch {
};
int xrep_tempexch_trans_reserve(struct xfs_scrub *sc, int whichfork,
- struct xrep_tempexch *ti);
+ xfs_fileoff_t off, xfs_filblks_t len, struct xrep_tempexch *ti);
int xrep_tempexch_trans_alloc(struct xfs_scrub *sc, int whichfork,
struct xrep_tempexch *ti);
diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c
index 4b7f7860e37e..cf99e0ca51b0 100644
--- a/fs/xfs/scrub/tempfile.c
+++ b/fs/xfs/scrub/tempfile.c
@@ -184,11 +184,18 @@ out_release_dquots:
}
/*
+ * Move sc->tempip from the regular directory tree to the metadata directory
+ * tree if sc->ip is part of the metadata directory tree and tempip has an
+ * eligible file mode.
+ *
* Temporary files have to be created before we even know which inode we're
* going to scrub, so we assume that they will be part of the regular directory
* tree. If it turns out that we're actually scrubbing a file from the
* metadata directory tree, we have to subtract the temp file from the root
- * dquots and detach the dquots.
+ * dquots and detach the dquots prior to setting the METADATA iflag. However,
+ * the scrub setup functions grab sc->ip and create sc->tempip before we
+ * actually get around to checking if the file mode is the right type for the
+ * scrubber.
*/
int
xrep_tempfile_adjust_directory_tree(
@@ -204,6 +211,9 @@ xrep_tempfile_adjust_directory_tree(
if (!sc->ip || !xfs_is_metadir_inode(sc->ip))
return 0;
+ if (!S_ISDIR(VFS_I(sc->tempip)->i_mode) &&
+ !S_ISREG(VFS_I(sc->tempip)->i_mode))
+ return 0;
xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL);
sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
@@ -223,6 +233,7 @@ xrep_tempfile_adjust_directory_tree(
if (error)
goto out_ilock;
+ xfs_iflags_set(sc->tempip, XFS_IRECOVERY);
xfs_qm_dqdetach(sc->tempip);
out_ilock:
xrep_tempfile_iunlock(sc);
@@ -246,6 +257,8 @@ xrep_tempfile_remove_metadir(
ASSERT(sc->tp == NULL);
+ xfs_iflags_clear(sc->tempip, XFS_IRECOVERY);
+
xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL);
sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
@@ -593,6 +606,8 @@ STATIC int
xrep_tempexch_prep_request(
struct xfs_scrub *sc,
int whichfork,
+ xfs_fileoff_t off,
+ xfs_filblks_t len,
struct xrep_tempexch *tx)
{
struct xfs_exchmaps_req *req = &tx->req;
@@ -616,18 +631,19 @@ xrep_tempexch_prep_request(
/* Exchange all mappings in both forks. */
req->ip1 = sc->tempip;
req->ip2 = sc->ip;
- req->startoff1 = 0;
- req->startoff2 = 0;
+ req->startoff1 = off;
+ req->startoff2 = off;
switch (whichfork) {
case XFS_ATTR_FORK:
req->flags |= XFS_EXCHMAPS_ATTR_FORK;
break;
case XFS_DATA_FORK:
- /* Always exchange sizes when exchanging data fork mappings. */
- req->flags |= XFS_EXCHMAPS_SET_SIZES;
+ /* Exchange sizes when exchanging all data fork mappings. */
+ if (off == 0 && len == XFS_MAX_FILEOFF)
+ req->flags |= XFS_EXCHMAPS_SET_SIZES;
break;
}
- req->blockcount = XFS_MAX_FILEOFF;
+ req->blockcount = len;
return 0;
}
@@ -736,6 +752,7 @@ xrep_tempexch_reserve_quota(
* or the two inodes have the same dquots.
*/
if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 ||
+ xfs_is_metadir_inode(req->ip1) ||
(req->ip1->i_udquot == req->ip2->i_udquot &&
req->ip1->i_gdquot == req->ip2->i_gdquot &&
req->ip1->i_pdquot == req->ip2->i_pdquot))
@@ -782,6 +799,8 @@ int
xrep_tempexch_trans_reserve(
struct xfs_scrub *sc,
int whichfork,
+ xfs_fileoff_t off,
+ xfs_filblks_t len,
struct xrep_tempexch *tx)
{
int error;
@@ -790,7 +809,7 @@ xrep_tempexch_trans_reserve(
xfs_assert_ilocked(sc->ip, XFS_ILOCK_EXCL);
xfs_assert_ilocked(sc->tempip, XFS_ILOCK_EXCL);
- error = xrep_tempexch_prep_request(sc, whichfork, tx);
+ error = xrep_tempexch_prep_request(sc, whichfork, off, len, tx);
if (error)
return error;
@@ -828,7 +847,8 @@ xrep_tempexch_trans_alloc(
ASSERT(sc->tp == NULL);
ASSERT(xfs_has_exchange_range(sc->mp));
- error = xrep_tempexch_prep_request(sc, whichfork, tx);
+ error = xrep_tempexch_prep_request(sc, whichfork, 0, XFS_MAX_FILEOFF,
+ tx);
if (error)
return error;
@@ -945,10 +965,13 @@ xrep_is_tempfile(
/*
* Files in the metadata directory tree also have S_PRIVATE set and
- * IOP_XATTR unset, so we must distinguish them separately.
+ * IOP_XATTR unset, so we must distinguish them separately. We (ab)use
+ * the IRECOVERY flag to mark temporary metadir inodes knowing that the
+ * end of log recovery clears IRECOVERY, so the only ones that can
+ * exist during online repair are the ones we create.
*/
if (xfs_has_metadir(mp) && (ip->i_diflags2 & XFS_DIFLAG2_METADATA))
- return false;
+ return __xfs_iflags_test(ip, XFS_IRECOVERY);
if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR))
return true;
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 98f923ae664d..2450e214103f 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -21,6 +21,7 @@
#include "xfs_rmap.h"
#include "xfs_parent.h"
#include "xfs_metafile.h"
+#include "xfs_rtgroup.h"
#include "scrub/scrub.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 9b38f5ad1eaf..a8187281eb96 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -17,6 +17,7 @@
#include "xfs_bit.h"
#include "xfs_quota_defs.h"
+struct xfs_rtgroup;
struct xfs_scrub;
struct xfile;
struct xfarray;
@@ -40,6 +41,9 @@ struct xchk_dirtree_outcomes;
TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_SHARED);
TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_COW);
+TRACE_DEFINE_ENUM(XG_TYPE_AG);
+TRACE_DEFINE_ENUM(XG_TYPE_RTG);
+
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_PROBE);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_SB);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_AGF);
@@ -72,6 +76,8 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_DIRTREE);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_BARRIER);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_METAPATH);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_RGSUPER);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_RTRMAPBT);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_RTREFCBT);
#define XFS_SCRUB_TYPE_STRINGS \
{ XFS_SCRUB_TYPE_PROBE, "probe" }, \
@@ -105,7 +111,9 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_RGSUPER);
{ XFS_SCRUB_TYPE_DIRTREE, "dirtree" }, \
{ XFS_SCRUB_TYPE_BARRIER, "barrier" }, \
{ XFS_SCRUB_TYPE_METAPATH, "metapath" }, \
- { XFS_SCRUB_TYPE_RGSUPER, "rgsuper" }
+ { XFS_SCRUB_TYPE_RGSUPER, "rgsuper" }, \
+ { XFS_SCRUB_TYPE_RTRMAPBT, "rtrmapbt" }, \
+ { XFS_SCRUB_TYPE_RTREFCBT, "rtrefcountbt" }
#define XFS_SCRUB_FLAG_STRINGS \
{ XFS_SCRUB_IFLAG_REPAIR, "repair" }, \
@@ -471,7 +479,7 @@ DECLARE_EVENT_CLASS(xchk_dqiter_class,
__field(xfs_exntst_t, state)
),
TP_fast_assign(
- __entry->dev = cursor->sc->ip->i_mount->m_super->s_dev;
+ __entry->dev = cursor->sc->mp->m_super->s_dev;
__entry->dqtype = cursor->dqtype;
__entry->ino = cursor->quota_ip->i_ino;
__entry->cur_id = cursor->id;
@@ -605,7 +613,7 @@ TRACE_EVENT(xchk_ifork_btree_op_error,
TP_fast_assign(
xfs_fsblock_t fsbno = xchk_btree_cur_fsbno(cur, level);
__entry->dev = sc->mp->m_super->s_dev;
- __entry->ino = sc->ip->i_ino;
+ __entry->ino = cur->bc_ino.ip->i_ino;
__entry->whichfork = cur->bc_ino.whichfork;
__entry->type = sc->sm->sm_type;
__assign_str(name);
@@ -1956,32 +1964,36 @@ DEFINE_XCHK_METAPATH_EVENT(xchk_metapath_lookup);
#if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)
DECLARE_EVENT_CLASS(xrep_extent_class,
- TP_PROTO(const struct xfs_perag *pag, xfs_agblock_t agbno,
+ TP_PROTO(const struct xfs_group *xg, xfs_agblock_t agbno,
xfs_extlen_t len),
- TP_ARGS(pag, agbno, len),
+ TP_ARGS(xg, agbno, len),
TP_STRUCT__entry(
__field(dev_t, dev)
+ __field(enum xfs_group_type, type)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, len)
),
TP_fast_assign(
- __entry->dev = pag_mount(pag)->m_super->s_dev;
- __entry->agno = pag_agno(pag);
+ __entry->dev = xg->xg_mount->m_super->s_dev;
+ __entry->type = xg->xg_type;
+ __entry->agno = xg->xg_gno;
__entry->agbno = agbno;
__entry->len = len;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x",
+ TP_printk("dev %d:%d %sno 0x%x %sbno 0x%x fsbcount 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->agno,
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->agbno,
__entry->len)
);
#define DEFINE_REPAIR_EXTENT_EVENT(name) \
DEFINE_EVENT(xrep_extent_class, name, \
- TP_PROTO(const struct xfs_perag *pag, xfs_agblock_t agbno, \
+ TP_PROTO(const struct xfs_group *xg, xfs_agblock_t agbno, \
xfs_extlen_t len), \
- TP_ARGS(pag, agbno, len))
+ TP_ARGS(xg, agbno, len))
DEFINE_REPAIR_EXTENT_EVENT(xreap_dispose_unmap_extent);
DEFINE_REPAIR_EXTENT_EVENT(xreap_dispose_free_extent);
DEFINE_REPAIR_EXTENT_EVENT(xreap_agextent_binval);
@@ -1989,35 +2001,39 @@ DEFINE_REPAIR_EXTENT_EVENT(xreap_bmapi_binval);
DEFINE_REPAIR_EXTENT_EVENT(xrep_agfl_insert);
DECLARE_EVENT_CLASS(xrep_reap_find_class,
- TP_PROTO(const struct xfs_perag *pag, xfs_agblock_t agbno,
+ TP_PROTO(const struct xfs_group *xg, xfs_agblock_t agbno,
xfs_extlen_t len, bool crosslinked),
- TP_ARGS(pag, agbno, len, crosslinked),
+ TP_ARGS(xg, agbno, len, crosslinked),
TP_STRUCT__entry(
__field(dev_t, dev)
+ __field(enum xfs_group_type, type)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, len)
__field(bool, crosslinked)
),
TP_fast_assign(
- __entry->dev = pag_mount(pag)->m_super->s_dev;
- __entry->agno = pag_agno(pag);
+ __entry->dev = xg->xg_mount->m_super->s_dev;
+ __entry->type = xg->xg_type;
+ __entry->agno = xg->xg_gno;
__entry->agbno = agbno;
__entry->len = len;
__entry->crosslinked = crosslinked;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x crosslinked %d",
+ TP_printk("dev %d:%d %sno 0x%x %sbno 0x%x fsbcount 0x%x crosslinked %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->agno,
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->agbno,
__entry->len,
__entry->crosslinked ? 1 : 0)
);
#define DEFINE_REPAIR_REAP_FIND_EVENT(name) \
DEFINE_EVENT(xrep_reap_find_class, name, \
- TP_PROTO(const struct xfs_perag *pag, xfs_agblock_t agbno, \
+ TP_PROTO(const struct xfs_group *xg, xfs_agblock_t agbno, \
xfs_extlen_t len, bool crosslinked), \
- TP_ARGS(pag, agbno, len, crosslinked))
+ TP_ARGS(xg, agbno, len, crosslinked))
DEFINE_REPAIR_REAP_FIND_EVENT(xreap_agextent_select);
DEFINE_REPAIR_REAP_FIND_EVENT(xreap_bmapi_select);
@@ -2108,29 +2124,33 @@ TRACE_EVENT(xrep_ibt_found,
)
TRACE_EVENT(xrep_refc_found,
- TP_PROTO(const struct xfs_perag *pag,
+ TP_PROTO(const struct xfs_group *xg,
const struct xfs_refcount_irec *rec),
- TP_ARGS(pag, rec),
+ TP_ARGS(xg, rec),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
__field(enum xfs_refc_domain, domain)
+ __field(enum xfs_group_type, type)
__field(xfs_agblock_t, startblock)
__field(xfs_extlen_t, blockcount)
__field(xfs_nlink_t, refcount)
),
TP_fast_assign(
- __entry->dev = pag_mount(pag)->m_super->s_dev;
- __entry->agno = pag_agno(pag);
+ __entry->dev = xg->xg_mount->m_super->s_dev;
+ __entry->agno = xg->xg_gno;
+ __entry->type = xg->xg_type;
__entry->domain = rec->rc_domain;
__entry->startblock = rec->rc_startblock;
__entry->blockcount = rec->rc_blockcount;
__entry->refcount = rec->rc_refcount;
),
- TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u",
+ TP_printk("dev %d:%d %sno 0x%x dom %s %sbno 0x%x fsbcount 0x%x refcount %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->agno,
__print_symbolic(__entry->domain, XFS_REFC_DOMAIN_STRINGS),
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->startblock,
__entry->blockcount,
__entry->refcount)
@@ -2282,6 +2302,32 @@ TRACE_EVENT(xrep_calc_ag_resblks_btsize,
__entry->rmapbt_sz,
__entry->refcbt_sz)
)
+
+#ifdef CONFIG_XFS_RT
+TRACE_EVENT(xrep_calc_rtgroup_resblks_btsize,
+ TP_PROTO(struct xfs_mount *mp, xfs_rgnumber_t rgno,
+ xfs_rgblock_t usedlen, xfs_rgblock_t rmapbt_sz),
+ TP_ARGS(mp, rgno, usedlen, rmapbt_sz),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_rgnumber_t, rgno)
+ __field(xfs_rgblock_t, usedlen)
+ __field(xfs_rgblock_t, rmapbt_sz)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->rgno = rgno;
+ __entry->usedlen = usedlen;
+ __entry->rmapbt_sz = rmapbt_sz;
+ ),
+ TP_printk("dev %d:%d rgno 0x%x usedlen %u rmapbt %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->rgno,
+ __entry->usedlen,
+ __entry->rmapbt_sz)
+);
+#endif /* CONFIG_XFS_RT */
+
TRACE_EVENT(xrep_reset_counters,
TP_PROTO(struct xfs_mount *mp, struct xchk_fscounters *fsc),
TP_ARGS(mp, fsc),
@@ -2680,11 +2726,12 @@ DEFINE_SCRUB_NLINKS_DIFF_EVENT(xrep_nlinks_update_inode);
DEFINE_SCRUB_NLINKS_DIFF_EVENT(xrep_nlinks_unfixable_inode);
TRACE_EVENT(xrep_rmap_live_update,
- TP_PROTO(const struct xfs_perag *pag, unsigned int op,
+ TP_PROTO(const struct xfs_group *xg, unsigned int op,
const struct xfs_rmap_update_params *p),
- TP_ARGS(pag, op, p),
+ TP_ARGS(xg, op, p),
TP_STRUCT__entry(
__field(dev_t, dev)
+ __field(enum xfs_group_type, type)
__field(xfs_agnumber_t, agno)
__field(unsigned int, op)
__field(xfs_agblock_t, agbno)
@@ -2694,8 +2741,9 @@ TRACE_EVENT(xrep_rmap_live_update,
__field(unsigned int, flags)
),
TP_fast_assign(
- __entry->dev = pag_mount(pag)->m_super->s_dev;
- __entry->agno = pag_agno(pag);
+ __entry->dev = xg->xg_mount->m_super->s_dev;
+ __entry->type = xg->xg_type;
+ __entry->agno = xg->xg_gno;
__entry->op = op;
__entry->agbno = p->startblock;
__entry->len = p->blockcount;
@@ -2704,10 +2752,12 @@ TRACE_EVENT(xrep_rmap_live_update,
if (p->unwritten)
__entry->flags |= XFS_RMAP_UNWRITTEN;
),
- TP_printk("dev %d:%d agno 0x%x op %d agbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%x",
+ TP_printk("dev %d:%d %sno 0x%x op %d %sbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->agno,
__entry->op,
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->agbno,
__entry->len,
__entry->owner,
@@ -2946,7 +2996,7 @@ DEFINE_EVENT(xrep_pptr_salvage_class, name, \
DEFINE_XREP_PPTR_SALVAGE_EVENT(xrep_xattr_salvage_pptr);
DEFINE_XREP_PPTR_SALVAGE_EVENT(xrep_xattr_insert_pptr);
-TRACE_EVENT(xrep_xattr_class,
+DECLARE_EVENT_CLASS(xrep_xattr_class,
TP_PROTO(struct xfs_inode *ip, struct xfs_inode *arg_ip),
TP_ARGS(ip, arg_ip),
TP_STRUCT__entry(
@@ -3605,6 +3655,186 @@ DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_try_unlink);
DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_unlink);
DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_link);
+#ifdef CONFIG_XFS_RT
+DECLARE_EVENT_CLASS(xrep_rtbitmap_class,
+ TP_PROTO(struct xfs_mount *mp, xfs_rtxnum_t start, xfs_rtxnum_t end),
+ TP_ARGS(mp, start, end),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, rtdev)
+ __field(xfs_rtxnum_t, start)
+ __field(xfs_rtxnum_t, end)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->rtdev = mp->m_rtdev_targp->bt_dev;
+ __entry->start = start;
+ __entry->end = end;
+ ),
+ TP_printk("dev %d:%d rtdev %d:%d startrtx 0x%llx endrtx 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->rtdev), MINOR(__entry->rtdev),
+ __entry->start,
+ __entry->end)
+);
+#define DEFINE_REPAIR_RGBITMAP_EVENT(name) \
+DEFINE_EVENT(xrep_rtbitmap_class, name, \
+ TP_PROTO(struct xfs_mount *mp, xfs_rtxnum_t start, \
+ xfs_rtxnum_t end), \
+ TP_ARGS(mp, start, end))
+DEFINE_REPAIR_RGBITMAP_EVENT(xrep_rtbitmap_record_free);
+DEFINE_REPAIR_RGBITMAP_EVENT(xrep_rtbitmap_record_free_bulk);
+
+TRACE_EVENT(xrep_rtbitmap_or,
+ TP_PROTO(struct xfs_mount *mp, unsigned long long wordoff,
+ xfs_rtword_t mask, xfs_rtword_t word),
+ TP_ARGS(mp, wordoff, mask, word),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, rtdev)
+ __field(unsigned long long, wordoff)
+ __field(unsigned int, mask)
+ __field(unsigned int, word)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->rtdev = mp->m_rtdev_targp->bt_dev;
+ __entry->wordoff = wordoff;
+ __entry->mask = mask;
+ __entry->word = word;
+ ),
+ TP_printk("dev %d:%d rtdev %d:%d wordoff 0x%llx mask 0x%x word 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->rtdev), MINOR(__entry->rtdev),
+ __entry->wordoff,
+ __entry->mask,
+ __entry->word)
+);
+
+TRACE_EVENT(xrep_rtbitmap_load,
+ TP_PROTO(struct xfs_rtgroup *rtg, xfs_fileoff_t rbmoff,
+ xfs_rtxnum_t rtx, xfs_rtxnum_t len),
+ TP_ARGS(rtg, rbmoff, rtx, len),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, rtdev)
+ __field(xfs_rgnumber_t, rgno)
+ __field(xfs_fileoff_t, rbmoff)
+ __field(xfs_rtxnum_t, rtx)
+ __field(xfs_rtxnum_t, len)
+ ),
+ TP_fast_assign(
+ __entry->dev = rtg_mount(rtg)->m_super->s_dev;
+ __entry->rtdev = rtg_mount(rtg)->m_rtdev_targp->bt_dev;
+ __entry->rgno = rtg_rgno(rtg);
+ __entry->rbmoff = rbmoff;
+ __entry->rtx = rtx;
+ __entry->len = len;
+ ),
+ TP_printk("dev %d:%d rtdev %d:%d rgno 0x%x rbmoff 0x%llx rtx 0x%llx rtxcount 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->rtdev), MINOR(__entry->rtdev),
+ __entry->rgno,
+ __entry->rbmoff,
+ __entry->rtx,
+ __entry->len)
+);
+
+TRACE_EVENT(xrep_rtbitmap_load_words,
+ TP_PROTO(struct xfs_mount *mp, xfs_fileoff_t rbmoff,
+ unsigned long long wordoff, unsigned int wordcnt),
+ TP_ARGS(mp, rbmoff, wordoff, wordcnt),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, rtdev)
+ __field(xfs_fileoff_t, rbmoff)
+ __field(unsigned long long, wordoff)
+ __field(unsigned int, wordcnt)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->rtdev = mp->m_rtdev_targp->bt_dev;
+ __entry->rbmoff = rbmoff;
+ __entry->wordoff = wordoff;
+ __entry->wordcnt = wordcnt;
+ ),
+ TP_printk("dev %d:%d rtdev %d:%d rbmoff 0x%llx wordoff 0x%llx wordcnt 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->rtdev), MINOR(__entry->rtdev),
+ __entry->rbmoff,
+ __entry->wordoff,
+ __entry->wordcnt)
+);
+
+TRACE_EVENT(xrep_rtbitmap_load_word,
+ TP_PROTO(struct xfs_mount *mp, unsigned long long wordoff,
+ unsigned int bit, xfs_rtword_t ondisk_word,
+ xfs_rtword_t xfile_word, xfs_rtword_t word_mask),
+ TP_ARGS(mp, wordoff, bit, ondisk_word, xfile_word, word_mask),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, rtdev)
+ __field(unsigned long long, wordoff)
+ __field(unsigned int, bit)
+ __field(xfs_rtword_t, ondisk_word)
+ __field(xfs_rtword_t, xfile_word)
+ __field(xfs_rtword_t, word_mask)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->rtdev = mp->m_rtdev_targp->bt_dev;
+ __entry->wordoff = wordoff;
+ __entry->bit = bit;
+ __entry->ondisk_word = ondisk_word;
+ __entry->xfile_word = xfile_word;
+ __entry->word_mask = word_mask;
+ ),
+ TP_printk("dev %d:%d rtdev %d:%d wordoff 0x%llx bit %u ondisk 0x%x(0x%x) inmem 0x%x(0x%x) result 0x%x mask 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->rtdev), MINOR(__entry->rtdev),
+ __entry->wordoff,
+ __entry->bit,
+ __entry->ondisk_word,
+ __entry->ondisk_word & __entry->word_mask,
+ __entry->xfile_word,
+ __entry->xfile_word & ~__entry->word_mask,
+ (__entry->xfile_word & ~__entry->word_mask) |
+ (__entry->ondisk_word & __entry->word_mask),
+ __entry->word_mask)
+);
+
+TRACE_EVENT(xrep_rtrmap_found,
+ TP_PROTO(struct xfs_mount *mp, const struct xfs_rmap_irec *rec),
+ TP_ARGS(mp, rec),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, rtdev)
+ __field(xfs_rgblock_t, rgbno)
+ __field(xfs_extlen_t, len)
+ __field(uint64_t, owner)
+ __field(uint64_t, offset)
+ __field(unsigned int, flags)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->rtdev = mp->m_rtdev_targp->bt_dev;
+ __entry->rgbno = rec->rm_startblock;
+ __entry->len = rec->rm_blockcount;
+ __entry->owner = rec->rm_owner;
+ __entry->offset = rec->rm_offset;
+ __entry->flags = rec->rm_flags;
+ ),
+ TP_printk("dev %d:%d rtdev %d:%d rgbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->rtdev), MINOR(__entry->rtdev),
+ __entry->rgbno,
+ __entry->len,
+ __entry->owner,
+ __entry->offset,
+ __entry->flags)
+);
+#endif /* CONFIG_XFS_RT */
+
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
#endif /* _TRACE_XFS_SCRUB_TRACE_H */