summaryrefslogtreecommitdiff
path: root/fs/xfs/scrub/repair.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/scrub/repair.c')
-rw-r--r--fs/xfs/scrub/repair.c504
1 files changed, 488 insertions, 16 deletions
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 1b8b5439f2d7..155bbaaa496e 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -27,12 +27,23 @@
#include "xfs_quota.h"
#include "xfs_qm.h"
#include "xfs_defer.h"
+#include "xfs_errortag.h"
+#include "xfs_error.h"
+#include "xfs_reflink.h"
+#include "xfs_health.h"
+#include "xfs_buf_mem.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_attr.h"
+#include "xfs_dir2.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/bitmap.h"
#include "scrub/stats.h"
+#include "scrub/xfile.h"
+#include "scrub/attr_repair.h"
/*
* Attempt to repair some metadata, if the metadata is corrupt and userspace
@@ -176,6 +187,16 @@ xrep_roll_ag_trans(
return 0;
}
+/* Roll the scrub transaction, holding the primary metadata locked. */
+int
+xrep_roll_trans(
+ struct xfs_scrub *sc)
+{
+ if (!sc->ip)
+ return xrep_roll_ag_trans(sc);
+ return xfs_trans_roll_inode(&sc->tp, sc->ip);
+}
+
/* Finish all deferred work attached to the repair transaction. */
int
xrep_defer_finish(
@@ -274,7 +295,7 @@ xrep_calc_ag_resblks(
icount = pag->pagi_count;
} else {
/* Try to get the actual counters from disk. */
- error = xfs_ialloc_read_agi(pag, NULL, &bp);
+ error = xfs_ialloc_read_agi(pag, NULL, 0, &bp);
if (!error) {
icount = pag->pagi_count;
xfs_buf_relse(bp);
@@ -387,7 +408,7 @@ xrep_calc_ag_resblks(
int
xrep_fix_freelist(
struct xfs_scrub *sc,
- bool can_shrink)
+ int alloc_flags)
{
struct xfs_alloc_arg args = {0};
@@ -397,8 +418,7 @@ xrep_fix_freelist(
args.alignment = 1;
args.pag = sc->sa.pag;
- return xfs_alloc_fix_freelist(&args,
- can_shrink ? 0 : XFS_ALLOC_FLAG_NOSHRINK);
+ return xfs_alloc_fix_freelist(&args, alloc_flags);
}
/*
@@ -673,6 +693,45 @@ xrep_find_ag_btree_roots(
return error;
}
+#ifdef CONFIG_XFS_QUOTA
+/* Update some quota flags in the superblock. */
+void
+xrep_update_qflags(
+ struct xfs_scrub *sc,
+ unsigned int clear_flags,
+ unsigned int set_flags)
+{
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_buf *bp;
+
+ mutex_lock(&mp->m_quotainfo->qi_quotaofflock);
+ if ((mp->m_qflags & clear_flags) == 0 &&
+ (mp->m_qflags & set_flags) == set_flags)
+ goto no_update;
+
+ mp->m_qflags &= ~clear_flags;
+ mp->m_qflags |= set_flags;
+
+ spin_lock(&mp->m_sb_lock);
+ mp->m_sb.sb_qflags &= ~clear_flags;
+ mp->m_sb.sb_qflags |= set_flags;
+ spin_unlock(&mp->m_sb_lock);
+
+ /*
+ * Update the quota flags in the ondisk superblock without touching
+ * the summary counters. We have not quiesced inode chunk allocation,
+ * so we cannot coordinate with updates to the icount and ifree percpu
+ * counters.
+ */
+ bp = xfs_trans_getsb(sc->tp);
+ xfs_sb_to_disk(bp->b_addr, &mp->m_sb);
+ xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_SB_BUF);
+ xfs_trans_log_buf(sc->tp, bp, 0, sizeof(struct xfs_dsb) - 1);
+
+no_update:
+ mutex_unlock(&mp->m_quotainfo->qi_quotaofflock);
+}
+
/* Force a quotacheck the next time we mount. */
void
xrep_force_quotacheck(
@@ -685,13 +744,7 @@ xrep_force_quotacheck(
if (!(flag & sc->mp->m_qflags))
return;
- mutex_lock(&sc->mp->m_quotainfo->qi_quotaofflock);
- sc->mp->m_qflags &= ~flag;
- spin_lock(&sc->mp->m_sb_lock);
- sc->mp->m_sb.sb_qflags &= ~flag;
- spin_unlock(&sc->mp->m_sb_lock);
- xfs_log_sb(sc->tp);
- mutex_unlock(&sc->mp->m_quotainfo->qi_quotaofflock);
+ xrep_update_qflags(sc, flag, 0);
}
/*
@@ -699,10 +752,10 @@ xrep_force_quotacheck(
*
* This function ensures that the appropriate dquots are attached to an inode.
* We cannot allow the dquot code to allocate an on-disk dquot block here
- * because we're already in transaction context with the inode locked. The
- * on-disk dquot should already exist anyway. If the quota code signals
- * corruption or missing quota information, schedule quotacheck, which will
- * repair corruptions in the quota metadata.
+ * because we're already in transaction context. The on-disk dquot should
+ * already exist anyway. If the quota code signals corruption or missing quota
+ * information, schedule quotacheck, which will repair corruptions in the quota
+ * metadata.
*/
int
xrep_ino_dqattach(
@@ -710,7 +763,10 @@ xrep_ino_dqattach(
{
int error;
- error = xfs_qm_dqattach_locked(sc->ip, false);
+ ASSERT(sc->tp != NULL);
+ ASSERT(sc->ip != NULL);
+
+ error = xfs_qm_dqattach(sc->ip);
switch (error) {
case -EFSBADCRC:
case -EFSCORRUPTED:
@@ -734,3 +790,419 @@ xrep_ino_dqattach(
return error;
}
+#endif /* CONFIG_XFS_QUOTA */
+
+/*
+ * Ensure that the inode being repaired is ready to handle a certain number of
+ * extents, or return EFSCORRUPTED. Caller must hold the ILOCK of the inode
+ * being repaired and have joined it to the scrub transaction.
+ */
+int
+xrep_ino_ensure_extent_count(
+ struct xfs_scrub *sc,
+ int whichfork,
+ xfs_extnum_t nextents)
+{
+ xfs_extnum_t max_extents;
+ bool inode_has_nrext64;
+
+ inode_has_nrext64 = xfs_inode_has_large_extent_counts(sc->ip);
+ max_extents = xfs_iext_max_nextents(inode_has_nrext64, whichfork);
+ if (nextents <= max_extents)
+ return 0;
+ if (inode_has_nrext64)
+ return -EFSCORRUPTED;
+ if (!xfs_has_large_extent_counts(sc->mp))
+ return -EFSCORRUPTED;
+
+ max_extents = xfs_iext_max_nextents(true, whichfork);
+ if (nextents > max_extents)
+ return -EFSCORRUPTED;
+
+ sc->ip->i_diflags2 |= XFS_DIFLAG2_NREXT64;
+ xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
+ return 0;
+}
+
+/*
+ * Initialize all the btree cursors for an AG repair except for the btree that
+ * we're rebuilding.
+ */
+void
+xrep_ag_btcur_init(
+ struct xfs_scrub *sc,
+ struct xchk_ag *sa)
+{
+ struct xfs_mount *mp = sc->mp;
+
+ /* Set up a bnobt cursor for cross-referencing. */
+ if (sc->sm->sm_type != XFS_SCRUB_TYPE_BNOBT &&
+ sc->sm->sm_type != XFS_SCRUB_TYPE_CNTBT) {
+ sa->bno_cur = xfs_bnobt_init_cursor(mp, sc->tp, sa->agf_bp,
+ sc->sa.pag);
+ sa->cnt_cur = xfs_cntbt_init_cursor(mp, sc->tp, sa->agf_bp,
+ sc->sa.pag);
+ }
+
+ /* Set up a inobt cursor for cross-referencing. */
+ if (sc->sm->sm_type != XFS_SCRUB_TYPE_INOBT &&
+ sc->sm->sm_type != XFS_SCRUB_TYPE_FINOBT) {
+ sa->ino_cur = xfs_inobt_init_cursor(sc->sa.pag, sc->tp,
+ sa->agi_bp);
+ if (xfs_has_finobt(mp))
+ sa->fino_cur = xfs_finobt_init_cursor(sc->sa.pag,
+ sc->tp, sa->agi_bp);
+ }
+
+ /* Set up a rmapbt cursor for cross-referencing. */
+ if (sc->sm->sm_type != XFS_SCRUB_TYPE_RMAPBT &&
+ xfs_has_rmapbt(mp))
+ sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
+ sc->sa.pag);
+
+ /* Set up a refcountbt cursor for cross-referencing. */
+ if (sc->sm->sm_type != XFS_SCRUB_TYPE_REFCNTBT &&
+ xfs_has_reflink(mp))
+ sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
+ sa->agf_bp, sc->sa.pag);
+}
+
+/*
+ * Reinitialize the in-core AG state after a repair by rereading the AGF
+ * buffer. We had better get the same AGF buffer as the one that's attached
+ * to the scrub context.
+ */
+int
+xrep_reinit_pagf(
+ struct xfs_scrub *sc)
+{
+ struct xfs_perag *pag = sc->sa.pag;
+ struct xfs_buf *bp;
+ int error;
+
+ ASSERT(pag);
+ ASSERT(xfs_perag_initialised_agf(pag));
+
+ clear_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
+ error = xfs_alloc_read_agf(pag, sc->tp, 0, &bp);
+ if (error)
+ return error;
+
+ if (bp != sc->sa.agf_bp) {
+ ASSERT(bp == sc->sa.agf_bp);
+ return -EFSCORRUPTED;
+ }
+
+ return 0;
+}
+
+/*
+ * Reinitialize the in-core AG state after a repair by rereading the AGI
+ * buffer. We had better get the same AGI buffer as the one that's attached
+ * to the scrub context.
+ */
+int
+xrep_reinit_pagi(
+ struct xfs_scrub *sc)
+{
+ struct xfs_perag *pag = sc->sa.pag;
+ struct xfs_buf *bp;
+ int error;
+
+ ASSERT(pag);
+ ASSERT(xfs_perag_initialised_agi(pag));
+
+ clear_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate);
+ error = xfs_ialloc_read_agi(pag, sc->tp, 0, &bp);
+ if (error)
+ return error;
+
+ if (bp != sc->sa.agi_bp) {
+ ASSERT(bp == sc->sa.agi_bp);
+ return -EFSCORRUPTED;
+ }
+
+ return 0;
+}
+
+/*
+ * Given an active reference to a perag structure, load AG headers and cursors.
+ * This should only be called to scan an AG while repairing file-based metadata.
+ */
+int
+xrep_ag_init(
+ struct xfs_scrub *sc,
+ struct xfs_perag *pag,
+ struct xchk_ag *sa)
+{
+ int error;
+
+ ASSERT(!sa->pag);
+
+ error = xfs_ialloc_read_agi(pag, sc->tp, 0, &sa->agi_bp);
+ if (error)
+ return error;
+
+ error = xfs_alloc_read_agf(pag, sc->tp, 0, &sa->agf_bp);
+ if (error)
+ return error;
+
+ /* Grab our own passive reference from the caller's ref. */
+ sa->pag = xfs_perag_hold(pag);
+ xrep_ag_btcur_init(sc, sa);
+ return 0;
+}
+
+/* Reinitialize the per-AG block reservation for the AG we just fixed. */
+int
+xrep_reset_perag_resv(
+ struct xfs_scrub *sc)
+{
+ int error;
+
+ if (!(sc->flags & XREP_RESET_PERAG_RESV))
+ return 0;
+
+ ASSERT(sc->sa.pag != NULL);
+ ASSERT(sc->ops->type == ST_PERAG);
+ ASSERT(sc->tp);
+
+ sc->flags &= ~XREP_RESET_PERAG_RESV;
+ xfs_ag_resv_free(sc->sa.pag);
+ error = xfs_ag_resv_init(sc->sa.pag, sc->tp);
+ if (error == -ENOSPC) {
+ xfs_err(sc->mp,
+"Insufficient free space to reset per-AG reservation for AG %u after repair.",
+ sc->sa.pag->pag_agno);
+ error = 0;
+ }
+
+ return error;
+}
+
+/* Decide if we are going to call the repair function for a scrub type. */
+bool
+xrep_will_attempt(
+ struct xfs_scrub *sc)
+{
+ /* Userspace asked us to rebuild the structure regardless. */
+ if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD)
+ return true;
+
+ /* Let debug users force us into the repair routines. */
+ if (XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
+ return true;
+
+ /* Metadata is corrupt or failed cross-referencing. */
+ if (xchk_needs_repair(sc->sm))
+ return true;
+
+ return false;
+}
+
+/* Try to fix some part of a metadata inode by calling another scrubber. */
+STATIC int
+xrep_metadata_inode_subtype(
+ struct xfs_scrub *sc,
+ unsigned int scrub_type)
+{
+ struct xfs_scrub_subord *sub;
+ int error;
+
+ /*
+ * Let's see if the inode needs repair. Use a subordinate scrub context
+ * to call the scrub and repair functions so that we can hang on to the
+ * resources that we already acquired instead of using the standard
+ * setup/teardown routines.
+ */
+ sub = xchk_scrub_create_subord(sc, scrub_type);
+ error = sub->sc.ops->scrub(&sub->sc);
+ if (error)
+ goto out;
+ if (!xrep_will_attempt(&sub->sc))
+ goto out;
+
+ /*
+ * Repair some part of the inode. This will potentially join the inode
+ * to the transaction.
+ */
+ error = sub->sc.ops->repair(&sub->sc);
+ if (error)
+ goto out;
+
+ /*
+ * Finish all deferred intent items and then roll the transaction so
+ * that the inode will not be joined to the transaction when we exit
+ * the function.
+ */
+ error = xfs_defer_finish(&sub->sc.tp);
+ if (error)
+ goto out;
+ error = xfs_trans_roll(&sub->sc.tp);
+ if (error)
+ goto out;
+
+ /*
+ * Clear the corruption flags and re-check the metadata that we just
+ * repaired.
+ */
+ sub->sc.sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
+ error = sub->sc.ops->scrub(&sub->sc);
+ if (error)
+ goto out;
+
+ /* If corruption persists, the repair has failed. */
+ if (xchk_needs_repair(sub->sc.sm)) {
+ error = -EFSCORRUPTED;
+ goto out;
+ }
+out:
+ xchk_scrub_free_subord(sub);
+ return error;
+}
+
+/*
+ * Repair the ondisk forks of a metadata inode. The caller must ensure that
+ * sc->ip points to the metadata inode and the ILOCK is held on that inode.
+ * The inode must not be joined to the transaction before the call, and will
+ * not be afterwards.
+ */
+int
+xrep_metadata_inode_forks(
+ struct xfs_scrub *sc)
+{
+ bool dirty = false;
+ int error;
+
+ /* Repair the inode record and the data fork. */
+ error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_INODE);
+ if (error)
+ return error;
+
+ error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTD);
+ if (error)
+ return error;
+
+ /* Make sure the attr fork looks ok before we delete it. */
+ if (xfs_inode_hasattr(sc->ip)) {
+ error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTA);
+ if (error)
+ return error;
+ }
+
+ /* Clear the reflink flag since metadata never shares. */
+ if (xfs_is_reflink_inode(sc->ip)) {
+ dirty = true;
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+ error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp);
+ if (error)
+ return error;
+ }
+
+ /* Clear the attr forks since metadata shouldn't have that. */
+ if (xfs_inode_hasattr(sc->ip)) {
+ if (!dirty) {
+ dirty = true;
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+ }
+ error = xrep_xattr_reset_fork(sc);
+ if (error)
+ return error;
+ }
+
+ /*
+ * If we modified the inode, roll the transaction but don't rejoin the
+ * inode to the new transaction because xrep_bmap_data can do that.
+ */
+ if (dirty) {
+ error = xfs_trans_roll(&sc->tp);
+ if (error)
+ return error;
+ dirty = false;
+ }
+
+ return 0;
+}
+
+/*
+ * Set up an in-memory buffer cache so that we can use the xfbtree. Allocating
+ * a shmem file might take loks, so we cannot be in transaction context. Park
+ * our resources in the scrub context and let the teardown function take care
+ * of them at the right time.
+ */
+int
+xrep_setup_xfbtree(
+ struct xfs_scrub *sc,
+ const char *descr)
+{
+ ASSERT(sc->tp == NULL);
+
+ return xmbuf_alloc(sc->mp, descr, &sc->xmbtp);
+}
+
+/*
+ * Create a dummy transaction for use in a live update hook function. This
+ * function MUST NOT be called from regular repair code because the current
+ * process' transaction is saved via the cookie.
+ */
+int
+xrep_trans_alloc_hook_dummy(
+ struct xfs_mount *mp,
+ void **cookiep,
+ struct xfs_trans **tpp)
+{
+ int error;
+
+ *cookiep = current->journal_info;
+ current->journal_info = NULL;
+
+ error = xfs_trans_alloc_empty(mp, tpp);
+ if (!error)
+ return 0;
+
+ current->journal_info = *cookiep;
+ *cookiep = NULL;
+ return error;
+}
+
+/* Cancel a dummy transaction used by a live update hook function. */
+void
+xrep_trans_cancel_hook_dummy(
+ void **cookiep,
+ struct xfs_trans *tp)
+{
+ xfs_trans_cancel(tp);
+ current->journal_info = *cookiep;
+ *cookiep = NULL;
+}
+
+/*
+ * See if this buffer can pass the given ->verify_struct() function.
+ *
+ * If the buffer already has ops attached and they're not the ones that were
+ * passed in, we reject the buffer. Otherwise, we perform the structure test
+ * (note that we do not check CRCs) and return the outcome of the test. The
+ * buffer ops and error state are left unchanged.
+ */
+bool
+xrep_buf_verify_struct(
+ struct xfs_buf *bp,
+ const struct xfs_buf_ops *ops)
+{
+ const struct xfs_buf_ops *old_ops = bp->b_ops;
+ xfs_failaddr_t fa;
+ int old_error;
+
+ if (old_ops) {
+ if (old_ops != ops)
+ return false;
+ }
+
+ old_error = bp->b_error;
+ bp->b_ops = ops;
+ fa = bp->b_ops->verify_struct(bp);
+ bp->b_ops = old_ops;
+ bp->b_error = old_error;
+
+ return fa == NULL;
+}