diff options
Diffstat (limited to 'fs/xfs/scrub/repair.c')
-rw-r--r-- | fs/xfs/scrub/repair.c | 504 |
1 files changed, 488 insertions, 16 deletions
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 1b8b5439f2d7..155bbaaa496e 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -27,12 +27,23 @@ #include "xfs_quota.h" #include "xfs_qm.h" #include "xfs_defer.h" +#include "xfs_errortag.h" +#include "xfs_error.h" +#include "xfs_reflink.h" +#include "xfs_health.h" +#include "xfs_buf_mem.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_attr.h" +#include "xfs_dir2.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" #include "scrub/repair.h" #include "scrub/bitmap.h" #include "scrub/stats.h" +#include "scrub/xfile.h" +#include "scrub/attr_repair.h" /* * Attempt to repair some metadata, if the metadata is corrupt and userspace @@ -176,6 +187,16 @@ xrep_roll_ag_trans( return 0; } +/* Roll the scrub transaction, holding the primary metadata locked. */ +int +xrep_roll_trans( + struct xfs_scrub *sc) +{ + if (!sc->ip) + return xrep_roll_ag_trans(sc); + return xfs_trans_roll_inode(&sc->tp, sc->ip); +} + /* Finish all deferred work attached to the repair transaction. */ int xrep_defer_finish( @@ -274,7 +295,7 @@ xrep_calc_ag_resblks( icount = pag->pagi_count; } else { /* Try to get the actual counters from disk. */ - error = xfs_ialloc_read_agi(pag, NULL, &bp); + error = xfs_ialloc_read_agi(pag, NULL, 0, &bp); if (!error) { icount = pag->pagi_count; xfs_buf_relse(bp); @@ -387,7 +408,7 @@ xrep_calc_ag_resblks( int xrep_fix_freelist( struct xfs_scrub *sc, - bool can_shrink) + int alloc_flags) { struct xfs_alloc_arg args = {0}; @@ -397,8 +418,7 @@ xrep_fix_freelist( args.alignment = 1; args.pag = sc->sa.pag; - return xfs_alloc_fix_freelist(&args, - can_shrink ? 0 : XFS_ALLOC_FLAG_NOSHRINK); + return xfs_alloc_fix_freelist(&args, alloc_flags); } /* @@ -673,6 +693,45 @@ xrep_find_ag_btree_roots( return error; } +#ifdef CONFIG_XFS_QUOTA +/* Update some quota flags in the superblock. */ +void +xrep_update_qflags( + struct xfs_scrub *sc, + unsigned int clear_flags, + unsigned int set_flags) +{ + struct xfs_mount *mp = sc->mp; + struct xfs_buf *bp; + + mutex_lock(&mp->m_quotainfo->qi_quotaofflock); + if ((mp->m_qflags & clear_flags) == 0 && + (mp->m_qflags & set_flags) == set_flags) + goto no_update; + + mp->m_qflags &= ~clear_flags; + mp->m_qflags |= set_flags; + + spin_lock(&mp->m_sb_lock); + mp->m_sb.sb_qflags &= ~clear_flags; + mp->m_sb.sb_qflags |= set_flags; + spin_unlock(&mp->m_sb_lock); + + /* + * Update the quota flags in the ondisk superblock without touching + * the summary counters. We have not quiesced inode chunk allocation, + * so we cannot coordinate with updates to the icount and ifree percpu + * counters. + */ + bp = xfs_trans_getsb(sc->tp); + xfs_sb_to_disk(bp->b_addr, &mp->m_sb); + xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_SB_BUF); + xfs_trans_log_buf(sc->tp, bp, 0, sizeof(struct xfs_dsb) - 1); + +no_update: + mutex_unlock(&mp->m_quotainfo->qi_quotaofflock); +} + /* Force a quotacheck the next time we mount. */ void xrep_force_quotacheck( @@ -685,13 +744,7 @@ xrep_force_quotacheck( if (!(flag & sc->mp->m_qflags)) return; - mutex_lock(&sc->mp->m_quotainfo->qi_quotaofflock); - sc->mp->m_qflags &= ~flag; - spin_lock(&sc->mp->m_sb_lock); - sc->mp->m_sb.sb_qflags &= ~flag; - spin_unlock(&sc->mp->m_sb_lock); - xfs_log_sb(sc->tp); - mutex_unlock(&sc->mp->m_quotainfo->qi_quotaofflock); + xrep_update_qflags(sc, flag, 0); } /* @@ -699,10 +752,10 @@ xrep_force_quotacheck( * * This function ensures that the appropriate dquots are attached to an inode. * We cannot allow the dquot code to allocate an on-disk dquot block here - * because we're already in transaction context with the inode locked. The - * on-disk dquot should already exist anyway. If the quota code signals - * corruption or missing quota information, schedule quotacheck, which will - * repair corruptions in the quota metadata. + * because we're already in transaction context. The on-disk dquot should + * already exist anyway. If the quota code signals corruption or missing quota + * information, schedule quotacheck, which will repair corruptions in the quota + * metadata. */ int xrep_ino_dqattach( @@ -710,7 +763,10 @@ xrep_ino_dqattach( { int error; - error = xfs_qm_dqattach_locked(sc->ip, false); + ASSERT(sc->tp != NULL); + ASSERT(sc->ip != NULL); + + error = xfs_qm_dqattach(sc->ip); switch (error) { case -EFSBADCRC: case -EFSCORRUPTED: @@ -734,3 +790,419 @@ xrep_ino_dqattach( return error; } +#endif /* CONFIG_XFS_QUOTA */ + +/* + * Ensure that the inode being repaired is ready to handle a certain number of + * extents, or return EFSCORRUPTED. Caller must hold the ILOCK of the inode + * being repaired and have joined it to the scrub transaction. + */ +int +xrep_ino_ensure_extent_count( + struct xfs_scrub *sc, + int whichfork, + xfs_extnum_t nextents) +{ + xfs_extnum_t max_extents; + bool inode_has_nrext64; + + inode_has_nrext64 = xfs_inode_has_large_extent_counts(sc->ip); + max_extents = xfs_iext_max_nextents(inode_has_nrext64, whichfork); + if (nextents <= max_extents) + return 0; + if (inode_has_nrext64) + return -EFSCORRUPTED; + if (!xfs_has_large_extent_counts(sc->mp)) + return -EFSCORRUPTED; + + max_extents = xfs_iext_max_nextents(true, whichfork); + if (nextents > max_extents) + return -EFSCORRUPTED; + + sc->ip->i_diflags2 |= XFS_DIFLAG2_NREXT64; + xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE); + return 0; +} + +/* + * Initialize all the btree cursors for an AG repair except for the btree that + * we're rebuilding. + */ +void +xrep_ag_btcur_init( + struct xfs_scrub *sc, + struct xchk_ag *sa) +{ + struct xfs_mount *mp = sc->mp; + + /* Set up a bnobt cursor for cross-referencing. */ + if (sc->sm->sm_type != XFS_SCRUB_TYPE_BNOBT && + sc->sm->sm_type != XFS_SCRUB_TYPE_CNTBT) { + sa->bno_cur = xfs_bnobt_init_cursor(mp, sc->tp, sa->agf_bp, + sc->sa.pag); + sa->cnt_cur = xfs_cntbt_init_cursor(mp, sc->tp, sa->agf_bp, + sc->sa.pag); + } + + /* Set up a inobt cursor for cross-referencing. */ + if (sc->sm->sm_type != XFS_SCRUB_TYPE_INOBT && + sc->sm->sm_type != XFS_SCRUB_TYPE_FINOBT) { + sa->ino_cur = xfs_inobt_init_cursor(sc->sa.pag, sc->tp, + sa->agi_bp); + if (xfs_has_finobt(mp)) + sa->fino_cur = xfs_finobt_init_cursor(sc->sa.pag, + sc->tp, sa->agi_bp); + } + + /* Set up a rmapbt cursor for cross-referencing. */ + if (sc->sm->sm_type != XFS_SCRUB_TYPE_RMAPBT && + xfs_has_rmapbt(mp)) + sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp, + sc->sa.pag); + + /* Set up a refcountbt cursor for cross-referencing. */ + if (sc->sm->sm_type != XFS_SCRUB_TYPE_REFCNTBT && + xfs_has_reflink(mp)) + sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp, + sa->agf_bp, sc->sa.pag); +} + +/* + * Reinitialize the in-core AG state after a repair by rereading the AGF + * buffer. We had better get the same AGF buffer as the one that's attached + * to the scrub context. + */ +int +xrep_reinit_pagf( + struct xfs_scrub *sc) +{ + struct xfs_perag *pag = sc->sa.pag; + struct xfs_buf *bp; + int error; + + ASSERT(pag); + ASSERT(xfs_perag_initialised_agf(pag)); + + clear_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate); + error = xfs_alloc_read_agf(pag, sc->tp, 0, &bp); + if (error) + return error; + + if (bp != sc->sa.agf_bp) { + ASSERT(bp == sc->sa.agf_bp); + return -EFSCORRUPTED; + } + + return 0; +} + +/* + * Reinitialize the in-core AG state after a repair by rereading the AGI + * buffer. We had better get the same AGI buffer as the one that's attached + * to the scrub context. + */ +int +xrep_reinit_pagi( + struct xfs_scrub *sc) +{ + struct xfs_perag *pag = sc->sa.pag; + struct xfs_buf *bp; + int error; + + ASSERT(pag); + ASSERT(xfs_perag_initialised_agi(pag)); + + clear_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate); + error = xfs_ialloc_read_agi(pag, sc->tp, 0, &bp); + if (error) + return error; + + if (bp != sc->sa.agi_bp) { + ASSERT(bp == sc->sa.agi_bp); + return -EFSCORRUPTED; + } + + return 0; +} + +/* + * Given an active reference to a perag structure, load AG headers and cursors. + * This should only be called to scan an AG while repairing file-based metadata. + */ +int +xrep_ag_init( + struct xfs_scrub *sc, + struct xfs_perag *pag, + struct xchk_ag *sa) +{ + int error; + + ASSERT(!sa->pag); + + error = xfs_ialloc_read_agi(pag, sc->tp, 0, &sa->agi_bp); + if (error) + return error; + + error = xfs_alloc_read_agf(pag, sc->tp, 0, &sa->agf_bp); + if (error) + return error; + + /* Grab our own passive reference from the caller's ref. */ + sa->pag = xfs_perag_hold(pag); + xrep_ag_btcur_init(sc, sa); + return 0; +} + +/* Reinitialize the per-AG block reservation for the AG we just fixed. */ +int +xrep_reset_perag_resv( + struct xfs_scrub *sc) +{ + int error; + + if (!(sc->flags & XREP_RESET_PERAG_RESV)) + return 0; + + ASSERT(sc->sa.pag != NULL); + ASSERT(sc->ops->type == ST_PERAG); + ASSERT(sc->tp); + + sc->flags &= ~XREP_RESET_PERAG_RESV; + xfs_ag_resv_free(sc->sa.pag); + error = xfs_ag_resv_init(sc->sa.pag, sc->tp); + if (error == -ENOSPC) { + xfs_err(sc->mp, +"Insufficient free space to reset per-AG reservation for AG %u after repair.", + sc->sa.pag->pag_agno); + error = 0; + } + + return error; +} + +/* Decide if we are going to call the repair function for a scrub type. */ +bool +xrep_will_attempt( + struct xfs_scrub *sc) +{ + /* Userspace asked us to rebuild the structure regardless. */ + if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) + return true; + + /* Let debug users force us into the repair routines. */ + if (XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) + return true; + + /* Metadata is corrupt or failed cross-referencing. */ + if (xchk_needs_repair(sc->sm)) + return true; + + return false; +} + +/* Try to fix some part of a metadata inode by calling another scrubber. */ +STATIC int +xrep_metadata_inode_subtype( + struct xfs_scrub *sc, + unsigned int scrub_type) +{ + struct xfs_scrub_subord *sub; + int error; + + /* + * Let's see if the inode needs repair. Use a subordinate scrub context + * to call the scrub and repair functions so that we can hang on to the + * resources that we already acquired instead of using the standard + * setup/teardown routines. + */ + sub = xchk_scrub_create_subord(sc, scrub_type); + error = sub->sc.ops->scrub(&sub->sc); + if (error) + goto out; + if (!xrep_will_attempt(&sub->sc)) + goto out; + + /* + * Repair some part of the inode. This will potentially join the inode + * to the transaction. + */ + error = sub->sc.ops->repair(&sub->sc); + if (error) + goto out; + + /* + * Finish all deferred intent items and then roll the transaction so + * that the inode will not be joined to the transaction when we exit + * the function. + */ + error = xfs_defer_finish(&sub->sc.tp); + if (error) + goto out; + error = xfs_trans_roll(&sub->sc.tp); + if (error) + goto out; + + /* + * Clear the corruption flags and re-check the metadata that we just + * repaired. + */ + sub->sc.sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT; + error = sub->sc.ops->scrub(&sub->sc); + if (error) + goto out; + + /* If corruption persists, the repair has failed. */ + if (xchk_needs_repair(sub->sc.sm)) { + error = -EFSCORRUPTED; + goto out; + } +out: + xchk_scrub_free_subord(sub); + return error; +} + +/* + * Repair the ondisk forks of a metadata inode. The caller must ensure that + * sc->ip points to the metadata inode and the ILOCK is held on that inode. + * The inode must not be joined to the transaction before the call, and will + * not be afterwards. + */ +int +xrep_metadata_inode_forks( + struct xfs_scrub *sc) +{ + bool dirty = false; + int error; + + /* Repair the inode record and the data fork. */ + error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_INODE); + if (error) + return error; + + error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTD); + if (error) + return error; + + /* Make sure the attr fork looks ok before we delete it. */ + if (xfs_inode_hasattr(sc->ip)) { + error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTA); + if (error) + return error; + } + + /* Clear the reflink flag since metadata never shares. */ + if (xfs_is_reflink_inode(sc->ip)) { + dirty = true; + xfs_trans_ijoin(sc->tp, sc->ip, 0); + error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp); + if (error) + return error; + } + + /* Clear the attr forks since metadata shouldn't have that. */ + if (xfs_inode_hasattr(sc->ip)) { + if (!dirty) { + dirty = true; + xfs_trans_ijoin(sc->tp, sc->ip, 0); + } + error = xrep_xattr_reset_fork(sc); + if (error) + return error; + } + + /* + * If we modified the inode, roll the transaction but don't rejoin the + * inode to the new transaction because xrep_bmap_data can do that. + */ + if (dirty) { + error = xfs_trans_roll(&sc->tp); + if (error) + return error; + dirty = false; + } + + return 0; +} + +/* + * Set up an in-memory buffer cache so that we can use the xfbtree. Allocating + * a shmem file might take loks, so we cannot be in transaction context. Park + * our resources in the scrub context and let the teardown function take care + * of them at the right time. + */ +int +xrep_setup_xfbtree( + struct xfs_scrub *sc, + const char *descr) +{ + ASSERT(sc->tp == NULL); + + return xmbuf_alloc(sc->mp, descr, &sc->xmbtp); +} + +/* + * Create a dummy transaction for use in a live update hook function. This + * function MUST NOT be called from regular repair code because the current + * process' transaction is saved via the cookie. + */ +int +xrep_trans_alloc_hook_dummy( + struct xfs_mount *mp, + void **cookiep, + struct xfs_trans **tpp) +{ + int error; + + *cookiep = current->journal_info; + current->journal_info = NULL; + + error = xfs_trans_alloc_empty(mp, tpp); + if (!error) + return 0; + + current->journal_info = *cookiep; + *cookiep = NULL; + return error; +} + +/* Cancel a dummy transaction used by a live update hook function. */ +void +xrep_trans_cancel_hook_dummy( + void **cookiep, + struct xfs_trans *tp) +{ + xfs_trans_cancel(tp); + current->journal_info = *cookiep; + *cookiep = NULL; +} + +/* + * See if this buffer can pass the given ->verify_struct() function. + * + * If the buffer already has ops attached and they're not the ones that were + * passed in, we reject the buffer. Otherwise, we perform the structure test + * (note that we do not check CRCs) and return the outcome of the test. The + * buffer ops and error state are left unchanged. + */ +bool +xrep_buf_verify_struct( + struct xfs_buf *bp, + const struct xfs_buf_ops *ops) +{ + const struct xfs_buf_ops *old_ops = bp->b_ops; + xfs_failaddr_t fa; + int old_error; + + if (old_ops) { + if (old_ops != ops) + return false; + } + + old_error = bp->b_error; + bp->b_ops = ops; + fa = bp->b_ops->verify_struct(bp); + bp->b_ops = old_ops; + bp->b_error = old_error; + + return fa == NULL; +} |