diff options
Diffstat (limited to 'fs/xfs/libxfs')
47 files changed, 1886 insertions, 984 deletions
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 0da80019a917..c02781a4c091 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -167,7 +167,7 @@ xfs_alloc_lookup_ge( * Lookup the first record less than or equal to [bno, len] * in the btree given by cur. */ -static int /* error */ +int /* error */ xfs_alloc_lookup_le( struct xfs_btree_cur *cur, /* btree cursor */ xfs_agblock_t bno, /* starting block of extent */ @@ -520,7 +520,7 @@ xfs_alloc_fixup_trees( return 0; } -static bool +static xfs_failaddr_t xfs_agfl_verify( struct xfs_buf *bp) { @@ -528,10 +528,19 @@ xfs_agfl_verify( struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); int i; + /* + * There is no verification of non-crc AGFLs because mkfs does not + * initialise the AGFL to zero or NULL. Hence the only valid part of the + * AGFL is what the AGF says is active. We can't get to the AGF, so we + * can't verify just those entries are valid. + */ + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return NULL; + if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC) - return false; + return __this_address; /* * during growfs operations, the perag is not fully initialised, * so we can't use it for any useful checking. growfs ensures we can't @@ -539,16 +548,17 @@ xfs_agfl_verify( * so we can detect and avoid this problem. */ if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno) - return false; + return __this_address; for (i = 0; i < XFS_AGFL_SIZE(mp); i++) { if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK && be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) - return false; + return __this_address; } - return xfs_log_check_lsn(mp, - be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)); + if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn))) + return __this_address; + return NULL; } static void @@ -556,6 +566,7 @@ xfs_agfl_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; /* * There is no verification of non-crc AGFLs because mkfs does not @@ -567,28 +578,29 @@ xfs_agfl_read_verify( return; if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_agfl_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_agfl_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void xfs_agfl_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_log_item; + xfs_failaddr_t fa; /* no verification of non-crc AGFLs */ if (!xfs_sb_version_hascrc(&mp->m_sb)) return; - if (!xfs_agfl_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_agfl_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -602,6 +614,7 @@ const struct xfs_buf_ops xfs_agfl_buf_ops = { .name = "xfs_agfl", .verify_read = xfs_agfl_read_verify, .verify_write = xfs_agfl_write_verify, + .verify_struct = xfs_agfl_verify, }; /* @@ -702,7 +715,7 @@ xfs_alloc_ag_vextent( ASSERT(args->agbno % args->alignment == 0); /* if not file data, insert new block into the reverse map btree */ - if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) { + if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) { error = xfs_rmap_alloc(args->tp, args->agbp, args->agno, args->agbno, args->len, &args->oinfo); if (error) @@ -1682,7 +1695,7 @@ xfs_free_ag_extent( bno_cur = cnt_cur = NULL; mp = tp->t_mountp; - if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) { + if (!xfs_rmap_should_skip_owner_update(oinfo)) { error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo); if (error) goto error0; @@ -2397,19 +2410,19 @@ xfs_alloc_put_freelist( return 0; } -static bool +static xfs_failaddr_t xfs_agf_verify( - struct xfs_mount *mp, - struct xfs_buf *bp) - { - struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); if (xfs_sb_version_hascrc(&mp->m_sb)) { if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn))) - return false; + return __this_address; } if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && @@ -2418,18 +2431,18 @@ xfs_agf_verify( be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) - return false; + return __this_address; if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) - return false; + return __this_address; if (xfs_sb_version_hasrmapbt(&mp->m_sb) && (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)) - return false; + return __this_address; /* * during growfs operations, the perag is not fully initialised, @@ -2438,18 +2451,18 @@ xfs_agf_verify( * so we can detect and avoid this problem. */ if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno) - return false; + return __this_address; if (xfs_sb_version_haslazysbcount(&mp->m_sb) && be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length)) - return false; + return __this_address; if (xfs_sb_version_hasreflink(&mp->m_sb) && (be32_to_cpu(agf->agf_refcount_level) < 1 || be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)) - return false; + return __this_address; - return true;; + return NULL; } @@ -2458,28 +2471,29 @@ xfs_agf_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, - XFS_ERRTAG_ALLOC_READ_AGF)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_agf_verify(bp); + if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF)) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void xfs_agf_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_log_item; + xfs_failaddr_t fa; - if (!xfs_agf_verify(mp, bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_agf_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -2496,6 +2510,7 @@ const struct xfs_buf_ops xfs_agf_buf_ops = { .name = "xfs_agf", .verify_read = xfs_agf_read_verify, .verify_write = xfs_agf_write_verify, + .verify_struct = xfs_agf_verify, }; /* @@ -2981,3 +2996,22 @@ xfs_verify_fsbno( return false; return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno)); } + +/* Is there a record covering a given extent? */ +int +xfs_alloc_has_record( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + xfs_extlen_t len, + bool *exists) +{ + union xfs_btree_irec low; + union xfs_btree_irec high; + + memset(&low, 0, sizeof(low)); + low.a.ar_startblock = bno; + memset(&high, 0xFF, sizeof(high)); + high.a.ar_startblock = bno + len - 1; + + return xfs_btree_has_record(cur, &low, &high, exists); +} diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 7ba2d129d504..65a0cafe06e4 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -198,6 +198,13 @@ xfs_free_extent( enum xfs_ag_resv_type type); /* block reservation type */ int /* error */ +xfs_alloc_lookup_le( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat); /* success/failure */ + +int /* error */ xfs_alloc_lookup_ge( struct xfs_btree_cur *cur, /* btree cursor */ xfs_agblock_t bno, /* starting block of extent */ @@ -237,4 +244,7 @@ bool xfs_verify_agbno(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno); bool xfs_verify_fsbno(struct xfs_mount *mp, xfs_fsblock_t fsbno); +int xfs_alloc_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno, + xfs_extlen_t len, bool *exist); + #endif /* __XFS_ALLOC_H__ */ diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index cfde0a0f9706..6840b588187e 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -307,13 +307,14 @@ xfs_cntbt_diff_two_keys( be32_to_cpu(k2->alloc.ar_startblock); } -static bool +static xfs_failaddr_t xfs_allocbt_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_perag *pag = bp->b_pag; + xfs_failaddr_t fa; unsigned int level; /* @@ -331,29 +332,31 @@ xfs_allocbt_verify( level = be16_to_cpu(block->bb_level); switch (block->bb_magic) { case cpu_to_be32(XFS_ABTB_CRC_MAGIC): - if (!xfs_btree_sblock_v5hdr_verify(bp)) - return false; + fa = xfs_btree_sblock_v5hdr_verify(bp); + if (fa) + return fa; /* fall through */ case cpu_to_be32(XFS_ABTB_MAGIC): if (pag && pag->pagf_init) { if (level >= pag->pagf_levels[XFS_BTNUM_BNOi]) - return false; + return __this_address; } else if (level >= mp->m_ag_maxlevels) - return false; + return __this_address; break; case cpu_to_be32(XFS_ABTC_CRC_MAGIC): - if (!xfs_btree_sblock_v5hdr_verify(bp)) - return false; + fa = xfs_btree_sblock_v5hdr_verify(bp); + if (fa) + return fa; /* fall through */ case cpu_to_be32(XFS_ABTC_MAGIC): if (pag && pag->pagf_init) { if (level >= pag->pagf_levels[XFS_BTNUM_CNTi]) - return false; + return __this_address; } else if (level >= mp->m_ag_maxlevels) - return false; + return __this_address; break; default: - return false; + return __this_address; } return xfs_btree_sblock_verify(bp, mp->m_alloc_mxr[level != 0]); @@ -363,25 +366,30 @@ static void xfs_allocbt_read_verify( struct xfs_buf *bp) { + xfs_failaddr_t fa; + if (!xfs_btree_sblock_verify_crc(bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_allocbt_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_allocbt_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } - if (bp->b_error) { + if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp); - } } static void xfs_allocbt_write_verify( struct xfs_buf *bp) { - if (!xfs_allocbt_verify(bp)) { + xfs_failaddr_t fa; + + fa = xfs_allocbt_verify(bp); + if (fa) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } xfs_btree_sblock_calc_crc(bp); @@ -392,6 +400,7 @@ const struct xfs_buf_ops xfs_allocbt_buf_ops = { .name = "xfs_allocbt", .verify_read = xfs_allocbt_read_verify, .verify_write = xfs_allocbt_write_verify, + .verify_struct = xfs_allocbt_verify, }; diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 6249c92671de..ce4a34a2751d 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -212,6 +212,7 @@ xfs_attr_set( int flags) { struct xfs_mount *mp = dp->i_mount; + struct xfs_buf *leaf_bp = NULL; struct xfs_da_args args; struct xfs_defer_ops dfops; struct xfs_trans_res tres; @@ -327,9 +328,16 @@ xfs_attr_set( * GROT: another possible req'mt for a double-split btree op. */ xfs_defer_init(args.dfops, args.firstblock); - error = xfs_attr_shortform_to_leaf(&args); + error = xfs_attr_shortform_to_leaf(&args, &leaf_bp); if (error) goto out_defer_cancel; + /* + * Prevent the leaf buffer from being unlocked so that a + * concurrent AIL push cannot grab the half-baked leaf + * buffer and run into problems with the write verifier. + */ + xfs_trans_bhold(args.trans, leaf_bp); + xfs_defer_bjoin(args.dfops, leaf_bp); xfs_defer_ijoin(args.dfops, dp); error = xfs_defer_finish(&args.trans, args.dfops); if (error) @@ -337,13 +345,14 @@ xfs_attr_set( /* * Commit the leaf transformation. We'll need another (linked) - * transaction to add the new attribute to the leaf. + * transaction to add the new attribute to the leaf, which + * means that we have to hold & join the leaf buffer here too. */ - error = xfs_trans_roll_inode(&args.trans, dp); if (error) goto out; - + xfs_trans_bjoin(args.trans, leaf_bp); + leaf_bp = NULL; } if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) @@ -374,8 +383,9 @@ xfs_attr_set( out_defer_cancel: xfs_defer_cancel(&dfops); - args.trans = NULL; out: + if (leaf_bp) + xfs_trans_brelse(args.trans, leaf_bp); if (args.trans) xfs_trans_cancel(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); @@ -707,7 +717,6 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) return error; out_defer_cancel: xfs_defer_cancel(args->dfops); - args->trans = NULL; return error; } @@ -760,7 +769,6 @@ xfs_attr_leaf_removename(xfs_da_args_t *args) return 0; out_defer_cancel: xfs_defer_cancel(args->dfops); - args->trans = NULL; return error; } @@ -1035,7 +1043,6 @@ out: return retval; out_defer_cancel: xfs_defer_cancel(args->dfops); - args->trans = NULL; goto out; } @@ -1176,7 +1183,6 @@ out: return error; out_defer_cancel: xfs_defer_cancel(args->dfops); - args->trans = NULL; goto out; } diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 53cc8b986eac..2135b8e67dcc 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -247,14 +247,15 @@ xfs_attr3_leaf_hdr_to_disk( } } -static bool +static xfs_failaddr_t xfs_attr3_leaf_verify( - struct xfs_buf *bp) + struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_attr_leafblock *leaf = bp->b_addr; - struct xfs_perag *pag = bp->b_pag; - struct xfs_attr3_icleaf_hdr ichdr; + struct xfs_attr3_icleaf_hdr ichdr; + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_attr_leafblock *leaf = bp->b_addr; + struct xfs_perag *pag = bp->b_pag; + struct xfs_attr_leaf_entry *entries; xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); @@ -262,17 +263,17 @@ xfs_attr3_leaf_verify( struct xfs_da3_node_hdr *hdr3 = bp->b_addr; if (ichdr.magic != XFS_ATTR3_LEAF_MAGIC) - return false; + return __this_address; if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn))) - return false; + return __this_address; } else { if (ichdr.magic != XFS_ATTR_LEAF_MAGIC) - return false; + return __this_address; } /* * In recovery there is a transient state where count == 0 is valid @@ -280,12 +281,27 @@ xfs_attr3_leaf_verify( * if the attr didn't fit in shortform. */ if (pag && pag->pagf_init && ichdr.count == 0) - return false; + return __this_address; + + /* + * firstused is the block offset of the first name info structure. + * Make sure it doesn't go off the block or crash into the header. + */ + if (ichdr.firstused > mp->m_attr_geo->blksize) + return __this_address; + if (ichdr.firstused < xfs_attr3_leaf_hdr_size(leaf)) + return __this_address; + + /* Make sure the entries array doesn't crash into the name info. */ + entries = xfs_attr3_leaf_entryp(bp->b_addr); + if ((char *)&entries[ichdr.count] > + (char *)bp->b_addr + ichdr.firstused) + return __this_address; /* XXX: need to range check rest of attr header values */ /* XXX: hash order check? */ - return true; + return NULL; } static void @@ -293,12 +309,13 @@ xfs_attr3_leaf_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (!xfs_attr3_leaf_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_attr3_leaf_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -322,21 +339,23 @@ xfs_attr3_leaf_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_attr3_leaf_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_attr3_leaf_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { .name = "xfs_attr3_leaf", .verify_read = xfs_attr3_leaf_read_verify, .verify_write = xfs_attr3_leaf_write_verify, + .verify_struct = xfs_attr3_leaf_verify, }; int @@ -735,10 +754,13 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args) } /* - * Convert from using the shortform to the leaf. + * Convert from using the shortform to the leaf. On success, return the + * buffer so that we can keep it locked until we're totally done with it. */ int -xfs_attr_shortform_to_leaf(xfs_da_args_t *args) +xfs_attr_shortform_to_leaf( + struct xfs_da_args *args, + struct xfs_buf **leaf_bp) { xfs_inode_t *dp; xfs_attr_shortform_t *sf; @@ -818,7 +840,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) sfe = XFS_ATTR_SF_NEXTENTRY(sfe); } error = 0; - + *leaf_bp = bp; out: kmem_free(tmpbuffer); return error; @@ -867,6 +889,80 @@ xfs_attr_shortform_allfit( return xfs_attr_shortform_bytesfit(dp, bytes); } +/* Verify the consistency of an inline attribute fork. */ +xfs_failaddr_t +xfs_attr_shortform_verify( + struct xfs_inode *ip) +{ + struct xfs_attr_shortform *sfp; + struct xfs_attr_sf_entry *sfep; + struct xfs_attr_sf_entry *next_sfep; + char *endp; + struct xfs_ifork *ifp; + int i; + int size; + + ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL); + ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK); + sfp = (struct xfs_attr_shortform *)ifp->if_u1.if_data; + size = ifp->if_bytes; + + /* + * Give up if the attribute is way too short. + */ + if (size < sizeof(struct xfs_attr_sf_hdr)) + return __this_address; + + endp = (char *)sfp + size; + + /* Check all reported entries */ + sfep = &sfp->list[0]; + for (i = 0; i < sfp->hdr.count; i++) { + /* + * struct xfs_attr_sf_entry has a variable length. + * Check the fixed-offset parts of the structure are + * within the data buffer. + */ + if (((char *)sfep + sizeof(*sfep)) >= endp) + return __this_address; + + /* Don't allow names with known bad length. */ + if (sfep->namelen == 0) + return __this_address; + + /* + * Check that the variable-length part of the structure is + * within the data buffer. The next entry starts after the + * name component, so nextentry is an acceptable test. + */ + next_sfep = XFS_ATTR_SF_NEXTENTRY(sfep); + if ((char *)next_sfep > endp) + return __this_address; + + /* + * Check for unknown flags. Short form doesn't support + * the incomplete or local bits, so we can use the namespace + * mask here. + */ + if (sfep->flags & ~XFS_ATTR_NSP_ONDISK_MASK) + return __this_address; + + /* + * Check for invalid namespace combinations. We only allow + * one namespace flag per xattr, so we can just count the + * bits (i.e. hweight) here. + */ + if (hweight8(sfep->flags & XFS_ATTR_NSP_ONDISK_MASK) > 1) + return __this_address; + + sfep = next_sfep; + } + if ((void *)sfep != (void *)endp) + return __this_address; + + return NULL; +} + /* * Convert a leaf attribute list to shortform attribute list */ @@ -2170,7 +2266,8 @@ xfs_attr3_leaf_lookup_int( leaf = bp->b_addr; xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf); entries = xfs_attr3_leaf_entryp(leaf); - ASSERT(ichdr.count < args->geo->blksize / 8); + if (ichdr.count >= args->geo->blksize / 8) + return -EFSCORRUPTED; /* * Binary search. (note: small blocks will skip this loop) @@ -2186,8 +2283,10 @@ xfs_attr3_leaf_lookup_int( else break; } - ASSERT(probe >= 0 && (!ichdr.count || probe < ichdr.count)); - ASSERT(span <= 4 || be32_to_cpu(entry->hashval) == hashval); + if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count))) + return -EFSCORRUPTED; + if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval)) + return -EFSCORRUPTED; /* * Since we may have duplicate hashval's, find the first matching diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index f7dda0c237b0..4da08af5b134 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -48,10 +48,12 @@ void xfs_attr_shortform_create(struct xfs_da_args *args); void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff); int xfs_attr_shortform_lookup(struct xfs_da_args *args); int xfs_attr_shortform_getvalue(struct xfs_da_args *args); -int xfs_attr_shortform_to_leaf(struct xfs_da_args *args); +int xfs_attr_shortform_to_leaf(struct xfs_da_args *args, + struct xfs_buf **leaf_bp); int xfs_attr_shortform_remove(struct xfs_da_args *args); int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes); +xfs_failaddr_t xfs_attr_shortform_verify(struct xfs_inode *ip); void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp); /* diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index d56caf037ca0..21be186067a2 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -65,7 +65,7 @@ xfs_attr3_rmt_blocks( * does CRC, location and bounds checking, the unpacking function checks the * attribute parameters and owner. */ -static bool +static xfs_failaddr_t xfs_attr3_rmt_hdr_ok( void *ptr, xfs_ino_t ino, @@ -76,19 +76,19 @@ xfs_attr3_rmt_hdr_ok( struct xfs_attr3_rmt_hdr *rmt = ptr; if (bno != be64_to_cpu(rmt->rm_blkno)) - return false; + return __this_address; if (offset != be32_to_cpu(rmt->rm_offset)) - return false; + return __this_address; if (size != be32_to_cpu(rmt->rm_bytes)) - return false; + return __this_address; if (ino != be64_to_cpu(rmt->rm_owner)) - return false; + return __this_address; /* ok */ - return true; + return NULL; } -static bool +static xfs_failaddr_t xfs_attr3_rmt_verify( struct xfs_mount *mp, void *ptr, @@ -98,27 +98,29 @@ xfs_attr3_rmt_verify( struct xfs_attr3_rmt_hdr *rmt = ptr; if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; + return __this_address; if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC)) - return false; + return __this_address; if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(rmt->rm_blkno) != bno) - return false; + return __this_address; if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) - return false; + return __this_address; if (be32_to_cpu(rmt->rm_offset) + be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX) - return false; + return __this_address; if (rmt->rm_owner == 0) - return false; + return __this_address; - return true; + return NULL; } -static void -xfs_attr3_rmt_read_verify( - struct xfs_buf *bp) +static int +__xfs_attr3_rmt_read_verify( + struct xfs_buf *bp, + bool check_crc, + xfs_failaddr_t *failaddr) { struct xfs_mount *mp = bp->b_target->bt_mount; char *ptr; @@ -128,7 +130,7 @@ xfs_attr3_rmt_read_verify( /* no verification of non-crc buffers */ if (!xfs_sb_version_hascrc(&mp->m_sb)) - return; + return 0; ptr = bp->b_addr; bno = bp->b_bn; @@ -136,23 +138,48 @@ xfs_attr3_rmt_read_verify( ASSERT(len >= blksize); while (len > 0) { - if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { - xfs_buf_ioerror(bp, -EFSBADCRC); - break; - } - if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - break; + if (check_crc && + !xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { + *failaddr = __this_address; + return -EFSBADCRC; } + *failaddr = xfs_attr3_rmt_verify(mp, ptr, blksize, bno); + if (*failaddr) + return -EFSCORRUPTED; len -= blksize; ptr += blksize; bno += BTOBB(blksize); } - if (bp->b_error) - xfs_verifier_error(bp); - else - ASSERT(len == 0); + if (len != 0) { + *failaddr = __this_address; + return -EFSCORRUPTED; + } + + return 0; +} + +static void +xfs_attr3_rmt_read_verify( + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + int error; + + error = __xfs_attr3_rmt_read_verify(bp, true, &fa); + if (error) + xfs_verifier_error(bp, error, fa); +} + +static xfs_failaddr_t +xfs_attr3_rmt_verify_struct( + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + int error; + + error = __xfs_attr3_rmt_read_verify(bp, false, &fa); + return error ? fa : NULL; } static void @@ -160,6 +187,7 @@ xfs_attr3_rmt_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; int blksize = mp->m_attr_geo->blksize; char *ptr; int len; @@ -177,9 +205,9 @@ xfs_attr3_rmt_write_verify( while (len > 0) { struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr; - if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_attr3_rmt_verify(mp, ptr, blksize, bno); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -188,8 +216,7 @@ xfs_attr3_rmt_write_verify( * xfs_attr3_rmt_hdr_set() for the explanation. */ if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); return; } xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); @@ -198,13 +225,16 @@ xfs_attr3_rmt_write_verify( ptr += blksize; bno += BTOBB(blksize); } - ASSERT(len == 0); + + if (len != 0) + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); } const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = { .name = "xfs_attr3_rmt", .verify_read = xfs_attr3_rmt_read_verify, .verify_write = xfs_attr3_rmt_write_verify, + .verify_struct = xfs_attr3_rmt_verify_struct, }; STATIC int @@ -269,7 +299,7 @@ xfs_attr_rmtval_copyout( byte_cnt = min(*valuelen, byte_cnt); if (xfs_sb_version_hascrc(&mp->m_sb)) { - if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset, + if (xfs_attr3_rmt_hdr_ok(src, ino, *offset, byte_cnt, bno)) { xfs_alert(mp, "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)", diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 08df809e2315..daae00ed30c5 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -400,7 +400,7 @@ xfs_bmap_check_leaf_extents( pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); XFS_WANT_CORRUPTED_GOTO(mp, - XFS_FSB_SANITY_CHECK(mp, bno), error0); + xfs_verify_fsbno(mp, bno), error0); if (bp_release) { bp_release = 0; xfs_trans_brelse(NULL, bp); @@ -1220,7 +1220,7 @@ xfs_iread_extents( pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); XFS_WANT_CORRUPTED_GOTO(mp, - XFS_FSB_SANITY_CHECK(mp, bno), out_brelse); + xfs_verify_fsbno(mp, bno), out_brelse); xfs_trans_brelse(tp, bp); } @@ -3337,6 +3337,49 @@ xfs_bmap_btalloc_filestreams( return 0; } +/* Update all inode and quota accounting for the allocation we just did. */ +static void +xfs_bmap_btalloc_accounting( + struct xfs_bmalloca *ap, + struct xfs_alloc_arg *args) +{ + if (ap->flags & XFS_BMAPI_COWFORK) { + /* + * COW fork blocks are in-core only and thus are treated as + * in-core quota reservation (like delalloc blocks) even when + * converted to real blocks. The quota reservation is not + * accounted to disk until blocks are remapped to the data + * fork. So if these blocks were previously delalloc, we + * already have quota reservation and there's nothing to do + * yet. + */ + if (ap->wasdel) + return; + + /* + * Otherwise, we've allocated blocks in a hole. The transaction + * has acquired in-core quota reservation for this extent. + * Rather than account these as real blocks, however, we reduce + * the transaction quota reservation based on the allocation. + * This essentially transfers the transaction quota reservation + * to that of a delalloc extent. + */ + ap->ip->i_delayed_blks += args->len; + xfs_trans_mod_dquot_byino(ap->tp, ap->ip, XFS_TRANS_DQ_RES_BLKS, + -(long)args->len); + return; + } + + /* data/attr fork only */ + ap->ip->i_d.di_nblocks += args->len; + xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); + if (ap->wasdel) + ap->ip->i_delayed_blks -= args->len; + xfs_trans_mod_dquot_byino(ap->tp, ap->ip, + ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT, + args->len); +} + STATIC int xfs_bmap_btalloc( struct xfs_bmalloca *ap) /* bmap alloc argument struct */ @@ -3347,6 +3390,8 @@ xfs_bmap_btalloc( xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ xfs_agnumber_t ag; xfs_alloc_arg_t args; + xfs_fileoff_t orig_offset; + xfs_extlen_t orig_length; xfs_extlen_t blen; xfs_extlen_t nextminlen = 0; int nullfb; /* true if ap->firstblock isn't set */ @@ -3356,6 +3401,8 @@ xfs_bmap_btalloc( int stripe_align; ASSERT(ap->length); + orig_offset = ap->offset; + orig_length = ap->length; mp = ap->ip->i_mount; @@ -3571,19 +3618,23 @@ xfs_bmap_btalloc( *ap->firstblock = args.fsbno; ASSERT(nullfb || fb_agno <= args.agno); ap->length = args.len; - if (!(ap->flags & XFS_BMAPI_COWFORK)) - ap->ip->i_d.di_nblocks += args.len; - xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); - if (ap->wasdel) - ap->ip->i_delayed_blks -= args.len; /* - * Adjust the disk quota also. This was reserved - * earlier. + * If the extent size hint is active, we tried to round the + * caller's allocation request offset down to extsz and the + * length up to another extsz boundary. If we found a free + * extent we mapped it in starting at this new offset. If the + * newly mapped space isn't long enough to cover any of the + * range of offsets that was originally requested, move the + * mapping up so that we can fill as much of the caller's + * original request as possible. Free space is apparently + * very fragmented so we're unlikely to be able to satisfy the + * hints anyway. */ - xfs_trans_mod_dquot_byino(ap->tp, ap->ip, - ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : - XFS_TRANS_DQ_BCOUNT, - (long) args.len); + if (ap->length <= orig_length) + ap->offset = orig_offset; + else if (ap->offset + ap->length < orig_offset + orig_length) + ap->offset = orig_offset + orig_length - ap->length; + xfs_bmap_btalloc_accounting(ap, &args); } else { ap->blkno = NULLFSBLOCK; ap->length = 0; @@ -3876,8 +3927,6 @@ xfs_bmapi_reserve_delalloc( struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); xfs_extlen_t alen; xfs_extlen_t indlen; - char rt = XFS_IS_REALTIME_INODE(ip); - xfs_extlen_t extsz; int error; xfs_fileoff_t aoff = off; @@ -3892,31 +3941,25 @@ xfs_bmapi_reserve_delalloc( prealloc = alen - len; /* Figure out the extent size, adjust alen */ - if (whichfork == XFS_COW_FORK) - extsz = xfs_get_cowextsz_hint(ip); - else - extsz = xfs_get_extsz_hint(ip); - if (extsz) { + if (whichfork == XFS_COW_FORK) { struct xfs_bmbt_irec prev; + xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip); if (!xfs_iext_peek_prev_extent(ifp, icur, &prev)) prev.br_startoff = NULLFILEOFF; - error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof, + error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof, 1, 0, &aoff, &alen); ASSERT(!error); } - if (rt) - extsz = alen / mp->m_sb.sb_rextsize; - /* * Make a transaction-less quota reservation for delayed allocation * blocks. This number gets adjusted later. We return if we haven't * allocated blocks already inside this loop. */ error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0, - rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); + XFS_QMOPT_RES_REGBLKS); if (error) return error; @@ -3927,12 +3970,7 @@ xfs_bmapi_reserve_delalloc( indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen); ASSERT(indlen > 0); - if (rt) { - error = xfs_mod_frextents(mp, -((int64_t)extsz)); - } else { - error = xfs_mod_fdblocks(mp, -((int64_t)alen), false); - } - + error = xfs_mod_fdblocks(mp, -((int64_t)alen), false); if (error) goto out_unreserve_quota; @@ -3963,14 +4001,11 @@ xfs_bmapi_reserve_delalloc( return 0; out_unreserve_blocks: - if (rt) - xfs_mod_frextents(mp, extsz); - else - xfs_mod_fdblocks(mp, alen, false); + xfs_mod_fdblocks(mp, alen, false); out_unreserve_quota: if (XFS_IS_QUOTA_ON(mp)) - xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ? - XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); + xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, + XFS_QMOPT_RES_REGBLKS); return error; } @@ -4304,8 +4339,16 @@ xfs_bmapi_write( while (bno < end && n < *nmap) { bool need_alloc = false, wasdelay = false; - /* in hole or beyoned EOF? */ + /* in hole or beyond EOF? */ if (eof || bma.got.br_startoff > bno) { + /* + * CoW fork conversions should /never/ hit EOF or + * holes. There should always be something for us + * to work on. + */ + ASSERT(!((flags & XFS_BMAPI_CONVERT) && + (flags & XFS_BMAPI_COWFORK))); + if (flags & XFS_BMAPI_DELALLOC) { /* * For the COW fork we can reasonably get a @@ -4824,6 +4867,7 @@ xfs_bmap_del_extent_cow( xfs_iext_insert(ip, icur, &new, state); break; } + ip->i_delayed_blks -= del->br_blockcount; } /* @@ -5136,7 +5180,7 @@ __xfs_bunmapi( * blowing out the transaction with a mix of EFIs and reflink * adjustments. */ - if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) + if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res)); else max_len = len; @@ -5662,7 +5706,8 @@ xfs_bmap_collapse_extents( *done = true; goto del_cursor; } - XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock)); + XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock), + del_cursor); new_startoff = got.br_startoff - offset_shift_fsb; if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) { @@ -5767,7 +5812,8 @@ xfs_bmap_insert_extents( goto del_cursor; } } - XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock)); + XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock), + del_cursor); if (stop_fsb >= got.br_startoff + got.br_blockcount) { error = -EIO; diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index c10aecaaae44..9faf479aba49 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -425,33 +425,29 @@ xfs_bmbt_diff_two_keys( be64_to_cpu(k2->bmbt.br_startoff); } -static bool +static xfs_failaddr_t xfs_bmbt_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + xfs_failaddr_t fa; unsigned int level; switch (block->bb_magic) { case cpu_to_be32(XFS_BMAP_CRC_MAGIC): - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; - if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) - return false; - if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn) - return false; /* * XXX: need a better way of verifying the owner here. Right now * just make sure there has been one set. */ - if (be64_to_cpu(block->bb_u.l.bb_owner) == 0) - return false; + fa = xfs_btree_lblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN); + if (fa) + return fa; /* fall through */ case cpu_to_be32(XFS_BMAP_MAGIC): break; default: - return false; + return __this_address; } /* @@ -463,46 +459,39 @@ xfs_bmbt_verify( */ level = be16_to_cpu(block->bb_level); if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1])) - return false; - if (be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) - return false; - - /* sibling pointer verification */ - if (!block->bb_u.l.bb_leftsib || - (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && - !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib)))) - return false; - if (!block->bb_u.l.bb_rightsib || - (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && - !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib)))) - return false; - - return true; + return __this_address; + + return xfs_btree_lblock_verify(bp, mp->m_bmap_dmxr[level != 0]); } static void xfs_bmbt_read_verify( struct xfs_buf *bp) { + xfs_failaddr_t fa; + if (!xfs_btree_lblock_verify_crc(bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_bmbt_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_bmbt_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } - if (bp->b_error) { + if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp); - } } static void xfs_bmbt_write_verify( struct xfs_buf *bp) { - if (!xfs_bmbt_verify(bp)) { + xfs_failaddr_t fa; + + fa = xfs_bmbt_verify(bp); + if (fa) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } xfs_btree_lblock_calc_crc(bp); @@ -512,6 +501,7 @@ const struct xfs_buf_ops xfs_bmbt_buf_ops = { .name = "xfs_bmbt", .verify_read = xfs_bmbt_read_verify, .verify_write = xfs_bmbt_write_verify, + .verify_struct = xfs_bmbt_verify, }; diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 5f33adf8eecb..79ee4a1951d1 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -273,7 +273,7 @@ xfs_btree_lblock_calc_crc( struct xfs_buf *bp) { struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) return; @@ -311,7 +311,7 @@ xfs_btree_sblock_calc_crc( struct xfs_buf *bp) { struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) return; @@ -329,7 +329,7 @@ xfs_btree_sblock_verify_crc( if (xfs_sb_version_hascrc(&mp->m_sb)) { if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) - return false; + return __this_address; return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); } @@ -853,7 +853,7 @@ xfs_btree_read_bufl( xfs_daddr_t d; /* real disk block address */ int error; - if (!XFS_FSB_SANITY_CHECK(mp, fsbno)) + if (!xfs_verify_fsbno(mp, fsbno)) return -EFSCORRUPTED; d = XFS_FSB_TO_DADDR(mp, fsbno); error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, @@ -4529,6 +4529,51 @@ xfs_btree_change_owner( &bbcoi); } +/* Verify the v5 fields of a long-format btree block. */ +xfs_failaddr_t +xfs_btree_lblock_v5hdr_verify( + struct xfs_buf *bp, + uint64_t owner) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return __this_address; + if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) + return __this_address; + if (block->bb_u.l.bb_blkno != cpu_to_be64(bp->b_bn)) + return __this_address; + if (owner != XFS_RMAP_OWN_UNKNOWN && + be64_to_cpu(block->bb_u.l.bb_owner) != owner) + return __this_address; + return NULL; +} + +/* Verify a long-format btree block. */ +xfs_failaddr_t +xfs_btree_lblock_verify( + struct xfs_buf *bp, + unsigned int max_recs) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + + /* numrecs verification */ + if (be16_to_cpu(block->bb_numrecs) > max_recs) + return __this_address; + + /* sibling pointer verification */ + if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && + !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))) + return __this_address; + if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && + !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))) + return __this_address; + + return NULL; +} + /** * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format * btree block @@ -4537,7 +4582,7 @@ xfs_btree_change_owner( * @max_recs: pointer to the m_*_mxr max records field in the xfs mount * @pag_max_level: pointer to the per-ag max level field */ -bool +xfs_failaddr_t xfs_btree_sblock_v5hdr_verify( struct xfs_buf *bp) { @@ -4546,14 +4591,14 @@ xfs_btree_sblock_v5hdr_verify( struct xfs_perag *pag = bp->b_pag; if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; + return __this_address; if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) - return false; + return __this_address; if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) - return false; - return true; + return __this_address; + return NULL; } /** @@ -4562,29 +4607,29 @@ xfs_btree_sblock_v5hdr_verify( * @bp: buffer containing the btree block * @max_recs: maximum records allowed in this btree node */ -bool +xfs_failaddr_t xfs_btree_sblock_verify( struct xfs_buf *bp, unsigned int max_recs) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + xfs_agblock_t agno; /* numrecs verification */ if (be16_to_cpu(block->bb_numrecs) > max_recs) - return false; + return __this_address; /* sibling pointer verification */ - if (!block->bb_u.s.bb_leftsib || - (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks && - block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK))) - return false; - if (!block->bb_u.s.bb_rightsib || - (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks && - block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK))) - return false; + agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp)); + if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) && + !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_leftsib))) + return __this_address; + if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) && + !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_rightsib))) + return __this_address; - return true; + return NULL; } /* @@ -4953,3 +4998,33 @@ xfs_btree_diff_two_ptrs( return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l); return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s); } + +/* If there's an extent, we're done. */ +STATIC int +xfs_btree_has_record_helper( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + void *priv) +{ + return XFS_BTREE_QUERY_RANGE_ABORT; +} + +/* Is there a record covering a given range of keys? */ +int +xfs_btree_has_record( + struct xfs_btree_cur *cur, + union xfs_btree_irec *low, + union xfs_btree_irec *high, + bool *exists) +{ + int error; + + error = xfs_btree_query_range(cur, low, high, + &xfs_btree_has_record_helper, NULL); + if (error == XFS_BTREE_QUERY_RANGE_ABORT) { + *exists = true; + return 0; + } + *exists = false; + return error; +} diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index b57501c6f71d..50440b5618e8 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -473,10 +473,6 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block) #define XFS_FILBLKS_MIN(a,b) min_t(xfs_filblks_t, (a), (b)) #define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b)) -#define XFS_FSB_SANITY_CHECK(mp,fsb) \ - (fsb && XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ - XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks) - /* * Trace hooks. Currently not implemented as they need to be ported * over to the generic tracing functionality, which is some effort. @@ -496,8 +492,14 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block) #define XFS_BTREE_TRACE_ARGR(c, r) #define XFS_BTREE_TRACE_CURSOR(c, t) -bool xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp); -bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs); +xfs_failaddr_t xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp); +xfs_failaddr_t xfs_btree_sblock_verify(struct xfs_buf *bp, + unsigned int max_recs); +xfs_failaddr_t xfs_btree_lblock_v5hdr_verify(struct xfs_buf *bp, + uint64_t owner); +xfs_failaddr_t xfs_btree_lblock_verify(struct xfs_buf *bp, + unsigned int max_recs); + uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits, unsigned long len); xfs_extlen_t xfs_btree_calc_size(struct xfs_mount *mp, uint *limits, @@ -545,5 +547,7 @@ void xfs_btree_get_keys(struct xfs_btree_cur *cur, struct xfs_btree_block *block, union xfs_btree_key *key); union xfs_btree_key *xfs_btree_high_key_from_key(struct xfs_btree_cur *cur, union xfs_btree_key *key); +int xfs_btree_has_record(struct xfs_btree_cur *cur, union xfs_btree_irec *low, + union xfs_btree_irec *high, bool *exists); #endif /* __XFS_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 651611530d2f..ea187b4a7991 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -128,7 +128,7 @@ xfs_da_state_free(xfs_da_state_t *state) kmem_zone_free(xfs_da_state_zone, state); } -static bool +static xfs_failaddr_t xfs_da3_node_verify( struct xfs_buf *bp) { @@ -145,24 +145,24 @@ xfs_da3_node_verify( struct xfs_da3_node_hdr *hdr3 = bp->b_addr; if (ichdr.magic != XFS_DA3_NODE_MAGIC) - return false; + return __this_address; if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn))) - return false; + return __this_address; } else { if (ichdr.magic != XFS_DA_NODE_MAGIC) - return false; + return __this_address; } if (ichdr.level == 0) - return false; + return __this_address; if (ichdr.level > XFS_DA_NODE_MAXDEPTH) - return false; + return __this_address; if (ichdr.count == 0) - return false; + return __this_address; /* * we don't know if the node is for and attribute or directory tree, @@ -170,11 +170,11 @@ xfs_da3_node_verify( */ if (ichdr.count > mp->m_dir_geo->node_ents && ichdr.count > mp->m_attr_geo->node_ents) - return false; + return __this_address; /* XXX: hash order check? */ - return true; + return NULL; } static void @@ -182,12 +182,13 @@ xfs_da3_node_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_da3_node_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (!xfs_da3_node_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_da3_node_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -211,19 +212,20 @@ xfs_da3_node_read_verify( struct xfs_buf *bp) { struct xfs_da_blkinfo *info = bp->b_addr; + xfs_failaddr_t fa; switch (be16_to_cpu(info->magic)) { case XFS_DA3_NODE_MAGIC: if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { - xfs_buf_ioerror(bp, -EFSBADCRC); + xfs_verifier_error(bp, -EFSBADCRC, + __this_address); break; } /* fall through */ case XFS_DA_NODE_MAGIC: - if (!xfs_da3_node_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - break; - } + fa = xfs_da3_node_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; case XFS_ATTR_LEAF_MAGIC: case XFS_ATTR3_LEAF_MAGIC: @@ -236,18 +238,40 @@ xfs_da3_node_read_verify( bp->b_ops->verify_read(bp); return; default: - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); break; } +} + +/* Verify the structure of a da3 block. */ +static xfs_failaddr_t +xfs_da3_node_verify_struct( + struct xfs_buf *bp) +{ + struct xfs_da_blkinfo *info = bp->b_addr; - /* corrupt block */ - xfs_verifier_error(bp); + switch (be16_to_cpu(info->magic)) { + case XFS_DA3_NODE_MAGIC: + case XFS_DA_NODE_MAGIC: + return xfs_da3_node_verify(bp); + case XFS_ATTR_LEAF_MAGIC: + case XFS_ATTR3_LEAF_MAGIC: + bp->b_ops = &xfs_attr3_leaf_buf_ops; + return bp->b_ops->verify_struct(bp); + case XFS_DIR2_LEAFN_MAGIC: + case XFS_DIR3_LEAFN_MAGIC: + bp->b_ops = &xfs_dir3_leafn_buf_ops; + return bp->b_ops->verify_struct(bp); + default: + return __this_address; + } } const struct xfs_buf_ops xfs_da3_node_buf_ops = { .name = "xfs_da3_node", .verify_read = xfs_da3_node_read_verify, .verify_write = xfs_da3_node_write_verify, + .verify_struct = xfs_da3_node_verify_struct, }; int diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 3771edcb301d..7e77299b7789 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -875,4 +875,10 @@ struct xfs_attr3_rmt_hdr { ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \ sizeof(struct xfs_attr3_rmt_hdr) : 0)) +/* Number of bytes in a directory block. */ +static inline unsigned int xfs_dir2_dirblock_bytes(struct xfs_sb *sbp) +{ + return 1 << (sbp->sb_blocklog + sbp->sb_dirblklog); +} + #endif /* __XFS_DA_FORMAT_H__ */ diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 072ebfe1d6ae..087fea02c389 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -249,6 +249,10 @@ xfs_defer_trans_roll( for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE); + /* Hold the (previously bjoin'd) buffer locked across the roll. */ + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) + xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]); + trace_xfs_defer_trans_roll((*tp)->t_mountp, dop); /* Roll the transaction. */ @@ -264,6 +268,12 @@ xfs_defer_trans_roll( for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0); + /* Rejoin the buffers and dirty them so the log moves forward. */ + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) { + xfs_trans_bjoin(*tp, dop->dop_bufs[i]); + xfs_trans_bhold(*tp, dop->dop_bufs[i]); + } + return error; } @@ -295,6 +305,31 @@ xfs_defer_ijoin( } } + ASSERT(0); + return -EFSCORRUPTED; +} + +/* + * Add this buffer to the deferred op. Each joined buffer is relogged + * each time we roll the transaction. + */ +int +xfs_defer_bjoin( + struct xfs_defer_ops *dop, + struct xfs_buf *bp) +{ + int i; + + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) { + if (dop->dop_bufs[i] == bp) + return 0; + else if (dop->dop_bufs[i] == NULL) { + dop->dop_bufs[i] = bp; + return 0; + } + } + + ASSERT(0); return -EFSCORRUPTED; } @@ -493,9 +528,7 @@ xfs_defer_init( struct xfs_defer_ops *dop, xfs_fsblock_t *fbp) { - dop->dop_committed = false; - dop->dop_low = false; - memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes)); + memset(dop, 0, sizeof(struct xfs_defer_ops)); *fbp = NULLFSBLOCK; INIT_LIST_HEAD(&dop->dop_intake); INIT_LIST_HEAD(&dop->dop_pending); diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index d4f046dd44bd..045beacdd37d 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -59,6 +59,7 @@ enum xfs_defer_ops_type { }; #define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ +#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */ struct xfs_defer_ops { bool dop_committed; /* did any trans commit? */ @@ -66,8 +67,9 @@ struct xfs_defer_ops { struct list_head dop_intake; /* unlogged pending work */ struct list_head dop_pending; /* logged pending work */ - /* relog these inodes with each roll */ + /* relog these with each roll */ struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES]; + struct xfs_buf *dop_bufs[XFS_DEFER_OPS_NR_BUFS]; }; void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type, @@ -77,6 +79,7 @@ void xfs_defer_cancel(struct xfs_defer_ops *dop); void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp); bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop); int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip); +int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp); /* Description of a deferred type. */ struct xfs_defer_op_type { diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index e10778c102ea..92f94e190f04 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -119,8 +119,7 @@ xfs_da_mount( ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT); - ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <= - XFS_MAX_BLOCKSIZE); + ASSERT(xfs_dir2_dirblock_bytes(&mp->m_sb) <= XFS_MAX_BLOCKSIZE); mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL); mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL); @@ -140,7 +139,7 @@ xfs_da_mount( dageo = mp->m_dir_geo; dageo->blklog = mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog; dageo->fsblog = mp->m_sb.sb_blocklog; - dageo->blksize = 1 << dageo->blklog; + dageo->blksize = xfs_dir2_dirblock_bytes(&mp->m_sb); dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog; /* diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index 1a8f2cf977ca..388d67c5c903 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -340,5 +340,7 @@ xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp) #define XFS_READDIR_BUFSIZE (32768) unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp, uint8_t filetype); +void *xfs_dir3_data_endp(struct xfs_da_geometry *geo, + struct xfs_dir2_data_hdr *hdr); #endif /* __XFS_DIR2_H__ */ diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index 43c902f7a68d..2da86a394bcf 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -58,7 +58,7 @@ xfs_dir_startup(void) xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2); } -static bool +static xfs_failaddr_t xfs_dir3_block_verify( struct xfs_buf *bp) { @@ -67,20 +67,18 @@ xfs_dir3_block_verify( if (xfs_sb_version_hascrc(&mp->m_sb)) { if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) - return false; + return __this_address; if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) - return false; + return __this_address; } else { if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) - return false; + return __this_address; } - if (__xfs_dir3_data_check(NULL, bp)) - return false; - return true; + return __xfs_dir3_data_check(NULL, bp); } static void @@ -88,15 +86,16 @@ xfs_dir3_block_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_dir3_block_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_dir3_block_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void @@ -104,12 +103,13 @@ xfs_dir3_block_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (!xfs_dir3_block_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_dir3_block_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -126,6 +126,7 @@ const struct xfs_buf_ops xfs_dir3_block_buf_ops = { .name = "xfs_dir3_block", .verify_read = xfs_dir3_block_read_verify, .verify_write = xfs_dir3_block_write_verify, + .verify_struct = xfs_dir3_block_verify, }; int diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 8727a43115ef..920279485275 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -36,9 +36,9 @@ /* * Check the consistency of the data block. * The input can also be a block-format directory. - * Return 0 is the buffer is good, otherwise an error. + * Return NULL if the buffer is good, otherwise the address of the error. */ -int +xfs_failaddr_t __xfs_dir3_data_check( struct xfs_inode *dp, /* incore inode pointer */ struct xfs_buf *bp) /* data block's buffer */ @@ -73,6 +73,14 @@ __xfs_dir3_data_check( */ ops = xfs_dir_get_ops(mp, dp); + /* + * If this isn't a directory, or we don't get handed the dir ops, + * something is seriously wrong. Bail out. + */ + if ((dp && !S_ISDIR(VFS_I(dp)->i_mode)) || + ops != xfs_dir_get_ops(mp, NULL)) + return __this_address; + hdr = bp->b_addr; p = (char *)ops->data_entry_p(hdr); @@ -81,7 +89,6 @@ __xfs_dir3_data_check( case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): btp = xfs_dir2_block_tail_p(geo, hdr); lep = xfs_dir2_block_leaf_p(btp); - endp = (char *)lep; /* * The number of leaf entries is limited by the size of the @@ -90,17 +97,19 @@ __xfs_dir3_data_check( * so just ensure that the count falls somewhere inside the * block right now. */ - XFS_WANT_CORRUPTED_RETURN(mp, be32_to_cpu(btp->count) < - ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)); + if (be32_to_cpu(btp->count) >= + ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)) + return __this_address; break; case cpu_to_be32(XFS_DIR3_DATA_MAGIC): case cpu_to_be32(XFS_DIR2_DATA_MAGIC): - endp = (char *)hdr + geo->blksize; break; default: - XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp); - return -EFSCORRUPTED; + return __this_address; } + endp = xfs_dir3_data_endp(geo, hdr); + if (!endp) + return __this_address; /* * Account for zero bestfree entries. @@ -108,22 +117,25 @@ __xfs_dir3_data_check( bf = ops->data_bestfree_p(hdr); count = lastfree = freeseen = 0; if (!bf[0].length) { - XFS_WANT_CORRUPTED_RETURN(mp, !bf[0].offset); + if (bf[0].offset) + return __this_address; freeseen |= 1 << 0; } if (!bf[1].length) { - XFS_WANT_CORRUPTED_RETURN(mp, !bf[1].offset); + if (bf[1].offset) + return __this_address; freeseen |= 1 << 1; } if (!bf[2].length) { - XFS_WANT_CORRUPTED_RETURN(mp, !bf[2].offset); + if (bf[2].offset) + return __this_address; freeseen |= 1 << 2; } - XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[0].length) >= - be16_to_cpu(bf[1].length)); - XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[1].length) >= - be16_to_cpu(bf[2].length)); + if (be16_to_cpu(bf[0].length) < be16_to_cpu(bf[1].length)) + return __this_address; + if (be16_to_cpu(bf[1].length) < be16_to_cpu(bf[2].length)) + return __this_address; /* * Loop over the data/unused entries. */ @@ -135,22 +147,23 @@ __xfs_dir3_data_check( * doesn't need to be there. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { - XFS_WANT_CORRUPTED_RETURN(mp, lastfree == 0); - XFS_WANT_CORRUPTED_RETURN(mp, endp >= - p + be16_to_cpu(dup->length)); - XFS_WANT_CORRUPTED_RETURN(mp, - be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == - (char *)dup - (char *)hdr); + if (lastfree != 0) + return __this_address; + if (endp < p + be16_to_cpu(dup->length)) + return __this_address; + if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) != + (char *)dup - (char *)hdr) + return __this_address; dfp = xfs_dir2_data_freefind(hdr, bf, dup); if (dfp) { i = (int)(dfp - bf); - XFS_WANT_CORRUPTED_RETURN(mp, - (freeseen & (1 << i)) == 0); + if ((freeseen & (1 << i)) != 0) + return __this_address; freeseen |= 1 << i; } else { - XFS_WANT_CORRUPTED_RETURN(mp, - be16_to_cpu(dup->length) <= - be16_to_cpu(bf[2].length)); + if (be16_to_cpu(dup->length) > + be16_to_cpu(bf[2].length)) + return __this_address; } p += be16_to_cpu(dup->length); lastfree = 1; @@ -163,16 +176,17 @@ __xfs_dir3_data_check( * The linear search is crude but this is DEBUG code. */ dep = (xfs_dir2_data_entry_t *)p; - XFS_WANT_CORRUPTED_RETURN(mp, dep->namelen != 0); - XFS_WANT_CORRUPTED_RETURN(mp, - !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))); - XFS_WANT_CORRUPTED_RETURN(mp, endp >= - p + ops->data_entsize(dep->namelen)); - XFS_WANT_CORRUPTED_RETURN(mp, - be16_to_cpu(*ops->data_entry_tag_p(dep)) == - (char *)dep - (char *)hdr); - XFS_WANT_CORRUPTED_RETURN(mp, - ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX); + if (dep->namelen == 0) + return __this_address; + if (xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))) + return __this_address; + if (endp < p + ops->data_entsize(dep->namelen)) + return __this_address; + if (be16_to_cpu(*ops->data_entry_tag_p(dep)) != + (char *)dep - (char *)hdr) + return __this_address; + if (ops->data_get_ftype(dep) >= XFS_DIR3_FT_MAX) + return __this_address; count++; lastfree = 0; if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || @@ -188,34 +202,52 @@ __xfs_dir3_data_check( be32_to_cpu(lep[i].hashval) == hash) break; } - XFS_WANT_CORRUPTED_RETURN(mp, - i < be32_to_cpu(btp->count)); + if (i >= be32_to_cpu(btp->count)) + return __this_address; } p += ops->data_entsize(dep->namelen); } /* * Need to have seen all the entries and all the bestfree slots. */ - XFS_WANT_CORRUPTED_RETURN(mp, freeseen == 7); + if (freeseen != 7) + return __this_address; if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { if (lep[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; - if (i > 0) - XFS_WANT_CORRUPTED_RETURN(mp, - be32_to_cpu(lep[i].hashval) >= - be32_to_cpu(lep[i - 1].hashval)); + if (i > 0 && be32_to_cpu(lep[i].hashval) < + be32_to_cpu(lep[i - 1].hashval)) + return __this_address; } - XFS_WANT_CORRUPTED_RETURN(mp, count == - be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)); - XFS_WANT_CORRUPTED_RETURN(mp, stale == be32_to_cpu(btp->stale)); + if (count != be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)) + return __this_address; + if (stale != be32_to_cpu(btp->stale)) + return __this_address; } - return 0; + return NULL; +} + +#ifdef DEBUG +void +xfs_dir3_data_check( + struct xfs_inode *dp, + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + + fa = __xfs_dir3_data_check(dp, bp); + if (!fa) + return; + xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount, + bp->b_addr, __FILE__, __LINE__, fa); + ASSERT(0); } +#endif -static bool +static xfs_failaddr_t xfs_dir3_data_verify( struct xfs_buf *bp) { @@ -224,20 +256,18 @@ xfs_dir3_data_verify( if (xfs_sb_version_hascrc(&mp->m_sb)) { if (hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC)) - return false; + return __this_address; if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) - return false; + return __this_address; } else { if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC)) - return false; + return __this_address; } - if (__xfs_dir3_data_check(NULL, bp)) - return false; - return true; + return __xfs_dir3_data_check(NULL, bp); } /* @@ -263,8 +293,7 @@ xfs_dir3_data_reada_verify( bp->b_ops->verify_read(bp); return; default: - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); break; } } @@ -274,15 +303,16 @@ xfs_dir3_data_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && - !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_dir3_data_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_dir3_data_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void @@ -290,12 +320,13 @@ xfs_dir3_data_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (!xfs_dir3_data_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_dir3_data_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -312,6 +343,7 @@ const struct xfs_buf_ops xfs_dir3_data_buf_ops = { .name = "xfs_dir3_data", .verify_read = xfs_dir3_data_read_verify, .verify_write = xfs_dir3_data_write_verify, + .verify_struct = xfs_dir3_data_verify, }; static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = { @@ -515,7 +547,6 @@ xfs_dir2_data_freescan_int( struct xfs_dir2_data_hdr *hdr, int *loghead) { - xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* active data entry */ xfs_dir2_data_unused_t *dup; /* unused data entry */ struct xfs_dir2_data_free *bf; @@ -537,12 +568,7 @@ xfs_dir2_data_freescan_int( * Set up pointers. */ p = (char *)ops->data_entry_p(hdr); - if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { - btp = xfs_dir2_block_tail_p(geo, hdr); - endp = (char *)xfs_dir2_block_leaf_p(btp); - } else - endp = (char *)hdr + geo->blksize; + endp = xfs_dir3_data_endp(geo, hdr); /* * Loop over the block's entries. */ @@ -755,17 +781,9 @@ xfs_dir2_data_make_free( /* * Figure out where the end of the data area is. */ - if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)) - endptr = (char *)hdr + args->geo->blksize; - else { - xfs_dir2_block_tail_t *btp; /* block tail */ + endptr = xfs_dir3_data_endp(args->geo, hdr); + ASSERT(endptr != NULL); - ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || - hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); - btp = xfs_dir2_block_tail_p(args->geo, hdr); - endptr = (char *)xfs_dir2_block_leaf_p(btp); - } /* * If this isn't the start of the block, then back up to * the previous entry and see if it's free. @@ -1067,3 +1085,21 @@ xfs_dir2_data_use_free( } *needscanp = needscan; } + +/* Find the end of the entry data in a data/block format dir block. */ +void * +xfs_dir3_data_endp( + struct xfs_da_geometry *geo, + struct xfs_dir2_data_hdr *hdr) +{ + switch (hdr->magic) { + case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): + case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): + return xfs_dir2_block_leaf_p(xfs_dir2_block_tail_p(geo, hdr)); + case cpu_to_be32(XFS_DIR3_DATA_MAGIC): + case cpu_to_be32(XFS_DIR2_DATA_MAGIC): + return (char *)hdr + geo->blksize; + default: + return NULL; + } +} diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index 27297a689d9c..d7e630f41f9c 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -50,13 +50,7 @@ static void xfs_dir3_leaf_log_tail(struct xfs_da_args *args, * Pop an assert if something is wrong. */ #ifdef DEBUG -#define xfs_dir3_leaf_check(dp, bp) \ -do { \ - if (!xfs_dir3_leaf1_check((dp), (bp))) \ - ASSERT(0); \ -} while (0); - -STATIC bool +static xfs_failaddr_t xfs_dir3_leaf1_check( struct xfs_inode *dp, struct xfs_buf *bp) @@ -69,17 +63,32 @@ xfs_dir3_leaf1_check( if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) { struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) - return false; + return __this_address; } else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC) - return false; + return __this_address; return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); } + +static inline void +xfs_dir3_leaf_check( + struct xfs_inode *dp, + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + + fa = xfs_dir3_leaf1_check(dp, bp); + if (!fa) + return; + xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount, + bp->b_addr, __FILE__, __LINE__, fa); + ASSERT(0); +} #else #define xfs_dir3_leaf_check(dp, bp) #endif -bool +xfs_failaddr_t xfs_dir3_leaf_check_int( struct xfs_mount *mp, struct xfs_inode *dp, @@ -114,27 +123,27 @@ xfs_dir3_leaf_check_int( * We can deduce a value for that from di_size. */ if (hdr->count > ops->leaf_max_ents(geo)) - return false; + return __this_address; /* Leaves and bests don't overlap in leaf format. */ if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC || hdr->magic == XFS_DIR3_LEAF1_MAGIC) && (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) - return false; + return __this_address; /* Check hash value order, count stale entries. */ for (i = stale = 0; i < hdr->count; i++) { if (i + 1 < hdr->count) { if (be32_to_cpu(ents[i].hashval) > be32_to_cpu(ents[i + 1].hashval)) - return false; + return __this_address; } if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; } if (hdr->stale != stale) - return false; - return true; + return __this_address; + return NULL; } /* @@ -142,7 +151,7 @@ xfs_dir3_leaf_check_int( * kernels we don't get assertion failures in xfs_dir3_leaf_hdr_from_disk() due * to incorrect magic numbers. */ -static bool +static xfs_failaddr_t xfs_dir3_leaf_verify( struct xfs_buf *bp, uint16_t magic) @@ -160,16 +169,16 @@ xfs_dir3_leaf_verify( : XFS_DIR3_LEAFN_MAGIC; if (leaf3->info.hdr.magic != cpu_to_be16(magic3)) - return false; + return __this_address; if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn))) - return false; + return __this_address; } else { if (leaf->hdr.info.magic != cpu_to_be16(magic)) - return false; + return __this_address; } return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf); @@ -181,15 +190,16 @@ __read_verify( uint16_t magic) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_dir3_leaf_verify(bp, magic)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_dir3_leaf_verify(bp, magic); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void @@ -198,12 +208,13 @@ __write_verify( uint16_t magic) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (!xfs_dir3_leaf_verify(bp, magic)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_dir3_leaf_verify(bp, magic); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -216,6 +227,13 @@ __write_verify( xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF); } +static xfs_failaddr_t +xfs_dir3_leaf1_verify( + struct xfs_buf *bp) +{ + return xfs_dir3_leaf_verify(bp, XFS_DIR2_LEAF1_MAGIC); +} + static void xfs_dir3_leaf1_read_verify( struct xfs_buf *bp) @@ -230,6 +248,13 @@ xfs_dir3_leaf1_write_verify( __write_verify(bp, XFS_DIR2_LEAF1_MAGIC); } +static xfs_failaddr_t +xfs_dir3_leafn_verify( + struct xfs_buf *bp) +{ + return xfs_dir3_leaf_verify(bp, XFS_DIR2_LEAFN_MAGIC); +} + static void xfs_dir3_leafn_read_verify( struct xfs_buf *bp) @@ -248,12 +273,14 @@ const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = { .name = "xfs_dir3_leaf1", .verify_read = xfs_dir3_leaf1_read_verify, .verify_write = xfs_dir3_leaf1_write_verify, + .verify_struct = xfs_dir3_leaf1_verify, }; const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = { .name = "xfs_dir3_leafn", .verify_read = xfs_dir3_leafn_read_verify, .verify_write = xfs_dir3_leafn_write_verify, + .verify_struct = xfs_dir3_leafn_verify, }; int diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 682e2bf370c7..239d97a64296 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -53,13 +53,7 @@ static int xfs_dir2_node_addname_int(xfs_da_args_t *args, * Check internal consistency of a leafn block. */ #ifdef DEBUG -#define xfs_dir3_leaf_check(dp, bp) \ -do { \ - if (!xfs_dir3_leafn_check((dp), (bp))) \ - ASSERT(0); \ -} while (0); - -static bool +static xfs_failaddr_t xfs_dir3_leafn_check( struct xfs_inode *dp, struct xfs_buf *bp) @@ -72,17 +66,32 @@ xfs_dir3_leafn_check( if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) { struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) - return false; + return __this_address; } else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC) - return false; + return __this_address; return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); } + +static inline void +xfs_dir3_leaf_check( + struct xfs_inode *dp, + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + + fa = xfs_dir3_leafn_check(dp, bp); + if (!fa) + return; + xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount, + bp->b_addr, __FILE__, __LINE__, fa); + ASSERT(0); +} #else #define xfs_dir3_leaf_check(dp, bp) #endif -static bool +static xfs_failaddr_t xfs_dir3_free_verify( struct xfs_buf *bp) { @@ -93,21 +102,21 @@ xfs_dir3_free_verify( struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (hdr3->magic != cpu_to_be32(XFS_DIR3_FREE_MAGIC)) - return false; + return __this_address; if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (be64_to_cpu(hdr3->blkno) != bp->b_bn) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) - return false; + return __this_address; } else { if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)) - return false; + return __this_address; } /* XXX: should bounds check the xfs_dir3_icfree_hdr here */ - return true; + return NULL; } static void @@ -115,15 +124,16 @@ xfs_dir3_free_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_dir3_free_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_dir3_free_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void @@ -131,12 +141,13 @@ xfs_dir3_free_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + xfs_failaddr_t fa; - if (!xfs_dir3_free_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_dir3_free_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -153,10 +164,11 @@ const struct xfs_buf_ops xfs_dir3_free_buf_ops = { .name = "xfs_dir3_free", .verify_read = xfs_dir3_free_read_verify, .verify_write = xfs_dir3_free_write_verify, + .verify_struct = xfs_dir3_free_verify, }; /* Everything ok in the free block header? */ -static bool +static xfs_failaddr_t xfs_dir3_free_header_check( struct xfs_inode *dp, xfs_dablk_t fbno, @@ -174,22 +186,22 @@ xfs_dir3_free_header_check( struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; if (be32_to_cpu(hdr3->firstdb) != firstdb) - return false; + return __this_address; if (be32_to_cpu(hdr3->nvalid) > maxbests) - return false; + return __this_address; if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused)) - return false; + return __this_address; } else { struct xfs_dir2_free_hdr *hdr = bp->b_addr; if (be32_to_cpu(hdr->firstdb) != firstdb) - return false; + return __this_address; if (be32_to_cpu(hdr->nvalid) > maxbests) - return false; + return __this_address; if (be32_to_cpu(hdr->nvalid) < be32_to_cpu(hdr->nused)) - return false; + return __this_address; } - return true; + return NULL; } static int @@ -200,6 +212,7 @@ __xfs_dir3_free_read( xfs_daddr_t mappedbno, struct xfs_buf **bpp) { + xfs_failaddr_t fa; int err; err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, @@ -208,9 +221,9 @@ __xfs_dir3_free_read( return err; /* Check things that we can't do in the verifier. */ - if (!xfs_dir3_free_header_check(dp, fbno, *bpp)) { - xfs_buf_ioerror(*bpp, -EFSCORRUPTED); - xfs_verifier_error(*bpp); + fa = xfs_dir3_free_header_check(dp, fbno, *bpp); + if (fa) { + xfs_verifier_error(*bpp, -EFSCORRUPTED, fa); xfs_trans_brelse(tp, *bpp); return -EFSCORRUPTED; } @@ -1906,7 +1919,7 @@ xfs_dir2_node_addname_int( (unsigned long long)ifbno, lastfbno); if (fblk) { xfs_alert(mp, - " fblk 0x%p blkno %llu index %d magic 0x%x", + " fblk "PTR_FMT" blkno %llu index %d magic 0x%x", fblk, (unsigned long long)fblk->blkno, fblk->index, diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index 4badd26c47e6..753aeeeffc18 100644 --- a/fs/xfs/libxfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -39,12 +39,13 @@ extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args, /* xfs_dir2_data.c */ #ifdef DEBUG -#define xfs_dir3_data_check(dp,bp) __xfs_dir3_data_check(dp, bp); +extern void xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); #else #define xfs_dir3_data_check(dp,bp) #endif -extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp); +extern xfs_failaddr_t __xfs_dir3_data_check(struct xfs_inode *dp, + struct xfs_buf *bp); extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp); extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno, @@ -89,8 +90,9 @@ xfs_dir3_leaf_find_entry(struct xfs_dir3_icleaf_hdr *leafhdr, int lowstale, int highstale, int *lfloglow, int *lfloghigh); extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); -extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp, - struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf); +extern xfs_failaddr_t xfs_dir3_leaf_check_int(struct xfs_mount *mp, + struct xfs_inode *dp, struct xfs_dir3_icleaf_hdr *hdr, + struct xfs_dir2_leaf *leaf); /* xfs_dir2_node.c */ extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, @@ -127,7 +129,7 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); extern int xfs_dir2_sf_removename(struct xfs_da_args *args); extern int xfs_dir2_sf_replace(struct xfs_da_args *args); -extern int xfs_dir2_sf_verify(struct xfs_inode *ip); +extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip); /* xfs_dir2_readdir.c */ extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp, diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index be8b9755f66a..0c75a7f00883 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -156,7 +156,6 @@ xfs_dir2_block_to_sf( xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */ { xfs_dir2_data_hdr_t *hdr; /* block header */ - xfs_dir2_block_tail_t *btp; /* block tail pointer */ xfs_dir2_data_entry_t *dep; /* data entry pointer */ xfs_inode_t *dp; /* incore directory inode */ xfs_dir2_data_unused_t *dup; /* unused data pointer */ @@ -192,9 +191,8 @@ xfs_dir2_block_to_sf( /* * Set up to loop over the block's entries. */ - btp = xfs_dir2_block_tail_p(args->geo, hdr); ptr = (char *)dp->d_ops->data_entry_p(hdr); - endptr = (char *)xfs_dir2_block_leaf_p(btp); + endptr = xfs_dir3_data_endp(args->geo, hdr); sfep = xfs_dir2_sf_firstentry(sfp); /* * Loop over the active and unused entries. @@ -630,7 +628,7 @@ xfs_dir2_sf_check( #endif /* DEBUG */ /* Verify the consistency of an inline directory. */ -int +xfs_failaddr_t xfs_dir2_sf_verify( struct xfs_inode *ip) { @@ -665,7 +663,7 @@ xfs_dir2_sf_verify( */ if (size <= offsetof(struct xfs_dir2_sf_hdr, parent) || size < xfs_dir2_sf_hdr_size(sfp->i8count)) - return -EFSCORRUPTED; + return __this_address; endp = (char *)sfp + size; @@ -674,7 +672,7 @@ xfs_dir2_sf_verify( i8count = ino > XFS_DIR2_MAX_SHORT_INUM; error = xfs_dir_ino_validate(mp, ino); if (error) - return error; + return __this_address; offset = dops->data_first_offset; /* Check all reported entries */ @@ -686,11 +684,11 @@ xfs_dir2_sf_verify( * within the data buffer. */ if (((char *)sfep + sizeof(*sfep)) >= endp) - return -EFSCORRUPTED; + return __this_address; /* Don't allow names with known bad length. */ if (sfep->namelen == 0) - return -EFSCORRUPTED; + return __this_address; /* * Check that the variable-length part of the structure is @@ -699,23 +697,23 @@ xfs_dir2_sf_verify( */ next_sfep = dops->sf_nextentry(sfp, sfep); if (endp < (char *)next_sfep) - return -EFSCORRUPTED; + return __this_address; /* Check that the offsets always increase. */ if (xfs_dir2_sf_get_offset(sfep) < offset) - return -EFSCORRUPTED; + return __this_address; /* Check the inode number. */ ino = dops->sf_get_ino(sfp, sfep); i8count += ino > XFS_DIR2_MAX_SHORT_INUM; error = xfs_dir_ino_validate(mp, ino); if (error) - return error; + return __this_address; /* Check the file type. */ filetype = dops->sf_get_ftype(sfep); if (filetype >= XFS_DIR3_FT_MAX) - return -EFSCORRUPTED; + return __this_address; offset = xfs_dir2_sf_get_offset(sfep) + dops->data_entsize(sfep->namelen); @@ -723,16 +721,16 @@ xfs_dir2_sf_verify( sfep = next_sfep; } if (i8count != sfp->i8count) - return -EFSCORRUPTED; + return __this_address; if ((void *)sfep != (void *)endp) - return -EFSCORRUPTED; + return __this_address; /* Make sure this whole thing ought to be in local format. */ if (offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + (uint)sizeof(xfs_dir2_block_tail_t) > mp->m_dir_geo->blksize) - return -EFSCORRUPTED; + return __this_address; - return 0; + return NULL; } /* diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index 747085b4ef44..8b7a6c3cb599 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -42,18 +42,14 @@ xfs_calc_dquots_per_chunk( /* * Do some primitive error checking on ondisk dquot data structures. */ -int -xfs_dqcheck( +xfs_failaddr_t +xfs_dquot_verify( struct xfs_mount *mp, xfs_disk_dquot_t *ddq, xfs_dqid_t id, uint type, /* used only when IO_dorepair is true */ - uint flags, - const char *str) + uint flags) { - xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; - int errs = 0; - /* * We can encounter an uninitialized dquot buffer for 2 reasons: * 1. If we crash while deleting the quotainode(s), and those blks got @@ -69,87 +65,57 @@ xfs_dqcheck( * This is all fine; things are still consistent, and we haven't lost * any quota information. Just don't complain about bad dquot blks. */ - if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", - str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); - errs++; - } - if (ddq->d_version != XFS_DQUOT_VERSION) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", - str, id, ddq->d_version, XFS_DQUOT_VERSION); - errs++; - } + if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) + return __this_address; + if (ddq->d_version != XFS_DQUOT_VERSION) + return __this_address; if (ddq->d_flags != XFS_DQ_USER && ddq->d_flags != XFS_DQ_PROJ && - ddq->d_flags != XFS_DQ_GROUP) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : XFS dquot ID 0x%x, unknown flags 0x%x", - str, id, ddq->d_flags); - errs++; - } + ddq->d_flags != XFS_DQ_GROUP) + return __this_address; - if (id != -1 && id != be32_to_cpu(ddq->d_id)) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : ondisk-dquot 0x%p, ID mismatch: " - "0x%x expected, found id 0x%x", - str, ddq, id, be32_to_cpu(ddq->d_id)); - errs++; - } + if (id != -1 && id != be32_to_cpu(ddq->d_id)) + return __this_address; - if (!errs && ddq->d_id) { - if (ddq->d_blk_softlimit && - be64_to_cpu(ddq->d_bcount) > - be64_to_cpu(ddq->d_blk_softlimit)) { - if (!ddq->d_btimer) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED", - str, (int)be32_to_cpu(ddq->d_id), ddq); - errs++; - } - } - if (ddq->d_ino_softlimit && - be64_to_cpu(ddq->d_icount) > - be64_to_cpu(ddq->d_ino_softlimit)) { - if (!ddq->d_itimer) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED", - str, (int)be32_to_cpu(ddq->d_id), ddq); - errs++; - } - } - if (ddq->d_rtb_softlimit && - be64_to_cpu(ddq->d_rtbcount) > - be64_to_cpu(ddq->d_rtb_softlimit)) { - if (!ddq->d_rtbtimer) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED", - str, (int)be32_to_cpu(ddq->d_id), ddq); - errs++; - } - } - } + if (!ddq->d_id) + return NULL; + + if (ddq->d_blk_softlimit && + be64_to_cpu(ddq->d_bcount) > be64_to_cpu(ddq->d_blk_softlimit) && + !ddq->d_btimer) + return __this_address; + + if (ddq->d_ino_softlimit && + be64_to_cpu(ddq->d_icount) > be64_to_cpu(ddq->d_ino_softlimit) && + !ddq->d_itimer) + return __this_address; - if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) - return errs; + if (ddq->d_rtb_softlimit && + be64_to_cpu(ddq->d_rtbcount) > be64_to_cpu(ddq->d_rtb_softlimit) && + !ddq->d_rtbtimer) + return __this_address; + + return NULL; +} + +/* + * Do some primitive error checking on ondisk dquot data structures. + */ +int +xfs_dquot_repair( + struct xfs_mount *mp, + struct xfs_disk_dquot *ddq, + xfs_dqid_t id, + uint type) +{ + struct xfs_dqblk *d = (struct xfs_dqblk *)ddq; - if (flags & XFS_QMOPT_DOWARN) - xfs_notice(mp, "Re-initializing dquot ID 0x%x", id); /* * Typically, a repair is only requested by quotacheck. */ ASSERT(id != -1); - ASSERT(flags & XFS_QMOPT_DQREPAIR); memset(d, 0, sizeof(xfs_dqblk_t)); d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); @@ -163,7 +129,7 @@ xfs_dqcheck( XFS_DQUOT_CRC_OFF); } - return errs; + return 0; } STATIC bool @@ -198,13 +164,13 @@ xfs_dquot_buf_verify_crc( return true; } -STATIC bool +STATIC xfs_failaddr_t xfs_dquot_buf_verify( struct xfs_mount *mp, - struct xfs_buf *bp, - int warn) + struct xfs_buf *bp) { struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; + xfs_failaddr_t fa; xfs_dqid_t id = 0; int ndquots; int i; @@ -228,33 +194,43 @@ xfs_dquot_buf_verify( */ for (i = 0; i < ndquots; i++) { struct xfs_disk_dquot *ddq; - int error; ddq = &d[i].dd_diskdq; if (i == 0) id = be32_to_cpu(ddq->d_id); - error = xfs_dqcheck(mp, ddq, id + i, 0, warn, __func__); - if (error) - return false; + fa = xfs_dquot_verify(mp, ddq, id + i, 0, 0); + if (fa) + return fa; } - return true; + + return NULL; +} + +static xfs_failaddr_t +xfs_dquot_buf_verify_struct( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + return xfs_dquot_buf_verify(mp, bp); } static void xfs_dquot_buf_read_verify( - struct xfs_buf *bp) + struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (!xfs_dquot_buf_verify_crc(mp, bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_dquot_buf_verify(mp, bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); + } } /* @@ -270,7 +246,7 @@ xfs_dquot_buf_readahead_verify( struct xfs_mount *mp = bp->b_target->bt_mount; if (!xfs_dquot_buf_verify_crc(mp, bp) || - !xfs_dquot_buf_verify(mp, bp, 0)) { + xfs_dquot_buf_verify(mp, bp) != NULL) { xfs_buf_ioerror(bp, -EIO); bp->b_flags &= ~XBF_DONE; } @@ -283,21 +259,21 @@ xfs_dquot_buf_readahead_verify( */ static void xfs_dquot_buf_write_verify( - struct xfs_buf *bp) + struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; - if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); - return; - } + fa = xfs_dquot_buf_verify(mp, bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); } const struct xfs_buf_ops xfs_dquot_buf_ops = { .name = "xfs_dquot", .verify_read = xfs_dquot_buf_read_verify, .verify_write = xfs_dquot_buf_write_verify, + .verify_struct = xfs_dquot_buf_verify_struct, }; const struct xfs_buf_ops xfs_dquot_buf_ra_ops = { diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index b90924104596..faf1a4edd618 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -233,6 +233,13 @@ typedef struct xfs_fsop_resblks { #define XFS_MAX_LOG_BLOCKS (1024 * 1024ULL) #define XFS_MIN_LOG_BYTES (10 * 1024 * 1024ULL) +/* + * Limits on sb_agblocks/sb_agblklog -- mkfs won't format AGs smaller than + * 16MB or larger than 1TB. + */ +#define XFS_MIN_AG_BYTES (1ULL << 24) /* 16 MB */ +#define XFS_MAX_AG_BYTES (1ULL << 40) /* 1 TB */ + /* keep the maximum size under 2^31 by a small amount */ #define XFS_MAX_LOG_BYTES \ ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES) diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index de3f04a98656..0e2cf5f0be1f 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -920,8 +920,7 @@ STATIC xfs_agnumber_t xfs_ialloc_ag_select( xfs_trans_t *tp, /* transaction pointer */ xfs_ino_t parent, /* parent directory inode number */ - umode_t mode, /* bits set to indicate file type */ - int okalloc) /* ok to allocate more space */ + umode_t mode) /* bits set to indicate file type */ { xfs_agnumber_t agcount; /* number of ag's in the filesystem */ xfs_agnumber_t agno; /* current ag number */ @@ -978,9 +977,6 @@ xfs_ialloc_ag_select( return agno; } - if (!okalloc) - goto nextag; - if (!pag->pagf_init) { error = xfs_alloc_pagf_init(mp, tp, agno, flags); if (error) @@ -1680,7 +1676,6 @@ xfs_dialloc( struct xfs_trans *tp, xfs_ino_t parent, umode_t mode, - int okalloc, struct xfs_buf **IO_agbp, xfs_ino_t *inop) { @@ -1692,6 +1687,7 @@ xfs_dialloc( int noroom = 0; xfs_agnumber_t start_agno; struct xfs_perag *pag; + int okalloc = 1; if (*IO_agbp) { /* @@ -1707,7 +1703,7 @@ xfs_dialloc( * We do not have an agbp, so select an initial allocation * group for inode allocation. */ - start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc); + start_agno = xfs_ialloc_ag_select(tp, parent, mode); if (start_agno == NULLAGNUMBER) { *inop = NULLFSINO; return 0; @@ -2495,7 +2491,7 @@ xfs_check_agi_unlinked( #define xfs_check_agi_unlinked(agi) #endif -static bool +static xfs_failaddr_t xfs_agi_verify( struct xfs_buf *bp) { @@ -2504,28 +2500,28 @@ xfs_agi_verify( if (xfs_sb_version_hascrc(&mp->m_sb)) { if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGI(bp)->agi_lsn))) - return false; + return __this_address; } /* * Validate the magic number of the agi block. */ if (agi->agi_magicnum != cpu_to_be32(XFS_AGI_MAGIC)) - return false; + return __this_address; if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum))) - return false; + return __this_address; if (be32_to_cpu(agi->agi_level) < 1 || be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS) - return false; + return __this_address; if (xfs_sb_version_hasfinobt(&mp->m_sb) && (be32_to_cpu(agi->agi_free_level) < 1 || be32_to_cpu(agi->agi_free_level) > XFS_BTREE_MAXLEVELS)) - return false; + return __this_address; /* * during growfs operations, the perag is not fully initialised, @@ -2534,10 +2530,10 @@ xfs_agi_verify( * so we can detect and avoid this problem. */ if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno) - return false; + return __this_address; xfs_check_agi_unlinked(agi); - return true; + return NULL; } static void @@ -2545,28 +2541,29 @@ xfs_agi_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp, - XFS_ERRTAG_IALLOC_READ_AGI)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_agi_verify(bp); + if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_IALLOC_READ_AGI)) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void xfs_agi_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_log_item; + xfs_failaddr_t fa; - if (!xfs_agi_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_agi_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -2582,6 +2579,7 @@ const struct xfs_buf_ops xfs_agi_buf_ops = { .name = "xfs_agi", .verify_read = xfs_agi_read_verify, .verify_write = xfs_agi_write_verify, + .verify_struct = xfs_agi_verify, }; /* @@ -2755,3 +2753,102 @@ xfs_verify_dir_ino( return false; return xfs_verify_ino(mp, ino); } + +/* Is there an inode record covering a given range of inode numbers? */ +int +xfs_ialloc_has_inode_record( + struct xfs_btree_cur *cur, + xfs_agino_t low, + xfs_agino_t high, + bool *exists) +{ + struct xfs_inobt_rec_incore irec; + xfs_agino_t agino; + uint16_t holemask; + int has_record; + int i; + int error; + + *exists = false; + error = xfs_inobt_lookup(cur, low, XFS_LOOKUP_LE, &has_record); + while (error == 0 && has_record) { + error = xfs_inobt_get_rec(cur, &irec, &has_record); + if (error || irec.ir_startino > high) + break; + + agino = irec.ir_startino; + holemask = irec.ir_holemask; + for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; holemask >>= 1, + i++, agino += XFS_INODES_PER_HOLEMASK_BIT) { + if (holemask & 1) + continue; + if (agino + XFS_INODES_PER_HOLEMASK_BIT > low && + agino <= high) { + *exists = true; + return 0; + } + } + + error = xfs_btree_increment(cur, 0, &has_record); + } + return error; +} + +/* Is there an inode record covering a given extent? */ +int +xfs_ialloc_has_inodes_at_extent( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + xfs_extlen_t len, + bool *exists) +{ + xfs_agino_t low; + xfs_agino_t high; + + low = XFS_OFFBNO_TO_AGINO(cur->bc_mp, bno, 0); + high = XFS_OFFBNO_TO_AGINO(cur->bc_mp, bno + len, 0) - 1; + + return xfs_ialloc_has_inode_record(cur, low, high, exists); +} + +struct xfs_ialloc_count_inodes { + xfs_agino_t count; + xfs_agino_t freecount; +}; + +/* Record inode counts across all inobt records. */ +STATIC int +xfs_ialloc_count_inodes_rec( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + void *priv) +{ + struct xfs_inobt_rec_incore irec; + struct xfs_ialloc_count_inodes *ci = priv; + + xfs_inobt_btrec_to_irec(cur->bc_mp, rec, &irec); + ci->count += irec.ir_count; + ci->freecount += irec.ir_freecount; + + return 0; +} + +/* Count allocated and free inodes under an inobt. */ +int +xfs_ialloc_count_inodes( + struct xfs_btree_cur *cur, + xfs_agino_t *count, + xfs_agino_t *freecount) +{ + struct xfs_ialloc_count_inodes ci = {0}; + int error; + + ASSERT(cur->bc_btnum == XFS_BTNUM_INO); + error = xfs_btree_query_all(cur, xfs_ialloc_count_inodes_rec, &ci); + if (error) + return error; + + *count = ci.count; + *freecount = ci.freecount; + return 0; +} diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index d2bdcd5e7312..c5402bb4ce0c 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -81,7 +81,6 @@ xfs_dialloc( struct xfs_trans *tp, /* transaction pointer */ xfs_ino_t parent, /* parent inode (directory) */ umode_t mode, /* mode bits for new inode */ - int okalloc, /* ok to allocate more space */ struct xfs_buf **agbp, /* buf for a.g. inode header */ xfs_ino_t *inop); /* inode number allocated */ @@ -171,6 +170,12 @@ int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, union xfs_btree_rec; void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, union xfs_btree_rec *rec, struct xfs_inobt_rec_incore *irec); +int xfs_ialloc_has_inodes_at_extent(struct xfs_btree_cur *cur, + xfs_agblock_t bno, xfs_extlen_t len, bool *exists); +int xfs_ialloc_has_inode_record(struct xfs_btree_cur *cur, xfs_agino_t low, + xfs_agino_t high, bool *exists); +int xfs_ialloc_count_inodes(struct xfs_btree_cur *cur, xfs_agino_t *count, + xfs_agino_t *freecount); int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); void xfs_ialloc_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno, diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 317caba9faa6..af197a5f3a82 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -141,21 +141,42 @@ xfs_finobt_alloc_block( union xfs_btree_ptr *new, int *stat) { + if (cur->bc_mp->m_inotbt_nores) + return xfs_inobt_alloc_block(cur, start, new, stat); return __xfs_inobt_alloc_block(cur, start, new, stat, XFS_AG_RESV_METADATA); } STATIC int -xfs_inobt_free_block( +__xfs_inobt_free_block( struct xfs_btree_cur *cur, - struct xfs_buf *bp) + struct xfs_buf *bp, + enum xfs_ag_resv_type resv) { struct xfs_owner_info oinfo; xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); return xfs_free_extent(cur->bc_tp, XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1, - &oinfo, XFS_AG_RESV_NONE); + &oinfo, resv); +} + +STATIC int +xfs_inobt_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_NONE); +} + +STATIC int +xfs_finobt_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + if (cur->bc_mp->m_inotbt_nores) + return xfs_inobt_free_block(cur, bp); + return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_METADATA); } STATIC int @@ -250,12 +271,13 @@ xfs_inobt_diff_two_keys( be32_to_cpu(k2->inobt.ir_startino); } -static int +static xfs_failaddr_t xfs_inobt_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + xfs_failaddr_t fa; unsigned int level; /* @@ -271,20 +293,21 @@ xfs_inobt_verify( switch (block->bb_magic) { case cpu_to_be32(XFS_IBT_CRC_MAGIC): case cpu_to_be32(XFS_FIBT_CRC_MAGIC): - if (!xfs_btree_sblock_v5hdr_verify(bp)) - return false; + fa = xfs_btree_sblock_v5hdr_verify(bp); + if (fa) + return fa; /* fall through */ case cpu_to_be32(XFS_IBT_MAGIC): case cpu_to_be32(XFS_FIBT_MAGIC): break; default: - return 0; + return NULL; } /* level verification */ level = be16_to_cpu(block->bb_level); if (level >= mp->m_in_maxlevels) - return false; + return __this_address; return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]); } @@ -293,25 +316,30 @@ static void xfs_inobt_read_verify( struct xfs_buf *bp) { + xfs_failaddr_t fa; + if (!xfs_btree_sblock_verify_crc(bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_inobt_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_inobt_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } - if (bp->b_error) { + if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp); - } } static void xfs_inobt_write_verify( struct xfs_buf *bp) { - if (!xfs_inobt_verify(bp)) { + xfs_failaddr_t fa; + + fa = xfs_inobt_verify(bp); + if (fa) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } xfs_btree_sblock_calc_crc(bp); @@ -322,6 +350,7 @@ const struct xfs_buf_ops xfs_inobt_buf_ops = { .name = "xfs_inobt", .verify_read = xfs_inobt_read_verify, .verify_write = xfs_inobt_write_verify, + .verify_struct = xfs_inobt_verify, }; STATIC int @@ -372,7 +401,7 @@ static const struct xfs_btree_ops xfs_finobt_ops = { .dup_cursor = xfs_inobt_dup_cursor, .set_root = xfs_finobt_set_root, .alloc_block = xfs_finobt_alloc_block, - .free_block = xfs_inobt_free_block, + .free_block = xfs_finobt_free_block, .get_minrecs = xfs_inobt_get_minrecs, .get_maxrecs = xfs_inobt_get_maxrecs, .init_key_from_rec = xfs_inobt_init_key_from_rec, diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c index 89bf16b4d937..b0f31791c7e6 100644 --- a/fs/xfs/libxfs/xfs_iext_tree.c +++ b/fs/xfs/libxfs/xfs_iext_tree.c @@ -632,8 +632,6 @@ xfs_iext_insert( struct xfs_iext_leaf *new = NULL; int nr_entries, i; - trace_xfs_iext_insert(ip, cur, state, _RET_IP_); - if (ifp->if_height == 0) xfs_iext_alloc_root(ifp, cur); else if (ifp->if_height == 1) @@ -661,6 +659,8 @@ xfs_iext_insert( xfs_iext_set(cur_rec(cur), irec); ifp->if_bytes += sizeof(struct xfs_iext_rec); + trace_xfs_iext_insert(ip, cur, state, _RET_IP_); + if (new) xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2); } diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 6b7989038d75..4fe17b368316 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -32,6 +32,8 @@ #include "xfs_ialloc.h" #include "xfs_dir2.h" +#include <linux/iversion.h> + /* * Check that none of the inode's in the buffer have a next * unlinked field of 0. @@ -113,8 +115,7 @@ xfs_inode_buf_verify( return; } - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); #ifdef DEBUG xfs_alert(mp, "bad inode magic/vsn daddr %lld #%d (magic=%x)", @@ -264,7 +265,8 @@ xfs_inode_from_disk( to->di_flags = be16_to_cpu(from->di_flags); if (to->di_version == 3) { - inode->i_version = be64_to_cpu(from->di_changecount); + inode_set_iversion_queried(inode, + be64_to_cpu(from->di_changecount)); to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec); to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec); to->di_flags2 = be64_to_cpu(from->di_flags2); @@ -314,7 +316,7 @@ xfs_inode_to_disk( to->di_flags = cpu_to_be16(from->di_flags); if (from->di_version == 3) { - to->di_changecount = cpu_to_be64(inode->i_version); + to->di_changecount = cpu_to_be64(inode_peek_iversion(inode)); to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec); to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); to->di_flags2 = cpu_to_be64(from->di_flags2); @@ -381,7 +383,7 @@ xfs_log_dinode_to_disk( } } -bool +xfs_failaddr_t xfs_dinode_verify( struct xfs_mount *mp, xfs_ino_t ino, @@ -390,53 +392,122 @@ xfs_dinode_verify( uint16_t mode; uint16_t flags; uint64_t flags2; + uint64_t di_size; if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) - return false; + return __this_address; + + /* Verify v3 integrity information first */ + if (dip->di_version >= 3) { + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return __this_address; + if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, + XFS_DINODE_CRC_OFF)) + return __this_address; + if (be64_to_cpu(dip->di_ino) != ino) + return __this_address; + if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) + return __this_address; + } /* don't allow invalid i_size */ - if (be64_to_cpu(dip->di_size) & (1ULL << 63)) - return false; + di_size = be64_to_cpu(dip->di_size); + if (di_size & (1ULL << 63)) + return __this_address; mode = be16_to_cpu(dip->di_mode); if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) - return false; + return __this_address; /* No zero-length symlinks/dirs. */ - if ((S_ISLNK(mode) || S_ISDIR(mode)) && dip->di_size == 0) - return false; + if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) + return __this_address; + + /* Fork checks carried over from xfs_iformat_fork */ + if (mode && + be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) > + be64_to_cpu(dip->di_nblocks)) + return __this_address; + + if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize) + return __this_address; + + flags = be16_to_cpu(dip->di_flags); + + if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) + return __this_address; + + /* Do we have appropriate data fork formats for the mode? */ + switch (mode & S_IFMT) { + case S_IFIFO: + case S_IFCHR: + case S_IFBLK: + case S_IFSOCK: + if (dip->di_format != XFS_DINODE_FMT_DEV) + return __this_address; + break; + case S_IFREG: + case S_IFLNK: + case S_IFDIR: + switch (dip->di_format) { + case XFS_DINODE_FMT_LOCAL: + /* + * no local regular files yet + */ + if (S_ISREG(mode)) + return __this_address; + if (di_size > XFS_DFORK_DSIZE(dip, mp)) + return __this_address; + /* fall through */ + case XFS_DINODE_FMT_EXTENTS: + case XFS_DINODE_FMT_BTREE: + break; + default: + return __this_address; + } + break; + case 0: + /* Uninitialized inode ok. */ + break; + default: + return __this_address; + } + + if (XFS_DFORK_Q(dip)) { + switch (dip->di_aformat) { + case XFS_DINODE_FMT_LOCAL: + case XFS_DINODE_FMT_EXTENTS: + case XFS_DINODE_FMT_BTREE: + break; + default: + return __this_address; + } + } /* only version 3 or greater inodes are extensively verified here */ if (dip->di_version < 3) - return true; - - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; - if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, - XFS_DINODE_CRC_OFF)) - return false; - if (be64_to_cpu(dip->di_ino) != ino) - return false; - if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return NULL; - flags = be16_to_cpu(dip->di_flags); flags2 = be64_to_cpu(dip->di_flags2); /* don't allow reflink/cowextsize if we don't have reflink */ if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) && !xfs_sb_version_hasreflink(&mp->m_sb)) - return false; + return __this_address; + + /* only regular files get reflink */ + if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG) + return __this_address; /* don't let reflink and realtime mix */ if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME)) - return false; + return __this_address; /* don't let reflink and dax mix */ if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX)) - return false; + return __this_address; - return true; + return NULL; } void @@ -476,6 +547,7 @@ xfs_iread( { xfs_buf_t *bp; xfs_dinode_t *dip; + xfs_failaddr_t fa; int error; /* @@ -507,11 +579,10 @@ xfs_iread( return error; /* even unallocated inodes are verified */ - if (!xfs_dinode_verify(mp, ip->i_ino, dip)) { - xfs_alert(mp, "%s: validation failed for inode %lld", - __func__, ip->i_ino); - - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip); + fa = xfs_dinode_verify(mp, ip->i_ino, dip); + if (fa) { + xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", dip, + sizeof(*dip), fa); error = -EFSCORRUPTED; goto out_brelse; } diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index a9c97a356c30..8a5e1da52d74 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h @@ -82,7 +82,7 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); #define xfs_inobp_check(mp, bp) #endif /* DEBUG */ -bool xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino, - struct xfs_dinode *dip); +xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino, + struct xfs_dinode *dip); #endif /* __XFS_INODE_BUF_H__ */ diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index c79a1616b79d..866d2861c625 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -35,6 +35,8 @@ #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_dir2_priv.h" +#include "xfs_attr_leaf.h" +#include "xfs_shared.h" kmem_zone_t *xfs_ifork_zone; @@ -62,69 +64,11 @@ xfs_iformat_fork( int error = 0; xfs_fsize_t di_size; - if (unlikely(be32_to_cpu(dip->di_nextents) + - be16_to_cpu(dip->di_anextents) > - be64_to_cpu(dip->di_nblocks))) { - xfs_warn(ip->i_mount, - "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", - (unsigned long long)ip->i_ino, - (int)(be32_to_cpu(dip->di_nextents) + - be16_to_cpu(dip->di_anextents)), - (unsigned long long) - be64_to_cpu(dip->di_nblocks)); - XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return -EFSCORRUPTED; - } - - if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { - xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.", - (unsigned long long)ip->i_ino, - dip->di_forkoff); - XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return -EFSCORRUPTED; - } - - if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && - !ip->i_mount->m_rtdev_targp)) { - xfs_warn(ip->i_mount, - "corrupt dinode %Lu, has realtime flag set.", - ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", - XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return -EFSCORRUPTED; - } - - if (unlikely(xfs_is_reflink_inode(ip) && !S_ISREG(inode->i_mode))) { - xfs_warn(ip->i_mount, - "corrupt dinode %llu, wrong file type for reflink.", - ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_iformat(reflink)", - XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return -EFSCORRUPTED; - } - - if (unlikely(xfs_is_reflink_inode(ip) && - (ip->i_d.di_flags & XFS_DIFLAG_REALTIME))) { - xfs_warn(ip->i_mount, - "corrupt dinode %llu, has reflink+realtime flag set.", - ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_iformat(reflink)", - XFS_ERRLEVEL_LOW, ip->i_mount, dip); - return -EFSCORRUPTED; - } - switch (inode->i_mode & S_IFMT) { case S_IFIFO: case S_IFCHR: case S_IFBLK: case S_IFSOCK: - if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) { - XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return -EFSCORRUPTED; - } ip->i_d.di_size = 0; inode->i_rdev = xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip)); break; @@ -134,32 +78,7 @@ xfs_iformat_fork( case S_IFDIR: switch (dip->di_format) { case XFS_DINODE_FMT_LOCAL: - /* - * no local regular files yet - */ - if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) { - xfs_warn(ip->i_mount, - "corrupt inode %Lu (local format for regular file).", - (unsigned long long) ip->i_ino); - XFS_CORRUPTION_ERROR("xfs_iformat(4)", - XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return -EFSCORRUPTED; - } - di_size = be64_to_cpu(dip->di_size); - if (unlikely(di_size < 0 || - di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { - xfs_warn(ip->i_mount, - "corrupt inode %Lu (bad size %Ld for local inode).", - (unsigned long long) ip->i_ino, - (long long) di_size); - XFS_CORRUPTION_ERROR("xfs_iformat(5)", - XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - return -EFSCORRUPTED; - } - size = (int)di_size; error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); break; @@ -170,28 +89,16 @@ xfs_iformat_fork( error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); break; default: - XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, - ip->i_mount); return -EFSCORRUPTED; } break; default: - XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); return -EFSCORRUPTED; } if (error) return error; - /* Check inline dir contents. */ - if (S_ISDIR(inode->i_mode) && dip->di_format == XFS_DINODE_FMT_LOCAL) { - error = xfs_dir2_sf_verify(ip); - if (error) { - xfs_idestroy_fork(ip, XFS_DATA_FORK); - return error; - } - } - if (xfs_is_reflink_inode(ip)) { ASSERT(ip->i_cowfp == NULL); xfs_ifork_init_cow(ip); @@ -208,18 +115,6 @@ xfs_iformat_fork( atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); size = be16_to_cpu(atp->hdr.totsize); - if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { - xfs_warn(ip->i_mount, - "corrupt inode %Lu (bad attr fork size %Ld).", - (unsigned long long) ip->i_ino, - (long long) size); - XFS_CORRUPTION_ERROR("xfs_iformat(8)", - XFS_ERRLEVEL_LOW, - ip->i_mount, dip); - error = -EFSCORRUPTED; - break; - } - error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); break; case XFS_DINODE_FMT_EXTENTS: @@ -403,6 +298,7 @@ xfs_iformat_btree( */ if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= XFS_IFORK_MAXEXT(ip, whichfork) || + nrecs == 0 || XFS_BMDR_SPACE_CALC(nrecs) > XFS_DFORK_SIZE(dip, mp, whichfork) || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) || @@ -827,3 +723,45 @@ xfs_ifork_init_cow( ip->i_cformat = XFS_DINODE_FMT_EXTENTS; ip->i_cnextents = 0; } + +/* Default fork content verifiers. */ +struct xfs_ifork_ops xfs_default_ifork_ops = { + .verify_attr = xfs_attr_shortform_verify, + .verify_dir = xfs_dir2_sf_verify, + .verify_symlink = xfs_symlink_shortform_verify, +}; + +/* Verify the inline contents of the data fork of an inode. */ +xfs_failaddr_t +xfs_ifork_verify_data( + struct xfs_inode *ip, + struct xfs_ifork_ops *ops) +{ + /* Non-local data fork, we're done. */ + if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) + return NULL; + + /* Check the inline data fork if there is one. */ + switch (VFS_I(ip)->i_mode & S_IFMT) { + case S_IFDIR: + return ops->verify_dir(ip); + case S_IFLNK: + return ops->verify_symlink(ip); + default: + return NULL; + } +} + +/* Verify the inline contents of the attr fork of an inode. */ +xfs_failaddr_t +xfs_ifork_verify_attr( + struct xfs_inode *ip, + struct xfs_ifork_ops *ops) +{ + /* There has to be an attr fork allocated if aformat is local. */ + if (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) + return NULL; + if (!XFS_IFORK_PTR(ip, XFS_ATTR_FORK)) + return __this_address; + return ops->verify_attr(ip); +} diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index b9f0098e33b8..dd8aba0dd119 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -186,4 +186,18 @@ extern struct kmem_zone *xfs_ifork_zone; extern void xfs_ifork_init_cow(struct xfs_inode *ip); +typedef xfs_failaddr_t (*xfs_ifork_verifier_t)(struct xfs_inode *); + +struct xfs_ifork_ops { + xfs_ifork_verifier_t verify_symlink; + xfs_ifork_verifier_t verify_dir; + xfs_ifork_verifier_t verify_attr; +}; +extern struct xfs_ifork_ops xfs_default_ifork_ops; + +xfs_failaddr_t xfs_ifork_verify_data(struct xfs_inode *ip, + struct xfs_ifork_ops *ops); +xfs_failaddr_t xfs_ifork_verify_attr(struct xfs_inode *ip, + struct xfs_ifork_ops *ops); + #endif /* __XFS_INODE_FORK_H__ */ diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c index c10597973333..cc4cbe290939 100644 --- a/fs/xfs/libxfs/xfs_log_rlimit.c +++ b/fs/xfs/libxfs/xfs_log_rlimit.c @@ -55,7 +55,7 @@ xfs_log_calc_max_attrsetm_res( * the maximum one in terms of the pre-calculated values which were done * at mount time. */ -STATIC void +void xfs_log_get_max_trans_res( struct xfs_mount *mp, struct xfs_trans_res *max_resp) diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h index d69c772271cb..bb1b13a9b5f4 100644 --- a/fs/xfs/libxfs/xfs_quota_defs.h +++ b/fs/xfs/libxfs/xfs_quota_defs.h @@ -112,8 +112,6 @@ typedef uint16_t xfs_qwarncnt_t; #define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */ #define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ #define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ -#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */ -#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ #define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ #define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */ #define XFS_QMOPT_DQNEXT 0x0008000 /* return next dquot >= this ID */ @@ -153,8 +151,11 @@ typedef uint16_t xfs_qwarncnt_t; (XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA) #define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS) -extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq, - xfs_dqid_t id, uint type, uint flags, const char *str); +extern xfs_failaddr_t xfs_dquot_verify(struct xfs_mount *mp, + struct xfs_disk_dquot *ddq, xfs_dqid_t id, uint type, + uint flags); extern int xfs_calc_dquots_per_chunk(unsigned int nbblks); +extern int xfs_dquot_repair(struct xfs_mount *mp, struct xfs_disk_dquot *ddq, + xfs_dqid_t id, uint type); #endif /* __XFS_QUOTA_H__ */ diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 585b35d34142..bee68c23d612 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1488,27 +1488,12 @@ __xfs_refcount_cow_alloc( xfs_extlen_t aglen, struct xfs_defer_ops *dfops) { - int error; - trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, agbno, aglen); /* Add refcount btree reservation */ - error = xfs_refcount_adjust_cow(rcur, agbno, aglen, + return xfs_refcount_adjust_cow(rcur, agbno, aglen, XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops); - if (error) - return error; - - /* Add rmap entry */ - if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { - error = xfs_rmap_alloc_extent(rcur->bc_mp, dfops, - rcur->bc_private.a.agno, - agbno, aglen, XFS_RMAP_OWN_COW); - if (error) - return error; - } - - return error; } /* @@ -1521,27 +1506,12 @@ __xfs_refcount_cow_free( xfs_extlen_t aglen, struct xfs_defer_ops *dfops) { - int error; - trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, agbno, aglen); /* Remove refcount btree reservation */ - error = xfs_refcount_adjust_cow(rcur, agbno, aglen, + return xfs_refcount_adjust_cow(rcur, agbno, aglen, XFS_REFCOUNT_ADJUST_COW_FREE, dfops); - if (error) - return error; - - /* Remove rmap entry */ - if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { - error = xfs_rmap_free_extent(rcur->bc_mp, dfops, - rcur->bc_private.a.agno, - agbno, aglen, XFS_RMAP_OWN_COW); - if (error) - return error; - } - - return error; } /* Record a CoW staging extent in the refcount btree. */ @@ -1552,11 +1522,19 @@ xfs_refcount_alloc_cow_extent( xfs_fsblock_t fsb, xfs_extlen_t len) { + int error; + if (!xfs_sb_version_hasreflink(&mp->m_sb)) return 0; - return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, + error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, fsb, len); + if (error) + return error; + + /* Add rmap entry */ + return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), + XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); } /* Forget a CoW staging event in the refcount btree. */ @@ -1567,9 +1545,17 @@ xfs_refcount_free_cow_extent( xfs_fsblock_t fsb, xfs_extlen_t len) { + int error; + if (!xfs_sb_version_hasreflink(&mp->m_sb)) return 0; + /* Remove rmap entry */ + error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), + XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); + if (error) + return error; + return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW, fsb, len); } @@ -1710,3 +1696,22 @@ out_cursor: xfs_trans_brelse(tp, agbp); goto out_trans; } + +/* Is there a record covering a given extent? */ +int +xfs_refcount_has_record( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + xfs_extlen_t len, + bool *exists) +{ + union xfs_btree_irec low; + union xfs_btree_irec high; + + memset(&low, 0, sizeof(low)); + low.rc.rc_startblock = bno; + memset(&high, 0xFF, sizeof(high)); + high.rc.rc_startblock = bno + len - 1; + + return xfs_btree_has_record(cur, &low, &high, exists); +} diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index eafb9d1f3b37..2a731ac68fe4 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -83,4 +83,7 @@ static inline xfs_fileoff_t xfs_refcount_max_unmap(int log_res) return (log_res * 3 / 4) / XFS_REFCOUNT_ITEM_OVERHEAD; } +extern int xfs_refcount_has_record(struct xfs_btree_cur *cur, + xfs_agblock_t bno, xfs_extlen_t len, bool *exists); + #endif /* __XFS_REFCOUNT_H__ */ diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 3c59dd3d58d7..8479769e470d 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -223,29 +223,31 @@ xfs_refcountbt_diff_two_keys( be32_to_cpu(k2->refc.rc_startblock); } -STATIC bool +STATIC xfs_failaddr_t xfs_refcountbt_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_perag *pag = bp->b_pag; + xfs_failaddr_t fa; unsigned int level; if (block->bb_magic != cpu_to_be32(XFS_REFC_CRC_MAGIC)) - return false; + return __this_address; if (!xfs_sb_version_hasreflink(&mp->m_sb)) - return false; - if (!xfs_btree_sblock_v5hdr_verify(bp)) - return false; + return __this_address; + fa = xfs_btree_sblock_v5hdr_verify(bp); + if (fa) + return fa; level = be16_to_cpu(block->bb_level); if (pag && pag->pagf_init) { if (level >= pag->pagf_refcount_level) - return false; + return __this_address; } else if (level >= mp->m_refc_maxlevels) - return false; + return __this_address; return xfs_btree_sblock_verify(bp, mp->m_refc_mxr[level != 0]); } @@ -254,25 +256,30 @@ STATIC void xfs_refcountbt_read_verify( struct xfs_buf *bp) { + xfs_failaddr_t fa; + if (!xfs_btree_sblock_verify_crc(bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_refcountbt_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_refcountbt_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } - if (bp->b_error) { + if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp); - } } STATIC void xfs_refcountbt_write_verify( struct xfs_buf *bp) { - if (!xfs_refcountbt_verify(bp)) { + xfs_failaddr_t fa; + + fa = xfs_refcountbt_verify(bp); + if (fa) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } xfs_btree_sblock_calc_crc(bp); @@ -283,6 +290,7 @@ const struct xfs_buf_ops xfs_refcountbt_buf_ops = { .name = "xfs_refcountbt", .verify_read = xfs_refcountbt_read_verify, .verify_write = xfs_refcountbt_write_verify, + .verify_struct = xfs_refcountbt_verify, }; STATIC int diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index dd019cee1b3b..79822cf6ebe3 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -368,6 +368,51 @@ xfs_rmap_lookup_le_range( } /* + * Perform all the relevant owner checks for a removal op. If we're doing an + * unknown-owner removal then we have no owner information to check. + */ +static int +xfs_rmap_free_check_owner( + struct xfs_mount *mp, + uint64_t ltoff, + struct xfs_rmap_irec *rec, + xfs_fsblock_t bno, + xfs_filblks_t len, + uint64_t owner, + uint64_t offset, + unsigned int flags) +{ + int error = 0; + + if (owner == XFS_RMAP_OWN_UNKNOWN) + return 0; + + /* Make sure the unwritten flag matches. */ + XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == + (rec->rm_flags & XFS_RMAP_UNWRITTEN), out); + + /* Make sure the owner matches what we expect to find in the tree. */ + XFS_WANT_CORRUPTED_GOTO(mp, owner == rec->rm_owner, out); + + /* Check the offset, if necessary. */ + if (XFS_RMAP_NON_INODE_OWNER(owner)) + goto out; + + if (flags & XFS_RMAP_BMBT_BLOCK) { + XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_flags & XFS_RMAP_BMBT_BLOCK, + out); + } else { + XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_offset <= offset, out); + XFS_WANT_CORRUPTED_GOTO(mp, + ltoff + rec->rm_blockcount >= offset + len, + out); + } + +out: + return error; +} + +/* * Find the extent in the rmap btree and remove it. * * The record we find should always be an exact match for the extent that we're @@ -444,33 +489,40 @@ xfs_rmap_unmap( goto out_done; } - /* Make sure the unwritten flag matches. */ - XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == - (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error); + /* + * If we're doing an unknown-owner removal for EFI recovery, we expect + * to find the full range in the rmapbt or nothing at all. If we + * don't find any rmaps overlapping either end of the range, we're + * done. Hopefully this means that the EFI creator already queued + * (and finished) a RUI to remove the rmap. + */ + if (owner == XFS_RMAP_OWN_UNKNOWN && + ltrec.rm_startblock + ltrec.rm_blockcount <= bno) { + struct xfs_rmap_irec rtrec; + + error = xfs_btree_increment(cur, 0, &i); + if (error) + goto out_error; + if (i == 0) + goto out_done; + error = xfs_rmap_get_rec(cur, &rtrec, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); + if (rtrec.rm_startblock >= bno + len) + goto out_done; + } /* Make sure the extent we found covers the entire freeing range. */ XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno && - ltrec.rm_startblock + ltrec.rm_blockcount >= - bno + len, out_error); - - /* Make sure the owner matches what we expect to find in the tree. */ - XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner || - XFS_RMAP_NON_INODE_OWNER(owner), out_error); + ltrec.rm_startblock + ltrec.rm_blockcount >= + bno + len, out_error); - /* Check the offset, if necessary. */ - if (!XFS_RMAP_NON_INODE_OWNER(owner)) { - if (flags & XFS_RMAP_BMBT_BLOCK) { - XFS_WANT_CORRUPTED_GOTO(mp, - ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK, - out_error); - } else { - XFS_WANT_CORRUPTED_GOTO(mp, - ltrec.rm_offset <= offset, out_error); - XFS_WANT_CORRUPTED_GOTO(mp, - ltoff + ltrec.rm_blockcount >= offset + len, - out_error); - } - } + /* Check owner information. */ + error = xfs_rmap_free_check_owner(mp, ltoff, <rec, bno, len, owner, + offset, flags); + if (error) + goto out_error; if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) { /* exact match, simply remove the record from rmap tree */ @@ -664,6 +716,7 @@ xfs_rmap_map( flags |= XFS_RMAP_UNWRITTEN; trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len, unwritten, oinfo); + ASSERT(!xfs_rmap_should_skip_owner_update(oinfo)); /* * For the initial lookup, look for an exact match or the left-adjacent @@ -2334,3 +2387,70 @@ xfs_rmap_compare( else return 0; } + +/* Is there a record covering a given extent? */ +int +xfs_rmap_has_record( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + xfs_extlen_t len, + bool *exists) +{ + union xfs_btree_irec low; + union xfs_btree_irec high; + + memset(&low, 0, sizeof(low)); + low.r.rm_startblock = bno; + memset(&high, 0xFF, sizeof(high)); + high.r.rm_startblock = bno + len - 1; + + return xfs_btree_has_record(cur, &low, &high, exists); +} + +/* + * Is there a record for this owner completely covering a given physical + * extent? If so, *has_rmap will be set to true. If there is no record + * or the record only covers part of the range, we set *has_rmap to false. + * This function doesn't perform range lookups or offset checks, so it is + * not suitable for checking data fork blocks. + */ +int +xfs_rmap_record_exists( + struct xfs_btree_cur *cur, + xfs_agblock_t bno, + xfs_extlen_t len, + struct xfs_owner_info *oinfo, + bool *has_rmap) +{ + uint64_t owner; + uint64_t offset; + unsigned int flags; + int has_record; + struct xfs_rmap_irec irec; + int error; + + xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); + ASSERT(XFS_RMAP_NON_INODE_OWNER(owner) || + (flags & XFS_RMAP_BMBT_BLOCK)); + + error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags, + &has_record); + if (error) + return error; + if (!has_record) { + *has_rmap = false; + return 0; + } + + error = xfs_rmap_get_rec(cur, &irec, &has_record); + if (error) + return error; + if (!has_record) { + *has_rmap = false; + return 0; + } + + *has_rmap = (irec.rm_owner == owner && irec.rm_startblock <= bno && + irec.rm_startblock + irec.rm_blockcount >= bno + len); + return 0; +} diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index 466ede637080..380e53be98d5 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -61,7 +61,21 @@ static inline void xfs_rmap_skip_owner_update( struct xfs_owner_info *oi) { - oi->oi_owner = XFS_RMAP_OWN_UNKNOWN; + xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_NULL); +} + +static inline bool +xfs_rmap_should_skip_owner_update( + struct xfs_owner_info *oi) +{ + return oi->oi_owner == XFS_RMAP_OWN_NULL; +} + +static inline void +xfs_rmap_any_owner_update( + struct xfs_owner_info *oi) +{ + xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_UNKNOWN); } /* Reverse mapping functions. */ @@ -219,5 +233,10 @@ int xfs_rmap_compare(const struct xfs_rmap_irec *a, union xfs_btree_rec; int xfs_rmap_btrec_to_irec(union xfs_btree_rec *rec, struct xfs_rmap_irec *irec); +int xfs_rmap_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno, + xfs_extlen_t len, bool *exists); +int xfs_rmap_record_exists(struct xfs_btree_cur *cur, xfs_agblock_t bno, + xfs_extlen_t len, struct xfs_owner_info *oinfo, + bool *has_rmap); #endif /* __XFS_RMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 9d9c9192584c..e829c3e489ea 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -303,13 +303,14 @@ xfs_rmapbt_diff_two_keys( return 0; } -static bool +static xfs_failaddr_t xfs_rmapbt_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_perag *pag = bp->b_pag; + xfs_failaddr_t fa; unsigned int level; /* @@ -325,19 +326,20 @@ xfs_rmapbt_verify( * in this case. */ if (block->bb_magic != cpu_to_be32(XFS_RMAP_CRC_MAGIC)) - return false; + return __this_address; if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) - return false; - if (!xfs_btree_sblock_v5hdr_verify(bp)) - return false; + return __this_address; + fa = xfs_btree_sblock_v5hdr_verify(bp); + if (fa) + return fa; level = be16_to_cpu(block->bb_level); if (pag && pag->pagf_init) { if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi]) - return false; + return __this_address; } else if (level >= mp->m_rmap_maxlevels) - return false; + return __this_address; return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]); } @@ -346,25 +348,30 @@ static void xfs_rmapbt_read_verify( struct xfs_buf *bp) { + xfs_failaddr_t fa; + if (!xfs_btree_sblock_verify_crc(bp)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_rmapbt_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_rmapbt_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } - if (bp->b_error) { + if (bp->b_error) trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_verifier_error(bp); - } } static void xfs_rmapbt_write_verify( struct xfs_buf *bp) { - if (!xfs_rmapbt_verify(bp)) { + xfs_failaddr_t fa; + + fa = xfs_rmapbt_verify(bp); + if (fa) { trace_xfs_btree_corrupt(bp, _RET_IP_); - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } xfs_btree_sblock_calc_crc(bp); @@ -375,6 +382,7 @@ const struct xfs_buf_ops xfs_rmapbt_buf_ops = { .name = "xfs_rmapbt", .verify_read = xfs_rmapbt_read_verify, .verify_write = xfs_rmapbt_write_verify, + .verify_struct = xfs_rmapbt_verify, }; STATIC int diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 3fb29a5ea915..106be2d0bb88 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -1097,3 +1097,24 @@ xfs_verify_rtbno( { return rtbno < mp->m_sb.sb_rblocks; } + +/* Is the given extent all free? */ +int +xfs_rtalloc_extent_is_free( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_rtblock_t start, + xfs_extlen_t len, + bool *is_free) +{ + xfs_rtblock_t end; + int matches; + int error; + + error = xfs_rtcheck_range(mp, tp, start, len, 1, &end, &matches); + if (error) + return error; + + *is_free = matches; + return 0; +} diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 9b5aae2bcc0b..a55f7a45fa78 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -40,6 +40,8 @@ #include "xfs_rmap_btree.h" #include "xfs_bmap.h" #include "xfs_refcount_btree.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" /* * Physical superblock buffer manipulations. Shared with libxfs in userspace. @@ -116,6 +118,9 @@ xfs_mount_validate_sb( bool check_inprogress, bool check_version) { + uint32_t agcount = 0; + uint32_t rem; + if (sbp->sb_magicnum != XFS_SB_MAGIC) { xfs_warn(mp, "bad magic number"); return -EWRONGFS; @@ -226,6 +231,13 @@ xfs_mount_validate_sb( return -EINVAL; } + /* Compute agcount for this number of dblocks and agblocks */ + if (sbp->sb_agblocks) { + agcount = div_u64_rem(sbp->sb_dblocks, sbp->sb_agblocks, &rem); + if (rem) + agcount++; + } + /* * More sanity checking. Most of these were stolen directly from * xfs_repair. @@ -250,6 +262,10 @@ xfs_mount_validate_sb( sbp->sb_inodesize != (1 << sbp->sb_inodelog) || sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE || sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || + XFS_FSB_TO_B(mp, sbp->sb_agblocks) < XFS_MIN_AG_BYTES || + XFS_FSB_TO_B(mp, sbp->sb_agblocks) > XFS_MAX_AG_BYTES || + sbp->sb_agblklog != xfs_highbit32(sbp->sb_agblocks - 1) + 1 || + agcount == 0 || agcount != sbp->sb_agcount || (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || @@ -640,11 +656,10 @@ xfs_sb_read_verify( error = xfs_sb_verify(bp, true); out_error: - if (error) { + if (error == -EFSCORRUPTED || error == -EFSBADCRC) + xfs_verifier_error(bp, error, __this_address); + else if (error) xfs_buf_ioerror(bp, error); - if (error == -EFSCORRUPTED || error == -EFSBADCRC) - xfs_verifier_error(bp); - } } /* @@ -673,13 +688,12 @@ xfs_sb_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; int error; error = xfs_sb_verify(bp, false); if (error) { - xfs_buf_ioerror(bp, error); - xfs_verifier_error(bp); + xfs_verifier_error(bp, error, __this_address); return; } @@ -876,3 +890,88 @@ xfs_sync_sb( xfs_trans_set_sync(tp); return xfs_trans_commit(tp); } + +int +xfs_fs_geometry( + struct xfs_sb *sbp, + struct xfs_fsop_geom *geo, + int struct_version) +{ + memset(geo, 0, sizeof(struct xfs_fsop_geom)); + + geo->blocksize = sbp->sb_blocksize; + geo->rtextsize = sbp->sb_rextsize; + geo->agblocks = sbp->sb_agblocks; + geo->agcount = sbp->sb_agcount; + geo->logblocks = sbp->sb_logblocks; + geo->sectsize = sbp->sb_sectsize; + geo->inodesize = sbp->sb_inodesize; + geo->imaxpct = sbp->sb_imax_pct; + geo->datablocks = sbp->sb_dblocks; + geo->rtblocks = sbp->sb_rblocks; + geo->rtextents = sbp->sb_rextents; + geo->logstart = sbp->sb_logstart; + BUILD_BUG_ON(sizeof(geo->uuid) != sizeof(sbp->sb_uuid)); + memcpy(geo->uuid, &sbp->sb_uuid, sizeof(sbp->sb_uuid)); + + if (struct_version < 2) + return 0; + + geo->sunit = sbp->sb_unit; + geo->swidth = sbp->sb_width; + + if (struct_version < 3) + return 0; + + geo->version = XFS_FSOP_GEOM_VERSION; + geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK | + XFS_FSOP_GEOM_FLAGS_DIRV2; + if (xfs_sb_version_hasattr(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR; + if (xfs_sb_version_hasquota(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_QUOTA; + if (xfs_sb_version_hasalign(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_IALIGN; + if (xfs_sb_version_hasdalign(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_DALIGN; + if (xfs_sb_version_hasextflgbit(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_EXTFLG; + if (xfs_sb_version_hassector(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR; + if (xfs_sb_version_hasasciici(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_DIRV2CI; + if (xfs_sb_version_haslazysbcount(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_LAZYSB; + if (xfs_sb_version_hasattr2(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR2; + if (xfs_sb_version_hasprojid32bit(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_PROJID32; + if (xfs_sb_version_hascrc(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_V5SB; + if (xfs_sb_version_hasftype(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_FTYPE; + if (xfs_sb_version_hasfinobt(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_FINOBT; + if (xfs_sb_version_hassparseinodes(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_SPINODES; + if (xfs_sb_version_hasrmapbt(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_RMAPBT; + if (xfs_sb_version_hasreflink(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_REFLINK; + if (xfs_sb_version_hassector(sbp)) + geo->logsectsize = sbp->sb_logsectsize; + else + geo->logsectsize = BBSIZE; + geo->rtsectsize = sbp->sb_blocksize; + geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp); + + if (struct_version < 4) + return 0; + + if (xfs_sb_version_haslogv2(sbp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_LOGV2; + + geo->logsunit = sbp->sb_logsunit; + + return 0; +} diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 961e6475a309..63dcd2a1a657 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -34,4 +34,8 @@ extern void xfs_sb_from_disk(struct xfs_sb *to, struct xfs_dsb *from); extern void xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from); extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp); +#define XFS_FS_GEOM_MAX_STRUCT_VER (4) +extern int xfs_fs_geometry(struct xfs_sb *sbp, struct xfs_fsop_geom *geo, + int struct_version); + #endif /* __XFS_SB_H__ */ diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index c6f4eb46fe26..d0b84da0cb1e 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -76,6 +76,9 @@ struct xfs_log_item_desc { int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes); int xfs_log_calc_minimum_size(struct xfs_mount *); +struct xfs_trans_res; +void xfs_log_get_max_trans_res(struct xfs_mount *mp, + struct xfs_trans_res *max_resp); /* * Values for t_flags. @@ -143,5 +146,6 @@ bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset, uint32_t size, struct xfs_buf *bp); void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, struct xfs_inode *ip, struct xfs_ifork *ifp); +xfs_failaddr_t xfs_symlink_shortform_verify(struct xfs_inode *ip); #endif /* __XFS_SHARED_H__ */ diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index c484877129a0..5ef5f354587e 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -98,7 +98,7 @@ xfs_symlink_hdr_ok( return true; } -static bool +static xfs_failaddr_t xfs_symlink_verify( struct xfs_buf *bp) { @@ -106,22 +106,22 @@ xfs_symlink_verify( struct xfs_dsymlink_hdr *dsl = bp->b_addr; if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; + return __this_address; if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC)) - return false; + return __this_address; if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid)) - return false; + return __this_address; if (bp->b_bn != be64_to_cpu(dsl->sl_blkno)) - return false; + return __this_address; if (be32_to_cpu(dsl->sl_offset) + be32_to_cpu(dsl->sl_bytes) >= XFS_SYMLINK_MAXLEN) - return false; + return __this_address; if (dsl->sl_owner == 0) - return false; + return __this_address; if (!xfs_log_check_lsn(mp, be64_to_cpu(dsl->sl_lsn))) - return false; + return __this_address; - return true; + return NULL; } static void @@ -129,18 +129,19 @@ xfs_symlink_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; + xfs_failaddr_t fa; /* no verification of non-crc buffers */ if (!xfs_sb_version_hascrc(&mp->m_sb)) return; if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF)) - xfs_buf_ioerror(bp, -EFSBADCRC); - else if (!xfs_symlink_verify(bp)) - xfs_buf_ioerror(bp, -EFSCORRUPTED); - - if (bp->b_error) - xfs_verifier_error(bp); + xfs_verifier_error(bp, -EFSBADCRC, __this_address); + else { + fa = xfs_symlink_verify(bp); + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); + } } static void @@ -148,15 +149,16 @@ xfs_symlink_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_buf_log_item *bip = bp->b_fspriv; + struct xfs_buf_log_item *bip = bp->b_log_item; + xfs_failaddr_t fa; /* no verification of non-crc buffers */ if (!xfs_sb_version_hascrc(&mp->m_sb)) return; - if (!xfs_symlink_verify(bp)) { - xfs_buf_ioerror(bp, -EFSCORRUPTED); - xfs_verifier_error(bp); + fa = xfs_symlink_verify(bp); + if (fa) { + xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } @@ -171,6 +173,7 @@ const struct xfs_buf_ops xfs_symlink_buf_ops = { .name = "xfs_symlink", .verify_read = xfs_symlink_read_verify, .verify_write = xfs_symlink_write_verify, + .verify_struct = xfs_symlink_verify, }; void @@ -207,3 +210,37 @@ xfs_symlink_local_to_remote( xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) + ifp->if_bytes - 1); } + +/* Verify the consistency of an inline symlink. */ +xfs_failaddr_t +xfs_symlink_shortform_verify( + struct xfs_inode *ip) +{ + char *sfp; + char *endp; + struct xfs_ifork *ifp; + int size; + + ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL); + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + sfp = (char *)ifp->if_u1.if_data; + size = ifp->if_bytes; + endp = sfp + size; + + /* Zero length symlinks can exist while we're deleting a remote one. */ + if (size == 0) + return NULL; + + /* No negative sizes or overly long symlink targets. */ + if (size < 0 || size > XFS_SYMLINK_MAXLEN) + return __this_address; + + /* No NULLs in the target either. */ + if (memchr(sfp, 0, size - 1)) + return __this_address; + + /* We /did/ null-terminate the buffer, right? */ + if (*endp != 0) + return __this_address; + return NULL; +} diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 6bd916bd35e2..5f17641f040f 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -34,6 +34,9 @@ #include "xfs_trans_space.h" #include "xfs_trace.h" +#define _ALLOC true +#define _FREE false + /* * A buffer has a format structure overhead in the log in addition * to the data, so we need to take this into account when reserving @@ -132,43 +135,77 @@ xfs_calc_inode_res( } /* - * The free inode btree is a conditional feature and the log reservation - * requirements differ slightly from that of the traditional inode allocation - * btree. The finobt tracks records for inode chunks with at least one free - * inode. A record can be removed from the tree for an inode allocation - * or free and thus the finobt reservation is unconditional across: + * Inode btree record insertion/removal modifies the inode btree and free space + * btrees (since the inobt does not use the agfl). This requires the following + * reservation: * - * - inode allocation - * - inode free - * - inode chunk allocation + * the inode btree: max depth * blocksize + * the allocation btrees: 2 trees * (max depth - 1) * block size * - * The 'modify' param indicates to include the record modification scenario. The - * 'alloc' param indicates to include the reservation for free space btree - * modifications on behalf of finobt modifications. This is required only for - * transactions that do not already account for free space btree modifications. + * The caller must account for SB and AG header modifications, etc. + */ +STATIC uint +xfs_calc_inobt_res( + struct xfs_mount *mp) +{ + return xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), + XFS_FSB_TO_B(mp, 1)); +} + +/* + * The free inode btree is a conditional feature. The behavior differs slightly + * from that of the traditional inode btree in that the finobt tracks records + * for inode chunks with at least one free inode. A record can be removed from + * the tree during individual inode allocation. Therefore the finobt + * reservation is unconditional for both the inode chunk allocation and + * individual inode allocation (modify) cases. * - * the free inode btree: max depth * block size - * the allocation btrees: 2 trees * (max depth - 1) * block size - * the free inode btree entry: block size + * Behavior aside, the reservation for finobt modification is equivalent to the + * traditional inobt: cover a full finobt shape change plus block allocation. */ STATIC uint xfs_calc_finobt_res( - struct xfs_mount *mp, - int alloc, - int modify) + struct xfs_mount *mp) { - uint res; - if (!xfs_sb_version_hasfinobt(&mp->m_sb)) return 0; - res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)); - if (alloc) - res += xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)); - if (modify) - res += (uint)XFS_FSB_TO_B(mp, 1); + return xfs_calc_inobt_res(mp); +} +/* + * Calculate the reservation required to allocate or free an inode chunk. This + * includes: + * + * the allocation btrees: 2 trees * (max depth - 1) * block size + * the inode chunk: m_ialloc_blks * N + * + * The size N of the inode chunk reservation depends on whether it is for + * allocation or free and which type of create transaction is in use. An inode + * chunk free always invalidates the buffers and only requires reservation for + * headers (N == 0). An inode chunk allocation requires a chunk sized + * reservation on v4 and older superblocks to initialize the chunk. No chunk + * reservation is required for allocation on v5 supers, which use ordered + * buffers to initialize. + */ +STATIC uint +xfs_calc_inode_chunk_res( + struct xfs_mount *mp, + bool alloc) +{ + uint res, size = 0; + + res = xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), + XFS_FSB_TO_B(mp, 1)); + if (alloc) { + /* icreate tx uses ordered buffers */ + if (xfs_sb_version_hascrc(&mp->m_sb)) + return res; + size = XFS_FSB_TO_B(mp, 1); + } + + res += xfs_calc_buf_res(mp->m_ialloc_blks, size); return res; } @@ -232,8 +269,6 @@ xfs_calc_write_reservation( * the super block to reflect the freed blocks: sector size * worst case split in allocation btrees per extent assuming 4 extents: * 4 exts * 2 trees * (2 * max depth - 1) * block size - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size */ STATIC uint xfs_calc_itruncate_reservation( @@ -245,12 +280,7 @@ xfs_calc_itruncate_reservation( XFS_FSB_TO_B(mp, 1))), (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(5, 0) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(2 + mp->m_ialloc_blks + - mp->m_in_maxlevels, 0))); + XFS_FSB_TO_B(mp, 1)))); } /* @@ -282,13 +312,14 @@ xfs_calc_rename_reservation( * For removing an inode from unlinked list at first, we can modify: * the agi hash list and counters: sector size * the on disk inode before ours in the agi hash list: inode cluster size + * the on disk inode in the agi hash list: inode cluster size */ STATIC uint xfs_calc_iunlink_remove_reservation( struct xfs_mount *mp) { return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); + 2 * max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); } /* @@ -320,13 +351,13 @@ xfs_calc_link_reservation( /* * For adding an inode to unlinked list we can modify: * the agi hash list: sector size - * the unlinked inode: inode size + * the on disk inode: inode cluster size */ STATIC uint xfs_calc_iunlink_add_reservation(xfs_mount_t *mp) { return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - xfs_calc_inode_res(mp, 1); + max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); } /* @@ -379,45 +410,16 @@ xfs_calc_create_resv_modify( xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + (uint)XFS_FSB_TO_B(mp, 1) + xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) + - xfs_calc_finobt_res(mp, 1, 1); -} - -/* - * For create we can allocate some inodes giving: - * the agi and agf of the ag getting the new inodes: 2 * sectorsize - * the superblock for the nlink flag: sector size - * the inode blocks allocated: mp->m_ialloc_blks * blocksize - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size - */ -STATIC uint -xfs_calc_create_resv_alloc( - struct xfs_mount *mp) -{ - return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + - mp->m_sb.sb_sectsize + - xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)); -} - -STATIC uint -__xfs_calc_create_reservation( - struct xfs_mount *mp) -{ - return XFS_DQUOT_LOGRES(mp) + - MAX(xfs_calc_create_resv_alloc(mp), - xfs_calc_create_resv_modify(mp)); + xfs_calc_finobt_res(mp); } /* * For icreate we can allocate some inodes giving: * the agi and agf of the ag getting the new inodes: 2 * sectorsize * the superblock for the nlink flag: sector size - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size - * the finobt (record insertion) + * the inode chunk (allocation, optional init) + * the inobt (record insertion) + * the finobt (optional, record insertion) */ STATIC uint xfs_calc_icreate_resv_alloc( @@ -425,10 +427,9 @@ xfs_calc_icreate_resv_alloc( { return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + mp->m_sb.sb_sectsize + - xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_finobt_res(mp, 0, 0); + xfs_calc_inode_chunk_res(mp, _ALLOC) + + xfs_calc_inobt_res(mp) + + xfs_calc_finobt_res(mp); } STATIC uint @@ -440,26 +441,12 @@ xfs_calc_icreate_reservation(xfs_mount_t *mp) } STATIC uint -xfs_calc_create_reservation( - struct xfs_mount *mp) -{ - if (xfs_sb_version_hascrc(&mp->m_sb)) - return xfs_calc_icreate_reservation(mp); - return __xfs_calc_create_reservation(mp); - -} - -STATIC uint xfs_calc_create_tmpfile_reservation( struct xfs_mount *mp) { uint res = XFS_DQUOT_LOGRES(mp); - if (xfs_sb_version_hascrc(&mp->m_sb)) - res += xfs_calc_icreate_resv_alloc(mp); - else - res += xfs_calc_create_resv_alloc(mp); - + res += xfs_calc_icreate_resv_alloc(mp); return res + xfs_calc_iunlink_add_reservation(mp); } @@ -470,7 +457,7 @@ STATIC uint xfs_calc_mkdir_reservation( struct xfs_mount *mp) { - return xfs_calc_create_reservation(mp); + return xfs_calc_icreate_reservation(mp); } @@ -483,20 +470,24 @@ STATIC uint xfs_calc_symlink_reservation( struct xfs_mount *mp) { - return xfs_calc_create_reservation(mp) + + return xfs_calc_icreate_reservation(mp) + xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN); } /* * In freeing an inode we can modify: * the inode being freed: inode size - * the super block free inode counter: sector size - * the agi hash list and counters: sector size - * the inode btree entry: block size - * the on disk inode before ours in the agi hash list: inode cluster size - * the inode btree: max depth * blocksize - * the allocation btrees: 2 trees * (max depth - 1) * block size + * the super block free inode counter, AGF and AGFL: sector size + * the on disk inode (agi unlinked list removal) + * the inode chunk (invalidated, headers only) + * the inode btree * the finobt (record insertion, removal or modification) + * + * Note that the inode chunk res. includes an allocfree res. for freeing of the + * inode chunk. This is technically extraneous because the inode chunk free is + * deferred (it occurs after a transaction roll). Include the extra reservation + * anyways since we've had reports of ifree transaction overruns due to too many + * agfl fixups during inode chunk frees. */ STATIC uint xfs_calc_ifree_reservation( @@ -504,15 +495,11 @@ xfs_calc_ifree_reservation( { return XFS_DQUOT_LOGRES(mp) + xfs_calc_inode_res(mp, 1) + - xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + xfs_calc_iunlink_remove_reservation(mp) + - xfs_calc_buf_res(1, 0) + - xfs_calc_buf_res(2 + mp->m_ialloc_blks + - mp->m_in_maxlevels, 0) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)) + - xfs_calc_finobt_res(mp, 0, 1); + xfs_calc_inode_chunk_res(mp, _FREE) + + xfs_calc_inobt_res(mp) + + xfs_calc_finobt_res(mp); } /* @@ -842,7 +829,7 @@ xfs_trans_resv_calc( resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT; resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES; - resp->tr_create.tr_logres = xfs_calc_create_reservation(mp); + resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp); resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT; resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES; |