diff options
author | Jiri Kosina <jkosina@suse.cz> | 2020-09-01 15:19:48 +0300 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2020-09-01 15:19:48 +0300 |
commit | ead5d1f4d877e92c051e1a1ade623d0d30e71619 (patch) | |
tree | cb9db5698a546e7b96f7d5bef5ce544629dd37a2 /fs/xfs/libxfs | |
parent | f53fa968a7344970b8f8a5707c39cdcf17a6f367 (diff) | |
parent | b51594df17d0ce80b9f9f35394a1f42d7ac94472 (diff) | |
download | linux-ead5d1f4d877e92c051e1a1ade623d0d30e71619.tar.xz |
Merge branch 'master' into for-next
Sync with Linus' branch in order to be able to apply fixups
of more recent patches.
Diffstat (limited to 'fs/xfs/libxfs')
63 files changed, 3677 insertions, 2306 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 08d6beb54f8c..8cf73fe4338e 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -231,7 +231,7 @@ xfs_sbblock_init( struct xfs_buf *bp, struct aghdr_init_data *id) { - struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); + struct xfs_dsb *dsb = bp->b_addr; xfs_sb_to_disk(dsb, &mp->m_sb); dsb->sb_inprogress = 1; @@ -243,7 +243,7 @@ xfs_agfblock_init( struct xfs_buf *bp, struct aghdr_init_data *id) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); + struct xfs_agf *agf = bp->b_addr; xfs_extlen_t tmpsize; agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); @@ -301,7 +301,7 @@ xfs_agflblock_init( uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid); } - agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp); + agfl_bno = xfs_buf_to_agfl_bno(bp); for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++) agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); } @@ -312,7 +312,7 @@ xfs_agiblock_init( struct xfs_buf *bp, struct aghdr_init_data *id) { - struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); + struct xfs_agi *agi = bp->b_addr; int bucket; agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC); @@ -502,7 +502,7 @@ xfs_ag_extend_space( if (error) return error; - agi = XFS_BUF_TO_AGI(bp); + agi = bp->b_addr; be32_add_cpu(&agi->agi_length, len); ASSERT(id->agno == mp->m_sb.sb_agcount - 1 || be32_to_cpu(agi->agi_length) == mp->m_sb.sb_agblocks); @@ -515,7 +515,7 @@ xfs_ag_extend_space( if (error) return error; - agf = XFS_BUF_TO_AGF(bp); + agf = bp->b_addr; be32_add_cpu(&agf->agf_length, len); ASSERT(agf->agf_length == agi->agi_length); xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH); @@ -563,17 +563,18 @@ xfs_ag_get_geometry( error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agf_bp); if (error) goto out_agi; - pag = xfs_perag_get(mp, agno); + + pag = agi_bp->b_pag; /* Fill out form. */ memset(ageo, 0, sizeof(*ageo)); ageo->ag_number = agno; - agi = XFS_BUF_TO_AGI(agi_bp); + agi = agi_bp->b_addr; ageo->ag_icount = be32_to_cpu(agi->agi_count); ageo->ag_ifree = be32_to_cpu(agi->agi_freecount); - agf = XFS_BUF_TO_AGF(agf_bp); + agf = agf_bp->b_addr; ageo->ag_length = be32_to_cpu(agf->agf_length); freeblks = pag->pagf_freeblks + pag->pagf_flcount + @@ -583,7 +584,6 @@ xfs_ag_get_geometry( xfs_ag_geom_health(pag, ageo); /* Release resources. */ - xfs_perag_put(pag); xfs_buf_relse(agf_bp); out_agi: xfs_buf_relse(agi_bp); diff --git a/fs/xfs/libxfs/xfs_ag_resv.h b/fs/xfs/libxfs/xfs_ag_resv.h index c0352edc8e41..8a8eb4bc48bb 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.h +++ b/fs/xfs/libxfs/xfs_ag_resv.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * Copyright (C) 2016 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> @@ -37,16 +37,4 @@ xfs_ag_resv_rmapbt_alloc( xfs_perag_put(pag); } -static inline void -xfs_ag_resv_rmapbt_free( - struct xfs_mount *mp, - xfs_agnumber_t agno) -{ - struct xfs_perag *pag; - - pag = xfs_perag_get(mp, agno); - xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1); - xfs_perag_put(pag); -} - #endif /* __XFS_AG_RESV_H__ */ diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index d8053bc96c4d..852b536551b5 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -151,7 +151,7 @@ xfs_alloc_lookup_eq( cur->bc_rec.a.ar_startblock = bno; cur->bc_rec.a.ar_blockcount = len; error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); - cur->bc_private.a.priv.abt.active = (*stat == 1); + cur->bc_ag.abt.active = (*stat == 1); return error; } @@ -171,7 +171,7 @@ xfs_alloc_lookup_ge( cur->bc_rec.a.ar_startblock = bno; cur->bc_rec.a.ar_blockcount = len; error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); - cur->bc_private.a.priv.abt.active = (*stat == 1); + cur->bc_ag.abt.active = (*stat == 1); return error; } @@ -190,7 +190,7 @@ xfs_alloc_lookup_le( cur->bc_rec.a.ar_startblock = bno; cur->bc_rec.a.ar_blockcount = len; error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); - cur->bc_private.a.priv.abt.active = (*stat == 1); + cur->bc_ag.abt.active = (*stat == 1); return error; } @@ -198,7 +198,7 @@ static inline bool xfs_alloc_cur_active( struct xfs_btree_cur *cur) { - return cur && cur->bc_private.a.priv.abt.active; + return cur && cur->bc_ag.abt.active; } /* @@ -230,7 +230,7 @@ xfs_alloc_get_rec( int *stat) /* output: success/failure */ { struct xfs_mount *mp = cur->bc_mp; - xfs_agnumber_t agno = cur->bc_private.a.agno; + xfs_agnumber_t agno = cur->bc_ag.agno; union xfs_btree_rec *rec; int error; @@ -589,6 +589,7 @@ xfs_agfl_verify( { struct xfs_mount *mp = bp->b_mount; struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); + __be32 *agfl_bno = xfs_buf_to_agfl_bno(bp); int i; /* @@ -614,8 +615,8 @@ xfs_agfl_verify( return __this_address; for (i = 0; i < xfs_agfl_size(mp); i++) { - if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK && - be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) + if (be32_to_cpu(agfl_bno[i]) != NULLAGBLOCK && + be32_to_cpu(agfl_bno[i]) >= mp->m_sb.sb_agblocks) return __this_address; } @@ -709,19 +710,18 @@ xfs_alloc_read_agfl( STATIC int xfs_alloc_update_counters( struct xfs_trans *tp, - struct xfs_perag *pag, struct xfs_buf *agbp, long len) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xfs_agf *agf = agbp->b_addr; - pag->pagf_freeblks += len; + agbp->b_pag->pagf_freeblks += len; be32_add_cpu(&agf->agf_freeblks, len); xfs_trans_agblocks_delta(tp, len); if (unlikely(be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length))) { - xfs_buf_corruption_error(agbp); + xfs_buf_mark_corrupt(agbp); return -EFSCORRUPTED; } @@ -907,7 +907,7 @@ xfs_alloc_cur_check( deactivate = true; out: if (deactivate) - cur->bc_private.a.priv.abt.active = false; + cur->bc_ag.abt.active = false; trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff, *new); return 0; @@ -922,13 +922,13 @@ xfs_alloc_cur_finish( struct xfs_alloc_arg *args, struct xfs_alloc_cur *acur) { + struct xfs_agf __maybe_unused *agf = args->agbp->b_addr; int error; ASSERT(acur->cnt && acur->bnolt); ASSERT(acur->bno >= acur->rec_bno); ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len); - ASSERT(acur->rec_bno + acur->rec_len <= - be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); + ASSERT(acur->rec_bno + acur->rec_len <= be32_to_cpu(agf->agf_length)); error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno, acur->rec_len, acur->bno, acur->len, 0); @@ -1026,6 +1026,7 @@ xfs_alloc_ag_vextent_small( xfs_extlen_t *flenp, /* result length */ int *stat) /* status: 0-freelist, 1-normal/none */ { + struct xfs_agf *agf = args->agbp->b_addr; int error = 0; xfs_agblock_t fbno = NULLAGBLOCK; xfs_extlen_t flen = 0; @@ -1054,8 +1055,7 @@ xfs_alloc_ag_vextent_small( if (args->minlen != 1 || args->alignment != 1 || args->resv == XFS_AG_RESV_AGFL || - (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) <= - args->minleft)) + be32_to_cpu(agf->agf_flcount) <= args->minleft) goto out; error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); @@ -1079,9 +1079,7 @@ xfs_alloc_ag_vextent_small( } *fbnop = args->agbno = fbno; *flenp = args->len = 1; - if (XFS_IS_CORRUPT(args->mp, - fbno >= be32_to_cpu( - XFS_BUF_TO_AGF(args->agbp)->agf_length))) { + if (XFS_IS_CORRUPT(args->mp, fbno >= be32_to_cpu(agf->agf_length))) { error = -EFSCORRUPTED; goto error; } @@ -1176,8 +1174,7 @@ xfs_alloc_ag_vextent( } if (!args->wasfromfl) { - error = xfs_alloc_update_counters(args->tp, args->pag, - args->agbp, + error = xfs_alloc_update_counters(args->tp, args->agbp, -((long)(args->len))); if (error) return error; @@ -1203,6 +1200,7 @@ STATIC int /* error */ xfs_alloc_ag_vextent_exact( xfs_alloc_arg_t *args) /* allocation argument structure */ { + struct xfs_agf __maybe_unused *agf = args->agbp->b_addr; xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ int error; @@ -1281,8 +1279,7 @@ xfs_alloc_ag_vextent_exact( */ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, args->agno, XFS_BTNUM_CNT); - ASSERT(args->agbno + args->len <= - be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); + ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length)); error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno, args->len, XFSA_FIXUP_BNO_OK); if (error) { @@ -1353,7 +1350,7 @@ xfs_alloc_walk_iter( if (error) return error; if (i == 0) - cur->bc_private.a.priv.abt.active = false; + cur->bc_ag.abt.active = false; if (count > 0) count--; @@ -1468,7 +1465,7 @@ xfs_alloc_ag_vextent_locality( if (error) return error; if (i) { - acur->cnt->bc_private.a.priv.abt.active = true; + acur->cnt->bc_ag.abt.active = true; fbcur = acur->cnt; fbinc = false; } @@ -1515,7 +1512,7 @@ xfs_alloc_ag_vextent_lastblock( * maxlen, go to the start of this block, and skip all those smaller * than minlen. */ - if (len || args->alignment > 1) { + if (*len || args->alignment > 1) { acur->cnt->bc_ptrs[0] = 1; do { error = xfs_alloc_get_rec(acur->cnt, bno, len, &i); @@ -1661,6 +1658,7 @@ STATIC int /* error */ xfs_alloc_ag_vextent_size( xfs_alloc_arg_t *args) /* allocation argument structure */ { + struct xfs_agf *agf = args->agbp->b_addr; xfs_btree_cur_t *bno_cur; /* cursor for bno btree */ xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */ int error; /* error result */ @@ -1851,8 +1849,7 @@ restart: args->agbno = rbno; if (XFS_IS_CORRUPT(args->mp, args->agbno + args->len > - be32_to_cpu( - XFS_BUF_TO_AGF(args->agbp)->agf_length))) { + be32_to_cpu(agf->agf_length))) { error = -EFSCORRUPTED; goto error0; } @@ -1888,7 +1885,6 @@ xfs_free_ag_extent( enum xfs_ag_resv_type type) { struct xfs_mount *mp; - struct xfs_perag *pag; struct xfs_btree_cur *bno_cur; struct xfs_btree_cur *cnt_cur; xfs_agblock_t gtbno; /* start of right neighbor */ @@ -2168,10 +2164,8 @@ xfs_free_ag_extent( /* * Update the freespace totals in the ag and superblock. */ - pag = xfs_perag_get(mp, agno); - error = xfs_alloc_update_counters(tp, pag, agbp, len); - xfs_ag_resv_free_extent(pag, type, tp, len); - xfs_perag_put(pag); + error = xfs_alloc_update_counters(tp, agbp, len); + xfs_ag_resv_free_extent(agbp->b_pag, type, tp, len); if (error) goto error0; @@ -2424,7 +2418,7 @@ xfs_agfl_reset( struct xfs_perag *pag) { struct xfs_mount *mp = tp->t_mountp; - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xfs_agf *agf = agbp->b_addr; ASSERT(pag->pagf_agflreset); trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_); @@ -2468,7 +2462,8 @@ xfs_defer_agfl_block( ASSERT(xfs_bmap_free_item_zone != NULL); ASSERT(oinfo != NULL); - new = kmem_zone_alloc(xfs_bmap_free_item_zone, 0); + new = kmem_cache_alloc(xfs_bmap_free_item_zone, + GFP_KERNEL | __GFP_NOFAIL); new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno); new->xefi_blockcount = 1; new->xefi_oinfo = *oinfo; @@ -2655,7 +2650,7 @@ xfs_alloc_get_freelist( xfs_agblock_t *bnop, /* block address retrieved from freelist */ int btreeblk) /* destination is a AGF btree */ { - xfs_agf_t *agf; /* a.g. freespace structure */ + struct xfs_agf *agf = agbp->b_addr; xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ xfs_agblock_t bno; /* block number returned */ __be32 *agfl_bno; @@ -2667,7 +2662,6 @@ xfs_alloc_get_freelist( /* * Freelist is empty, give up. */ - agf = XFS_BUF_TO_AGF(agbp); if (!agf->agf_flcount) { *bnop = NULLAGBLOCK; return 0; @@ -2684,14 +2678,14 @@ xfs_alloc_get_freelist( /* * Get the block number and update the data structures. */ - agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); + agfl_bno = xfs_buf_to_agfl_bno(agflbp); bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]); be32_add_cpu(&agf->agf_flfirst, 1); xfs_trans_brelse(tp, agflbp); if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp)) agf->agf_flfirst = 0; - pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); + pag = agbp->b_pag; ASSERT(!pag->pagf_agflreset); be32_add_cpu(&agf->agf_flcount, -1); xfs_trans_agflist_delta(tp, -1); @@ -2703,7 +2697,6 @@ xfs_alloc_get_freelist( pag->pagf_btreeblks++; logflags |= XFS_AGF_BTREEBLKS; } - xfs_perag_put(pag); xfs_alloc_log_agf(tp, agbp, logflags); *bnop = bno; @@ -2745,7 +2738,7 @@ xfs_alloc_log_agf( sizeof(xfs_agf_t) }; - trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_); + trace_xfs_agf(tp->t_mountp, bp->b_addr, fields, _RET_IP_); xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF); @@ -2783,18 +2776,15 @@ xfs_alloc_put_freelist( xfs_agblock_t bno, /* block being freed */ int btreeblk) /* block came from a AGF btree */ { - xfs_agf_t *agf; /* a.g. freespace structure */ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_agf *agf = agbp->b_addr; __be32 *blockp;/* pointer to array entry */ int error; int logflags; - xfs_mount_t *mp; /* mount structure */ xfs_perag_t *pag; /* per allocation group data */ __be32 *agfl_bno; int startoff; - agf = XFS_BUF_TO_AGF(agbp); - mp = tp->t_mountp; - if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno), &agflbp))) return error; @@ -2802,7 +2792,7 @@ xfs_alloc_put_freelist( if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp)) agf->agf_fllast = 0; - pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); + pag = agbp->b_pag; ASSERT(!pag->pagf_agflreset); be32_add_cpu(&agf->agf_flcount, 1); xfs_trans_agflist_delta(tp, 1); @@ -2814,13 +2804,12 @@ xfs_alloc_put_freelist( pag->pagf_btreeblks--; logflags |= XFS_AGF_BTREEBLKS; } - xfs_perag_put(pag); xfs_alloc_log_agf(tp, agbp, logflags); ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)); - agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); + agfl_bno = xfs_buf_to_agfl_bno(agflbp); blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)]; *blockp = cpu_to_be32(bno); startoff = (char *)blockp - (char *)agflbp->b_addr; @@ -2838,13 +2827,12 @@ xfs_agf_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_mount; - struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); + struct xfs_agf *agf = bp->b_addr; if (xfs_sb_version_hascrc(&mp->m_sb)) { if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; - if (!xfs_log_check_lsn(mp, - be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn))) + if (!xfs_log_check_lsn(mp, be64_to_cpu(agf->agf_lsn))) return __this_address; } @@ -2858,6 +2846,13 @@ xfs_agf_verify( be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp))) return __this_address; + if (be32_to_cpu(agf->agf_length) > mp->m_sb.sb_dblocks) + return __this_address; + + if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) || + be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length)) + return __this_address; + if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 || be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || @@ -2869,6 +2864,10 @@ xfs_agf_verify( be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)) return __this_address; + if (xfs_sb_version_hasrmapbt(&mp->m_sb) && + be32_to_cpu(agf->agf_rmap_blocks) > be32_to_cpu(agf->agf_length)) + return __this_address; + /* * during growfs operations, the perag is not fully initialised, * so we can't use it for any useful checking. growfs ensures we can't @@ -2883,6 +2882,11 @@ xfs_agf_verify( return __this_address; if (xfs_sb_version_hasreflink(&mp->m_sb) && + be32_to_cpu(agf->agf_refcount_blocks) > + be32_to_cpu(agf->agf_length)) + return __this_address; + + if (xfs_sb_version_hasreflink(&mp->m_sb) && (be32_to_cpu(agf->agf_refcount_level) < 1 || be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)) return __this_address; @@ -2914,6 +2918,7 @@ xfs_agf_write_verify( { struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; + struct xfs_agf *agf = bp->b_addr; xfs_failaddr_t fa; fa = xfs_agf_verify(bp); @@ -2926,7 +2931,7 @@ xfs_agf_write_verify( return; if (bip) - XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); + agf->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF); } @@ -2994,8 +2999,8 @@ xfs_alloc_read_agf( return error; ASSERT(!(*bpp)->b_error); - agf = XFS_BUF_TO_AGF(*bpp); - pag = xfs_perag_get(mp, agno); + agf = (*bpp)->b_addr; + pag = (*bpp)->b_pag; if (!pag->pagf_init) { pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); @@ -3023,7 +3028,6 @@ xfs_alloc_read_agf( be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); } #endif - xfs_perag_put(pag); return 0; } @@ -3275,6 +3279,7 @@ __xfs_free_extent( struct xfs_buf *agbp; xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno); xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno); + struct xfs_agf *agf; int error; unsigned int busy_flags = 0; @@ -3288,6 +3293,7 @@ __xfs_free_extent( error = xfs_free_extent_fix_freelist(tp, agno, &agbp); if (error) return error; + agf = agbp->b_addr; if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) { error = -EFSCORRUPTED; @@ -3295,9 +3301,7 @@ __xfs_free_extent( } /* validate the extent size is legal now we have the agf locked */ - if (XFS_IS_CORRUPT(mp, - agbno + len > - be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length))) { + if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) { error = -EFSCORRUPTED; goto err; } @@ -3408,7 +3412,7 @@ xfs_agfl_walk( unsigned int i; int error; - agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); + agfl_bno = xfs_buf_to_agfl_bno(agflbp); i = be32_to_cpu(agf->agf_flfirst); /* Nothing to walk in an empty AGFL. */ diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 7380fbe4a3ff..6c22b12176b8 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. * All Rights Reserved. @@ -236,4 +236,13 @@ typedef int (*xfs_agfl_walk_fn)(struct xfs_mount *mp, xfs_agblock_t bno, int xfs_agfl_walk(struct xfs_mount *mp, struct xfs_agf *agf, struct xfs_buf *agflbp, xfs_agfl_walk_fn walk_fn, void *priv); +static inline __be32 * +xfs_buf_to_agfl_bno( + struct xfs_buf *bp) +{ + if (xfs_sb_version_hascrc(&bp->b_mount->m_sb)) + return bp->b_addr + sizeof(struct xfs_agfl); + return bp->b_addr; +} + #endif /* __XFS_ALLOC_H__ */ diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 279694d73e4e..8e01231b308e 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -12,6 +12,7 @@ #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_btree.h" +#include "xfs_btree_staging.h" #include "xfs_alloc_btree.h" #include "xfs_alloc.h" #include "xfs_extent_busy.h" @@ -25,7 +26,7 @@ xfs_allocbt_dup_cursor( struct xfs_btree_cur *cur) { return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agbp, cur->bc_private.a.agno, + cur->bc_ag.agbp, cur->bc_ag.agno, cur->bc_btnum); } @@ -35,18 +36,16 @@ xfs_allocbt_set_root( union xfs_btree_ptr *ptr, int inc) { - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); - xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agf *agf = agbp->b_addr; int btnum = cur->bc_btnum; - struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno); + struct xfs_perag *pag = agbp->b_pag; ASSERT(ptr->s != 0); agf->agf_roots[btnum] = ptr->s; be32_add_cpu(&agf->agf_levels[btnum], inc); pag->pagf_levels[btnum] += inc; - xfs_perag_put(pag); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); } @@ -62,7 +61,7 @@ xfs_allocbt_alloc_block( xfs_agblock_t bno; /* Allocate the new block from the freelist. If we can't, give up. */ - error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, + error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_ag.agbp, &bno, 1); if (error) return error; @@ -72,7 +71,7 @@ xfs_allocbt_alloc_block( return 0; } - xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false); + xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.agno, bno, 1, false); xfs_trans_agbtree_delta(cur->bc_tp, 1); new->s = cpu_to_be32(bno); @@ -86,8 +85,8 @@ xfs_allocbt_free_block( struct xfs_btree_cur *cur, struct xfs_buf *bp) { - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agf *agf = agbp->b_addr; xfs_agblock_t bno; int error; @@ -113,8 +112,7 @@ xfs_allocbt_update_lastrec( int ptr, int reason) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); - xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); + struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; struct xfs_perag *pag; __be32 len; int numrecs; @@ -159,10 +157,9 @@ xfs_allocbt_update_lastrec( } agf->agf_longest = len; - pag = xfs_perag_get(cur->bc_mp, seqno); + pag = cur->bc_ag.agbp->b_pag; pag->pagf_longest = be32_to_cpu(len); - xfs_perag_put(pag); - xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST); + xfs_alloc_log_agf(cur->bc_tp, cur->bc_ag.agbp, XFS_AGF_LONGEST); } STATIC int @@ -226,9 +223,9 @@ xfs_allocbt_init_ptr_from_cur( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; - ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno)); + ASSERT(cur->bc_ag.agno == be32_to_cpu(agf->agf_seqno)); ptr->s = agf->agf_roots[cur->bc_btnum]; } @@ -471,23 +468,19 @@ static const struct xfs_btree_ops xfs_cntbt_ops = { .recs_inorder = xfs_cntbt_recs_inorder, }; -/* - * Allocate a new allocation btree cursor. - */ -struct xfs_btree_cur * /* new alloc btree cursor */ -xfs_allocbt_init_cursor( - struct xfs_mount *mp, /* file system mount point */ - struct xfs_trans *tp, /* transaction pointer */ - struct xfs_buf *agbp, /* buffer for agf structure */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_btnum_t btnum) /* btree identifier */ +/* Allocate most of a new allocation btree cursor. */ +STATIC struct xfs_btree_cur * +xfs_allocbt_init_common( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_agnumber_t agno, + xfs_btnum_t btnum) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); struct xfs_btree_cur *cur; ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT); - cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); + cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); cur->bc_tp = tp; cur->bc_mp = mp; @@ -495,19 +488,16 @@ xfs_allocbt_init_cursor( cur->bc_blocklog = mp->m_sb.sb_blocklog; if (btnum == XFS_BTNUM_CNT) { - cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2); cur->bc_ops = &xfs_cntbt_ops; - cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2); cur->bc_flags = XFS_BTREE_LASTREC_UPDATE; } else { - cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2); cur->bc_ops = &xfs_bnobt_ops; - cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2); } - cur->bc_private.a.agbp = agbp; - cur->bc_private.a.agno = agno; - cur->bc_private.a.priv.abt.active = false; + cur->bc_ag.agno = agno; + cur->bc_ag.abt.active = false; if (xfs_sb_version_hascrc(&mp->m_sb)) cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; @@ -516,6 +506,73 @@ xfs_allocbt_init_cursor( } /* + * Allocate a new allocation btree cursor. + */ +struct xfs_btree_cur * /* new alloc btree cursor */ +xfs_allocbt_init_cursor( + struct xfs_mount *mp, /* file system mount point */ + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_buf *agbp, /* buffer for agf structure */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_btnum_t btnum) /* btree identifier */ +{ + struct xfs_agf *agf = agbp->b_addr; + struct xfs_btree_cur *cur; + + cur = xfs_allocbt_init_common(mp, tp, agno, btnum); + if (btnum == XFS_BTNUM_CNT) + cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); + else + cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); + + cur->bc_ag.agbp = agbp; + + return cur; +} + +/* Create a free space btree cursor with a fake root for staging. */ +struct xfs_btree_cur * +xfs_allocbt_stage_cursor( + struct xfs_mount *mp, + struct xbtree_afakeroot *afake, + xfs_agnumber_t agno, + xfs_btnum_t btnum) +{ + struct xfs_btree_cur *cur; + + cur = xfs_allocbt_init_common(mp, NULL, agno, btnum); + xfs_btree_stage_afakeroot(cur, afake); + return cur; +} + +/* + * Install a new free space btree root. Caller is responsible for invalidating + * and freeing the old btree blocks. + */ +void +xfs_allocbt_commit_staged_btree( + struct xfs_btree_cur *cur, + struct xfs_trans *tp, + struct xfs_buf *agbp) +{ + struct xfs_agf *agf = agbp->b_addr; + struct xbtree_afakeroot *afake = cur->bc_ag.afake; + + ASSERT(cur->bc_flags & XFS_BTREE_STAGING); + + agf->agf_roots[cur->bc_btnum] = cpu_to_be32(afake->af_root); + agf->agf_levels[cur->bc_btnum] = cpu_to_be32(afake->af_levels); + xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); + + if (cur->bc_btnum == XFS_BTNUM_BNO) { + xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_bnobt_ops); + } else { + cur->bc_flags |= XFS_BTREE_LASTREC_UPDATE; + xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_cntbt_ops); + } +} + +/* * Calculate number of records in an alloc btree block. */ int diff --git a/fs/xfs/libxfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h index c9305ebb69f6..a5b998e950fe 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.h +++ b/fs/xfs/libxfs/xfs_alloc_btree.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2000,2005 Silicon Graphics, Inc. * All Rights Reserved. @@ -13,6 +13,7 @@ struct xfs_buf; struct xfs_btree_cur; struct xfs_mount; +struct xbtree_afakeroot; /* * Btree block header size depends on a superblock flag. @@ -48,8 +49,14 @@ struct xfs_mount; extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t, xfs_btnum_t); +struct xfs_btree_cur *xfs_allocbt_stage_cursor(struct xfs_mount *mp, + struct xbtree_afakeroot *afake, xfs_agnumber_t agno, + xfs_btnum_t btnum); extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int); extern xfs_extlen_t xfs_allocbt_calc_size(struct xfs_mount *mp, unsigned long long len); +void xfs_allocbt_commit_staged_btree(struct xfs_btree_cur *cur, + struct xfs_trans *tp, struct xfs_buf *agbp); + #endif /* __XFS_ALLOC_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index e6149720ce02..2e055c079f39 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -46,6 +46,7 @@ STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args); STATIC int xfs_attr_leaf_get(xfs_da_args_t *args); STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args); STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args); +STATIC int xfs_attr_leaf_hasname(struct xfs_da_args *args, struct xfs_buf **bp); /* * Internal routines when attribute list is more than one block. @@ -53,43 +54,18 @@ STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args); STATIC int xfs_attr_node_get(xfs_da_args_t *args); STATIC int xfs_attr_node_addname(xfs_da_args_t *args); STATIC int xfs_attr_node_removename(xfs_da_args_t *args); +STATIC int xfs_attr_node_hasname(xfs_da_args_t *args, + struct xfs_da_state **state); STATIC int xfs_attr_fillstate(xfs_da_state_t *state); STATIC int xfs_attr_refillstate(xfs_da_state_t *state); - -STATIC int -xfs_attr_args_init( - struct xfs_da_args *args, - struct xfs_inode *dp, - const unsigned char *name, - size_t namelen, - int flags) -{ - - if (!name) - return -EINVAL; - - memset(args, 0, sizeof(*args)); - args->geo = dp->i_mount->m_attr_geo; - args->whichfork = XFS_ATTR_FORK; - args->dp = dp; - args->flags = flags; - args->name = name; - args->namelen = namelen; - if (args->namelen >= MAXNAMELEN) - return -EFAULT; /* match IRIX behaviour */ - - args->hashval = xfs_da_hashname(args->name, args->namelen); - return 0; -} - int xfs_inode_hasattr( struct xfs_inode *ip) { if (!XFS_IFORK_Q(ip) || - (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && - ip->i_d.di_anextents == 0)) + (ip->i_afp->if_format == XFS_DINODE_FMT_EXTENTS && + ip->i_afp->if_nextents == 0)) return 0; return 1; } @@ -104,85 +80,60 @@ xfs_inode_hasattr( */ int xfs_attr_get_ilocked( - struct xfs_inode *ip, struct xfs_da_args *args) { - ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); + ASSERT(xfs_isilocked(args->dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); - if (!xfs_inode_hasattr(ip)) + if (!xfs_inode_hasattr(args->dp)) return -ENOATTR; - else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) + + if (args->dp->i_afp->if_format == XFS_DINODE_FMT_LOCAL) return xfs_attr_shortform_getvalue(args); - else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) + if (xfs_bmap_one_block(args->dp, XFS_ATTR_FORK)) return xfs_attr_leaf_get(args); - else - return xfs_attr_node_get(args); + return xfs_attr_node_get(args); } /* * Retrieve an extended attribute by name, and its value if requested. * - * If ATTR_KERNOVAL is set in @flags, then the caller does not want the value, - * just an indication whether the attribute exists and the size of the value if - * it exists. The size is returned in @valuelenp, + * If args->valuelen is zero, then the caller does not want the value, just an + * indication whether the attribute exists and the size of the value if it + * exists. The size is returned in args.valuelen. * - * If the attribute is found, but exceeds the size limit set by the caller in - * @valuelenp, return -ERANGE with the size of the attribute that was found in - * @valuelenp. + * If args->value is NULL but args->valuelen is non-zero, allocate the buffer + * for the value after existence of the attribute has been determined. The + * caller always has to free args->value if it is set, no matter if this + * function was successful or not. * - * If ATTR_ALLOC is set in @flags, allocate the buffer for the value after - * existence of the attribute has been determined. On success, return that - * buffer to the caller and leave them to free it. On failure, free any - * allocated buffer and ensure the buffer pointer returned to the caller is - * null. + * If the attribute is found, but exceeds the size limit set by the caller in + * args->valuelen, return -ERANGE with the size of the attribute that was found + * in args->valuelen. */ int xfs_attr_get( - struct xfs_inode *ip, - const unsigned char *name, - size_t namelen, - unsigned char **value, - int *valuelenp, - int flags) + struct xfs_da_args *args) { - struct xfs_da_args args; uint lock_mode; int error; - ASSERT((flags & (ATTR_ALLOC | ATTR_KERNOVAL)) || *value); - - XFS_STATS_INC(ip->i_mount, xs_attr_get); + XFS_STATS_INC(args->dp->i_mount, xs_attr_get); - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + if (XFS_FORCED_SHUTDOWN(args->dp->i_mount)) return -EIO; - error = xfs_attr_args_init(&args, ip, name, namelen, flags); - if (error) - return error; + args->geo = args->dp->i_mount->m_attr_geo; + args->whichfork = XFS_ATTR_FORK; + args->hashval = xfs_da_hashname(args->name, args->namelen); /* Entirely possible to look up a name which doesn't exist */ - args.op_flags = XFS_DA_OP_OKNOENT; - if (flags & ATTR_ALLOC) - args.op_flags |= XFS_DA_OP_ALLOCVAL; - else - args.value = *value; - args.valuelen = *valuelenp; + args->op_flags = XFS_DA_OP_OKNOENT; - lock_mode = xfs_ilock_attr_map_shared(ip); - error = xfs_attr_get_ilocked(ip, &args); - xfs_iunlock(ip, lock_mode); - *valuelenp = args.valuelen; + lock_mode = xfs_ilock_attr_map_shared(args->dp); + error = xfs_attr_get_ilocked(args); + xfs_iunlock(args->dp, lock_mode); - /* on error, we have to clean up allocated value buffers */ - if (error) { - if (flags & ATTR_ALLOC) { - kmem_free(args.value); - *value = NULL; - } - return error; - } - *value = args.value; - return 0; + return error; } /* @@ -227,8 +178,13 @@ xfs_attr_try_sf_addname( struct xfs_da_args *args) { - struct xfs_mount *mp = dp->i_mount; - int error, error2; + int error; + + /* + * Build initial attribute list (if required). + */ + if (dp->i_afp->if_format == XFS_DINODE_FMT_EXTENTS) + xfs_attr_shortform_create(args); error = xfs_attr_shortform_addname(args); if (error == -ENOSPC) @@ -238,15 +194,73 @@ xfs_attr_try_sf_addname( * Commit the shortform mods, and we're done. * NOTE: this is also the error path (EEXIST, etc). */ - if (!error && (args->flags & ATTR_KERNOTIME) == 0) + if (!error && !(args->op_flags & XFS_DA_OP_NOTIME)) xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG); - if (mp->m_flags & XFS_MOUNT_WSYNC) + if (dp->i_mount->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(args->trans); - error2 = xfs_trans_commit(args->trans); - args->trans = NULL; - return error ? error : error2; + return error; +} + +/* + * Check to see if the attr should be upgraded from non-existent or shortform to + * single-leaf-block attribute list. + */ +static inline bool +xfs_attr_is_shortform( + struct xfs_inode *ip) +{ + return ip->i_afp->if_format == XFS_DINODE_FMT_LOCAL || + (ip->i_afp->if_format == XFS_DINODE_FMT_EXTENTS && + ip->i_afp->if_nextents == 0); +} + +/* + * Attempts to set an attr in shortform, or converts short form to leaf form if + * there is not enough room. If the attr is set, the transaction is committed + * and set to NULL. + */ +STATIC int +xfs_attr_set_shortform( + struct xfs_da_args *args, + struct xfs_buf **leaf_bp) +{ + struct xfs_inode *dp = args->dp; + int error, error2 = 0; + + /* + * Try to add the attr to the attribute list in the inode. + */ + error = xfs_attr_try_sf_addname(dp, args); + if (error != -ENOSPC) { + error2 = xfs_trans_commit(args->trans); + args->trans = NULL; + return error ? error : error2; + } + /* + * It won't fit in the shortform, transform to a leaf block. GROT: + * another possible req'mt for a double-split btree op. + */ + error = xfs_attr_shortform_to_leaf(args, leaf_bp); + if (error) + return error; + + /* + * Prevent the leaf buffer from being unlocked so that a concurrent AIL + * push cannot grab the half-baked leaf buffer and run into problems + * with the write verifier. Once we're done rolling the transaction we + * can release the hold and add the attr to the leaf. + */ + xfs_trans_bhold(args->trans, *leaf_bp); + error = xfs_defer_finish(&args->trans); + xfs_trans_bhold_release(args->trans, *leaf_bp); + if (error) { + xfs_trans_brelse(args->trans, *leaf_bp); + return error; + } + + return 0; } /* @@ -258,61 +272,94 @@ xfs_attr_set_args( { struct xfs_inode *dp = args->dp; struct xfs_buf *leaf_bp = NULL; - int error; + int error = 0; /* - * If the attribute list is non-existent or a shortform list, - * upgrade it to a single-leaf-block attribute list. + * If the attribute list is already in leaf format, jump straight to + * leaf handling. Otherwise, try to add the attribute to the shortform + * list; if there's no room then convert the list to leaf format and try + * again. */ - if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL || - (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && - dp->i_d.di_anextents == 0)) { + if (xfs_attr_is_shortform(dp)) { /* - * Build initial attribute list (if required). + * If the attr was successfully set in shortform, the + * transaction is committed and set to NULL. Otherwise, is it + * converted from shortform to leaf, and the transaction is + * retained. */ - if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) - xfs_attr_shortform_create(args); + error = xfs_attr_set_shortform(args, &leaf_bp); + if (error || !args->trans) + return error; + } - /* - * Try to add the attr to the attribute list in the inode. - */ - error = xfs_attr_try_sf_addname(dp, args); + if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { + error = xfs_attr_leaf_addname(args); if (error != -ENOSPC) return error; /* - * It won't fit in the shortform, transform to a leaf block. - * GROT: another possible req'mt for a double-split btree op. + * Promote the attribute list to the Btree format. */ - error = xfs_attr_shortform_to_leaf(args, &leaf_bp); + error = xfs_attr3_leaf_to_node(args); if (error) return error; /* - * Prevent the leaf buffer from being unlocked so that a - * concurrent AIL push cannot grab the half-baked leaf - * buffer and run into problems with the write verifier. - * Once we're done rolling the transaction we can release - * the hold and add the attr to the leaf. + * Finish any deferred work items and roll the transaction once + * more. The goal here is to call node_addname with the inode + * and transaction in the same state (inode locked and joined, + * transaction clean) no matter how we got to this step. */ - xfs_trans_bhold(args->trans, leaf_bp); error = xfs_defer_finish(&args->trans); - xfs_trans_bhold_release(args->trans, leaf_bp); - if (error) { - xfs_trans_brelse(args->trans, leaf_bp); + if (error) + return error; + + /* + * Commit the current trans (including the inode) and + * start a new one. + */ + error = xfs_trans_roll_inode(&args->trans, dp); + if (error) return error; - } } - if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) - error = xfs_attr_leaf_addname(args); - else - error = xfs_attr_node_addname(args); + error = xfs_attr_node_addname(args); return error; } /* + * Return EEXIST if attr is found, or ENOATTR if not + */ +int +xfs_has_attr( + struct xfs_da_args *args) +{ + struct xfs_inode *dp = args->dp; + struct xfs_buf *bp = NULL; + int error; + + if (!xfs_inode_hasattr(dp)) + return -ENOATTR; + + if (dp->i_afp->if_format == XFS_DINODE_FMT_LOCAL) { + ASSERT(dp->i_afp->if_flags & XFS_IFINLINE); + return xfs_attr_sf_findname(args, NULL, NULL); + } + + if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { + error = xfs_attr_leaf_hasname(args, &bp); + + if (bp) + xfs_trans_brelse(args->trans, bp); + + return error; + } + + return xfs_attr_node_hasname(args, NULL); +} + +/* * Remove the attribute specified in @args. */ int @@ -324,7 +371,7 @@ xfs_attr_remove_args( if (!xfs_inode_hasattr(dp)) { error = -ENOATTR; - } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { + } else if (dp->i_afp->if_format == XFS_DINODE_FMT_LOCAL) { ASSERT(dp->i_afp->if_flags & XFS_IFINLINE); error = xfs_attr_shortform_remove(args); } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { @@ -336,188 +383,140 @@ xfs_attr_remove_args( return error; } +/* + * Note: If args->value is NULL the attribute will be removed, just like the + * Linux ->setattr API. + */ int xfs_attr_set( - struct xfs_inode *dp, - const unsigned char *name, - size_t namelen, - unsigned char *value, - int valuelen, - int flags) + struct xfs_da_args *args) { + struct xfs_inode *dp = args->dp; struct xfs_mount *mp = dp->i_mount; - struct xfs_da_args args; struct xfs_trans_res tres; - int rsvd = (flags & ATTR_ROOT) != 0; + bool rsvd = (args->attr_filter & XFS_ATTR_ROOT); int error, local; - - XFS_STATS_INC(mp, xs_attr_set); + unsigned int total; if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; - error = xfs_attr_args_init(&args, dp, name, namelen, flags); - if (error) - return error; - - args.value = value; - args.valuelen = valuelen; - args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; - args.total = xfs_attr_calc_size(&args, &local); - error = xfs_qm_dqattach(dp); if (error) return error; - /* - * If the inode doesn't have an attribute fork, add one. - * (inode must not be locked when we call this routine) - */ - if (XFS_IFORK_Q(dp) == 0) { - int sf_size = sizeof(xfs_attr_sf_hdr_t) + - XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen); - - error = xfs_bmap_add_attrfork(dp, sf_size, rsvd); - if (error) - return error; - } - - tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres + - M_RES(mp)->tr_attrsetrt.tr_logres * args.total; - tres.tr_logcount = XFS_ATTRSET_LOG_COUNT; - tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; - - /* - * Root fork attributes can use reserved data blocks for this - * operation if necessary - */ - error = xfs_trans_alloc(mp, &tres, args.total, 0, - rsvd ? XFS_TRANS_RESERVE : 0, &args.trans); - if (error) - return error; - - xfs_ilock(dp, XFS_ILOCK_EXCL); - error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0, - rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : - XFS_QMOPT_RES_REGBLKS); - if (error) - goto out_trans_cancel; - - xfs_trans_ijoin(args.trans, dp, 0); - error = xfs_attr_set_args(&args); - if (error) - goto out_trans_cancel; - if (!args.trans) { - /* shortform attribute has already been committed */ - goto out_unlock; - } - - /* - * If this is a synchronous mount, make sure that the - * transaction goes to disk before returning to the user. - */ - if (mp->m_flags & XFS_MOUNT_WSYNC) - xfs_trans_set_sync(args.trans); - - if ((flags & ATTR_KERNOTIME) == 0) - xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); + args->geo = mp->m_attr_geo; + args->whichfork = XFS_ATTR_FORK; + args->hashval = xfs_da_hashname(args->name, args->namelen); /* - * Commit the last in the sequence of transactions. + * We have no control over the attribute names that userspace passes us + * to remove, so we have to allow the name lookup prior to attribute + * removal to fail as well. */ - xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); - error = xfs_trans_commit(args.trans); -out_unlock: - xfs_iunlock(dp, XFS_ILOCK_EXCL); - return error; - -out_trans_cancel: - if (args.trans) - xfs_trans_cancel(args.trans); - goto out_unlock; -} + args->op_flags = XFS_DA_OP_OKNOENT; -/* - * Generic handler routine to remove a name from an attribute list. - * Transitions attribute list from Btree to shortform as necessary. - */ -int -xfs_attr_remove( - struct xfs_inode *dp, - const unsigned char *name, - size_t namelen, - int flags) -{ - struct xfs_mount *mp = dp->i_mount; - struct xfs_da_args args; - int error; + if (args->value) { + XFS_STATS_INC(mp, xs_attr_set); - XFS_STATS_INC(mp, xs_attr_remove); + args->op_flags |= XFS_DA_OP_ADDNAME; + args->total = xfs_attr_calc_size(args, &local); - if (XFS_FORCED_SHUTDOWN(dp->i_mount)) - return -EIO; + /* + * If the inode doesn't have an attribute fork, add one. + * (inode must not be locked when we call this routine) + */ + if (XFS_IFORK_Q(dp) == 0) { + int sf_size = sizeof(struct xfs_attr_sf_hdr) + + XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, + args->valuelen); - error = xfs_attr_args_init(&args, dp, name, namelen, flags); - if (error) - return error; + error = xfs_bmap_add_attrfork(dp, sf_size, rsvd); + if (error) + return error; + } - /* - * we have no control over the attribute names that userspace passes us - * to remove, so we have to allow the name lookup prior to attribute - * removal to fail. - */ - args.op_flags = XFS_DA_OP_OKNOENT; + tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres + + M_RES(mp)->tr_attrsetrt.tr_logres * + args->total; + tres.tr_logcount = XFS_ATTRSET_LOG_COUNT; + tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; + total = args->total; + } else { + XFS_STATS_INC(mp, xs_attr_remove); - error = xfs_qm_dqattach(dp); - if (error) - return error; + tres = M_RES(mp)->tr_attrrm; + total = XFS_ATTRRM_SPACE_RES(mp); + } /* * Root fork attributes can use reserved data blocks for this * operation if necessary */ - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_attrrm, - XFS_ATTRRM_SPACE_RES(mp), 0, - (flags & ATTR_ROOT) ? XFS_TRANS_RESERVE : 0, - &args.trans); + error = xfs_trans_alloc(mp, &tres, total, 0, + rsvd ? XFS_TRANS_RESERVE : 0, &args->trans); if (error) return error; xfs_ilock(dp, XFS_ILOCK_EXCL); - /* - * No need to make quota reservations here. We expect to release some - * blocks not allocate in the common case. - */ - xfs_trans_ijoin(args.trans, dp, 0); + xfs_trans_ijoin(args->trans, dp, 0); + if (args->value) { + unsigned int quota_flags = XFS_QMOPT_RES_REGBLKS; + + if (rsvd) + quota_flags |= XFS_QMOPT_FORCE_RES; + error = xfs_trans_reserve_quota_nblks(args->trans, dp, + args->total, 0, quota_flags); + if (error) + goto out_trans_cancel; - error = xfs_attr_remove_args(&args); - if (error) - goto out; + error = xfs_has_attr(args); + if (error == -EEXIST && (args->attr_flags & XATTR_CREATE)) + goto out_trans_cancel; + if (error == -ENOATTR && (args->attr_flags & XATTR_REPLACE)) + goto out_trans_cancel; + if (error != -ENOATTR && error != -EEXIST) + goto out_trans_cancel; + + error = xfs_attr_set_args(args); + if (error) + goto out_trans_cancel; + /* shortform attribute has already been committed */ + if (!args->trans) + goto out_unlock; + } else { + error = xfs_has_attr(args); + if (error != -EEXIST) + goto out_trans_cancel; + + error = xfs_attr_remove_args(args); + if (error) + goto out_trans_cancel; + } /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) - xfs_trans_set_sync(args.trans); + xfs_trans_set_sync(args->trans); - if ((flags & ATTR_KERNOTIME) == 0) - xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); + if (!(args->op_flags & XFS_DA_OP_NOTIME)) + xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG); /* * Commit the last in the sequence of transactions. */ - xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); - error = xfs_trans_commit(args.trans); + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE); + error = xfs_trans_commit(args->trans); +out_unlock: xfs_iunlock(dp, XFS_ILOCK_EXCL); - return error; -out: - if (args.trans) - xfs_trans_cancel(args.trans); - xfs_iunlock(dp, XFS_ILOCK_EXCL); - return error; +out_trans_cancel: + if (args->trans) + xfs_trans_cancel(args->trans); + goto out_unlock; } /*======================================================================== @@ -536,10 +535,10 @@ xfs_attr_shortform_addname(xfs_da_args_t *args) trace_xfs_attr_sf_addname(args); retval = xfs_attr_shortform_lookup(args); - if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) { + if (retval == -ENOATTR && (args->attr_flags & XATTR_REPLACE)) return retval; - } else if (retval == -EEXIST) { - if (args->flags & ATTR_CREATE) + if (retval == -EEXIST) { + if (args->attr_flags & XATTR_CREATE) return retval; retval = xfs_attr_shortform_remove(args); if (retval) @@ -549,7 +548,7 @@ xfs_attr_shortform_addname(xfs_da_args_t *args) * that the leaf format add routine won't trip over the attr * not being around. */ - args->flags &= ~ATTR_REPLACE; + args->attr_flags &= ~XATTR_REPLACE; } if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX || @@ -572,54 +571,65 @@ xfs_attr_shortform_addname(xfs_da_args_t *args) * External routines when attribute list is one block *========================================================================*/ +/* Store info about a remote block */ +STATIC void +xfs_attr_save_rmt_blk( + struct xfs_da_args *args) +{ + args->blkno2 = args->blkno; + args->index2 = args->index; + args->rmtblkno2 = args->rmtblkno; + args->rmtblkcnt2 = args->rmtblkcnt; + args->rmtvaluelen2 = args->rmtvaluelen; +} + +/* Set stored info about a remote block */ +STATIC void +xfs_attr_restore_rmt_blk( + struct xfs_da_args *args) +{ + args->blkno = args->blkno2; + args->index = args->index2; + args->rmtblkno = args->rmtblkno2; + args->rmtblkcnt = args->rmtblkcnt2; + args->rmtvaluelen = args->rmtvaluelen2; +} + /* - * Add a name to the leaf attribute list structure + * Tries to add an attribute to an inode in leaf form * - * This leaf block cannot have a "remote" value, we only call this routine - * if bmap_one_block() says there is only one block (ie: no remote blks). + * This function is meant to execute as part of a delayed operation and leaves + * the transaction handling to the caller. On success the attribute is added + * and the inode and transaction are left dirty. If there is not enough space, + * the attr data is converted to node format and -ENOSPC is returned. Caller is + * responsible for handling the dirty inode and transaction or adding the attr + * in node format. */ STATIC int -xfs_attr_leaf_addname( - struct xfs_da_args *args) +xfs_attr_leaf_try_add( + struct xfs_da_args *args, + struct xfs_buf *bp) { - struct xfs_inode *dp; - struct xfs_buf *bp; - int retval, error, forkoff; - - trace_xfs_attr_leaf_addname(args); - - /* - * Read the (only) block in the attribute list in. - */ - dp = args->dp; - args->blkno = 0; - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp); - if (error) - return error; + int retval; /* * Look up the given attribute in the leaf block. Figure out if * the given flags produce an error or call for an atomic rename. */ - retval = xfs_attr3_leaf_lookup_int(bp, args); - if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) { - xfs_trans_brelse(args->trans, bp); + retval = xfs_attr_leaf_hasname(args, &bp); + if (retval != -ENOATTR && retval != -EEXIST) return retval; - } else if (retval == -EEXIST) { - if (args->flags & ATTR_CREATE) { /* pure create op */ - xfs_trans_brelse(args->trans, bp); - return retval; - } + if (retval == -ENOATTR && (args->attr_flags & XATTR_REPLACE)) + goto out_brelse; + if (retval == -EEXIST) { + if (args->attr_flags & XATTR_CREATE) + goto out_brelse; trace_xfs_attr_leaf_replace(args); /* save the attribute state for later removal*/ args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */ - args->blkno2 = args->blkno; /* set 2nd entry info*/ - args->index2 = args->index; - args->rmtblkno2 = args->rmtblkno; - args->rmtblkcnt2 = args->rmtblkcnt; - args->rmtvaluelen2 = args->rmtvaluelen; + xfs_attr_save_rmt_blk(args); /* * clear the remote attr state now that it is saved so that the @@ -632,37 +642,35 @@ xfs_attr_leaf_addname( } /* - * Add the attribute to the leaf block, transitioning to a Btree - * if required. + * Add the attribute to the leaf block */ - retval = xfs_attr3_leaf_add(bp, args); - if (retval == -ENOSPC) { - /* - * Promote the attribute list to the Btree format, then - * Commit that transaction so that the node_addname() call - * can manage its own transactions. - */ - error = xfs_attr3_leaf_to_node(args); - if (error) - return error; - error = xfs_defer_finish(&args->trans); - if (error) - return error; + return xfs_attr3_leaf_add(bp, args); - /* - * Commit the current trans (including the inode) and start - * a new one. - */ - error = xfs_trans_roll_inode(&args->trans, dp); - if (error) - return error; +out_brelse: + xfs_trans_brelse(args->trans, bp); + return retval; +} - /* - * Fob the whole rest of the problem off on the Btree code. - */ - error = xfs_attr_node_addname(args); + +/* + * Add a name to the leaf attribute list structure + * + * This leaf block cannot have a "remote" value, we only call this routine + * if bmap_one_block() says there is only one block (ie: no remote blks). + */ +STATIC int +xfs_attr_leaf_addname( + struct xfs_da_args *args) +{ + int error, forkoff; + struct xfs_buf *bp = NULL; + struct xfs_inode *dp = args->dp; + + trace_xfs_attr_leaf_addname(args); + + error = xfs_attr_leaf_try_add(args, bp); + if (error) return error; - } /* * Commit the transaction that added the attr name so that @@ -684,71 +692,92 @@ xfs_attr_leaf_addname( return error; } - /* - * If this is an atomic rename operation, we must "flip" the - * incomplete flags on the "new" and "old" attribute/value pairs - * so that one disappears and one appears atomically. Then we - * must remove the "old" attribute/value pair. - */ - if (args->op_flags & XFS_DA_OP_RENAME) { + if (!(args->op_flags & XFS_DA_OP_RENAME)) { /* - * In a separate transaction, set the incomplete flag on the - * "old" attr and clear the incomplete flag on the "new" attr. + * Added a "remote" value, just clear the incomplete flag. */ - error = xfs_attr3_leaf_flipflags(args); + if (args->rmtblkno > 0) + error = xfs_attr3_leaf_clearflag(args); + + return error; + } + + /* + * If this is an atomic rename operation, we must "flip" the incomplete + * flags on the "new" and "old" attribute/value pairs so that one + * disappears and one appears atomically. Then we must remove the "old" + * attribute/value pair. + * + * In a separate transaction, set the incomplete flag on the "old" attr + * and clear the incomplete flag on the "new" attr. + */ + + error = xfs_attr3_leaf_flipflags(args); + if (error) + return error; + /* + * Commit the flag value change and start the next trans in series. + */ + error = xfs_trans_roll_inode(&args->trans, args->dp); + if (error) + return error; + + /* + * Dismantle the "old" attribute/value pair by removing a "remote" value + * (if it exists). + */ + xfs_attr_restore_rmt_blk(args); + + if (args->rmtblkno) { + error = xfs_attr_rmtval_invalidate(args); if (error) return error; - /* - * Dismantle the "old" attribute/value pair by removing - * a "remote" value (if it exists). - */ - args->index = args->index2; - args->blkno = args->blkno2; - args->rmtblkno = args->rmtblkno2; - args->rmtblkcnt = args->rmtblkcnt2; - args->rmtvaluelen = args->rmtvaluelen2; - if (args->rmtblkno) { - error = xfs_attr_rmtval_remove(args); - if (error) - return error; - } - - /* - * Read in the block containing the "old" attr, then - * remove the "old" attr from that block (neat, huh!) - */ - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, - &bp); + error = xfs_attr_rmtval_remove(args); if (error) return error; + } - xfs_attr3_leaf_remove(bp, args); + /* + * Read in the block containing the "old" attr, then remove the "old" + * attr from that block (neat, huh!) + */ + error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, + &bp); + if (error) + return error; - /* - * If the result is small enough, shrink it all into the inode. - */ - if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { - error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); - /* bp is gone due to xfs_da_shrink_inode */ - if (error) - return error; - error = xfs_defer_finish(&args->trans); - if (error) - return error; - } + xfs_attr3_leaf_remove(bp, args); - /* - * Commit the remove and start the next trans in series. - */ - error = xfs_trans_roll_inode(&args->trans, dp); + /* + * If the result is small enough, shrink it all into the inode. + */ + forkoff = xfs_attr_shortform_allfit(bp, dp); + if (forkoff) + error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); + /* bp is gone due to xfs_da_shrink_inode */ + + return error; +} + +/* + * Return EEXIST if attr is found, or ENOATTR if not + */ +STATIC int +xfs_attr_leaf_hasname( + struct xfs_da_args *args, + struct xfs_buf **bp) +{ + int error = 0; + + error = xfs_attr3_leaf_read(args->trans, args->dp, 0, bp); + if (error) + return error; + + error = xfs_attr3_leaf_lookup_int(*bp, args); + if (error != -ENOATTR && error != -EEXIST) + xfs_trans_brelse(args->trans, *bp); - } else if (args->rmtblkno > 0) { - /* - * Added a "remote" value, just clear the incomplete flag. - */ - error = xfs_attr3_leaf_clearflag(args); - } return error; } @@ -772,31 +801,25 @@ xfs_attr_leaf_removename( * Remove the attribute. */ dp = args->dp; - args->blkno = 0; - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp); - if (error) - return error; - error = xfs_attr3_leaf_lookup_int(bp, args); + error = xfs_attr_leaf_hasname(args, &bp); + if (error == -ENOATTR) { xfs_trans_brelse(args->trans, bp); return error; - } + } else if (error != -EEXIST) + return error; xfs_attr3_leaf_remove(bp, args); /* * If the result is small enough, shrink it all into the inode. */ - if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { - error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); + forkoff = xfs_attr_shortform_allfit(bp, dp); + if (forkoff) + return xfs_attr3_leaf_to_shortform(bp, args, forkoff); /* bp is gone due to xfs_da_shrink_inode */ - if (error) - return error; - error = xfs_defer_finish(&args->trans); - if (error) - return error; - } + return 0; } @@ -816,21 +839,53 @@ xfs_attr_leaf_get(xfs_da_args_t *args) trace_xfs_attr_leaf_get(args); - args->blkno = 0; - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp); - if (error) - return error; + error = xfs_attr_leaf_hasname(args, &bp); - error = xfs_attr3_leaf_lookup_int(bp, args); - if (error != -EEXIST) { + if (error == -ENOATTR) { xfs_trans_brelse(args->trans, bp); return error; - } + } else if (error != -EEXIST) + return error; + + error = xfs_attr3_leaf_getvalue(bp, args); xfs_trans_brelse(args->trans, bp); return error; } +/* + * Return EEXIST if attr is found, or ENOATTR if not + * statep: If not null is set to point at the found state. Caller will + * be responsible for freeing the state in this case. + */ +STATIC int +xfs_attr_node_hasname( + struct xfs_da_args *args, + struct xfs_da_state **statep) +{ + struct xfs_da_state *state; + int retval, error; + + state = xfs_da_state_alloc(args); + if (statep != NULL) + *statep = NULL; + + /* + * Search to see if name exists, and get back a pointer to it. + */ + error = xfs_da3_node_lookup_int(state, &retval); + if (error) { + xfs_da_state_free(state); + return error; + } + + if (statep != NULL) + *statep = state; + else + xfs_da_state_free(state); + return retval; +} + /*======================================================================== * External routines when attribute list size > geo->blksize *========================================================================*/ @@ -852,7 +907,6 @@ xfs_attr_node_addname( struct xfs_da_state *state; struct xfs_da_state_blk *blk; struct xfs_inode *dp; - struct xfs_mount *mp; int retval, error; trace_xfs_attr_node_addname(args); @@ -861,36 +915,28 @@ xfs_attr_node_addname( * Fill in bucket of arguments/results/context to carry around. */ dp = args->dp; - mp = dp->i_mount; restart: - state = xfs_da_state_alloc(); - state->args = args; - state->mp = mp; - /* * Search to see if name already exists, and get back a pointer * to where it should go. */ - error = xfs_da3_node_lookup_int(state, &retval); - if (error) + retval = xfs_attr_node_hasname(args, &state); + if (retval != -ENOATTR && retval != -EEXIST) goto out; + blk = &state->path.blk[ state->path.active-1 ]; ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); - if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) { + if (retval == -ENOATTR && (args->attr_flags & XATTR_REPLACE)) goto out; - } else if (retval == -EEXIST) { - if (args->flags & ATTR_CREATE) + if (retval == -EEXIST) { + if (args->attr_flags & XATTR_CREATE) goto out; trace_xfs_attr_node_replace(args); /* save the attribute state for later removal*/ args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */ - args->blkno2 = args->blkno; /* set 2nd entry info*/ - args->index2 = args->index; - args->rmtblkno2 = args->rmtblkno; - args->rmtblkcnt2 = args->rmtblkcnt; - args->rmtvaluelen2 = args->rmtvaluelen; + xfs_attr_save_rmt_blk(args); /* * clear the remote attr state now that it is saved so that the @@ -976,82 +1022,75 @@ restart: return error; } - /* - * If this is an atomic rename operation, we must "flip" the - * incomplete flags on the "new" and "old" attribute/value pairs - * so that one disappears and one appears atomically. Then we - * must remove the "old" attribute/value pair. - */ - if (args->op_flags & XFS_DA_OP_RENAME) { + if (!(args->op_flags & XFS_DA_OP_RENAME)) { /* - * In a separate transaction, set the incomplete flag on the - * "old" attr and clear the incomplete flag on the "new" attr. + * Added a "remote" value, just clear the incomplete flag. */ - error = xfs_attr3_leaf_flipflags(args); - if (error) - goto out; + if (args->rmtblkno > 0) + error = xfs_attr3_leaf_clearflag(args); + retval = error; + goto out; + } - /* - * Dismantle the "old" attribute/value pair by removing - * a "remote" value (if it exists). - */ - args->index = args->index2; - args->blkno = args->blkno2; - args->rmtblkno = args->rmtblkno2; - args->rmtblkcnt = args->rmtblkcnt2; - args->rmtvaluelen = args->rmtvaluelen2; - if (args->rmtblkno) { - error = xfs_attr_rmtval_remove(args); - if (error) - return error; - } + /* + * If this is an atomic rename operation, we must "flip" the incomplete + * flags on the "new" and "old" attribute/value pairs so that one + * disappears and one appears atomically. Then we must remove the "old" + * attribute/value pair. + * + * In a separate transaction, set the incomplete flag on the "old" attr + * and clear the incomplete flag on the "new" attr. + */ + error = xfs_attr3_leaf_flipflags(args); + if (error) + goto out; + /* + * Commit the flag value change and start the next trans in series + */ + error = xfs_trans_roll_inode(&args->trans, args->dp); + if (error) + goto out; - /* - * Re-find the "old" attribute entry after any split ops. - * The INCOMPLETE flag means that we will find the "old" - * attr, not the "new" one. - */ - args->op_flags |= XFS_DA_OP_INCOMPLETE; - state = xfs_da_state_alloc(); - state->args = args; - state->mp = mp; - state->inleaf = 0; - error = xfs_da3_node_lookup_int(state, &retval); + /* + * Dismantle the "old" attribute/value pair by removing a "remote" value + * (if it exists). + */ + xfs_attr_restore_rmt_blk(args); + + if (args->rmtblkno) { + error = xfs_attr_rmtval_invalidate(args); if (error) - goto out; + return error; - /* - * Remove the name and update the hashvals in the tree. - */ - blk = &state->path.blk[ state->path.active-1 ]; - ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); - error = xfs_attr3_leaf_remove(blk->bp, args); - xfs_da3_fixhashpath(state, &state->path); + error = xfs_attr_rmtval_remove(args); + if (error) + return error; + } - /* - * Check to see if the tree needs to be collapsed. - */ - if (retval && (state->path.active > 1)) { - error = xfs_da3_join(state); - if (error) - goto out; - error = xfs_defer_finish(&args->trans); - if (error) - goto out; - } + /* + * Re-find the "old" attribute entry after any split ops. The INCOMPLETE + * flag means that we will find the "old" attr, not the "new" one. + */ + args->attr_filter |= XFS_ATTR_INCOMPLETE; + state = xfs_da_state_alloc(args); + state->inleaf = 0; + error = xfs_da3_node_lookup_int(state, &retval); + if (error) + goto out; - /* - * Commit and start the next trans in the chain. - */ - error = xfs_trans_roll_inode(&args->trans, dp); - if (error) - goto out; + /* + * Remove the name and update the hashvals in the tree. + */ + blk = &state->path.blk[state->path.active-1]; + ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); + error = xfs_attr3_leaf_remove(blk->bp, args); + xfs_da3_fixhashpath(state, &state->path); - } else if (args->rmtblkno > 0) { - /* - * Added a "remote" value, just clear the incomplete flag. - */ - error = xfs_attr3_leaf_clearflag(args); + /* + * Check to see if the tree needs to be collapsed. + */ + if (retval && (state->path.active > 1)) { + error = xfs_da3_join(state); if (error) goto out; } @@ -1066,6 +1105,114 @@ out: } /* + * Shrink an attribute from leaf to shortform + */ +STATIC int +xfs_attr_node_shrink( + struct xfs_da_args *args, + struct xfs_da_state *state) +{ + struct xfs_inode *dp = args->dp; + int error, forkoff; + struct xfs_buf *bp; + + /* + * Have to get rid of the copy of this dabuf in the state. + */ + ASSERT(state->path.active == 1); + ASSERT(state->path.blk[0].bp); + state->path.blk[0].bp = NULL; + + error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp); + if (error) + return error; + + forkoff = xfs_attr_shortform_allfit(bp, dp); + if (forkoff) { + error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); + /* bp is gone due to xfs_da_shrink_inode */ + } else + xfs_trans_brelse(args->trans, bp); + + return error; +} + +/* + * Mark an attribute entry INCOMPLETE and save pointers to the relevant buffers + * for later deletion of the entry. + */ +STATIC int +xfs_attr_leaf_mark_incomplete( + struct xfs_da_args *args, + struct xfs_da_state *state) +{ + int error; + + /* + * Fill in disk block numbers in the state structure + * so that we can get the buffers back after we commit + * several transactions in the following calls. + */ + error = xfs_attr_fillstate(state); + if (error) + return error; + + /* + * Mark the attribute as INCOMPLETE + */ + return xfs_attr3_leaf_setflag(args); +} + +/* + * Initial setup for xfs_attr_node_removename. Make sure the attr is there and + * the blocks are valid. Attr keys with remote blocks will be marked + * incomplete. + */ +STATIC +int xfs_attr_node_removename_setup( + struct xfs_da_args *args, + struct xfs_da_state **state) +{ + int error; + + error = xfs_attr_node_hasname(args, state); + if (error != -EEXIST) + return error; + + ASSERT((*state)->path.blk[(*state)->path.active - 1].bp != NULL); + ASSERT((*state)->path.blk[(*state)->path.active - 1].magic == + XFS_ATTR_LEAF_MAGIC); + + if (args->rmtblkno > 0) { + error = xfs_attr_leaf_mark_incomplete(args, *state); + if (error) + return error; + + return xfs_attr_rmtval_invalidate(args); + } + + return 0; +} + +STATIC int +xfs_attr_node_remove_rmt( + struct xfs_da_args *args, + struct xfs_da_state *state) +{ + int error = 0; + + error = xfs_attr_rmtval_remove(args); + if (error) + return error; + + /* + * Refill the state structure with buffers, the prior calls released our + * buffers. + */ + return xfs_attr_refillstate(state); +} + +/* * Remove a name from a B-tree attribute list. * * This will involve walking down the Btree, and may involve joining @@ -1078,64 +1225,22 @@ xfs_attr_node_removename( { struct xfs_da_state *state; struct xfs_da_state_blk *blk; - struct xfs_inode *dp; - struct xfs_buf *bp; - int retval, error, forkoff; + int retval, error; + struct xfs_inode *dp = args->dp; trace_xfs_attr_node_removename(args); - /* - * Tie a string around our finger to remind us where we are. - */ - dp = args->dp; - state = xfs_da_state_alloc(); - state->args = args; - state->mp = dp->i_mount; - - /* - * Search to see if name exists, and get back a pointer to it. - */ - error = xfs_da3_node_lookup_int(state, &retval); - if (error || (retval != -EEXIST)) { - if (error == 0) - error = retval; + error = xfs_attr_node_removename_setup(args, &state); + if (error) goto out; - } /* * If there is an out-of-line value, de-allocate the blocks. * This is done before we remove the attribute so that we don't * overflow the maximum size of a transaction and/or hit a deadlock. */ - blk = &state->path.blk[ state->path.active-1 ]; - ASSERT(blk->bp != NULL); - ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); if (args->rmtblkno > 0) { - /* - * Fill in disk block numbers in the state structure - * so that we can get the buffers back after we commit - * several transactions in the following calls. - */ - error = xfs_attr_fillstate(state); - if (error) - goto out; - - /* - * Mark the attribute as INCOMPLETE, then bunmapi() the - * remote value. - */ - error = xfs_attr3_leaf_setflag(args); - if (error) - goto out; - error = xfs_attr_rmtval_remove(args); - if (error) - goto out; - - /* - * Refill the state structure with buffers, the prior calls - * released our buffers. - */ - error = xfs_attr_refillstate(state); + error = xfs_attr_node_remove_rmt(args, state); if (error) goto out; } @@ -1169,33 +1274,12 @@ xfs_attr_node_removename( /* * If the result is small enough, push it all into the inode. */ - if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { - /* - * Have to get rid of the copy of this dabuf in the state. - */ - ASSERT(state->path.active == 1); - ASSERT(state->path.blk[0].bp); - state->path.blk[0].bp = NULL; - - error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp); - if (error) - goto out; - - if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { - error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); - /* bp is gone due to xfs_da_shrink_inode */ - if (error) - goto out; - error = xfs_defer_finish(&args->trans); - if (error) - goto out; - } else - xfs_trans_brelse(args->trans, bp); - } - error = 0; + if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) + error = xfs_attr_node_shrink(args, state); out: - xfs_da_state_free(state); + if (state) + xfs_da_state_free(state); return error; } @@ -1311,47 +1395,41 @@ xfs_attr_refillstate(xfs_da_state_t *state) * Returns 0 on successful retrieval, otherwise an error. */ STATIC int -xfs_attr_node_get(xfs_da_args_t *args) +xfs_attr_node_get( + struct xfs_da_args *args) { - xfs_da_state_t *state; - xfs_da_state_blk_t *blk; - int error, retval; - int i; + struct xfs_da_state *state; + struct xfs_da_state_blk *blk; + int i; + int error; trace_xfs_attr_node_get(args); - state = xfs_da_state_alloc(); - state->args = args; - state->mp = args->dp->i_mount; - /* * Search to see if name exists, and get back a pointer to it. */ - error = xfs_da3_node_lookup_int(state, &retval); - if (error) { - retval = error; - goto out_release; - } - if (retval != -EEXIST) + error = xfs_attr_node_hasname(args, &state); + if (error != -EEXIST) goto out_release; /* * Get the value, local or "remote" */ blk = &state->path.blk[state->path.active - 1]; - retval = xfs_attr3_leaf_getvalue(blk->bp, args); + error = xfs_attr3_leaf_getvalue(blk->bp, args); /* * If not in a transaction, we have to release all the buffers. */ out_release: - for (i = 0; i < state->path.active; i++) { + for (i = 0; state != NULL && i < state->path.active; i++) { xfs_trans_brelse(args->trans, state->path.blk[i].bp); state->path.blk[i].bp = NULL; } - xfs_da_state_free(state); - return retval; + if (state) + xfs_da_state_free(state); + return error; } /* Returns true if the attribute entry name is valid. */ diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 4243b2272642..3e97a935e712 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2000,2002-2003,2005 Silicon Graphics, Inc. * All Rights Reserved. @@ -21,39 +21,6 @@ struct xfs_attr_list_context; * as possible so as to fit into the literal area of the inode. */ -/*======================================================================== - * External interfaces - *========================================================================*/ - - -#define ATTR_DONTFOLLOW 0x0001 /* -- ignored, from IRIX -- */ -#define ATTR_ROOT 0x0002 /* use attrs in root (trusted) namespace */ -#define ATTR_TRUST 0x0004 /* -- unused, from IRIX -- */ -#define ATTR_SECURE 0x0008 /* use attrs in security namespace */ -#define ATTR_CREATE 0x0010 /* pure create: fail if attr already exists */ -#define ATTR_REPLACE 0x0020 /* pure set: fail if attr does not exist */ - -#define ATTR_KERNOTIME 0x1000 /* [kernel] don't update inode timestamps */ -#define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */ - -#define ATTR_INCOMPLETE 0x4000 /* [kernel] return INCOMPLETE attr keys */ -#define ATTR_ALLOC 0x8000 /* [kernel] allocate xattr buffer on demand */ - -#define ATTR_KERNEL_FLAGS \ - (ATTR_KERNOTIME | ATTR_KERNOVAL | ATTR_INCOMPLETE | ATTR_ALLOC) - -#define XFS_ATTR_FLAGS \ - { ATTR_DONTFOLLOW, "DONTFOLLOW" }, \ - { ATTR_ROOT, "ROOT" }, \ - { ATTR_TRUST, "TRUST" }, \ - { ATTR_SECURE, "SECURE" }, \ - { ATTR_CREATE, "CREATE" }, \ - { ATTR_REPLACE, "REPLACE" }, \ - { ATTR_KERNOTIME, "KERNOTIME" }, \ - { ATTR_KERNOVAL, "KERNOVAL" }, \ - { ATTR_INCOMPLETE, "INCOMPLETE" }, \ - { ATTR_ALLOC, "ALLOC" } - /* * The maximum size (into the kernel or returned from the kernel) of an * attribute value or the buffer used for an attr_list() call. Larger @@ -62,45 +29,16 @@ struct xfs_attr_list_context; #define ATTR_MAX_VALUELEN (64*1024) /* max length of a value */ /* - * Define how lists of attribute names are returned to the user from - * the attr_list() call. A large, 32bit aligned, buffer is passed in - * along with its size. We put an array of offsets at the top that each - * reference an attrlist_ent_t and pack the attrlist_ent_t's at the bottom. - */ -typedef struct attrlist { - __s32 al_count; /* number of entries in attrlist */ - __s32 al_more; /* T/F: more attrs (do call again) */ - __s32 al_offset[1]; /* byte offsets of attrs [var-sized] */ -} attrlist_t; - -/* - * Show the interesting info about one attribute. This is what the - * al_offset[i] entry points to. - */ -typedef struct attrlist_ent { /* data from attr_list() */ - __u32 a_valuelen; /* number bytes in value of attr */ - char a_name[1]; /* attr name (NULL terminated) */ -} attrlist_ent_t; - -/* - * Given a pointer to the (char*) buffer containing the attr_list() result, - * and an index, return a pointer to the indicated attribute in the buffer. - */ -#define ATTR_ENTRY(buffer, index) \ - ((attrlist_ent_t *) \ - &((char *)buffer)[ ((attrlist_t *)(buffer))->al_offset[index] ]) - -/* * Kernel-internal version of the attrlist cursor. */ -typedef struct attrlist_cursor_kern { +struct xfs_attrlist_cursor_kern { __u32 hashval; /* hash value of next entry to add */ __u32 blkno; /* block containing entry (suggestion) */ __u32 offset; /* offset in list of equal-hashvals */ __u16 pad1; /* padding to match user-level */ __u8 pad2; /* padding to match user-level */ __u8 initted; /* T/F: cursor has been initialized */ -} attrlist_cursor_kern_t; +}; /*======================================================================== @@ -112,27 +50,28 @@ typedef struct attrlist_cursor_kern { typedef void (*put_listent_func_t)(struct xfs_attr_list_context *, int, unsigned char *, int, int); -typedef struct xfs_attr_list_context { - struct xfs_trans *tp; - struct xfs_inode *dp; /* inode */ - struct attrlist_cursor_kern *cursor; /* position in list */ - char *alist; /* output buffer */ +struct xfs_attr_list_context { + struct xfs_trans *tp; + struct xfs_inode *dp; /* inode */ + struct xfs_attrlist_cursor_kern cursor; /* position in list */ + void *buffer; /* output buffer */ /* * Abort attribute list iteration if non-zero. Can be used to pass * error values to the xfs_attr_list caller. */ - int seen_enough; + int seen_enough; + bool allow_incomplete; - ssize_t count; /* num used entries */ - int dupcnt; /* count dup hashvals seen */ - int bufsize; /* total buffer size */ - int firstu; /* first used byte in buffer */ - int flags; /* from VOP call */ - int resynch; /* T/F: resynch with cursor */ - put_listent_func_t put_listent; /* list output fmt function */ - int index; /* index into output buffer */ -} xfs_attr_list_context_t; + ssize_t count; /* num used entries */ + int dupcnt; /* count dup hashvals seen */ + int bufsize; /* total buffer size */ + int firstu; /* first used byte in buffer */ + unsigned int attr_filter; /* XFS_ATTR_{ROOT,SECURE} */ + int resynch; /* T/F: resynch with cursor */ + put_listent_func_t put_listent; /* list output fmt function */ + int index; /* index into output buffer */ +}; /*======================================================================== @@ -143,21 +82,15 @@ typedef struct xfs_attr_list_context { * Overall external interface routines. */ int xfs_attr_inactive(struct xfs_inode *dp); -int xfs_attr_list_int_ilocked(struct xfs_attr_list_context *); -int xfs_attr_list_int(struct xfs_attr_list_context *); +int xfs_attr_list_ilocked(struct xfs_attr_list_context *); +int xfs_attr_list(struct xfs_attr_list_context *); int xfs_inode_hasattr(struct xfs_inode *ip); -int xfs_attr_get_ilocked(struct xfs_inode *ip, struct xfs_da_args *args); -int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name, - size_t namelen, unsigned char **value, int *valuelenp, - int flags); -int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name, - size_t namelen, unsigned char *value, int valuelen, int flags); +int xfs_attr_get_ilocked(struct xfs_da_args *args); +int xfs_attr_get(struct xfs_da_args *args); +int xfs_attr_set(struct xfs_da_args *args); int xfs_attr_set_args(struct xfs_da_args *args); -int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, - size_t namelen, int flags); +int xfs_has_attr(struct xfs_da_args *args); int xfs_attr_remove_args(struct xfs_da_args *args); -int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, - int flags, struct attrlist_cursor_kern *cursor); bool xfs_attr_namecheck(const void *name, size_t length); #endif /* __XFS_ATTR_H__ */ diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index fed537a4353d..8623c815164a 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -309,14 +309,6 @@ xfs_attr3_leaf_verify( return fa; /* - * In recovery there is a transient state where count == 0 is valid - * because we may have transitioned an empty shortform attr to a leaf - * if the attr didn't fit in shortform. - */ - if (!xfs_log_in_recovery(mp) && ichdr.count == 0) - return __this_address; - - /* * firstused is the block offset of the first name info structure. * Make sure it doesn't go off the block or crash into the header. */ @@ -331,6 +323,13 @@ xfs_attr3_leaf_verify( (char *)bp->b_addr + ichdr.firstused) return __this_address; + /* + * NOTE: This verifier historically failed empty leaf buffers because + * we expect the fork to be in another format. Empty attr fork format + * conversions are possible during xattr set, however, and format + * conversion is not atomic with the xattr set that triggers it. We + * cannot assume leaf blocks are non-empty until that is addressed. + */ buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize; for (i = 0, ent = entries; i < ichdr.count; ent++, i++) { fa = xfs_attr3_leaf_verify_entry(mp, buf_end, leaf, &ichdr, @@ -445,14 +444,25 @@ xfs_attr3_leaf_read( * Namespace helper routines *========================================================================*/ -/* - * If namespace bits don't match return 0. - * If all match then return 1. - */ -STATIC int -xfs_attr_namesp_match(int arg_flags, int ondisk_flags) +static bool +xfs_attr_match( + struct xfs_da_args *args, + uint8_t namelen, + unsigned char *name, + int flags) { - return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags); + if (args->namelen != namelen) + return false; + if (memcmp(args->name, name, namelen) != 0) + return false; + /* + * If we are looking for incomplete entries, show only those, else only + * show complete entries. + */ + if (args->attr_filter != + (flags & (XFS_ATTR_NSP_ONDISK_MASK | XFS_ATTR_INCOMPLETE))) + return false; + return true; } static int @@ -464,7 +474,7 @@ xfs_attr_copy_value( /* * No copy if all we have to do is get the length */ - if (args->flags & ATTR_KERNOVAL) { + if (!args->valuelen) { args->valuelen = valuelen; return 0; } @@ -477,8 +487,8 @@ xfs_attr_copy_value( return -ERANGE; } - if (args->op_flags & XFS_DA_OP_ALLOCVAL) { - args->value = kmem_alloc_large(valuelen, 0); + if (!args->value) { + args->value = kmem_alloc_large(valuelen, KM_NOLOCKDEP); if (!args->value) return -ENOMEM; } @@ -526,9 +536,9 @@ xfs_attr_shortform_bytesfit( int offset; /* rounded down */ - offset = (XFS_LITINO(mp, dp->i_d.di_version) - bytes) >> 3; + offset = (XFS_LITINO(mp) - bytes) >> 3; - if (dp->i_d.di_format == XFS_DINODE_FMT_DEV) { + if (dp->i_df.if_format == XFS_DINODE_FMT_DEV) { minforkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; return (offset >= minforkoff) ? minforkoff : 0; } @@ -556,7 +566,7 @@ xfs_attr_shortform_bytesfit( dsize = dp->i_df.if_bytes; - switch (dp->i_d.di_format) { + switch (dp->i_df.if_format) { case XFS_DINODE_FMT_EXTENTS: /* * If there is no attr fork and the data fork is extents, @@ -593,8 +603,7 @@ xfs_attr_shortform_bytesfit( minforkoff = roundup(minforkoff, 8) >> 3; /* attr fork btree root can have at least this many key/ptr pairs */ - maxforkoff = XFS_LITINO(mp, dp->i_d.di_version) - - XFS_BMDR_SPACE_CALC(MINABTPTRS); + maxforkoff = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS); maxforkoff = maxforkoff >> 3; /* rounded down */ if (offset >= maxforkoff) @@ -626,22 +635,19 @@ xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp) * Create the initial contents of a shortform attribute list. */ void -xfs_attr_shortform_create(xfs_da_args_t *args) +xfs_attr_shortform_create( + struct xfs_da_args *args) { - xfs_attr_sf_hdr_t *hdr; - xfs_inode_t *dp; - struct xfs_ifork *ifp; + struct xfs_inode *dp = args->dp; + struct xfs_ifork *ifp = dp->i_afp; + struct xfs_attr_sf_hdr *hdr; trace_xfs_attr_sf_create(args); - dp = args->dp; - ASSERT(dp != NULL); - ifp = dp->i_afp; - ASSERT(ifp != NULL); ASSERT(ifp->if_bytes == 0); - if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) { + if (ifp->if_format == XFS_DINODE_FMT_EXTENTS) { ifp->if_flags &= ~XFS_IFEXTENTS; /* just in case */ - dp->i_d.di_aformat = XFS_DINODE_FMT_LOCAL; + ifp->if_format = XFS_DINODE_FMT_LOCAL; ifp->if_flags |= XFS_IFINLINE; } else { ASSERT(ifp->if_flags & XFS_IFINLINE); @@ -654,18 +660,65 @@ xfs_attr_shortform_create(xfs_da_args_t *args) } /* + * Return -EEXIST if attr is found, or -ENOATTR if not + * args: args containing attribute name and namelen + * sfep: If not null, pointer will be set to the last attr entry found on + -EEXIST. On -ENOATTR pointer is left at the last entry in the list + * basep: If not null, pointer is set to the byte offset of the entry in the + * list on -EEXIST. On -ENOATTR, pointer is left at the byte offset of + * the last entry in the list + */ +int +xfs_attr_sf_findname( + struct xfs_da_args *args, + struct xfs_attr_sf_entry **sfep, + unsigned int *basep) +{ + struct xfs_attr_shortform *sf; + struct xfs_attr_sf_entry *sfe; + unsigned int base = sizeof(struct xfs_attr_sf_hdr); + int size = 0; + int end; + int i; + + sf = (struct xfs_attr_shortform *)args->dp->i_afp->if_u1.if_data; + sfe = &sf->list[0]; + end = sf->hdr.count; + for (i = 0; i < end; sfe = XFS_ATTR_SF_NEXTENTRY(sfe), + base += size, i++) { + size = XFS_ATTR_SF_ENTSIZE(sfe); + if (!xfs_attr_match(args, sfe->namelen, sfe->nameval, + sfe->flags)) + continue; + break; + } + + if (sfep != NULL) + *sfep = sfe; + + if (basep != NULL) + *basep = base; + + if (i == end) + return -ENOATTR; + return -EEXIST; +} + +/* * Add a name/value pair to the shortform attribute list. * Overflow from the inode has already been checked for. */ void -xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff) +xfs_attr_shortform_add( + struct xfs_da_args *args, + int forkoff) { - xfs_attr_shortform_t *sf; - xfs_attr_sf_entry_t *sfe; - int i, offset, size; - xfs_mount_t *mp; - xfs_inode_t *dp; - struct xfs_ifork *ifp; + struct xfs_attr_shortform *sf; + struct xfs_attr_sf_entry *sfe; + int offset, size; + struct xfs_mount *mp; + struct xfs_inode *dp; + struct xfs_ifork *ifp; trace_xfs_attr_sf_add(args); @@ -676,18 +729,8 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff) ifp = dp->i_afp; ASSERT(ifp->if_flags & XFS_IFINLINE); sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data; - sfe = &sf->list[0]; - for (i = 0; i < sf->hdr.count; sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) { -#ifdef DEBUG - if (sfe->namelen != args->namelen) - continue; - if (memcmp(args->name, sfe->nameval, args->namelen) != 0) - continue; - if (!xfs_attr_namesp_match(args->flags, sfe->flags)) - continue; + if (xfs_attr_sf_findname(args, &sfe, NULL) == -EEXIST) ASSERT(0); -#endif - } offset = (char *)sfe - (char *)sf; size = XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen); @@ -697,7 +740,7 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff) sfe->namelen = args->namelen; sfe->valuelen = args->valuelen; - sfe->flags = XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags); + sfe->flags = args->attr_filter; memcpy(sfe->nameval, args->name, args->namelen); memcpy(&sfe->nameval[args->namelen], args->value, args->valuelen); sf->hdr.count++; @@ -716,13 +759,12 @@ xfs_attr_fork_remove( struct xfs_inode *ip, struct xfs_trans *tp) { - xfs_idestroy_fork(ip, XFS_ATTR_FORK); - ip->i_d.di_forkoff = 0; - ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; - - ASSERT(ip->i_d.di_anextents == 0); - ASSERT(ip->i_afp == NULL); + ASSERT(ip->i_afp->if_nextents == 0); + xfs_idestroy_fork(ip->i_afp); + kmem_cache_free(xfs_ifork_zone, ip->i_afp); + ip->i_afp = NULL; + ip->i_d.di_forkoff = 0; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); } @@ -730,35 +772,27 @@ xfs_attr_fork_remove( * Remove an attribute from the shortform attribute list structure. */ int -xfs_attr_shortform_remove(xfs_da_args_t *args) +xfs_attr_shortform_remove( + struct xfs_da_args *args) { - xfs_attr_shortform_t *sf; - xfs_attr_sf_entry_t *sfe; - int base, size=0, end, totsize, i; - xfs_mount_t *mp; - xfs_inode_t *dp; + struct xfs_attr_shortform *sf; + struct xfs_attr_sf_entry *sfe; + int size = 0, end, totsize; + unsigned int base; + struct xfs_mount *mp; + struct xfs_inode *dp; + int error; trace_xfs_attr_sf_remove(args); dp = args->dp; mp = dp->i_mount; - base = sizeof(xfs_attr_sf_hdr_t); sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data; - sfe = &sf->list[0]; - end = sf->hdr.count; - for (i = 0; i < end; sfe = XFS_ATTR_SF_NEXTENTRY(sfe), - base += size, i++) { - size = XFS_ATTR_SF_ENTSIZE(sfe); - if (sfe->namelen != args->namelen) - continue; - if (memcmp(sfe->nameval, args->name, args->namelen) != 0) - continue; - if (!xfs_attr_namesp_match(args->flags, sfe->flags)) - continue; - break; - } - if (i == end) - return -ENOATTR; + + error = xfs_attr_sf_findname(args, &sfe, &base); + if (error != -EEXIST) + return error; + size = XFS_ATTR_SF_ENTSIZE(sfe); /* * Fix up the attribute fork data, covering the hole @@ -776,7 +810,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args) totsize -= size; if (totsize == sizeof(xfs_attr_sf_hdr_t) && (mp->m_flags & XFS_MOUNT_ATTR2) && - (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) && + (dp->i_df.if_format != XFS_DINODE_FMT_BTREE) && !(args->op_flags & XFS_DA_OP_ADDNAME)) { xfs_attr_fork_remove(dp, args->trans); } else { @@ -786,7 +820,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args) ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || (args->op_flags & XFS_DA_OP_ADDNAME) || !(mp->m_flags & XFS_MOUNT_ATTR2) || - dp->i_d.di_format == XFS_DINODE_FMT_BTREE); + dp->i_df.if_format == XFS_DINODE_FMT_BTREE); xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA); } @@ -816,13 +850,9 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args) sfe = &sf->list[0]; for (i = 0; i < sf->hdr.count; sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) { - if (sfe->namelen != args->namelen) - continue; - if (memcmp(args->name, sfe->nameval, args->namelen) != 0) - continue; - if (!xfs_attr_namesp_match(args->flags, sfe->flags)) - continue; - return -EEXIST; + if (xfs_attr_match(args, sfe->namelen, sfe->nameval, + sfe->flags)) + return -EEXIST; } return -ENOATTR; } @@ -830,9 +860,9 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args) /* * Retrieve the attribute value and length. * - * If ATTR_KERNOVAL is specified, only the length needs to be returned. - * Unlike a lookup, we only return an error if the attribute does not - * exist or we can't retrieve the value. + * If args->valuelen is zero, only the length needs to be returned. Unlike a + * lookup, we only return an error if the attribute does not exist or we can't + * retrieve the value. */ int xfs_attr_shortform_getvalue( @@ -847,14 +877,10 @@ xfs_attr_shortform_getvalue( sfe = &sf->list[0]; for (i = 0; i < sf->hdr.count; sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) { - if (sfe->namelen != args->namelen) - continue; - if (memcmp(args->name, sfe->nameval, args->namelen) != 0) - continue; - if (!xfs_attr_namesp_match(args->flags, sfe->flags)) - continue; - return xfs_attr_copy_value(args, &sfe->nameval[args->namelen], - sfe->valuelen); + if (xfs_attr_match(args, sfe->namelen, sfe->nameval, + sfe->flags)) + return xfs_attr_copy_value(args, + &sfe->nameval[args->namelen], sfe->valuelen); } return -ENOATTR; } @@ -918,7 +944,7 @@ xfs_attr_shortform_to_leaf( nargs.valuelen = sfe->valuelen; nargs.hashval = xfs_da_hashname(sfe->nameval, sfe->namelen); - nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags); + nargs.attr_filter = sfe->flags & XFS_ATTR_NSP_ONDISK_MASK; error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */ ASSERT(error == -ENOATTR); error = xfs_attr3_leaf_add(bp, &nargs); @@ -971,7 +997,7 @@ xfs_attr_shortform_allfit( + be16_to_cpu(name_loc->valuelen); } if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) && - (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) && + (dp->i_df.if_format != XFS_DINODE_FMT_BTREE) && (bytes == sizeof(struct xfs_attr_sf_hdr))) return -1; return xfs_attr_shortform_bytesfit(dp, bytes); @@ -990,7 +1016,7 @@ xfs_attr_shortform_verify( int i; int64_t size; - ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL); + ASSERT(ip->i_afp->if_format == XFS_DINODE_FMT_LOCAL); ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK); sfp = (struct xfs_attr_shortform *)ifp->if_u1.if_data; size = ifp->if_bytes; @@ -1094,7 +1120,7 @@ xfs_attr3_leaf_to_shortform( if (forkoff == -1) { ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2); - ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE); + ASSERT(dp->i_df.if_format != XFS_DINODE_FMT_BTREE); xfs_attr_fork_remove(dp, args->trans); goto out; } @@ -1124,7 +1150,7 @@ xfs_attr3_leaf_to_shortform( nargs.value = &name_loc->nameval[nargs.namelen]; nargs.valuelen = be16_to_cpu(name_loc->valuelen); nargs.hashval = be32_to_cpu(entry->hashval); - nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(entry->flags); + nargs.attr_filter = entry->flags & XFS_ATTR_NSP_ONDISK_MASK; xfs_attr_shortform_add(&nargs, forkoff); } error = 0; @@ -1449,8 +1475,9 @@ xfs_attr3_leaf_add_work( entry->nameidx = cpu_to_be16(ichdr->freemap[mapindex].base + ichdr->freemap[mapindex].size); entry->hashval = cpu_to_be32(args->hashval); - entry->flags = tmp ? XFS_ATTR_LOCAL : 0; - entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags); + entry->flags = args->attr_filter; + if (tmp) + entry->flags |= XFS_ATTR_LOCAL; if (args->op_flags & XFS_DA_OP_RENAME) { entry->flags |= XFS_ATTR_INCOMPLETE; if ((args->blkno2 == args->blkno) && @@ -2346,7 +2373,7 @@ xfs_attr3_leaf_lookup_int( xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf); entries = xfs_attr3_leaf_entryp(leaf); if (ichdr.count >= args->geo->blksize / 8) { - xfs_buf_corruption_error(bp); + xfs_buf_mark_corrupt(bp); return -EFSCORRUPTED; } @@ -2365,11 +2392,11 @@ xfs_attr3_leaf_lookup_int( break; } if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count))) { - xfs_buf_corruption_error(bp); + xfs_buf_mark_corrupt(bp); return -EFSCORRUPTED; } if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval)) { - xfs_buf_corruption_error(bp); + xfs_buf_mark_corrupt(bp); return -EFSCORRUPTED; } @@ -2399,33 +2426,17 @@ xfs_attr3_leaf_lookup_int( /* * GROT: Add code to remove incomplete entries. */ - /* - * If we are looking for INCOMPLETE entries, show only those. - * If we are looking for complete entries, show only those. - */ - if (!!(args->op_flags & XFS_DA_OP_INCOMPLETE) != - !!(entry->flags & XFS_ATTR_INCOMPLETE)) { - continue; - } if (entry->flags & XFS_ATTR_LOCAL) { name_loc = xfs_attr3_leaf_name_local(leaf, probe); - if (name_loc->namelen != args->namelen) - continue; - if (memcmp(args->name, name_loc->nameval, - args->namelen) != 0) - continue; - if (!xfs_attr_namesp_match(args->flags, entry->flags)) + if (!xfs_attr_match(args, name_loc->namelen, + name_loc->nameval, entry->flags)) continue; args->index = probe; return -EEXIST; } else { name_rmt = xfs_attr3_leaf_name_remote(leaf, probe); - if (name_rmt->namelen != args->namelen) - continue; - if (memcmp(args->name, name_rmt->name, - args->namelen) != 0) - continue; - if (!xfs_attr_namesp_match(args->flags, entry->flags)) + if (!xfs_attr_match(args, name_rmt->namelen, + name_rmt->name, entry->flags)) continue; args->index = probe; args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen); @@ -2444,9 +2455,9 @@ xfs_attr3_leaf_lookup_int( * Get the value associated with an attribute name from a leaf attribute * list structure. * - * If ATTR_KERNOVAL is specified, only the length needs to be returned. - * Unlike a lookup, we only return an error if the attribute does not - * exist or we can't retrieve the value. + * If args->valuelen is zero, only the length needs to be returned. Unlike a + * lookup, we only return an error if the attribute does not exist or we can't + * retrieve the value. */ int xfs_attr3_leaf_getvalue( @@ -2771,10 +2782,7 @@ xfs_attr3_leaf_clearflag( XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt))); } - /* - * Commit the flag value change and start the next trans in series. - */ - return xfs_trans_roll_inode(&args->trans, args->dp); + return 0; } /* @@ -2822,10 +2830,7 @@ xfs_attr3_leaf_setflag( XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt))); } - /* - * Commit the flag value change and start the next trans in series. - */ - return xfs_trans_roll_inode(&args->trans, args->dp); + return 0; } /* @@ -2940,10 +2945,5 @@ xfs_attr3_leaf_flipflags( XFS_DA_LOGRANGE(leaf2, name_rmt, sizeof(*name_rmt))); } - /* - * Commit the flag value change and start the next trans in series. - */ - error = xfs_trans_roll_inode(&args->trans, args->dp); - - return error; + return 0; } diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index 73615b1dd1a8..9b1c59f40a26 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2000,2002-2003,2005 Silicon Graphics, Inc. * Copyright (c) 2013 Red Hat, Inc. @@ -8,7 +8,6 @@ #define __XFS_ATTR_LEAF_H__ struct attrlist; -struct attrlist_cursor_kern; struct xfs_attr_list_context; struct xfs_da_args; struct xfs_da_state; @@ -53,6 +52,9 @@ int xfs_attr_shortform_getvalue(struct xfs_da_args *args); int xfs_attr_shortform_to_leaf(struct xfs_da_args *args, struct xfs_buf **leaf_bp); int xfs_attr_shortform_remove(struct xfs_da_args *args); +int xfs_attr_sf_findname(struct xfs_da_args *args, + struct xfs_attr_sf_entry **sfep, + unsigned int *basep); int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes); xfs_failaddr_t xfs_attr_shortform_verify(struct xfs_inode *ip); diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 8b7f74b3bea2..3f80cede7406 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -397,7 +397,7 @@ xfs_attr_rmtval_get( trace_xfs_attr_rmtval_get(args); - ASSERT(!(args->flags & ATTR_KERNOVAL)); + ASSERT(args->valuelen != 0); ASSERT(args->rmtvaluelen == args->valuelen); valuelen = args->rmtvaluelen; @@ -440,32 +440,23 @@ xfs_attr_rmtval_get( } /* - * Write the value associated with an attribute into the out-of-line buffer - * that we have defined for it. + * Find a "hole" in the attribute address space large enough for us to drop the + * new attribute's value into */ -int -xfs_attr_rmtval_set( +STATIC int +xfs_attr_rmt_find_hole( struct xfs_da_args *args) { struct xfs_inode *dp = args->dp; struct xfs_mount *mp = dp->i_mount; - struct xfs_bmbt_irec map; - xfs_dablk_t lblkno; - xfs_fileoff_t lfileoff = 0; - uint8_t *src = args->value; - int blkcnt; - int valuelen; - int nmap; int error; - int offset = 0; - - trace_xfs_attr_rmtval_set(args); + int blkcnt; + xfs_fileoff_t lfileoff = 0; /* - * Find a "hole" in the attribute address space large enough for - * us to drop the new attribute's value into. Because CRC enable - * attributes have headers, we can't just do a straight byte to FSB - * conversion and have to take the header space into account. + * Because CRC enable attributes have headers, we can't just do a + * straight byte to FSB conversion and have to take the header space + * into account. */ blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen); error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff, @@ -473,48 +464,26 @@ xfs_attr_rmtval_set( if (error) return error; - args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff; + args->rmtblkno = (xfs_dablk_t)lfileoff; args->rmtblkcnt = blkcnt; - /* - * Roll through the "value", allocating blocks on disk as required. - */ - while (blkcnt > 0) { - /* - * Allocate a single extent, up to the size of the value. - * - * Note that we have to consider this a data allocation as we - * write the remote attribute without logging the contents. - * Hence we must ensure that we aren't using blocks that are on - * the busy list so that we don't overwrite blocks which have - * recently been freed but their transactions are not yet - * committed to disk. If we overwrite the contents of a busy - * extent and then crash then the block may not contain the - * correct metadata after log recovery occurs. - */ - nmap = 1; - error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, - blkcnt, XFS_BMAPI_ATTRFORK, args->total, &map, - &nmap); - if (error) - return error; - error = xfs_defer_finish(&args->trans); - if (error) - return error; - - ASSERT(nmap == 1); - ASSERT((map.br_startblock != DELAYSTARTBLOCK) && - (map.br_startblock != HOLESTARTBLOCK)); - lblkno += map.br_blockcount; - blkcnt -= map.br_blockcount; + return 0; +} - /* - * Start the next trans in the chain. - */ - error = xfs_trans_roll_inode(&args->trans, dp); - if (error) - return error; - } +STATIC int +xfs_attr_rmtval_set_value( + struct xfs_da_args *args) +{ + struct xfs_inode *dp = args->dp; + struct xfs_mount *mp = dp->i_mount; + struct xfs_bmbt_irec map; + xfs_dablk_t lblkno; + uint8_t *src = args->value; + int blkcnt; + int valuelen; + int nmap; + int error; + int offset = 0; /* * Roll through the "value", copying the attribute value to the @@ -595,19 +564,82 @@ xfs_attr_rmtval_stale( } /* + * Write the value associated with an attribute into the out-of-line buffer + * that we have defined for it. + */ +int +xfs_attr_rmtval_set( + struct xfs_da_args *args) +{ + struct xfs_inode *dp = args->dp; + struct xfs_bmbt_irec map; + xfs_dablk_t lblkno; + int blkcnt; + int nmap; + int error; + + trace_xfs_attr_rmtval_set(args); + + error = xfs_attr_rmt_find_hole(args); + if (error) + return error; + + blkcnt = args->rmtblkcnt; + lblkno = (xfs_dablk_t)args->rmtblkno; + /* + * Roll through the "value", allocating blocks on disk as required. + */ + while (blkcnt > 0) { + /* + * Allocate a single extent, up to the size of the value. + * + * Note that we have to consider this a data allocation as we + * write the remote attribute without logging the contents. + * Hence we must ensure that we aren't using blocks that are on + * the busy list so that we don't overwrite blocks which have + * recently been freed but their transactions are not yet + * committed to disk. If we overwrite the contents of a busy + * extent and then crash then the block may not contain the + * correct metadata after log recovery occurs. + */ + nmap = 1; + error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, + blkcnt, XFS_BMAPI_ATTRFORK, args->total, &map, + &nmap); + if (error) + return error; + error = xfs_defer_finish(&args->trans); + if (error) + return error; + + ASSERT(nmap == 1); + ASSERT((map.br_startblock != DELAYSTARTBLOCK) && + (map.br_startblock != HOLESTARTBLOCK)); + lblkno += map.br_blockcount; + blkcnt -= map.br_blockcount; + + /* + * Start the next trans in the chain. + */ + error = xfs_trans_roll_inode(&args->trans, dp); + if (error) + return error; + } + + return xfs_attr_rmtval_set_value(args); +} + +/* * Remove the value associated with an attribute by deleting the * out-of-line buffer that it is stored on. */ int -xfs_attr_rmtval_remove( +xfs_attr_rmtval_invalidate( struct xfs_da_args *args) { xfs_dablk_t lblkno; int blkcnt; int error; - int done; - - trace_xfs_attr_rmtval_remove(args); /* * Roll through the "value", invalidating the attribute value's blocks. @@ -635,21 +667,29 @@ xfs_attr_rmtval_remove( lblkno += map.br_blockcount; blkcnt -= map.br_blockcount; } + return 0; +} + +/* + * Remove the value associated with an attribute by deleting the + * out-of-line buffer that it is stored on. + */ +int +xfs_attr_rmtval_remove( + struct xfs_da_args *args) +{ + int error; + int retval; + + trace_xfs_attr_rmtval_remove(args); /* * Keep de-allocating extents until the remote-value region is gone. */ - lblkno = args->rmtblkno; - blkcnt = args->rmtblkcnt; - done = 0; - while (!done) { - error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, - XFS_BMAPI_ATTRFORK, 1, &done); - if (error) - return error; - error = xfs_defer_finish(&args->trans); - if (error) - return error; + do { + retval = __xfs_attr_rmtval_remove(args); + if (retval && retval != -EAGAIN) + return retval; /* * Close out trans and start the next one in the chain. @@ -657,6 +697,36 @@ xfs_attr_rmtval_remove( error = xfs_trans_roll_inode(&args->trans, args->dp); if (error) return error; - } + } while (retval == -EAGAIN); + return 0; } + +/* + * Remove the value associated with an attribute by deleting the out-of-line + * buffer that it is stored on. Returns EAGAIN for the caller to refresh the + * transaction and re-call the function + */ +int +__xfs_attr_rmtval_remove( + struct xfs_da_args *args) +{ + int error, done; + + /* + * Unmap value blocks for this attr. + */ + error = xfs_bunmapi(args->trans, args->dp, args->rmtblkno, + args->rmtblkcnt, XFS_BMAPI_ATTRFORK, 1, &done); + if (error) + return error; + + error = xfs_defer_finish(&args->trans); + if (error) + return error; + + if (!done) + return -EAGAIN; + + return error; +} diff --git a/fs/xfs/libxfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h index 6fb4572845ce..9eee615da156 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.h +++ b/fs/xfs/libxfs/xfs_attr_remote.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2013 Red Hat, Inc. * All Rights Reserved. @@ -13,5 +13,6 @@ int xfs_attr_rmtval_set(struct xfs_da_args *args); int xfs_attr_rmtval_remove(struct xfs_da_args *args); int xfs_attr_rmtval_stale(struct xfs_inode *ip, struct xfs_bmbt_irec *map, xfs_buf_flags_t incore_flags); - +int xfs_attr_rmtval_invalidate(struct xfs_da_args *args); +int __xfs_attr_rmtval_remove(struct xfs_da_args *args); #endif /* __XFS_ATTR_REMOTE_H__ */ diff --git a/fs/xfs/libxfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h index aafa4fe70624..bb004fb7944a 100644 --- a/fs/xfs/libxfs/xfs_attr_sf.h +++ b/fs/xfs/libxfs/xfs_attr_sf.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc. * All Rights Reserved. diff --git a/fs/xfs/libxfs/xfs_bit.h b/fs/xfs/libxfs/xfs_bit.h index 99017b8df292..a04f266ae644 100644 --- a/fs/xfs/libxfs/xfs_bit.h +++ b/fs/xfs/libxfs/xfs_bit.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc. * All Rights Reserved. diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 9a6d7a84689a..9c40d5971035 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -61,10 +61,10 @@ xfs_bmap_compute_maxlevels( int sz; /* root block size */ /* - * The maximum number of extents in a file, hence the maximum - * number of leaf entries, is controlled by the type of di_nextents - * (a signed 32-bit number, xfs_extnum_t), or by di_anextents - * (a signed 16-bit number, xfs_aextnum_t). + * The maximum number of extents in a file, hence the maximum number of + * leaf entries, is controlled by the size of the on-disk extent count, + * either a signed 32-bit number for the data fork, or a signed 16-bit + * number for the attr fork. * * Note that we can no longer assume that if we are in ATTR1 that * the fork offset of all the inodes will be @@ -120,10 +120,11 @@ xfs_bmbt_lookup_first( */ static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork) { + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + return whichfork != XFS_COW_FORK && - XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_NEXTENTS(ip, whichfork) > - XFS_IFORK_MAXEXT(ip, whichfork); + ifp->if_format == XFS_DINODE_FMT_EXTENTS && + ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork); } /* @@ -131,10 +132,11 @@ static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork) */ static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork) { + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + return whichfork != XFS_COW_FORK && - XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE && - XFS_IFORK_NEXTENTS(ip, whichfork) <= - XFS_IFORK_MAXEXT(ip, whichfork); + ifp->if_format == XFS_DINODE_FMT_BTREE && + ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork); } /* @@ -193,14 +195,12 @@ xfs_default_attroffset( struct xfs_mount *mp = ip->i_mount; uint offset; - if (mp->m_sb.sb_inodesize == 256) { - offset = XFS_LITINO(mp, ip->i_d.di_version) - - XFS_BMDR_SPACE_CALC(MINABTPTRS); - } else { + if (mp->m_sb.sb_inodesize == 256) + offset = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS); + else offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS); - } - ASSERT(offset < XFS_LITINO(mp, ip->i_d.di_version)); + ASSERT(offset < XFS_LITINO(mp)); return offset; } @@ -215,8 +215,8 @@ xfs_bmap_forkoff_reset( int whichfork) { if (whichfork == XFS_ATTR_FORK && - ip->i_d.di_format != XFS_DINODE_FMT_DEV && - ip->i_d.di_format != XFS_DINODE_FMT_BTREE) { + ip->i_df.if_format != XFS_DINODE_FMT_DEV && + ip->i_df.if_format != XFS_DINODE_FMT_BTREE) { uint dfl_forkoff = xfs_default_attroffset(ip) >> 3; if (dfl_forkoff > ip->i_d.di_forkoff) @@ -317,31 +317,28 @@ xfs_bmap_check_leaf_extents( xfs_inode_t *ip, /* incore inode pointer */ int whichfork) /* data or attr fork */ { + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); struct xfs_btree_block *block; /* current btree block */ xfs_fsblock_t bno; /* block # of "block" */ xfs_buf_t *bp; /* buffer for "block" */ int error; /* error return value */ xfs_extnum_t i=0, j; /* index into the extents list */ - struct xfs_ifork *ifp; /* fork structure */ int level; /* btree level, for checking */ - xfs_mount_t *mp; /* file system mount structure */ __be64 *pp; /* pointer to block address */ xfs_bmbt_rec_t *ep; /* pointer to current extent */ xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */ xfs_bmbt_rec_t *nextp; /* pointer to next extent */ int bp_release = 0; - if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) { + if (ifp->if_format != XFS_DINODE_FMT_BTREE) return; - } /* skip large extent count inodes */ - if (ip->i_d.di_nextents > 10000) + if (ip->i_df.if_nextents > 10000) return; bno = NULLFSBLOCK; - mp = ip->i_mount; - ifp = XFS_IFORK_PTR(ip, whichfork); block = ifp->if_broot; /* * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. @@ -556,7 +553,8 @@ __xfs_bmap_add_free( #endif ASSERT(xfs_bmap_free_item_zone != NULL); - new = kmem_zone_alloc(xfs_bmap_free_item_zone, 0); + new = kmem_cache_alloc(xfs_bmap_free_item_zone, + GFP_KERNEL | __GFP_NOFAIL); new->xefi_startblock = bno; new->xefi_blockcount = (xfs_extlen_t)len; if (oinfo) @@ -606,7 +604,7 @@ xfs_bmap_btree_to_extents( ASSERT(cur); ASSERT(whichfork != XFS_COW_FORK); ASSERT(ifp->if_flags & XFS_IFEXTENTS); - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); + ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE); ASSERT(be16_to_cpu(rblock->bb_level) == 1); ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1); ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1); @@ -634,7 +632,7 @@ xfs_bmap_btree_to_extents( xfs_iroot_realloc(ip, -1, whichfork); ASSERT(ifp->if_broot == NULL); ASSERT((ifp->if_flags & XFS_IFBROOT) == 0); - XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); + ifp->if_format = XFS_DINODE_FMT_EXTENTS; *logflagsp |= XFS_ILOG_CORE | xfs_ilog_fext(whichfork); return 0; } @@ -670,7 +668,7 @@ xfs_bmap_extents_to_btree( mp = ip->i_mount; ASSERT(whichfork != XFS_COW_FORK); ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS); + ASSERT(ifp->if_format == XFS_DINODE_FMT_EXTENTS); /* * Make space in the inode incore. This needs to be undone if we fail @@ -690,11 +688,11 @@ xfs_bmap_extents_to_btree( * Need a cursor. Can't allocate until bb_level is filled in. */ cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; + cur->bc_ino.flags = wasdel ? XFS_BTCUR_BMBT_WASDEL : 0; /* * Convert to a btree with two levels, one record in root. */ - XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE); + ifp->if_format = XFS_DINODE_FMT_BTREE; memset(&args, 0, sizeof(args)); args.tp = tp; args.mp = mp; @@ -727,7 +725,7 @@ xfs_bmap_extents_to_btree( ASSERT(tp->t_firstblock == NULLFSBLOCK || args.agno >= XFS_FSB_TO_AGNO(mp, tp->t_firstblock)); tp->t_firstblock = args.fsbno; - cur->bc_private.b.allocated++; + cur->bc_ino.allocated++; ip->i_d.di_nblocks++; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); error = xfs_trans_get_buf(tp, mp->m_ddev_targp, @@ -752,7 +750,7 @@ xfs_bmap_extents_to_btree( xfs_bmbt_disk_set_all(arp, &rec); cnt++; } - ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork)); + ASSERT(cnt == ifp->if_nextents); xfs_btree_set_numrecs(ablock, cnt); /* @@ -780,7 +778,7 @@ out_unreserve_dquot: xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); out_root_realloc: xfs_iroot_realloc(ip, -1, whichfork); - XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); + ifp->if_format = XFS_DINODE_FMT_EXTENTS; ASSERT(ifp->if_broot == NULL); xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); @@ -802,16 +800,16 @@ xfs_bmap_local_to_extents_empty( struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(whichfork != XFS_COW_FORK); - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); + ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); ASSERT(ifp->if_bytes == 0); - ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0); + ASSERT(ifp->if_nextents == 0); xfs_bmap_forkoff_reset(ip, whichfork); ifp->if_flags &= ~XFS_IFINLINE; ifp->if_flags |= XFS_IFEXTENTS; ifp->if_u1.if_root = NULL; ifp->if_height = 0; - XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); + ifp->if_format = XFS_DINODE_FMT_EXTENTS; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); } @@ -842,7 +840,7 @@ xfs_bmap_local_to_extents( */ ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK)); ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); + ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); if (!ifp->if_bytes) { xfs_bmap_local_to_extents_empty(tp, ip, whichfork); @@ -909,7 +907,7 @@ xfs_bmap_local_to_extents( xfs_iext_first(ifp, &icur); xfs_iext_insert(ip, &icur, &rec, 0); - XFS_IFORK_NEXT_SET(ip, whichfork, 1); + ifp->if_nextents = 1; ip->i_d.di_nblocks = 1; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); @@ -953,7 +951,7 @@ xfs_bmap_add_attrfork_btree( xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); return -ENOSPC; } - cur->bc_private.b.allocated = 0; + cur->bc_ino.allocated = 0; xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); } return 0; @@ -974,13 +972,14 @@ xfs_bmap_add_attrfork_extents( xfs_btree_cur_t *cur; /* bmap btree cursor */ int error; /* error return value */ - if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip)) + if (ip->i_df.if_nextents * sizeof(struct xfs_bmbt_rec) <= + XFS_IFORK_DSIZE(ip)) return 0; cur = NULL; error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags, XFS_DATA_FORK); if (cur) { - cur->bc_private.b.allocated = 0; + cur->bc_ino.allocated = 0; xfs_btree_del_cursor(cur, error); } return error; @@ -1035,7 +1034,7 @@ xfs_bmap_set_attrforkoff( int size, int *version) { - switch (ip->i_d.di_format) { + switch (ip->i_df.if_format) { case XFS_DINODE_FMT_DEV: ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; break; @@ -1093,17 +1092,6 @@ xfs_bmap_add_attrfork( goto trans_cancel; if (XFS_IFORK_Q(ip)) goto trans_cancel; - if (XFS_IS_CORRUPT(mp, ip->i_d.di_anextents != 0)) { - error = -EFSCORRUPTED; - goto trans_cancel; - } - if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) { - /* - * For inodes coming from pre-6.2 filesystems. - */ - ASSERT(ip->i_d.di_aformat == 0); - ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; - } xfs_trans_ijoin(tp, ip, 0); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); @@ -1111,10 +1099,14 @@ xfs_bmap_add_attrfork( if (error) goto trans_cancel; ASSERT(ip->i_afp == NULL); - ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, 0); + + ip->i_afp = kmem_cache_zalloc(xfs_ifork_zone, + GFP_KERNEL | __GFP_NOFAIL); + + ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS; ip->i_afp->if_flags = XFS_IFEXTENTS; logflags = 0; - switch (ip->i_d.di_format) { + switch (ip->i_df.if_format) { case XFS_DINODE_FMT_LOCAL: error = xfs_bmap_add_attrfork_local(tp, ip, &logflags); break; @@ -1178,20 +1170,20 @@ xfs_iread_bmbt_block( { struct xfs_iread_state *ir = priv; struct xfs_mount *mp = cur->bc_mp; - struct xfs_inode *ip = cur->bc_private.b.ip; + struct xfs_inode *ip = cur->bc_ino.ip; struct xfs_btree_block *block; struct xfs_buf *bp; struct xfs_bmbt_rec *frp; xfs_extnum_t num_recs; xfs_extnum_t j; - int whichfork = cur->bc_private.b.whichfork; + int whichfork = cur->bc_ino.whichfork; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); block = xfs_btree_get_block(cur, level, &bp); /* Abort if we find more records than nextents. */ num_recs = xfs_btree_get_numrecs(block); - if (unlikely(ir->loaded + num_recs > - XFS_IFORK_NEXTENTS(ip, whichfork))) { + if (unlikely(ir->loaded + num_recs > ifp->if_nextents)) { xfs_warn(ip->i_mount, "corrupt dinode %llu, (btree extents).", (unsigned long long)ip->i_ino); xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, block, @@ -1217,7 +1209,7 @@ xfs_iread_bmbt_block( xfs_bmap_fork_to_state(whichfork)); trace_xfs_read_extent(ip, &ir->icur, xfs_bmap_fork_to_state(whichfork), _THIS_IP_); - xfs_iext_next(XFS_IFORK_PTR(ip, whichfork), &ir->icur); + xfs_iext_next(ifp, &ir->icur); } return 0; @@ -1240,9 +1232,7 @@ xfs_iread_extents( ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - if (XFS_IS_CORRUPT(mp, - XFS_IFORK_FORMAT(ip, whichfork) != - XFS_DINODE_FMT_BTREE)) { + if (XFS_IS_CORRUPT(mp, ifp->if_format != XFS_DINODE_FMT_BTREE)) { error = -EFSCORRUPTED; goto out; } @@ -1256,8 +1246,7 @@ xfs_iread_extents( if (error) goto out; - if (XFS_IS_CORRUPT(mp, - ir.loaded != XFS_IFORK_NEXTENTS(ip, whichfork))) { + if (XFS_IS_CORRUPT(mp, ir.loaded != ifp->if_nextents)) { error = -EFSCORRUPTED; goto out; } @@ -1291,14 +1280,13 @@ xfs_bmap_first_unused( xfs_fileoff_t lowest, max; int error; - ASSERT(xfs_ifork_has_extents(ip, whichfork) || - XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); - - if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { + if (ifp->if_format == XFS_DINODE_FMT_LOCAL) { *first_unused = 0; return 0; } + ASSERT(xfs_ifork_has_extents(ifp)); + if (!(ifp->if_flags & XFS_IFEXTENTS)) { error = xfs_iread_extents(tp, ip, whichfork); if (error) @@ -1339,7 +1327,7 @@ xfs_bmap_last_before( struct xfs_iext_cursor icur; int error; - switch (XFS_IFORK_FORMAT(ip, whichfork)) { + switch (ifp->if_format) { case XFS_DINODE_FMT_LOCAL: *last_block = 0; return 0; @@ -1438,16 +1426,17 @@ xfs_bmap_last_offset( xfs_fileoff_t *last_block, int whichfork) { + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); struct xfs_bmbt_irec rec; int is_empty; int error; *last_block = 0; - if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) + if (ifp->if_format == XFS_DINODE_FMT_LOCAL) return 0; - if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ip, whichfork))) + if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ifp))) return -EFSCORRUPTED; error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty); @@ -1465,23 +1454,22 @@ xfs_bmap_last_offset( */ int /* 1=>1 block, 0=>otherwise */ xfs_bmap_one_block( - xfs_inode_t *ip, /* incore inode */ - int whichfork) /* data or attr fork */ + struct xfs_inode *ip, /* incore inode */ + int whichfork) /* data or attr fork */ { - struct xfs_ifork *ifp; /* inode fork pointer */ - int rval; /* return value */ - xfs_bmbt_irec_t s; /* internal version of extent */ + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + int rval; /* return value */ + struct xfs_bmbt_irec s; /* internal version of extent */ struct xfs_iext_cursor icur; #ifndef DEBUG if (whichfork == XFS_DATA_FORK) return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize; #endif /* !DEBUG */ - if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1) + if (ifp->if_nextents != 1) return 0; - if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) + if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) return 0; - ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(ifp->if_flags & XFS_IFEXTENTS); xfs_iext_first(ifp, &icur); xfs_iext_get_extent(ifp, &icur, &s); @@ -1503,10 +1491,11 @@ xfs_bmap_add_extent_delay_real( struct xfs_bmalloca *bma, int whichfork) { + struct xfs_mount *mp = bma->ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); struct xfs_bmbt_irec *new = &bma->got; int error; /* error return value */ int i; /* temp state */ - struct xfs_ifork *ifp; /* inode fork pointer */ xfs_fileoff_t new_endoff; /* end offset of new entry */ xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ /* left is 0, right is 1, prev is 2 */ @@ -1516,19 +1505,12 @@ xfs_bmap_add_extent_delay_real( xfs_filblks_t da_old; /* old count del alloc blocks used */ xfs_filblks_t temp=0; /* value for da_new calculations */ int tmp_rval; /* partial logging flags */ - struct xfs_mount *mp; - xfs_extnum_t *nextents; struct xfs_bmbt_irec old; - mp = bma->ip->i_mount; - ifp = XFS_IFORK_PTR(bma->ip, whichfork); ASSERT(whichfork != XFS_ATTR_FORK); - nextents = (whichfork == XFS_COW_FORK ? &bma->ip->i_cnextents : - &bma->ip->i_d.di_nextents); - ASSERT(!isnullstartblock(new->br_startblock)); ASSERT(!bma->cur || - (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); + (bma->cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL)); XFS_STATS_INC(mp, xs_add_exlist); @@ -1616,7 +1598,7 @@ xfs_bmap_add_extent_delay_real( xfs_iext_remove(bma->ip, &bma->icur, state); xfs_iext_prev(ifp, &bma->icur); xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); - (*nextents)--; + ifp->if_nextents--; if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1720,8 +1702,8 @@ xfs_bmap_add_extent_delay_real( PREV.br_startblock = new->br_startblock; PREV.br_state = new->br_state; xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); + ifp->if_nextents++; - (*nextents)++; if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -1786,7 +1768,8 @@ xfs_bmap_add_extent_delay_real( * The left neighbor is not contiguous. */ xfs_iext_update_extent(bma->ip, state, &bma->icur, new); - (*nextents)++; + ifp->if_nextents++; + if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -1818,7 +1801,7 @@ xfs_bmap_add_extent_delay_real( temp = PREV.br_blockcount - new->br_blockcount; da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), startblockval(PREV.br_startblock) - - (bma->cur ? bma->cur->bc_private.b.allocated : 0)); + (bma->cur ? bma->cur->bc_ino.allocated : 0)); PREV.br_startoff = new_endoff; PREV.br_blockcount = temp; @@ -1872,7 +1855,8 @@ xfs_bmap_add_extent_delay_real( * The right neighbor is not contiguous. */ xfs_iext_update_extent(bma->ip, state, &bma->icur, new); - (*nextents)++; + ifp->if_nextents++; + if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -1904,7 +1888,7 @@ xfs_bmap_add_extent_delay_real( temp = PREV.br_blockcount - new->br_blockcount; da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), startblockval(PREV.br_startblock) - - (bma->cur ? bma->cur->bc_private.b.allocated : 0)); + (bma->cur ? bma->cur->bc_ino.allocated : 0)); PREV.br_startblock = nullstartblock(da_new); PREV.br_blockcount = temp; @@ -1957,7 +1941,7 @@ xfs_bmap_add_extent_delay_real( xfs_iext_next(ifp, &bma->icur); xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state); xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state); - (*nextents)++; + ifp->if_nextents++; if (bma->cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -2025,8 +2009,8 @@ xfs_bmap_add_extent_delay_real( xfs_mod_delalloc(mp, (int64_t)da_new - da_old); if (bma->cur) { - da_new += bma->cur->bc_private.b.allocated; - bma->cur->bc_private.b.allocated = 0; + da_new += bma->cur->bc_ino.allocated; + bma->cur->bc_ino.allocated = 0; } /* adjust for changes in reserved delayed indirect blocks */ @@ -2161,8 +2145,7 @@ xfs_bmap_add_extent_unwritten_real( xfs_iext_remove(ip, icur, state); xfs_iext_prev(ifp, icur); xfs_iext_update_extent(ip, state, icur, &LEFT); - XFS_IFORK_NEXT_SET(ip, whichfork, - XFS_IFORK_NEXTENTS(ip, whichfork) - 2); + ifp->if_nextents -= 2; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -2214,8 +2197,7 @@ xfs_bmap_add_extent_unwritten_real( xfs_iext_remove(ip, icur, state); xfs_iext_prev(ifp, icur); xfs_iext_update_extent(ip, state, icur, &LEFT); - XFS_IFORK_NEXT_SET(ip, whichfork, - XFS_IFORK_NEXTENTS(ip, whichfork) - 1); + ifp->if_nextents--; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -2257,9 +2239,8 @@ xfs_bmap_add_extent_unwritten_real( xfs_iext_remove(ip, icur, state); xfs_iext_prev(ifp, icur); xfs_iext_update_extent(ip, state, icur, &PREV); + ifp->if_nextents--; - XFS_IFORK_NEXT_SET(ip, whichfork, - XFS_IFORK_NEXTENTS(ip, whichfork) - 1); if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -2366,8 +2347,8 @@ xfs_bmap_add_extent_unwritten_real( xfs_iext_update_extent(ip, state, icur, &PREV); xfs_iext_insert(ip, icur, new, state); - XFS_IFORK_NEXT_SET(ip, whichfork, - XFS_IFORK_NEXTENTS(ip, whichfork) + 1); + ifp->if_nextents++; + if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -2442,9 +2423,8 @@ xfs_bmap_add_extent_unwritten_real( xfs_iext_update_extent(ip, state, icur, &PREV); xfs_iext_next(ifp, icur); xfs_iext_insert(ip, icur, new, state); + ifp->if_nextents++; - XFS_IFORK_NEXT_SET(ip, whichfork, - XFS_IFORK_NEXTENTS(ip, whichfork) + 1); if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -2495,9 +2475,8 @@ xfs_bmap_add_extent_unwritten_real( xfs_iext_next(ifp, icur); xfs_iext_insert(ip, icur, &r[1], state); xfs_iext_insert(ip, icur, &r[0], state); + ifp->if_nextents += 2; - XFS_IFORK_NEXT_SET(ip, whichfork, - XFS_IFORK_NEXTENTS(ip, whichfork) + 2); if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; else { @@ -2573,7 +2552,7 @@ xfs_bmap_add_extent_unwritten_real( /* clear out the allocated field, done with it now in any case. */ if (cur) { - cur->bc_private.b.allocated = 0; + cur->bc_ino.allocated = 0; *curp = cur; } @@ -2752,7 +2731,7 @@ xfs_bmap_add_extent_hole_real( struct xfs_bmbt_irec old; ASSERT(!isnullstartblock(new->br_startblock)); - ASSERT(!cur || !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); + ASSERT(!cur || !(cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL)); XFS_STATS_INC(mp, xs_add_exlist); @@ -2812,9 +2791,8 @@ xfs_bmap_add_extent_hole_real( xfs_iext_remove(ip, icur, state); xfs_iext_prev(ifp, icur); xfs_iext_update_extent(ip, state, icur, &left); + ifp->if_nextents--; - XFS_IFORK_NEXT_SET(ip, whichfork, - XFS_IFORK_NEXTENTS(ip, whichfork) - 1); if (cur == NULL) { rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); } else { @@ -2912,8 +2890,8 @@ xfs_bmap_add_extent_hole_real( * Insert a new entry. */ xfs_iext_insert(ip, icur, new, state); - XFS_IFORK_NEXT_SET(ip, whichfork, - XFS_IFORK_NEXTENTS(ip, whichfork) + 1); + ifp->if_nextents++; + if (cur == NULL) { rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); } else { @@ -2955,7 +2933,7 @@ xfs_bmap_add_extent_hole_real( /* clear out the allocated field, done with it now in any case. */ if (cur) - cur->bc_private.b.allocated = 0; + cur->bc_ino.allocated = 0; xfs_bmap_check_leaf_extents(cur, ip, whichfork); done: @@ -3893,7 +3871,8 @@ xfs_bmapi_read( int flags) { struct xfs_mount *mp = ip->i_mount; - struct xfs_ifork *ifp; + int whichfork = xfs_bmapi_whichfork(flags); + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); struct xfs_bmbt_irec got; xfs_fileoff_t obno; xfs_fileoff_t end; @@ -3901,48 +3880,23 @@ xfs_bmapi_read( int error; bool eof = false; int n = 0; - int whichfork = xfs_bmapi_whichfork(flags); ASSERT(*nmap >= 1); - ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE| - XFS_BMAPI_COWFORK))); + ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_ENTIRE))); ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)); - if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) || - XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { + if (WARN_ON_ONCE(!ifp)) + return -EFSCORRUPTED; + + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || + XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) return -EFSCORRUPTED; - } if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; XFS_STATS_INC(mp, xs_blk_mapr); - ifp = XFS_IFORK_PTR(ip, whichfork); - if (!ifp) { - /* No CoW fork? Return a hole. */ - if (whichfork == XFS_COW_FORK) { - mval->br_startoff = bno; - mval->br_startblock = HOLESTARTBLOCK; - mval->br_blockcount = len; - mval->br_state = XFS_EXT_NORM; - *nmap = 1; - return 0; - } - - /* - * A missing attr ifork implies that the inode says we're in - * extents or btree format but failed to pass the inode fork - * verifier while trying to load it. Treat that as a file - * corruption too. - */ -#ifdef DEBUG - xfs_alert(mp, "%s: inode %llu missing fork %d", - __func__, ip->i_ino, whichfork); -#endif /* DEBUG */ - return -EFSCORRUPTED; - } - if (!(ifp->if_flags & XFS_IFEXTENTS)) { error = xfs_iread_extents(NULL, ip, whichfork); if (error) @@ -4187,25 +4141,15 @@ xfs_bmapi_allocate( bma->nallocs++; if (bma->cur) - bma->cur->bc_private.b.flags = - bma->wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; + bma->cur->bc_ino.flags = + bma->wasdel ? XFS_BTCUR_BMBT_WASDEL : 0; bma->got.br_startoff = bma->offset; bma->got.br_startblock = bma->blkno; bma->got.br_blockcount = bma->length; bma->got.br_state = XFS_EXT_NORM; - /* - * In the data fork, a wasdelay extent has been initialized, so - * shouldn't be flagged as unwritten. - * - * For the cow fork, however, we convert delalloc reservations - * (extents allocated for speculative preallocation) to - * allocated unwritten extents, and only convert the unwritten - * extents to real extents when we're about to write the data. - */ - if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) && - (bma->flags & XFS_BMAPI_PREALLOC)) + if (bma->flags & XFS_BMAPI_PREALLOC) bma->got.br_state = XFS_EXT_UNWRITTEN; if (bma->wasdel) @@ -4319,11 +4263,13 @@ xfs_bmapi_minleft( struct xfs_inode *ip, int fork) { + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, fork); + if (tp && tp->t_firstblock != NULLFSBLOCK) return 0; - if (XFS_IFORK_FORMAT(ip, fork) != XFS_DINODE_FMT_BTREE) + if (ifp->if_format != XFS_DINODE_FMT_BTREE) return 1; - return be16_to_cpu(XFS_IFORK_PTR(ip, fork)->if_broot->bb_level) + 1; + return be16_to_cpu(ifp->if_broot->bb_level) + 1; } /* @@ -4338,11 +4284,13 @@ xfs_bmapi_finish( int whichfork, int error) { + struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); + if ((bma->logflags & xfs_ilog_fext(whichfork)) && - XFS_IFORK_FORMAT(bma->ip, whichfork) != XFS_DINODE_FMT_EXTENTS) + ifp->if_format != XFS_DINODE_FMT_EXTENTS) bma->logflags &= ~xfs_ilog_fext(whichfork); else if ((bma->logflags & xfs_ilog_fbroot(whichfork)) && - XFS_IFORK_FORMAT(bma->ip, whichfork) != XFS_DINODE_FMT_BTREE) + ifp->if_format != XFS_DINODE_FMT_BTREE) bma->logflags &= ~xfs_ilog_fbroot(whichfork); if (bma->logflags) @@ -4374,13 +4322,13 @@ xfs_bmapi_write( .total = total, }; struct xfs_mount *mp = ip->i_mount; - struct xfs_ifork *ifp; + int whichfork = xfs_bmapi_whichfork(flags); + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); xfs_fileoff_t end; /* end of mapped file region */ bool eof = false; /* after the end of extents */ int error; /* error return */ int n; /* current extent index */ xfs_fileoff_t obno; /* old block number (offset) */ - int whichfork; /* data or attr fork */ #ifdef DEBUG xfs_fileoff_t orig_bno; /* original block number value */ @@ -4395,13 +4343,12 @@ xfs_bmapi_write( orig_mval = mval; orig_nmap = *nmap; #endif - whichfork = xfs_bmapi_whichfork(flags); ASSERT(*nmap >= 1); ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); ASSERT(tp != NULL); ASSERT(len > 0); - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); + ASSERT(ifp->if_format != XFS_DINODE_FMT_LOCAL); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT(!(flags & XFS_BMAPI_REMAP)); @@ -4417,7 +4364,7 @@ xfs_bmapi_write( ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) != (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)); - if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) || + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { return -EFSCORRUPTED; } @@ -4425,8 +4372,6 @@ xfs_bmapi_write( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - ifp = XFS_IFORK_PTR(ip, whichfork); - XFS_STATS_INC(mp, xs_blk_mapw); if (!(ifp->if_flags & XFS_IFEXTENTS)) { @@ -4536,9 +4481,8 @@ xfs_bmapi_write( if (error) goto error0; - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE || - XFS_IFORK_NEXTENTS(ip, whichfork) > - XFS_IFORK_MAXEXT(ip, whichfork)); + ASSERT(ifp->if_format != XFS_DINODE_FMT_BTREE || + ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork)); xfs_bmapi_finish(&bma, whichfork, 0); xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval, orig_nmap, *nmap); @@ -4613,8 +4557,23 @@ xfs_bmapi_convert_delalloc( bma.offset = bma.got.br_startoff; bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount, MAXEXTLEN); bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork); + + /* + * When we're converting the delalloc reservations backing dirty pages + * in the page cache, we must be careful about how we create the new + * extents: + * + * New CoW fork extents are created unwritten, turned into real extents + * when we're about to write the data to disk, and mapped into the data + * fork after the write finishes. End of story. + * + * New data fork extents must be mapped in as unwritten and converted + * to real extents after the write succeeds to avoid exposing stale + * disk contents if we crash. + */ + bma.flags = XFS_BMAPI_PREALLOC; if (whichfork == XFS_COW_FORK) - bma.flags = XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC; + bma.flags |= XFS_BMAPI_COWFORK; if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev)) bma.prev.br_startoff = NULLFILEOFF; @@ -4684,7 +4643,7 @@ xfs_bmapi_remap( ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) != (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)); - if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) || + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { return -EFSCORRUPTED; } @@ -4709,7 +4668,7 @@ xfs_bmapi_remap( if (ifp->if_flags & XFS_IFBROOT) { cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_private.b.flags = 0; + cur->bc_ino.flags = 0; } got.br_startoff = bno; @@ -4728,9 +4687,9 @@ xfs_bmapi_remap( error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags, whichfork); error0: - if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) + if (ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS) logflags &= ~XFS_ILOG_DEXT; - else if (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) + else if (ip->i_df.if_format != XFS_DINODE_FMT_BTREE) logflags &= ~XFS_ILOG_DBROOT; if (logflags) @@ -5080,9 +5039,8 @@ xfs_bmap_del_extent_real( * conversion to btree format, since the transaction will be dirty then. */ if (tp->t_blk_res == 0 && - XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_NEXTENTS(ip, whichfork) >= - XFS_IFORK_MAXEXT(ip, whichfork) && + ifp->if_format == XFS_DINODE_FMT_EXTENTS && + ifp->if_nextents >= XFS_IFORK_MAXEXT(ip, whichfork) && del->br_startoff > got.br_startoff && del_endoff < got_endoff) return -ENOSPC; @@ -5134,8 +5092,8 @@ xfs_bmap_del_extent_real( */ xfs_iext_remove(ip, icur, state); xfs_iext_prev(ifp, icur); - XFS_IFORK_NEXT_SET(ip, whichfork, - XFS_IFORK_NEXTENTS(ip, whichfork) - 1); + ifp->if_nextents--; + flags |= XFS_ILOG_CORE; if (!cur) { flags |= xfs_ilog_fext(whichfork); @@ -5243,8 +5201,8 @@ xfs_bmap_del_extent_real( } } else flags |= xfs_ilog_fext(whichfork); - XFS_IFORK_NEXT_SET(ip, whichfork, - XFS_IFORK_NEXTENTS(ip, whichfork) + 1); + + ifp->if_nextents++; xfs_iext_next(ifp, icur); xfs_iext_insert(ip, icur, &new, state); break; @@ -5324,7 +5282,7 @@ __xfs_bunmapi( whichfork = xfs_bmapi_whichfork(flags); ASSERT(whichfork != XFS_COW_FORK); ifp = XFS_IFORK_PTR(ip, whichfork); - if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork))) + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp))) return -EFSCORRUPTED; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; @@ -5362,9 +5320,9 @@ __xfs_bunmapi( logflags = 0; if (ifp->if_flags & XFS_IFBROOT) { - ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); + ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE); cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_private.b.flags = 0; + cur->bc_ino.flags = 0; } else cur = NULL; @@ -5607,10 +5565,10 @@ error0: * logging the extent records if we've converted to btree format. */ if ((logflags & xfs_ilog_fext(whichfork)) && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) + ifp->if_format != XFS_DINODE_FMT_EXTENTS) logflags &= ~xfs_ilog_fext(whichfork); else if ((logflags & xfs_ilog_fbroot(whichfork)) && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) + ifp->if_format != XFS_DINODE_FMT_BTREE) logflags &= ~xfs_ilog_fbroot(whichfork); /* * Log inode even in the error case, if the transaction @@ -5620,7 +5578,7 @@ error0: xfs_trans_log_inode(tp, ip, logflags); if (cur) { if (!error) - cur->bc_private.b.allocated = 0; + cur->bc_ino.allocated = 0; xfs_btree_del_cursor(cur, error); } return error; @@ -5692,6 +5650,7 @@ xfs_bmse_merge( struct xfs_btree_cur *cur, int *logflags) /* output */ { + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); struct xfs_bmbt_irec new; xfs_filblks_t blockcount; int error, i; @@ -5710,8 +5669,7 @@ xfs_bmse_merge( * Update the on-disk extent count, the btree if necessary and log the * inode. */ - XFS_IFORK_NEXT_SET(ip, whichfork, - XFS_IFORK_NEXTENTS(ip, whichfork) - 1); + ifp->if_nextents--; *logflags |= XFS_ILOG_CORE; if (!cur) { *logflags |= XFS_ILOG_DEXT; @@ -5749,7 +5707,7 @@ xfs_bmse_merge( done: xfs_iext_remove(ip, icur, 0); - xfs_iext_prev(XFS_IFORK_PTR(ip, whichfork), icur); + xfs_iext_prev(ifp, icur); xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur, &new); @@ -5821,7 +5779,7 @@ xfs_bmap_collapse_extents( int error = 0; int logflags = 0; - if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) || + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { return -EFSCORRUPTED; } @@ -5839,7 +5797,7 @@ xfs_bmap_collapse_extents( if (ifp->if_flags & XFS_IFBROOT) { cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_private.b.flags = 0; + cur->bc_ino.flags = 0; } if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) { @@ -5938,7 +5896,7 @@ xfs_bmap_insert_extents( int error = 0; int logflags = 0; - if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) || + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { return -EFSCORRUPTED; } @@ -5956,7 +5914,7 @@ xfs_bmap_insert_extents( if (ifp->if_flags & XFS_IFBROOT) { cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_private.b.flags = 0; + cur->bc_ino.flags = 0; } if (*next_fsb == NULLFSBLOCK) { @@ -6025,25 +5983,25 @@ del_cursor: * @split_fsb is a block where the extents is split. If split_fsb lies in a * hole or the first block of extents, just return 0. */ -STATIC int -xfs_bmap_split_extent_at( +int +xfs_bmap_split_extent( struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t split_fsb) { int whichfork = XFS_DATA_FORK; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); struct xfs_btree_cur *cur = NULL; struct xfs_bmbt_irec got; struct xfs_bmbt_irec new; /* split extent */ struct xfs_mount *mp = ip->i_mount; - struct xfs_ifork *ifp; xfs_fsblock_t gotblkcnt; /* new block count for got */ struct xfs_iext_cursor icur; int error = 0; int logflags = 0; int i = 0; - if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) || + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { return -EFSCORRUPTED; } @@ -6051,7 +6009,6 @@ xfs_bmap_split_extent_at( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - ifp = XFS_IFORK_PTR(ip, whichfork); if (!(ifp->if_flags & XFS_IFEXTENTS)) { /* Read in all the extents */ error = xfs_iread_extents(tp, ip, whichfork); @@ -6074,7 +6031,7 @@ xfs_bmap_split_extent_at( if (ifp->if_flags & XFS_IFBROOT) { cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_private.b.flags = 0; + cur->bc_ino.flags = 0; error = xfs_bmbt_lookup_eq(cur, &got, &i); if (error) goto del_cursor; @@ -6099,8 +6056,7 @@ xfs_bmap_split_extent_at( /* Add new extent */ xfs_iext_next(ifp, &icur); xfs_iext_insert(ip, &icur, &new, 0); - XFS_IFORK_NEXT_SET(ip, whichfork, - XFS_IFORK_NEXTENTS(ip, whichfork) + 1); + ifp->if_nextents++; if (cur) { error = xfs_bmbt_lookup_eq(cur, &new, &i); @@ -6133,7 +6089,7 @@ xfs_bmap_split_extent_at( del_cursor: if (cur) { - cur->bc_private.b.allocated = 0; + cur->bc_ino.allocated = 0; xfs_btree_del_cursor(cur, error); } @@ -6142,34 +6098,6 @@ del_cursor: return error; } -int -xfs_bmap_split_extent( - struct xfs_inode *ip, - xfs_fileoff_t split_fsb) -{ - struct xfs_mount *mp = ip->i_mount; - struct xfs_trans *tp; - int error; - - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, - XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp); - if (error) - return error; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - - error = xfs_bmap_split_extent_at(tp, ip, split_fsb); - if (error) - goto out; - - return xfs_trans_commit(tp); - -out: - xfs_trans_cancel(tp); - return error; -} - /* Deferred mapping is only for real extents in the data fork. */ static bool xfs_bmap_is_update_needed( diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 14d25e0b7d9c..e1bd484e5548 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. @@ -158,17 +158,22 @@ static inline int xfs_bmapi_whichfork(int bmapi_flags) { BMAP_ATTRFORK, "ATTR" }, \ { BMAP_COWFORK, "COW" } +/* Return true if the extent is an allocated extent, written or not. */ +static inline bool xfs_bmap_is_real_extent(struct xfs_bmbt_irec *irec) +{ + return irec->br_startblock != HOLESTARTBLOCK && + irec->br_startblock != DELAYSTARTBLOCK && + !isnullstartblock(irec->br_startblock); +} /* * Return true if the extent is a real, allocated extent, or false if it is a * delayed allocation, and unwritten extent or a hole. */ -static inline bool xfs_bmap_is_real_extent(struct xfs_bmbt_irec *irec) +static inline bool xfs_bmap_is_written_extent(struct xfs_bmbt_irec *irec) { - return irec->br_state != XFS_EXT_UNWRITTEN && - irec->br_startblock != HOLESTARTBLOCK && - irec->br_startblock != DELAYSTARTBLOCK && - !isnullstartblock(irec->br_startblock); + return xfs_bmap_is_real_extent(irec) && + irec->br_state != XFS_EXT_UNWRITTEN; } /* @@ -222,7 +227,8 @@ int xfs_bmap_can_insert_extents(struct xfs_inode *ip, xfs_fileoff_t off, int xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, bool *done, xfs_fileoff_t stop_fsb); -int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset); +int xfs_bmap_split_extent(struct xfs_trans *tp, struct xfs_inode *ip, + xfs_fileoff_t split_offset); int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork, xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc, struct xfs_bmbt_irec *got, struct xfs_iext_cursor *cur, diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index ffe608d2a2d9..ecec604e6e4d 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -166,13 +166,13 @@ xfs_bmbt_dup_cursor( struct xfs_btree_cur *new; new = xfs_bmbt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_private.b.ip, cur->bc_private.b.whichfork); + cur->bc_ino.ip, cur->bc_ino.whichfork); /* * Copy the firstblock, dfops, and flags values, * since init cursor doesn't get them. */ - new->bc_private.b.flags = cur->bc_private.b.flags; + new->bc_ino.flags = cur->bc_ino.flags; return new; } @@ -183,12 +183,12 @@ xfs_bmbt_update_cursor( struct xfs_btree_cur *dst) { ASSERT((dst->bc_tp->t_firstblock != NULLFSBLOCK) || - (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME)); + (dst->bc_ino.ip->i_d.di_flags & XFS_DIFLAG_REALTIME)); - dst->bc_private.b.allocated += src->bc_private.b.allocated; + dst->bc_ino.allocated += src->bc_ino.allocated; dst->bc_tp->t_firstblock = src->bc_tp->t_firstblock; - src->bc_private.b.allocated = 0; + src->bc_ino.allocated = 0; } STATIC int @@ -205,8 +205,8 @@ xfs_bmbt_alloc_block( args.tp = cur->bc_tp; args.mp = cur->bc_mp; args.fsbno = cur->bc_tp->t_firstblock; - xfs_rmap_ino_bmbt_owner(&args.oinfo, cur->bc_private.b.ip->i_ino, - cur->bc_private.b.whichfork); + xfs_rmap_ino_bmbt_owner(&args.oinfo, cur->bc_ino.ip->i_ino, + cur->bc_ino.whichfork); if (args.fsbno == NULLFSBLOCK) { args.fsbno = be64_to_cpu(start->l); @@ -230,7 +230,7 @@ xfs_bmbt_alloc_block( } args.minlen = args.maxlen = args.prod = 1; - args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; + args.wasdel = cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL; if (!args.wasdel && args.tp->t_blk_res == 0) { error = -ENOSPC; goto error0; @@ -259,10 +259,10 @@ xfs_bmbt_alloc_block( ASSERT(args.len == 1); cur->bc_tp->t_firstblock = args.fsbno; - cur->bc_private.b.allocated++; - cur->bc_private.b.ip->i_d.di_nblocks++; - xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE); - xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip, + cur->bc_ino.allocated++; + cur->bc_ino.ip->i_d.di_nblocks++; + xfs_trans_log_inode(args.tp, cur->bc_ino.ip, XFS_ILOG_CORE); + xfs_trans_mod_dquot_byino(args.tp, cur->bc_ino.ip, XFS_TRANS_DQ_BCOUNT, 1L); new->l = cpu_to_be64(args.fsbno); @@ -280,12 +280,12 @@ xfs_bmbt_free_block( struct xfs_buf *bp) { struct xfs_mount *mp = cur->bc_mp; - struct xfs_inode *ip = cur->bc_private.b.ip; + struct xfs_inode *ip = cur->bc_ino.ip; struct xfs_trans *tp = cur->bc_tp; xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp)); struct xfs_owner_info oinfo; - xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_private.b.whichfork); + xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); xfs_bmap_add_free(cur->bc_tp, fsbno, 1, &oinfo); ip->i_d.di_nblocks--; @@ -302,8 +302,8 @@ xfs_bmbt_get_minrecs( if (level == cur->bc_nlevels - 1) { struct xfs_ifork *ifp; - ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, - cur->bc_private.b.whichfork); + ifp = XFS_IFORK_PTR(cur->bc_ino.ip, + cur->bc_ino.whichfork); return xfs_bmbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes, level == 0) / 2; @@ -320,8 +320,8 @@ xfs_bmbt_get_maxrecs( if (level == cur->bc_nlevels - 1) { struct xfs_ifork *ifp; - ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, - cur->bc_private.b.whichfork); + ifp = XFS_IFORK_PTR(cur->bc_ino.ip, + cur->bc_ino.whichfork); return xfs_bmbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes, level == 0); @@ -347,7 +347,7 @@ xfs_bmbt_get_dmaxrecs( { if (level != cur->bc_nlevels - 1) return cur->bc_mp->m_bmap_dmxr[level != 0]; - return xfs_bmdr_maxrecs(cur->bc_private.b.forksize, level == 0); + return xfs_bmdr_maxrecs(cur->bc_ino.forksize, level == 0); } STATIC void @@ -552,7 +552,7 @@ xfs_bmbt_init_cursor( struct xfs_btree_cur *cur; ASSERT(whichfork != XFS_COW_FORK); - cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); + cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); cur->bc_tp = tp; cur->bc_mp = mp; @@ -566,11 +566,11 @@ xfs_bmbt_init_cursor( if (xfs_sb_version_hascrc(&mp->m_sb)) cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; - cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork); - cur->bc_private.b.ip = ip; - cur->bc_private.b.allocated = 0; - cur->bc_private.b.flags = 0; - cur->bc_private.b.whichfork = whichfork; + cur->bc_ino.forksize = XFS_IFORK_SIZE(ip, whichfork); + cur->bc_ino.ip = ip; + cur->bc_ino.allocated = 0; + cur->bc_ino.flags = 0; + cur->bc_ino.whichfork = whichfork; return cur; } @@ -636,15 +636,12 @@ xfs_bmbt_change_owner( ASSERT(tp || buffer_list); ASSERT(!(tp && buffer_list)); - if (whichfork == XFS_DATA_FORK) - ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_BTREE); - else - ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE); + ASSERT(XFS_IFORK_PTR(ip, whichfork)->if_format == XFS_DINODE_FMT_BTREE); cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork); if (!cur) return -ENOMEM; - cur->bc_private.b.flags |= XFS_BTCUR_BPRV_INVALID_OWNER; + cur->bc_ino.flags |= XFS_BTCUR_BMBT_INVALID_OWNER; error = xfs_btree_change_owner(cur, new_owner, buffer_list); xfs_btree_del_cursor(cur, error); diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h index 29b407d053b4..72bf74c79fb9 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.h +++ b/fs/xfs/libxfs/xfs_bmap_btree.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2000,2002-2005 Silicon Graphics, Inc. * All Rights Reserved. diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index fd300dc93ca4..2d25bab68764 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -20,6 +20,7 @@ #include "xfs_trace.h" #include "xfs_alloc.h" #include "xfs_log.h" +#include "xfs_btree_staging.h" /* * Cursor allocation zone. @@ -214,7 +215,7 @@ xfs_btree_check_sptr( { if (level <= 0) return false; - return xfs_verify_agbno(cur->bc_mp, cur->bc_private.a.agno, agbno); + return xfs_verify_agbno(cur->bc_mp, cur->bc_ag.agno, agbno); } /* @@ -234,8 +235,8 @@ xfs_btree_check_ptr( return 0; xfs_err(cur->bc_mp, "Inode %llu fork %d: Corrupt btree %d pointer at level %d index %d.", - cur->bc_private.b.ip->i_ino, - cur->bc_private.b.whichfork, cur->bc_btnum, + cur->bc_ino.ip->i_ino, + cur->bc_ino.whichfork, cur->bc_btnum, level, index); } else { if (xfs_btree_check_sptr(cur, be32_to_cpu((&ptr->s)[index]), @@ -243,7 +244,7 @@ xfs_btree_check_ptr( return 0; xfs_err(cur->bc_mp, "AG %u: Corrupt btree %d pointer at level %d index %d.", - cur->bc_private.a.agno, cur->bc_btnum, + cur->bc_ag.agno, cur->bc_btnum, level, index); } @@ -378,10 +379,12 @@ xfs_btree_del_cursor( * allocated indirect blocks' accounting. */ ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || - cur->bc_private.b.allocated == 0); + cur->bc_ino.allocated == 0); /* * Free the cursor. */ + if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) + kmem_free((void *)cur->bc_ops); kmem_cache_free(xfs_btree_cur_zone, cur); } @@ -642,6 +645,17 @@ xfs_btree_ptr_addr( ((char *)block + xfs_btree_ptr_offset(cur, n, level)); } +struct xfs_ifork * +xfs_btree_ifork_ptr( + struct xfs_btree_cur *cur) +{ + ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); + + if (cur->bc_flags & XFS_BTREE_STAGING) + return cur->bc_ino.ifake->if_fork; + return XFS_IFORK_PTR(cur->bc_ino.ip, cur->bc_ino.whichfork); +} + /* * Get the root block which is stored in the inode. * @@ -652,9 +666,8 @@ STATIC struct xfs_btree_block * xfs_btree_get_iroot( struct xfs_btree_cur *cur) { - struct xfs_ifork *ifp; + struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); - ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork); return (struct xfs_btree_block *)ifp->if_broot; } @@ -881,13 +894,13 @@ xfs_btree_readahead_sblock( if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { - xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, + xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.agno, left, 1, cur->bc_ops->buf_ops); rval++; } if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { - xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, + xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.agno, right, 1, cur->bc_ops->buf_ops); rval++; } @@ -945,7 +958,7 @@ xfs_btree_ptr_to_daddr( *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, fsbno); } else { agbno = be32_to_cpu(ptr->s); - *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, + *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_ag.agno, agbno); } @@ -1014,7 +1027,7 @@ xfs_btree_ptr_is_null( return ptr->s == cpu_to_be32(NULLAGBLOCK); } -STATIC void +void xfs_btree_set_ptr_null( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr) @@ -1050,7 +1063,7 @@ xfs_btree_get_sibling( } } -STATIC void +void xfs_btree_set_sibling( struct xfs_btree_cur *cur, struct xfs_btree_block *block, @@ -1128,7 +1141,7 @@ xfs_btree_init_block( btnum, level, numrecs, owner, 0); } -STATIC void +void xfs_btree_init_block_cur( struct xfs_btree_cur *cur, struct xfs_buf *bp, @@ -1144,9 +1157,9 @@ xfs_btree_init_block_cur( * code. */ if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - owner = cur->bc_private.b.ip->i_ino; + owner = cur->bc_ino.ip->i_ino; else - owner = cur->bc_private.a.agno; + owner = cur->bc_ag.agno; xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, cur->bc_btnum, level, numrecs, @@ -1220,7 +1233,7 @@ xfs_btree_set_refs( } } -STATIC int +int xfs_btree_get_buf_block( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr, @@ -1280,7 +1293,7 @@ xfs_btree_read_buf_block( /* * Copy keys from one btree block to another. */ -STATIC void +void xfs_btree_copy_keys( struct xfs_btree_cur *cur, union xfs_btree_key *dst_key, @@ -1308,11 +1321,11 @@ xfs_btree_copy_recs( /* * Copy block pointers from one btree block to another. */ -STATIC void +void xfs_btree_copy_ptrs( struct xfs_btree_cur *cur, union xfs_btree_ptr *dst_ptr, - union xfs_btree_ptr *src_ptr, + const union xfs_btree_ptr *src_ptr, int numptrs) { ASSERT(numptrs >= 0); @@ -1393,8 +1406,8 @@ xfs_btree_log_keys( xfs_btree_key_offset(cur, first), xfs_btree_key_offset(cur, last + 1) - 1); } else { - xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, - xfs_ilog_fbroot(cur->bc_private.b.whichfork)); + xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip, + xfs_ilog_fbroot(cur->bc_ino.whichfork)); } } @@ -1436,8 +1449,8 @@ xfs_btree_log_ptrs( xfs_btree_ptr_offset(cur, first, level), xfs_btree_ptr_offset(cur, last + 1, level) - 1); } else { - xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, - xfs_ilog_fbroot(cur->bc_private.b.whichfork)); + xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip, + xfs_ilog_fbroot(cur->bc_ino.whichfork)); } } @@ -1505,8 +1518,8 @@ xfs_btree_log_block( xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); xfs_trans_log_buf(cur->bc_tp, bp, first, last); } else { - xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, - xfs_ilog_fbroot(cur->bc_private.b.whichfork)); + xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip, + xfs_ilog_fbroot(cur->bc_ino.whichfork)); } } @@ -1743,10 +1756,10 @@ xfs_btree_lookup_get_block( /* Check the inode owner since the verifiers don't. */ if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) && - !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_INVALID_OWNER) && + !(cur->bc_ino.flags & XFS_BTCUR_BMBT_INVALID_OWNER) && (cur->bc_flags & XFS_BTREE_LONG_PTRS) && be64_to_cpu((*blkp)->bb_u.l.bb_owner) != - cur->bc_private.b.ip->i_ino) + cur->bc_ino.ip->i_ino) goto out_bad; /* Did we get the level we were looking for? */ @@ -1762,7 +1775,7 @@ xfs_btree_lookup_get_block( out_bad: *blkp = NULL; - xfs_buf_corruption_error(bp); + xfs_buf_mark_corrupt(bp); xfs_trans_brelse(cur->bc_tp, bp); return -EFSCORRUPTED; } @@ -2938,9 +2951,9 @@ xfs_btree_new_iroot( xfs_btree_copy_ptrs(cur, pp, &nptr, 1); - xfs_iroot_realloc(cur->bc_private.b.ip, + xfs_iroot_realloc(cur->bc_ino.ip, 1 - xfs_btree_get_numrecs(cblock), - cur->bc_private.b.whichfork); + cur->bc_ino.whichfork); xfs_btree_setbuf(cur, level, cbp); @@ -2953,7 +2966,7 @@ xfs_btree_new_iroot( xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); *logflags |= - XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork); + XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork); *stat = 1; return 0; error0: @@ -3105,11 +3118,11 @@ xfs_btree_make_block_unfull( if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && level == cur->bc_nlevels - 1) { - struct xfs_inode *ip = cur->bc_private.b.ip; + struct xfs_inode *ip = cur->bc_ino.ip; if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { /* A root block that can be made bigger. */ - xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork); + xfs_iroot_realloc(ip, 1, cur->bc_ino.whichfork); *stat = 1; } else { /* A root block that needs replacing */ @@ -3455,8 +3468,8 @@ STATIC int xfs_btree_kill_iroot( struct xfs_btree_cur *cur) { - int whichfork = cur->bc_private.b.whichfork; - struct xfs_inode *ip = cur->bc_private.b.ip; + int whichfork = cur->bc_ino.whichfork; + struct xfs_inode *ip = cur->bc_ino.ip; struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); struct xfs_btree_block *block; struct xfs_btree_block *cblock; @@ -3514,8 +3527,8 @@ xfs_btree_kill_iroot( index = numrecs - cur->bc_ops->get_maxrecs(cur, level); if (index) { - xfs_iroot_realloc(cur->bc_private.b.ip, index, - cur->bc_private.b.whichfork); + xfs_iroot_realloc(cur->bc_ino.ip, index, + cur->bc_ino.whichfork); block = ifp->if_broot; } @@ -3544,7 +3557,7 @@ xfs_btree_kill_iroot( cur->bc_bufs[level - 1] = NULL; be16_add_cpu(&block->bb_level, -1); xfs_trans_log_inode(cur->bc_tp, ip, - XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork)); + XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork)); cur->bc_nlevels--; out0: return 0; @@ -3712,8 +3725,8 @@ xfs_btree_delrec( */ if (level == cur->bc_nlevels - 1) { if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { - xfs_iroot_realloc(cur->bc_private.b.ip, -1, - cur->bc_private.b.whichfork); + xfs_iroot_realloc(cur->bc_ino.ip, -1, + cur->bc_ino.whichfork); error = xfs_btree_kill_iroot(cur); if (error) diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 3eff7c321d43..10e50cbacacf 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. * All Rights Reserved. @@ -10,6 +10,7 @@ struct xfs_buf; struct xfs_inode; struct xfs_mount; struct xfs_trans; +struct xfs_ifork; extern kmem_zone_t *xfs_btree_cur_zone; @@ -177,15 +178,37 @@ union xfs_btree_irec { struct xfs_refcount_irec rc; }; -/* Per-AG btree private information. */ -union xfs_btree_cur_private { - struct { - unsigned long nr_ops; /* # record updates */ - int shape_changes; /* # of extent splits */ - } refc; - struct { - bool active; /* allocation cursor state */ - } abt; +/* Per-AG btree information. */ +struct xfs_btree_cur_ag { + union { + struct xfs_buf *agbp; + struct xbtree_afakeroot *afake; /* for staging cursor */ + }; + xfs_agnumber_t agno; + union { + struct { + unsigned long nr_ops; /* # record updates */ + int shape_changes; /* # of extent splits */ + } refc; + struct { + bool active; /* allocation cursor state */ + } abt; + }; +}; + +/* Btree-in-inode cursor information */ +struct xfs_btree_cur_ino { + struct xfs_inode *ip; + struct xbtree_ifakeroot *ifake; /* for staging cursor */ + int allocated; + short forksize; + char whichfork; + char flags; +/* We are converting a delalloc reservation */ +#define XFS_BTCUR_BMBT_WASDEL (1 << 0) + +/* For extent swap, ignore owner check in verifier */ +#define XFS_BTCUR_BMBT_INVALID_OWNER (1 << 1) }; /* @@ -209,21 +232,9 @@ typedef struct xfs_btree_cur xfs_btnum_t bc_btnum; /* identifies which btree type */ int bc_statoff; /* offset of btre stats array */ union { - struct { /* needed for BNO, CNT, INO */ - struct xfs_buf *agbp; /* agf/agi buffer pointer */ - xfs_agnumber_t agno; /* ag number */ - union xfs_btree_cur_private priv; - } a; - struct { /* needed for BMAP */ - struct xfs_inode *ip; /* pointer to our inode */ - int allocated; /* count of alloced */ - short forksize; /* fork's inode space */ - char whichfork; /* data or attr fork */ - char flags; /* flags */ -#define XFS_BTCUR_BPRV_WASDEL (1<<0) /* was delayed */ -#define XFS_BTCUR_BPRV_INVALID_OWNER (1<<1) /* for ext swap */ - } b; - } bc_private; /* per-btree type data */ + struct xfs_btree_cur_ag bc_ag; + struct xfs_btree_cur_ino bc_ino; + }; } xfs_btree_cur_t; /* cursor flags */ @@ -232,6 +243,12 @@ typedef struct xfs_btree_cur #define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */ #define XFS_BTREE_CRC_BLOCKS (1<<3) /* uses extended btree blocks */ #define XFS_BTREE_OVERLAPPING (1<<4) /* overlapping intervals */ +/* + * The root of this btree is a fakeroot structure so that we can stage a btree + * rebuild without leaving it accessible via primary metadata. The ops struct + * is dynamically allocated and must be freed when the cursor is deleted. + */ +#define XFS_BTREE_STAGING (1<<5) #define XFS_BTREE_NOERROR 0 @@ -494,6 +511,7 @@ union xfs_btree_key *xfs_btree_high_key_from_key(struct xfs_btree_cur *cur, int xfs_btree_has_record(struct xfs_btree_cur *cur, union xfs_btree_irec *low, union xfs_btree_irec *high, bool *exists); bool xfs_btree_has_more_records(struct xfs_btree_cur *cur); +struct xfs_ifork *xfs_btree_ifork_ptr(struct xfs_btree_cur *cur); /* Does this cursor point to the last block in the given level? */ static inline bool @@ -512,4 +530,20 @@ xfs_btree_islastblock( return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK); } +void xfs_btree_set_ptr_null(struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr); +int xfs_btree_get_buf_block(struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr, + struct xfs_btree_block **block, struct xfs_buf **bpp); +void xfs_btree_set_sibling(struct xfs_btree_cur *cur, + struct xfs_btree_block *block, union xfs_btree_ptr *ptr, + int lr); +void xfs_btree_init_block_cur(struct xfs_btree_cur *cur, + struct xfs_buf *bp, int level, int numrecs); +void xfs_btree_copy_ptrs(struct xfs_btree_cur *cur, + union xfs_btree_ptr *dst_ptr, + const union xfs_btree_ptr *src_ptr, int numptrs); +void xfs_btree_copy_keys(struct xfs_btree_cur *cur, + union xfs_btree_key *dst_key, union xfs_btree_key *src_key, + int numkeys); + #endif /* __XFS_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c new file mode 100644 index 000000000000..f464a7c7cf22 --- /dev/null +++ b/fs/xfs/libxfs/xfs_btree_staging.c @@ -0,0 +1,879 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2020 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@oracle.com> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_trans.h" +#include "xfs_btree.h" +#include "xfs_trace.h" +#include "xfs_btree_staging.h" + +/* + * Staging Cursors and Fake Roots for Btrees + * ========================================= + * + * A staging btree cursor is a special type of btree cursor that callers must + * use to construct a new btree index using the btree bulk loader code. The + * bulk loading code uses the staging btree cursor to abstract the details of + * initializing new btree blocks and filling them with records or key/ptr + * pairs. Regular btree operations (e.g. queries and modifications) are not + * supported with staging cursors, and callers must not invoke them. + * + * Fake root structures contain all the information about a btree that is under + * construction by the bulk loading code. Staging btree cursors point to fake + * root structures instead of the usual AG header or inode structure. + * + * Callers are expected to initialize a fake root structure and pass it into + * the _stage_cursor function for a specific btree type. When bulk loading is + * complete, callers should call the _commit_staged_btree function for that + * specific btree type to commit the new btree into the filesystem. + */ + +/* + * Don't allow staging cursors to be duplicated because they're supposed to be + * kept private to a single thread. + */ +STATIC struct xfs_btree_cur * +xfs_btree_fakeroot_dup_cursor( + struct xfs_btree_cur *cur) +{ + ASSERT(0); + return NULL; +} + +/* + * Don't allow block allocation for a staging cursor, because staging cursors + * do not support regular btree modifications. + * + * Bulk loading uses a separate callback to obtain new blocks from a + * preallocated list, which prevents ENOSPC failures during loading. + */ +STATIC int +xfs_btree_fakeroot_alloc_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *start_bno, + union xfs_btree_ptr *new_bno, + int *stat) +{ + ASSERT(0); + return -EFSCORRUPTED; +} + +/* + * Don't allow block freeing for a staging cursor, because staging cursors + * do not support regular btree modifications. + */ +STATIC int +xfs_btree_fakeroot_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + ASSERT(0); + return -EFSCORRUPTED; +} + +/* Initialize a pointer to the root block from the fakeroot. */ +STATIC void +xfs_btree_fakeroot_init_ptr_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + struct xbtree_afakeroot *afake; + + ASSERT(cur->bc_flags & XFS_BTREE_STAGING); + + afake = cur->bc_ag.afake; + ptr->s = cpu_to_be32(afake->af_root); +} + +/* + * Bulk Loading for AG Btrees + * ========================== + * + * For a btree rooted in an AG header, pass a xbtree_afakeroot structure to the + * staging cursor. Callers should initialize this to zero. + * + * The _stage_cursor() function for a specific btree type should call + * xfs_btree_stage_afakeroot to set up the in-memory cursor as a staging + * cursor. The corresponding _commit_staged_btree() function should log the + * new root and call xfs_btree_commit_afakeroot() to transform the staging + * cursor into a regular btree cursor. + */ + +/* Update the btree root information for a per-AG fake root. */ +STATIC void +xfs_btree_afakeroot_set_root( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, + int inc) +{ + struct xbtree_afakeroot *afake = cur->bc_ag.afake; + + ASSERT(cur->bc_flags & XFS_BTREE_STAGING); + afake->af_root = be32_to_cpu(ptr->s); + afake->af_levels += inc; +} + +/* + * Initialize a AG-rooted btree cursor with the given AG btree fake root. + * The btree cursor's bc_ops will be overridden as needed to make the staging + * functionality work. + */ +void +xfs_btree_stage_afakeroot( + struct xfs_btree_cur *cur, + struct xbtree_afakeroot *afake) +{ + struct xfs_btree_ops *nops; + + ASSERT(!(cur->bc_flags & XFS_BTREE_STAGING)); + ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)); + ASSERT(cur->bc_tp == NULL); + + nops = kmem_alloc(sizeof(struct xfs_btree_ops), KM_NOFS); + memcpy(nops, cur->bc_ops, sizeof(struct xfs_btree_ops)); + nops->alloc_block = xfs_btree_fakeroot_alloc_block; + nops->free_block = xfs_btree_fakeroot_free_block; + nops->init_ptr_from_cur = xfs_btree_fakeroot_init_ptr_from_cur; + nops->set_root = xfs_btree_afakeroot_set_root; + nops->dup_cursor = xfs_btree_fakeroot_dup_cursor; + + cur->bc_ag.afake = afake; + cur->bc_nlevels = afake->af_levels; + cur->bc_ops = nops; + cur->bc_flags |= XFS_BTREE_STAGING; +} + +/* + * Transform an AG-rooted staging btree cursor back into a regular cursor by + * substituting a real btree root for the fake one and restoring normal btree + * cursor ops. The caller must log the btree root change prior to calling + * this. + */ +void +xfs_btree_commit_afakeroot( + struct xfs_btree_cur *cur, + struct xfs_trans *tp, + struct xfs_buf *agbp, + const struct xfs_btree_ops *ops) +{ + ASSERT(cur->bc_flags & XFS_BTREE_STAGING); + ASSERT(cur->bc_tp == NULL); + + trace_xfs_btree_commit_afakeroot(cur); + + kmem_free((void *)cur->bc_ops); + cur->bc_ag.agbp = agbp; + cur->bc_ops = ops; + cur->bc_flags &= ~XFS_BTREE_STAGING; + cur->bc_tp = tp; +} + +/* + * Bulk Loading for Inode-Rooted Btrees + * ==================================== + * + * For a btree rooted in an inode fork, pass a xbtree_ifakeroot structure to + * the staging cursor. This structure should be initialized as follows: + * + * - if_fork_size field should be set to the number of bytes available to the + * fork in the inode. + * + * - if_fork should point to a freshly allocated struct xfs_ifork. + * + * - if_format should be set to the appropriate fork type (e.g. + * XFS_DINODE_FMT_BTREE). + * + * All other fields must be zero. + * + * The _stage_cursor() function for a specific btree type should call + * xfs_btree_stage_ifakeroot to set up the in-memory cursor as a staging + * cursor. The corresponding _commit_staged_btree() function should log the + * new root and call xfs_btree_commit_ifakeroot() to transform the staging + * cursor into a regular btree cursor. + */ + +/* + * Initialize an inode-rooted btree cursor with the given inode btree fake + * root. The btree cursor's bc_ops will be overridden as needed to make the + * staging functionality work. If new_ops is not NULL, these new ops will be + * passed out to the caller for further overriding. + */ +void +xfs_btree_stage_ifakeroot( + struct xfs_btree_cur *cur, + struct xbtree_ifakeroot *ifake, + struct xfs_btree_ops **new_ops) +{ + struct xfs_btree_ops *nops; + + ASSERT(!(cur->bc_flags & XFS_BTREE_STAGING)); + ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); + ASSERT(cur->bc_tp == NULL); + + nops = kmem_alloc(sizeof(struct xfs_btree_ops), KM_NOFS); + memcpy(nops, cur->bc_ops, sizeof(struct xfs_btree_ops)); + nops->alloc_block = xfs_btree_fakeroot_alloc_block; + nops->free_block = xfs_btree_fakeroot_free_block; + nops->init_ptr_from_cur = xfs_btree_fakeroot_init_ptr_from_cur; + nops->dup_cursor = xfs_btree_fakeroot_dup_cursor; + + cur->bc_ino.ifake = ifake; + cur->bc_nlevels = ifake->if_levels; + cur->bc_ops = nops; + cur->bc_flags |= XFS_BTREE_STAGING; + + if (new_ops) + *new_ops = nops; +} + +/* + * Transform an inode-rooted staging btree cursor back into a regular cursor by + * substituting a real btree root for the fake one and restoring normal btree + * cursor ops. The caller must log the btree root change prior to calling + * this. + */ +void +xfs_btree_commit_ifakeroot( + struct xfs_btree_cur *cur, + struct xfs_trans *tp, + int whichfork, + const struct xfs_btree_ops *ops) +{ + ASSERT(cur->bc_flags & XFS_BTREE_STAGING); + ASSERT(cur->bc_tp == NULL); + + trace_xfs_btree_commit_ifakeroot(cur); + + kmem_free((void *)cur->bc_ops); + cur->bc_ino.ifake = NULL; + cur->bc_ino.whichfork = whichfork; + cur->bc_ops = ops; + cur->bc_flags &= ~XFS_BTREE_STAGING; + cur->bc_tp = tp; +} + +/* + * Bulk Loading of Staged Btrees + * ============================= + * + * This interface is used with a staged btree cursor to create a totally new + * btree with a large number of records (i.e. more than what would fit in a + * single root block). When the creation is complete, the new root can be + * linked atomically into the filesystem by committing the staged cursor. + * + * Creation of a new btree proceeds roughly as follows: + * + * The first step is to initialize an appropriate fake btree root structure and + * then construct a staged btree cursor. Refer to the block comments about + * "Bulk Loading for AG Btrees" and "Bulk Loading for Inode-Rooted Btrees" for + * more information about how to do this. + * + * The second step is to initialize a struct xfs_btree_bload context as + * documented in the structure definition. + * + * The third step is to call xfs_btree_bload_compute_geometry to compute the + * height of and the number of blocks needed to construct the btree. See the + * section "Computing the Geometry of the New Btree" for details about this + * computation. + * + * In step four, the caller must allocate xfs_btree_bload.nr_blocks blocks and + * save them for later use by ->claim_block(). Bulk loading requires all + * blocks to be allocated beforehand to avoid ENOSPC failures midway through a + * rebuild, and to minimize seek distances of the new btree. + * + * Step five is to call xfs_btree_bload() to start constructing the btree. + * + * The final step is to commit the staging btree cursor, which logs the new + * btree root and turns the staging cursor into a regular cursor. The caller + * is responsible for cleaning up the previous btree blocks, if any. + * + * Computing the Geometry of the New Btree + * ======================================= + * + * The number of items placed in each btree block is computed via the following + * algorithm: For leaf levels, the number of items for the level is nr_records + * in the bload structure. For node levels, the number of items for the level + * is the number of blocks in the next lower level of the tree. For each + * level, the desired number of items per block is defined as: + * + * desired = max(minrecs, maxrecs - slack factor) + * + * The number of blocks for the level is defined to be: + * + * blocks = floor(nr_items / desired) + * + * Note this is rounded down so that the npb calculation below will never fall + * below minrecs. The number of items that will actually be loaded into each + * btree block is defined as: + * + * npb = nr_items / blocks + * + * Some of the leftmost blocks in the level will contain one extra record as + * needed to handle uneven division. If the number of records in any block + * would exceed maxrecs for that level, blocks is incremented and npb is + * recalculated. + * + * In other words, we compute the number of blocks needed to satisfy a given + * loading level, then spread the items as evenly as possible. + * + * The height and number of fs blocks required to create the btree are computed + * and returned via btree_height and nr_blocks. + */ + +/* + * Put a btree block that we're loading onto the ordered list and release it. + * The btree blocks will be written to disk when bulk loading is finished. + */ +static void +xfs_btree_bload_drop_buf( + struct list_head *buffers_list, + struct xfs_buf **bpp) +{ + if (*bpp == NULL) + return; + + if (!xfs_buf_delwri_queue(*bpp, buffers_list)) + ASSERT(0); + + xfs_buf_relse(*bpp); + *bpp = NULL; +} + +/* + * Allocate and initialize one btree block for bulk loading. + * + * The new btree block will have its level and numrecs fields set to the values + * of the level and nr_this_block parameters, respectively. + * + * The caller should ensure that ptrp, bpp, and blockp refer to the left + * sibling of the new block, if there is any. On exit, ptrp, bpp, and blockp + * will all point to the new block. + */ +STATIC int +xfs_btree_bload_prep_block( + struct xfs_btree_cur *cur, + struct xfs_btree_bload *bbl, + struct list_head *buffers_list, + unsigned int level, + unsigned int nr_this_block, + union xfs_btree_ptr *ptrp, /* in/out */ + struct xfs_buf **bpp, /* in/out */ + struct xfs_btree_block **blockp, /* in/out */ + void *priv) +{ + union xfs_btree_ptr new_ptr; + struct xfs_buf *new_bp; + struct xfs_btree_block *new_block; + int ret; + + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + level == cur->bc_nlevels - 1) { + struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); + size_t new_size; + + ASSERT(*bpp == NULL); + + /* Allocate a new incore btree root block. */ + new_size = bbl->iroot_size(cur, nr_this_block, priv); + ifp->if_broot = kmem_zalloc(new_size, 0); + ifp->if_broot_bytes = (int)new_size; + ifp->if_flags |= XFS_IFBROOT; + + /* Initialize it and send it out. */ + xfs_btree_init_block_int(cur->bc_mp, ifp->if_broot, + XFS_BUF_DADDR_NULL, cur->bc_btnum, level, + nr_this_block, cur->bc_ino.ip->i_ino, + cur->bc_flags); + + *bpp = NULL; + *blockp = ifp->if_broot; + xfs_btree_set_ptr_null(cur, ptrp); + return 0; + } + + /* Claim one of the caller's preallocated blocks. */ + xfs_btree_set_ptr_null(cur, &new_ptr); + ret = bbl->claim_block(cur, &new_ptr, priv); + if (ret) + return ret; + + ASSERT(!xfs_btree_ptr_is_null(cur, &new_ptr)); + + ret = xfs_btree_get_buf_block(cur, &new_ptr, &new_block, &new_bp); + if (ret) + return ret; + + /* + * The previous block (if any) is the left sibling of the new block, + * so set its right sibling pointer to the new block and drop it. + */ + if (*blockp) + xfs_btree_set_sibling(cur, *blockp, &new_ptr, XFS_BB_RIGHTSIB); + xfs_btree_bload_drop_buf(buffers_list, bpp); + + /* Initialize the new btree block. */ + xfs_btree_init_block_cur(cur, new_bp, level, nr_this_block); + xfs_btree_set_sibling(cur, new_block, ptrp, XFS_BB_LEFTSIB); + + /* Set the out parameters. */ + *bpp = new_bp; + *blockp = new_block; + xfs_btree_copy_ptrs(cur, ptrp, &new_ptr, 1); + return 0; +} + +/* Load one leaf block. */ +STATIC int +xfs_btree_bload_leaf( + struct xfs_btree_cur *cur, + unsigned int recs_this_block, + xfs_btree_bload_get_record_fn get_record, + struct xfs_btree_block *block, + void *priv) +{ + unsigned int j; + int ret; + + /* Fill the leaf block with records. */ + for (j = 1; j <= recs_this_block; j++) { + union xfs_btree_rec *block_rec; + + ret = get_record(cur, priv); + if (ret) + return ret; + block_rec = xfs_btree_rec_addr(cur, j, block); + cur->bc_ops->init_rec_from_cur(cur, block_rec); + } + + return 0; +} + +/* + * Load one node block with key/ptr pairs. + * + * child_ptr must point to a block within the next level down in the tree. A + * key/ptr entry will be created in the new node block to the block pointed to + * by child_ptr. On exit, child_ptr points to the next block on the child + * level that needs processing. + */ +STATIC int +xfs_btree_bload_node( + struct xfs_btree_cur *cur, + unsigned int recs_this_block, + union xfs_btree_ptr *child_ptr, + struct xfs_btree_block *block) +{ + unsigned int j; + int ret; + + /* Fill the node block with keys and pointers. */ + for (j = 1; j <= recs_this_block; j++) { + union xfs_btree_key child_key; + union xfs_btree_ptr *block_ptr; + union xfs_btree_key *block_key; + struct xfs_btree_block *child_block; + struct xfs_buf *child_bp; + + ASSERT(!xfs_btree_ptr_is_null(cur, child_ptr)); + + ret = xfs_btree_get_buf_block(cur, child_ptr, &child_block, + &child_bp); + if (ret) + return ret; + + block_ptr = xfs_btree_ptr_addr(cur, j, block); + xfs_btree_copy_ptrs(cur, block_ptr, child_ptr, 1); + + block_key = xfs_btree_key_addr(cur, j, block); + xfs_btree_get_keys(cur, child_block, &child_key); + xfs_btree_copy_keys(cur, block_key, &child_key, 1); + + xfs_btree_get_sibling(cur, child_block, child_ptr, + XFS_BB_RIGHTSIB); + xfs_buf_relse(child_bp); + } + + return 0; +} + +/* + * Compute the maximum number of records (or keyptrs) per block that we want to + * install at this level in the btree. Caller is responsible for having set + * @cur->bc_ino.forksize to the desired fork size, if appropriate. + */ +STATIC unsigned int +xfs_btree_bload_max_npb( + struct xfs_btree_cur *cur, + struct xfs_btree_bload *bbl, + unsigned int level) +{ + unsigned int ret; + + if (level == cur->bc_nlevels - 1 && cur->bc_ops->get_dmaxrecs) + return cur->bc_ops->get_dmaxrecs(cur, level); + + ret = cur->bc_ops->get_maxrecs(cur, level); + if (level == 0) + ret -= bbl->leaf_slack; + else + ret -= bbl->node_slack; + return ret; +} + +/* + * Compute the desired number of records (or keyptrs) per block that we want to + * install at this level in the btree, which must be somewhere between minrecs + * and max_npb. The caller is free to install fewer records per block. + */ +STATIC unsigned int +xfs_btree_bload_desired_npb( + struct xfs_btree_cur *cur, + struct xfs_btree_bload *bbl, + unsigned int level) +{ + unsigned int npb = xfs_btree_bload_max_npb(cur, bbl, level); + + /* Root blocks are not subject to minrecs rules. */ + if (level == cur->bc_nlevels - 1) + return max(1U, npb); + + return max_t(unsigned int, cur->bc_ops->get_minrecs(cur, level), npb); +} + +/* + * Compute the number of records to be stored in each block at this level and + * the number of blocks for this level. For leaf levels, we must populate an + * empty root block even if there are no records, so we have to have at least + * one block. + */ +STATIC void +xfs_btree_bload_level_geometry( + struct xfs_btree_cur *cur, + struct xfs_btree_bload *bbl, + unsigned int level, + uint64_t nr_this_level, + unsigned int *avg_per_block, + uint64_t *blocks, + uint64_t *blocks_with_extra) +{ + uint64_t npb; + uint64_t dontcare; + unsigned int desired_npb; + unsigned int maxnr; + + maxnr = cur->bc_ops->get_maxrecs(cur, level); + + /* + * Compute the number of blocks we need to fill each block with the + * desired number of records/keyptrs per block. Because desired_npb + * could be minrecs, we use regular integer division (which rounds + * the block count down) so that in the next step the effective # of + * items per block will never be less than desired_npb. + */ + desired_npb = xfs_btree_bload_desired_npb(cur, bbl, level); + *blocks = div64_u64_rem(nr_this_level, desired_npb, &dontcare); + *blocks = max(1ULL, *blocks); + + /* + * Compute the number of records that we will actually put in each + * block, assuming that we want to spread the records evenly between + * the blocks. Take care that the effective # of items per block (npb) + * won't exceed maxrecs even for the blocks that get an extra record, + * since desired_npb could be maxrecs, and in the previous step we + * rounded the block count down. + */ + npb = div64_u64_rem(nr_this_level, *blocks, blocks_with_extra); + if (npb > maxnr || (npb == maxnr && *blocks_with_extra > 0)) { + (*blocks)++; + npb = div64_u64_rem(nr_this_level, *blocks, blocks_with_extra); + } + + *avg_per_block = min_t(uint64_t, npb, nr_this_level); + + trace_xfs_btree_bload_level_geometry(cur, level, nr_this_level, + *avg_per_block, desired_npb, *blocks, + *blocks_with_extra); +} + +/* + * Ensure a slack value is appropriate for the btree. + * + * If the slack value is negative, set slack so that we fill the block to + * halfway between minrecs and maxrecs. Make sure the slack is never so large + * that we can underflow minrecs. + */ +static void +xfs_btree_bload_ensure_slack( + struct xfs_btree_cur *cur, + int *slack, + int level) +{ + int maxr; + int minr; + + maxr = cur->bc_ops->get_maxrecs(cur, level); + minr = cur->bc_ops->get_minrecs(cur, level); + + /* + * If slack is negative, automatically set slack so that we load the + * btree block approximately halfway between minrecs and maxrecs. + * Generally, this will net us 75% loading. + */ + if (*slack < 0) + *slack = maxr - ((maxr + minr) >> 1); + + *slack = min(*slack, maxr - minr); +} + +/* + * Prepare a btree cursor for a bulk load operation by computing the geometry + * fields in bbl. Caller must ensure that the btree cursor is a staging + * cursor. This function can be called multiple times. + */ +int +xfs_btree_bload_compute_geometry( + struct xfs_btree_cur *cur, + struct xfs_btree_bload *bbl, + uint64_t nr_records) +{ + uint64_t nr_blocks = 0; + uint64_t nr_this_level; + + ASSERT(cur->bc_flags & XFS_BTREE_STAGING); + + /* + * Make sure that the slack values make sense for traditional leaf and + * node blocks. Inode-rooted btrees will return different minrecs and + * maxrecs values for the root block (bc_nlevels == level - 1). We're + * checking levels 0 and 1 here, so set bc_nlevels such that the btree + * code doesn't interpret either as the root level. + */ + cur->bc_nlevels = XFS_BTREE_MAXLEVELS - 1; + xfs_btree_bload_ensure_slack(cur, &bbl->leaf_slack, 0); + xfs_btree_bload_ensure_slack(cur, &bbl->node_slack, 1); + + bbl->nr_records = nr_this_level = nr_records; + for (cur->bc_nlevels = 1; cur->bc_nlevels < XFS_BTREE_MAXLEVELS;) { + uint64_t level_blocks; + uint64_t dontcare64; + unsigned int level = cur->bc_nlevels - 1; + unsigned int avg_per_block; + + xfs_btree_bload_level_geometry(cur, bbl, level, nr_this_level, + &avg_per_block, &level_blocks, &dontcare64); + + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { + /* + * If all the items we want to store at this level + * would fit in the inode root block, then we have our + * btree root and are done. + * + * Note that bmap btrees forbid records in the root. + */ + if (level != 0 && nr_this_level <= avg_per_block) { + nr_blocks++; + break; + } + + /* + * Otherwise, we have to store all the items for this + * level in traditional btree blocks and therefore need + * another level of btree to point to those blocks. + * + * We have to re-compute the geometry for each level of + * an inode-rooted btree because the geometry differs + * between a btree root in an inode fork and a + * traditional btree block. + * + * This distinction is made in the btree code based on + * whether level == bc_nlevels - 1. Based on the + * previous root block size check against the root + * block geometry, we know that we aren't yet ready to + * populate the root. Increment bc_nevels and + * recalculate the geometry for a traditional + * block-based btree level. + */ + cur->bc_nlevels++; + xfs_btree_bload_level_geometry(cur, bbl, level, + nr_this_level, &avg_per_block, + &level_blocks, &dontcare64); + } else { + /* + * If all the items we want to store at this level + * would fit in a single root block, we're done. + */ + if (nr_this_level <= avg_per_block) { + nr_blocks++; + break; + } + + /* Otherwise, we need another level of btree. */ + cur->bc_nlevels++; + } + + nr_blocks += level_blocks; + nr_this_level = level_blocks; + } + + if (cur->bc_nlevels == XFS_BTREE_MAXLEVELS) + return -EOVERFLOW; + + bbl->btree_height = cur->bc_nlevels; + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) + bbl->nr_blocks = nr_blocks - 1; + else + bbl->nr_blocks = nr_blocks; + return 0; +} + +/* Bulk load a btree given the parameters and geometry established in bbl. */ +int +xfs_btree_bload( + struct xfs_btree_cur *cur, + struct xfs_btree_bload *bbl, + void *priv) +{ + struct list_head buffers_list; + union xfs_btree_ptr child_ptr; + union xfs_btree_ptr ptr; + struct xfs_buf *bp = NULL; + struct xfs_btree_block *block = NULL; + uint64_t nr_this_level = bbl->nr_records; + uint64_t blocks; + uint64_t i; + uint64_t blocks_with_extra; + uint64_t total_blocks = 0; + unsigned int avg_per_block; + unsigned int level = 0; + int ret; + + ASSERT(cur->bc_flags & XFS_BTREE_STAGING); + + INIT_LIST_HEAD(&buffers_list); + cur->bc_nlevels = bbl->btree_height; + xfs_btree_set_ptr_null(cur, &child_ptr); + xfs_btree_set_ptr_null(cur, &ptr); + + xfs_btree_bload_level_geometry(cur, bbl, level, nr_this_level, + &avg_per_block, &blocks, &blocks_with_extra); + + /* Load each leaf block. */ + for (i = 0; i < blocks; i++) { + unsigned int nr_this_block = avg_per_block; + + /* + * Due to rounding, btree blocks will not be evenly populated + * in most cases. blocks_with_extra tells us how many blocks + * will receive an extra record to distribute the excess across + * the current level as evenly as possible. + */ + if (i < blocks_with_extra) + nr_this_block++; + + ret = xfs_btree_bload_prep_block(cur, bbl, &buffers_list, level, + nr_this_block, &ptr, &bp, &block, priv); + if (ret) + goto out; + + trace_xfs_btree_bload_block(cur, level, i, blocks, &ptr, + nr_this_block); + + ret = xfs_btree_bload_leaf(cur, nr_this_block, bbl->get_record, + block, priv); + if (ret) + goto out; + + /* + * Record the leftmost leaf pointer so we know where to start + * with the first node level. + */ + if (i == 0) + xfs_btree_copy_ptrs(cur, &child_ptr, &ptr, 1); + } + total_blocks += blocks; + xfs_btree_bload_drop_buf(&buffers_list, &bp); + + /* Populate the internal btree nodes. */ + for (level = 1; level < cur->bc_nlevels; level++) { + union xfs_btree_ptr first_ptr; + + nr_this_level = blocks; + block = NULL; + xfs_btree_set_ptr_null(cur, &ptr); + + xfs_btree_bload_level_geometry(cur, bbl, level, nr_this_level, + &avg_per_block, &blocks, &blocks_with_extra); + + /* Load each node block. */ + for (i = 0; i < blocks; i++) { + unsigned int nr_this_block = avg_per_block; + + if (i < blocks_with_extra) + nr_this_block++; + + ret = xfs_btree_bload_prep_block(cur, bbl, + &buffers_list, level, nr_this_block, + &ptr, &bp, &block, priv); + if (ret) + goto out; + + trace_xfs_btree_bload_block(cur, level, i, blocks, + &ptr, nr_this_block); + + ret = xfs_btree_bload_node(cur, nr_this_block, + &child_ptr, block); + if (ret) + goto out; + + /* + * Record the leftmost node pointer so that we know + * where to start the next node level above this one. + */ + if (i == 0) + xfs_btree_copy_ptrs(cur, &first_ptr, &ptr, 1); + } + total_blocks += blocks; + xfs_btree_bload_drop_buf(&buffers_list, &bp); + xfs_btree_copy_ptrs(cur, &child_ptr, &first_ptr, 1); + } + + /* Initialize the new root. */ + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { + ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); + cur->bc_ino.ifake->if_levels = cur->bc_nlevels; + cur->bc_ino.ifake->if_blocks = total_blocks - 1; + } else { + cur->bc_ag.afake->af_root = be32_to_cpu(ptr.s); + cur->bc_ag.afake->af_levels = cur->bc_nlevels; + cur->bc_ag.afake->af_blocks = total_blocks; + } + + /* + * Write the new blocks to disk. If the ordered list isn't empty after + * that, then something went wrong and we have to fail. This should + * never happen, but we'll check anyway. + */ + ret = xfs_buf_delwri_submit(&buffers_list); + if (ret) + goto out; + if (!list_empty(&buffers_list)) { + ASSERT(list_empty(&buffers_list)); + ret = -EIO; + } + +out: + xfs_buf_delwri_cancel(&buffers_list); + if (bp) + xfs_buf_relse(bp); + return ret; +} diff --git a/fs/xfs/libxfs/xfs_btree_staging.h b/fs/xfs/libxfs/xfs_btree_staging.h new file mode 100644 index 000000000000..f0d2976050ae --- /dev/null +++ b/fs/xfs/libxfs/xfs_btree_staging.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2020 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@oracle.com> + */ +#ifndef __XFS_BTREE_STAGING_H__ +#define __XFS_BTREE_STAGING_H__ + +/* Fake root for an AG-rooted btree. */ +struct xbtree_afakeroot { + /* AG block number of the new btree root. */ + xfs_agblock_t af_root; + + /* Height of the new btree. */ + unsigned int af_levels; + + /* Number of blocks used by the btree. */ + unsigned int af_blocks; +}; + +/* Cursor interactions with fake roots for AG-rooted btrees. */ +void xfs_btree_stage_afakeroot(struct xfs_btree_cur *cur, + struct xbtree_afakeroot *afake); +void xfs_btree_commit_afakeroot(struct xfs_btree_cur *cur, struct xfs_trans *tp, + struct xfs_buf *agbp, const struct xfs_btree_ops *ops); + +/* Fake root for an inode-rooted btree. */ +struct xbtree_ifakeroot { + /* Fake inode fork. */ + struct xfs_ifork *if_fork; + + /* Number of blocks used by the btree. */ + int64_t if_blocks; + + /* Height of the new btree. */ + unsigned int if_levels; + + /* Number of bytes available for this fork in the inode. */ + unsigned int if_fork_size; + + /* Fork format. */ + unsigned int if_format; + + /* Number of records. */ + unsigned int if_extents; +}; + +/* Cursor interactions with fake roots for inode-rooted btrees. */ +void xfs_btree_stage_ifakeroot(struct xfs_btree_cur *cur, + struct xbtree_ifakeroot *ifake, + struct xfs_btree_ops **new_ops); +void xfs_btree_commit_ifakeroot(struct xfs_btree_cur *cur, struct xfs_trans *tp, + int whichfork, const struct xfs_btree_ops *ops); + +/* Bulk loading of staged btrees. */ +typedef int (*xfs_btree_bload_get_record_fn)(struct xfs_btree_cur *cur, void *priv); +typedef int (*xfs_btree_bload_claim_block_fn)(struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, void *priv); +typedef size_t (*xfs_btree_bload_iroot_size_fn)(struct xfs_btree_cur *cur, + unsigned int nr_this_level, void *priv); + +struct xfs_btree_bload { + /* + * This function will be called nr_records times to load records into + * the btree. The function does this by setting the cursor's bc_rec + * field in in-core format. Records must be returned in sort order. + */ + xfs_btree_bload_get_record_fn get_record; + + /* + * This function will be called nr_blocks times to obtain a pointer + * to a new btree block on disk. Callers must preallocate all space + * for the new btree before calling xfs_btree_bload, and this function + * is what claims that reservation. + */ + xfs_btree_bload_claim_block_fn claim_block; + + /* + * This function should return the size of the in-core btree root + * block. It is only necessary for XFS_BTREE_ROOT_IN_INODE btree + * types. + */ + xfs_btree_bload_iroot_size_fn iroot_size; + + /* + * The caller should set this to the number of records that will be + * stored in the new btree. + */ + uint64_t nr_records; + + /* + * Number of free records to leave in each leaf block. If the caller + * sets this to -1, the slack value will be calculated to be halfway + * between maxrecs and minrecs. This typically leaves the block 75% + * full. Note that slack values are not enforced on inode root blocks. + */ + int leaf_slack; + + /* + * Number of free key/ptrs pairs to leave in each node block. This + * field has the same semantics as leaf_slack. + */ + int node_slack; + + /* + * The xfs_btree_bload_compute_geometry function will set this to the + * number of btree blocks needed to store nr_records records. + */ + uint64_t nr_blocks; + + /* + * The xfs_btree_bload_compute_geometry function will set this to the + * height of the new btree. + */ + unsigned int btree_height; +}; + +int xfs_btree_bload_compute_geometry(struct xfs_btree_cur *cur, + struct xfs_btree_bload *bbl, uint64_t nr_records); +int xfs_btree_bload(struct xfs_btree_cur *cur, struct xfs_btree_bload *bbl, + void *priv); + +#endif /* __XFS_BTREE_STAGING_H__ */ diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 875e04f82541..e46bc03365db 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -78,10 +78,16 @@ kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */ * Allocate a dir-state structure. * We don't put them on the stack since they're large. */ -xfs_da_state_t * -xfs_da_state_alloc(void) +struct xfs_da_state * +xfs_da_state_alloc( + struct xfs_da_args *args) { - return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS); + struct xfs_da_state *state; + + state = kmem_cache_zalloc(xfs_da_state_zone, GFP_NOFS | __GFP_NOFAIL); + state->args = args; + state->mp = args->dp->i_mount; + return state; } /* @@ -590,7 +596,7 @@ xfs_da3_split( node = oldblk->bp->b_addr; if (node->hdr.info.forw) { if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) { - xfs_buf_corruption_error(oldblk->bp); + xfs_buf_mark_corrupt(oldblk->bp); error = -EFSCORRUPTED; goto out; } @@ -603,7 +609,7 @@ xfs_da3_split( node = oldblk->bp->b_addr; if (node->hdr.info.back) { if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) { - xfs_buf_corruption_error(oldblk->bp); + xfs_buf_mark_corrupt(oldblk->bp); error = -EFSCORRUPTED; goto out; } @@ -1624,7 +1630,7 @@ xfs_da3_node_lookup_int( } if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) { - xfs_buf_corruption_error(blk->bp); + xfs_buf_mark_corrupt(blk->bp); return -EFSCORRUPTED; } @@ -1639,7 +1645,7 @@ xfs_da3_node_lookup_int( /* Tree taller than we can handle; bail out! */ if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) { - xfs_buf_corruption_error(blk->bp); + xfs_buf_mark_corrupt(blk->bp); return -EFSCORRUPTED; } @@ -1647,7 +1653,7 @@ xfs_da3_node_lookup_int( if (blkno == args->geo->leafblk) expected_level = nodehdr.level - 1; else if (expected_level != nodehdr.level) { - xfs_buf_corruption_error(blk->bp); + xfs_buf_mark_corrupt(blk->bp); return -EFSCORRUPTED; } else expected_level--; @@ -1986,7 +1992,8 @@ xfs_da3_path_shift( ASSERT(path != NULL); ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); level = (path->active-1) - 1; /* skip bottom layer in path */ - for (blk = &path->blk[level]; level >= 0; blk--, level--) { + for (; level >= 0; level--) { + blk = &path->blk[level]; xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, blk->bp->b_addr); @@ -2520,8 +2527,10 @@ xfs_dabuf_map( */ if (nirecs > 1) { map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), KM_NOFS); - if (!map) + if (!map) { + error = -ENOMEM; goto out_free_irecs; + } *mapp = map; } diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index 0f4fbb0889ff..ad5dd324631a 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc. * Copyright (c) 2013 Red Hat, Inc. @@ -57,9 +57,10 @@ typedef struct xfs_da_args { const uint8_t *name; /* string (maybe not NULL terminated) */ int namelen; /* length of string (maybe no NULL) */ uint8_t filetype; /* filetype of inode for directories */ - uint8_t *value; /* set of bytes (maybe contain NULLs) */ + void *value; /* set of bytes (maybe contain NULLs) */ int valuelen; /* length of value */ - int flags; /* argument flags (eg: ATTR_NOCREATE) */ + unsigned int attr_filter; /* XFS_ATTR_{ROOT,SECURE,INCOMPLETE} */ + unsigned int attr_flags; /* XATTR_{CREATE,REPLACE} */ xfs_dahash_t hashval; /* hash value of name */ xfs_ino_t inumber; /* input/output inode number */ struct xfs_inode *dp; /* directory inode to manipulate */ @@ -88,8 +89,7 @@ typedef struct xfs_da_args { #define XFS_DA_OP_ADDNAME 0x0004 /* this is an add operation */ #define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */ #define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */ -#define XFS_DA_OP_ALLOCVAL 0x0020 /* lookup to alloc buffer if found */ -#define XFS_DA_OP_INCOMPLETE 0x0040 /* lookup INCOMPLETE attr keys */ +#define XFS_DA_OP_NOTIME 0x0020 /* don't update inode timestamps */ #define XFS_DA_OP_FLAGS \ { XFS_DA_OP_JUSTCHECK, "JUSTCHECK" }, \ @@ -97,8 +97,7 @@ typedef struct xfs_da_args { { XFS_DA_OP_ADDNAME, "ADDNAME" }, \ { XFS_DA_OP_OKNOENT, "OKNOENT" }, \ { XFS_DA_OP_CILOOKUP, "CILOOKUP" }, \ - { XFS_DA_OP_ALLOCVAL, "ALLOCVAL" }, \ - { XFS_DA_OP_INCOMPLETE, "INCOMPLETE" } + { XFS_DA_OP_NOTIME, "NOTIME" } /* * Storage for holding state during Btree searches and split/join ops. @@ -220,7 +219,7 @@ enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args, const unsigned char *name, int len); -xfs_da_state_t *xfs_da_state_alloc(void); +struct xfs_da_state *xfs_da_state_alloc(struct xfs_da_args *args); void xfs_da_state_free(xfs_da_state_t *state); void xfs_da3_node_hdr_from_disk(struct xfs_mount *mp, diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 734837a9b51a..059ac108b1b3 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. * Copyright (c) 2013 Red Hat, Inc. @@ -692,19 +692,7 @@ struct xfs_attr3_leafblock { #define XFS_ATTR_ROOT (1 << XFS_ATTR_ROOT_BIT) #define XFS_ATTR_SECURE (1 << XFS_ATTR_SECURE_BIT) #define XFS_ATTR_INCOMPLETE (1 << XFS_ATTR_INCOMPLETE_BIT) - -/* - * Conversion macros for converting namespace bits from argument flags - * to ondisk flags. - */ -#define XFS_ATTR_NSP_ARGS_MASK (ATTR_ROOT | ATTR_SECURE) #define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE) -#define XFS_ATTR_NSP_ONDISK(flags) ((flags) & XFS_ATTR_NSP_ONDISK_MASK) -#define XFS_ATTR_NSP_ARGS(flags) ((flags) & XFS_ATTR_NSP_ARGS_MASK) -#define XFS_ATTR_NSP_ARGS_TO_ONDISK(x) (((x) & ATTR_ROOT ? XFS_ATTR_ROOT : 0) |\ - ((x) & ATTR_SECURE ? XFS_ATTR_SECURE : 0)) -#define XFS_ATTR_NSP_ONDISK_TO_ARGS(x) (((x) & XFS_ATTR_ROOT ? ATTR_ROOT : 0) |\ - ((x) & XFS_ATTR_SECURE ? ATTR_SECURE : 0)) /* * Alignment for namelist and valuelist entries (since they are mixed diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 22557527cfdb..d8f586256add 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -178,6 +178,18 @@ static const struct xfs_defer_op_type *defer_op_types[] = { [XFS_DEFER_OPS_TYPE_AGFL_FREE] = &xfs_agfl_free_defer_type, }; +static void +xfs_defer_create_intent( + struct xfs_trans *tp, + struct xfs_defer_pending *dfp, + bool sort) +{ + const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type]; + + dfp->dfp_intent = ops->create_intent(tp, &dfp->dfp_work, + dfp->dfp_count, sort); +} + /* * For each pending item in the intake list, log its intent item and the * associated extents, then add the entire intake list to the end of @@ -187,17 +199,11 @@ STATIC void xfs_defer_create_intents( struct xfs_trans *tp) { - struct list_head *li; struct xfs_defer_pending *dfp; - const struct xfs_defer_op_type *ops; list_for_each_entry(dfp, &tp->t_dfops, dfp_list) { - ops = defer_op_types[dfp->dfp_type]; - dfp->dfp_intent = ops->create_intent(tp, dfp->dfp_count); trace_xfs_defer_create_intent(tp->t_mountp, dfp); - list_sort(tp->t_mountp, &dfp->dfp_work, ops->diff_items); - list_for_each(li, &dfp->dfp_work) - ops->log_item(tp, dfp->dfp_intent, li); + xfs_defer_create_intent(tp, dfp, true); } } @@ -234,10 +240,13 @@ xfs_defer_trans_roll( struct xfs_log_item *lip; struct xfs_buf *bplist[XFS_DEFER_OPS_NR_BUFS]; struct xfs_inode *iplist[XFS_DEFER_OPS_NR_INODES]; + unsigned int ordered = 0; /* bitmap */ int bpcount = 0, ipcount = 0; int i; int error; + BUILD_BUG_ON(NBBY * sizeof(ordered) < XFS_DEFER_OPS_NR_BUFS); + list_for_each_entry(lip, &tp->t_items, li_trans) { switch (lip->li_type) { case XFS_LI_BUF: @@ -248,7 +257,10 @@ xfs_defer_trans_roll( ASSERT(0); return -EFSCORRUPTED; } - xfs_trans_dirty_buf(tp, bli->bli_buf); + if (bli->bli_flags & XFS_BLI_ORDERED) + ordered |= (1U << bpcount); + else + xfs_trans_dirty_buf(tp, bli->bli_buf); bplist[bpcount++] = bli->bli_buf; } break; @@ -289,6 +301,8 @@ xfs_defer_trans_roll( /* Rejoin the buffers and dirty them so the log moves forward. */ for (i = 0; i < bpcount; i++) { xfs_trans_bjoin(tp, bplist[i]); + if (ordered & (1U << i)) + xfs_trans_ordered_buf(tp, bplist[i]); xfs_trans_bhold(tp, bplist[i]); } @@ -346,6 +360,53 @@ xfs_defer_cancel_list( } /* + * Log an intent-done item for the first pending intent, and finish the work + * items. + */ +static int +xfs_defer_finish_one( + struct xfs_trans *tp, + struct xfs_defer_pending *dfp) +{ + const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type]; + struct xfs_btree_cur *state = NULL; + struct list_head *li, *n; + int error; + + trace_xfs_defer_pending_finish(tp->t_mountp, dfp); + + dfp->dfp_done = ops->create_done(tp, dfp->dfp_intent, dfp->dfp_count); + list_for_each_safe(li, n, &dfp->dfp_work) { + list_del(li); + dfp->dfp_count--; + error = ops->finish_item(tp, dfp->dfp_done, li, &state); + if (error == -EAGAIN) { + /* + * Caller wants a fresh transaction; put the work item + * back on the list and log a new log intent item to + * replace the old one. See "Requesting a Fresh + * Transaction while Finishing Deferred Work" above. + */ + list_add(li, &dfp->dfp_work); + dfp->dfp_count++; + dfp->dfp_done = NULL; + xfs_defer_create_intent(tp, dfp, false); + } + + if (error) + goto out; + } + + /* Done with the dfp, free it. */ + list_del(&dfp->dfp_list); + kmem_free(dfp); +out: + if (ops->finish_cleanup) + ops->finish_cleanup(tp, state, error); + return error; +} + +/* * Finish all the pending work. This involves logging intent items for * any work items that wandered in since the last transaction roll (if * one has even happened), rolling the transaction, and finishing the @@ -358,11 +419,7 @@ xfs_defer_finish_noroll( struct xfs_trans **tp) { struct xfs_defer_pending *dfp; - struct list_head *li; - struct list_head *n; - void *state; int error = 0; - const struct xfs_defer_op_type *ops; LIST_HEAD(dop_pending); ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); @@ -371,87 +428,30 @@ xfs_defer_finish_noroll( /* Until we run out of pending work to finish... */ while (!list_empty(&dop_pending) || !list_empty(&(*tp)->t_dfops)) { - /* log intents and pull in intake items */ xfs_defer_create_intents(*tp); list_splice_tail_init(&(*tp)->t_dfops, &dop_pending); - /* - * Roll the transaction. - */ error = xfs_defer_trans_roll(tp); if (error) - goto out; + goto out_shutdown; - /* Log an intent-done item for the first pending item. */ dfp = list_first_entry(&dop_pending, struct xfs_defer_pending, dfp_list); - ops = defer_op_types[dfp->dfp_type]; - trace_xfs_defer_pending_finish((*tp)->t_mountp, dfp); - dfp->dfp_done = ops->create_done(*tp, dfp->dfp_intent, - dfp->dfp_count); - - /* Finish the work items. */ - state = NULL; - list_for_each_safe(li, n, &dfp->dfp_work) { - list_del(li); - dfp->dfp_count--; - error = ops->finish_item(*tp, li, dfp->dfp_done, - &state); - if (error == -EAGAIN) { - /* - * Caller wants a fresh transaction; - * put the work item back on the list - * and jump out. - */ - list_add(li, &dfp->dfp_work); - dfp->dfp_count++; - break; - } else if (error) { - /* - * Clean up after ourselves and jump out. - * xfs_defer_cancel will take care of freeing - * all these lists and stuff. - */ - if (ops->finish_cleanup) - ops->finish_cleanup(*tp, state, error); - goto out; - } - } - if (error == -EAGAIN) { - /* - * Caller wants a fresh transaction, so log a - * new log intent item to replace the old one - * and roll the transaction. See "Requesting - * a Fresh Transaction while Finishing - * Deferred Work" above. - */ - dfp->dfp_intent = ops->create_intent(*tp, - dfp->dfp_count); - dfp->dfp_done = NULL; - list_for_each(li, &dfp->dfp_work) - ops->log_item(*tp, dfp->dfp_intent, li); - } else { - /* Done with the dfp, free it. */ - list_del(&dfp->dfp_list); - kmem_free(dfp); - } - - if (ops->finish_cleanup) - ops->finish_cleanup(*tp, state, error); - } - -out: - if (error) { - xfs_defer_trans_abort(*tp, &dop_pending); - xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE); - trace_xfs_defer_finish_error(*tp, error); - xfs_defer_cancel_list((*tp)->t_mountp, &dop_pending); - xfs_defer_cancel(*tp); - return error; + error = xfs_defer_finish_one(*tp, dfp); + if (error && error != -EAGAIN) + goto out_shutdown; } trace_xfs_defer_finish_done(*tp, _RET_IP_); return 0; + +out_shutdown: + xfs_defer_trans_abort(*tp, &dop_pending); + xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE); + trace_xfs_defer_finish_error(*tp, error); + xfs_defer_cancel_list((*tp)->t_mountp, &dop_pending); + xfs_defer_cancel(*tp); + return error; } int diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index 7c28d7608ac6..6b2ca580f2b0 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * Copyright (C) 2016 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> @@ -6,6 +6,7 @@ #ifndef __XFS_DEFER_H__ #define __XFS_DEFER_H__ +struct xfs_btree_cur; struct xfs_defer_op_type; /* @@ -28,8 +29,8 @@ enum xfs_defer_ops_type { struct xfs_defer_pending { struct list_head dfp_list; /* pending items */ struct list_head dfp_work; /* work items */ - void *dfp_intent; /* log intent item */ - void *dfp_done; /* log done item */ + struct xfs_log_item *dfp_intent; /* log intent item */ + struct xfs_log_item *dfp_done; /* log done item */ unsigned int dfp_count; /* # extent items */ enum xfs_defer_ops_type dfp_type; }; @@ -43,15 +44,16 @@ void xfs_defer_move(struct xfs_trans *dtp, struct xfs_trans *stp); /* Description of a deferred type. */ struct xfs_defer_op_type { - void (*abort_intent)(void *); - void *(*create_done)(struct xfs_trans *, void *, unsigned int); - int (*finish_item)(struct xfs_trans *, struct list_head *, void *, - void **); - void (*finish_cleanup)(struct xfs_trans *, void *, int); - void (*cancel_item)(struct list_head *); - int (*diff_items)(void *, struct list_head *, struct list_head *); - void *(*create_intent)(struct xfs_trans *, uint); - void (*log_item)(struct xfs_trans *, void *, struct list_head *); + struct xfs_log_item *(*create_intent)(struct xfs_trans *tp, + struct list_head *items, unsigned int count, bool sort); + void (*abort_intent)(struct xfs_log_item *intent); + struct xfs_log_item *(*create_done)(struct xfs_trans *tp, + struct xfs_log_item *intent, unsigned int count); + int (*finish_item)(struct xfs_trans *tp, struct xfs_log_item *done, + struct list_head *item, struct xfs_btree_cur **state); + void (*finish_cleanup)(struct xfs_trans *tp, + struct xfs_btree_cur *state, int error); + void (*cancel_item)(struct list_head *item); unsigned int max_items; }; diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index dd6fcaaea318..612a9c5e41b1 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -278,7 +278,7 @@ xfs_dir_createname( if (!inum) args->op_flags |= XFS_DA_OP_JUSTCHECK; - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) { rval = xfs_dir2_sf_addname(args); goto out_free; } @@ -373,7 +373,7 @@ xfs_dir_lookup( args->op_flags |= XFS_DA_OP_CILOOKUP; lock_mode = xfs_ilock_data_map_shared(dp); - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) { rval = xfs_dir2_sf_lookup(args); goto out_check_rval; } @@ -443,7 +443,7 @@ xfs_dir_removename( args->whichfork = XFS_DATA_FORK; args->trans = tp; - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) { rval = xfs_dir2_sf_removename(args); goto out_free; } @@ -504,7 +504,7 @@ xfs_dir_replace( args->whichfork = XFS_DATA_FORK; args->trans = tp; - if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) { rval = xfs_dir2_sf_replace(args); goto out_free; } diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index 033777e282f2..e55378640b05 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. * All Rights Reserved. diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index d6ced59b9567..5b59d3f7746b 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -114,6 +114,23 @@ const struct xfs_buf_ops xfs_dir3_block_buf_ops = { .verify_struct = xfs_dir3_block_verify, }; +static xfs_failaddr_t +xfs_dir3_block_header_check( + struct xfs_inode *dp, + struct xfs_buf *bp) +{ + struct xfs_mount *mp = dp->i_mount; + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + + if (be64_to_cpu(hdr3->owner) != dp->i_ino) + return __this_address; + } + + return NULL; +} + int xfs_dir3_block_read( struct xfs_trans *tp, @@ -121,12 +138,24 @@ xfs_dir3_block_read( struct xfs_buf **bpp) { struct xfs_mount *mp = dp->i_mount; + xfs_failaddr_t fa; int err; err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, 0, bpp, XFS_DATA_FORK, &xfs_dir3_block_buf_ops); - if (!err && tp && *bpp) - xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF); + if (err || !*bpp) + return err; + + /* Check things that we can't do in the verifier. */ + fa = xfs_dir3_block_header_check(dp, *bpp); + if (fa) { + __xfs_buf_mark_corrupt(*bpp, fa); + xfs_trans_brelse(tp, *bpp); + *bpp = NULL; + return -EFSCORRUPTED; + } + + xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF); return err; } @@ -1075,7 +1104,7 @@ xfs_dir2_sf_to_block( ASSERT(ifp->if_bytes == dp->i_d.di_size); ASSERT(ifp->if_u1.if_data != NULL); ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count)); - ASSERT(dp->i_d.di_nextents == 0); + ASSERT(dp->i_df.if_nextents == 0); /* * Copy the directory into a temporary buffer. diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index b9eba8213180..375b3edb2ad2 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -394,6 +394,22 @@ static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = { .verify_write = xfs_dir3_data_write_verify, }; +static xfs_failaddr_t +xfs_dir3_data_header_check( + struct xfs_inode *dp, + struct xfs_buf *bp) +{ + struct xfs_mount *mp = dp->i_mount; + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + struct xfs_dir3_data_hdr *hdr3 = bp->b_addr; + + if (be64_to_cpu(hdr3->hdr.owner) != dp->i_ino) + return __this_address; + } + + return NULL; +} int xfs_dir3_data_read( @@ -403,12 +419,24 @@ xfs_dir3_data_read( unsigned int flags, struct xfs_buf **bpp) { + xfs_failaddr_t fa; int err; err = xfs_da_read_buf(tp, dp, bno, flags, bpp, XFS_DATA_FORK, &xfs_dir3_data_buf_ops); - if (!err && tp && *bpp) - xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF); + if (err || !*bpp) + return err; + + /* Check things that we can't do in the verifier. */ + fa = xfs_dir3_data_header_check(dp, *bpp); + if (fa) { + __xfs_buf_mark_corrupt(*bpp, fa); + xfs_trans_brelse(tp, *bpp); + *bpp = NULL; + return -EFSCORRUPTED; + } + + xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF); return err; } diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index a131b520aac7..95d2a3f92d75 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -1383,7 +1383,7 @@ xfs_dir2_leaf_removename( ltp = xfs_dir2_leaf_tail_p(geo, leaf); bestsp = xfs_dir2_leaf_bests_p(ltp); if (be16_to_cpu(bestsp[db]) != oldbest) { - xfs_buf_corruption_error(lbp); + xfs_buf_mark_corrupt(lbp); return -EFSCORRUPTED; } /* diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index a0cc5e240306..5d51265d29d6 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -194,6 +194,8 @@ xfs_dir3_free_header_check( return __this_address; if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused)) return __this_address; + if (be64_to_cpu(hdr3->hdr.owner) != dp->i_ino) + return __this_address; } else { struct xfs_dir2_free_hdr *hdr = bp->b_addr; @@ -226,8 +228,9 @@ __xfs_dir3_free_read( /* Check things that we can't do in the verifier. */ fa = xfs_dir3_free_header_check(dp, fbno, *bpp); if (fa) { - xfs_verifier_error(*bpp, -EFSCORRUPTED, fa); + __xfs_buf_mark_corrupt(*bpp, fa); xfs_trans_brelse(tp, *bpp); + *bpp = NULL; return -EFSCORRUPTED; } @@ -439,7 +442,7 @@ xfs_dir2_leaf_to_node( ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); if (be32_to_cpu(ltp->bestcount) > (uint)dp->i_d.di_size / args->geo->blksize) { - xfs_buf_corruption_error(lbp); + xfs_buf_mark_corrupt(lbp); return -EFSCORRUPTED; } @@ -513,7 +516,7 @@ xfs_dir2_leafn_add( * into other peoples memory */ if (index < 0) { - xfs_buf_corruption_error(bp); + xfs_buf_mark_corrupt(bp); return -EFSCORRUPTED; } @@ -800,7 +803,7 @@ xfs_dir2_leafn_lookup_for_entry( xfs_dir3_leaf_check(dp, bp); if (leafhdr.count <= 0) { - xfs_buf_corruption_error(bp); + xfs_buf_mark_corrupt(bp); return -EFSCORRUPTED; } @@ -2012,9 +2015,7 @@ xfs_dir2_node_addname( /* * Allocate and initialize the state (btree cursor). */ - state = xfs_da_state_alloc(); - state->args = args; - state->mp = args->dp->i_mount; + state = xfs_da_state_alloc(args); /* * Look up the name. We're not supposed to find it, but * this gives us the insertion point. @@ -2083,9 +2084,8 @@ xfs_dir2_node_lookup( /* * Allocate and initialize the btree cursor. */ - state = xfs_da_state_alloc(); - state->args = args; - state->mp = args->dp->i_mount; + state = xfs_da_state_alloc(args); + /* * Fill in the path to the entry in the cursor. */ @@ -2136,9 +2136,7 @@ xfs_dir2_node_removename( /* * Allocate and initialize the btree cursor. */ - state = xfs_da_state_alloc(); - state->args = args; - state->mp = args->dp->i_mount; + state = xfs_da_state_alloc(args); /* Look up the entry we're deleting, set up the cursor. */ error = xfs_da3_node_lookup_int(state, &rval); @@ -2203,9 +2201,7 @@ xfs_dir2_node_replace( /* * Allocate and initialize the btree cursor. */ - state = xfs_da_state_alloc(); - state->args = args; - state->mp = args->dp->i_mount; + state = xfs_da_state_alloc(args); /* * We have to save new inode number and ftype since diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index 01ee0b926572..44c6a77cba05 100644 --- a/fs/xfs/libxfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. * All Rights Reserved. diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index 7b7f6fb2ea3b..2463b5d73447 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -343,7 +343,7 @@ xfs_dir2_block_to_sf( */ ASSERT(dp->i_df.if_bytes == 0); xfs_init_local_fork(dp, XFS_DATA_FORK, sfp, size); - dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; + dp->i_df.if_format = XFS_DINODE_FMT_LOCAL; dp->i_d.di_size = size; logflags |= XFS_ILOG_DDATA; @@ -710,11 +710,11 @@ xfs_dir2_sf_verify( struct xfs_inode *ip) { struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); struct xfs_dir2_sf_hdr *sfp; struct xfs_dir2_sf_entry *sfep; struct xfs_dir2_sf_entry *next_sfep; char *endp; - struct xfs_ifork *ifp; xfs_ino_t ino; int i; int i8count; @@ -723,9 +723,8 @@ xfs_dir2_sf_verify( int error; uint8_t filetype; - ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL); + ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); - ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data; size = ifp->if_bytes; @@ -827,9 +826,9 @@ xfs_dir2_sf_create( * If it's currently a zero-length extent file, * convert it to local format. */ - if (dp->i_d.di_format == XFS_DINODE_FMT_EXTENTS) { + if (dp->i_df.if_format == XFS_DINODE_FMT_EXTENTS) { dp->i_df.if_flags &= ~XFS_IFEXTENTS; /* just in case */ - dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; + dp->i_df.if_format = XFS_DINODE_FMT_LOCAL; xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE); dp->i_df.if_flags |= XFS_IFINLINE; } @@ -1027,7 +1026,7 @@ xfs_dir2_sf_replace_needblock( int newsize; struct xfs_dir2_sf_hdr *sfp; - if (dp->i_d.di_format != XFS_DINODE_FMT_LOCAL) + if (dp->i_df.if_format != XFS_DINODE_FMT_LOCAL) return false; sfp = (struct xfs_dir2_sf_hdr *)dp->i_df.if_u1.if_data; diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index bedc1e752b60..5a2db00b9d5f 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -37,9 +37,10 @@ xfs_failaddr_t xfs_dquot_verify( struct xfs_mount *mp, struct xfs_disk_dquot *ddq, - xfs_dqid_t id, - uint type) /* used only during quotacheck */ + xfs_dqid_t id) /* used only during quotacheck */ { + __u8 ddq_type; + /* * We can encounter an uninitialized dquot buffer for 2 reasons: * 1. If we crash while deleting the quotainode(s), and those blks got @@ -60,11 +61,12 @@ xfs_dquot_verify( if (ddq->d_version != XFS_DQUOT_VERSION) return __this_address; - if (type && ddq->d_flags != type) + if (ddq->d_type & ~XFS_DQTYPE_ANY) return __this_address; - if (ddq->d_flags != XFS_DQ_USER && - ddq->d_flags != XFS_DQ_PROJ && - ddq->d_flags != XFS_DQ_GROUP) + ddq_type = ddq->d_type & XFS_DQTYPE_REC_MASK; + if (ddq_type != XFS_DQTYPE_USER && + ddq_type != XFS_DQTYPE_PROJ && + ddq_type != XFS_DQTYPE_GROUP) return __this_address; if (id != -1 && id != be32_to_cpu(ddq->d_id)) @@ -95,14 +97,13 @@ xfs_failaddr_t xfs_dqblk_verify( struct xfs_mount *mp, struct xfs_dqblk *dqb, - xfs_dqid_t id, - uint type) /* used only during quotacheck */ + xfs_dqid_t id) /* used only during quotacheck */ { if (xfs_sb_version_hascrc(&mp->m_sb) && !uuid_equal(&dqb->dd_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; - return xfs_dquot_verify(mp, &dqb->dd_diskdq, id, type); + return xfs_dquot_verify(mp, &dqb->dd_diskdq, id); } /* @@ -113,7 +114,7 @@ xfs_dqblk_repair( struct xfs_mount *mp, struct xfs_dqblk *dqb, xfs_dqid_t id, - uint type) + xfs_dqtype_t type) { /* * Typically, a repair is only requested by quotacheck. @@ -123,7 +124,7 @@ xfs_dqblk_repair( dqb->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); dqb->dd_diskdq.d_version = XFS_DQUOT_VERSION; - dqb->dd_diskdq.d_flags = type; + dqb->dd_diskdq.d_type = type; dqb->dd_diskdq.d_id = cpu_to_be32(id); if (xfs_sb_version_hascrc(&mp->m_sb)) { @@ -205,7 +206,7 @@ xfs_dquot_buf_verify( if (i == 0) id = be32_to_cpu(ddq->d_id); - fa = xfs_dqblk_verify(mp, &dqb[i], id + i, 0); + fa = xfs_dqblk_verify(mp, &dqb[i], id + i); if (fa) { if (!readahead) xfs_buf_verifier_error(bp, -EFSCORRUPTED, diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h index 79e6c4fb1d8a..53b305dea381 100644 --- a/fs/xfs/libxfs/xfs_errortag.h +++ b/fs/xfs/libxfs/xfs_errortag.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. * Copyright (C) 2017 Oracle. @@ -55,7 +55,8 @@ #define XFS_ERRTAG_FORCE_SCRUB_REPAIR 32 #define XFS_ERRTAG_FORCE_SUMMARY_RECALC 33 #define XFS_ERRTAG_IUNLINK_FALLBACK 34 -#define XFS_ERRTAG_MAX 35 +#define XFS_ERRTAG_BUF_IOERROR 35 +#define XFS_ERRTAG_MAX 36 /* * Random factors for above tags, 1 means always, 2 means 1/2 time, etc. @@ -95,5 +96,6 @@ #define XFS_RANDOM_FORCE_SCRUB_REPAIR 1 #define XFS_RANDOM_FORCE_SUMMARY_RECALC 1 #define XFS_RANDOM_IUNLINK_FALLBACK (XFS_RANDOM_DEFAULT/10) +#define XFS_RANDOM_BUF_IOERROR XFS_RANDOM_DEFAULT #endif /* __XFS_ERRORTAG_H_ */ diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 77e9fa385980..31b7ece985bb 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. @@ -497,6 +497,23 @@ static inline bool xfs_sb_version_hascrc(struct xfs_sb *sbp) return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; } +/* + * v5 file systems support V3 inodes only, earlier file systems support + * v2 and v1 inodes. + */ +static inline bool xfs_sb_version_has_v3inode(struct xfs_sb *sbp) +{ + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; +} + +static inline bool xfs_dinode_good_version(struct xfs_sb *sbp, + uint8_t version) +{ + if (xfs_sb_version_has_v3inode(sbp)) + return version == 3; + return version == 1 || version == 2; +} + static inline bool xfs_sb_version_has_pquotino(struct xfs_sb *sbp) { return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; @@ -560,7 +577,6 @@ xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino) #define XFS_SB_DADDR ((xfs_daddr_t)0) /* daddr in filesystem/ag */ #define XFS_SB_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_SB_DADDR) -#define XFS_BUF_TO_SBP(bp) ((xfs_dsb_t *)((bp)->b_addr)) #define XFS_HDR_BLOCK(mp,d) ((xfs_agblock_t)XFS_BB_TO_FSBT(mp,d)) #define XFS_DADDR_TO_FSB(mp,d) XFS_AGB_TO_FSB(mp, \ @@ -707,7 +723,6 @@ typedef struct xfs_agf { /* disk block (xfs_daddr_t) in the AG */ #define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) #define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp)) -#define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)((bp)->b_addr)) /* * Size of the unlinked inode hash table in the agi. @@ -775,7 +790,6 @@ typedef struct xfs_agi { /* disk block (xfs_daddr_t) in the AG */ #define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log)) #define XFS_AGI_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp)) -#define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)((bp)->b_addr)) /* * The third a.g. block contains the a.g. freelist, an array @@ -783,21 +797,15 @@ typedef struct xfs_agi { */ #define XFS_AGFL_DADDR(mp) ((xfs_daddr_t)(3 << (mp)->m_sectbb_log)) #define XFS_AGFL_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp)) -#define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)((bp)->b_addr)) - -#define XFS_BUF_TO_AGFL_BNO(mp, bp) \ - (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \ - &(XFS_BUF_TO_AGFL(bp)->agfl_bno[0]) : \ - (__be32 *)(bp)->b_addr) +#define XFS_BUF_TO_AGFL(bp) ((struct xfs_agfl *)((bp)->b_addr)) -typedef struct xfs_agfl { +struct xfs_agfl { __be32 agfl_magicnum; __be32 agfl_seqno; uuid_t agfl_uuid; __be64 agfl_lsn; __be32 agfl_crc; - __be32 agfl_bno[]; /* actually xfs_agfl_size(mp) */ -} __attribute__((packed)) xfs_agfl_t; +} __attribute__((packed)); #define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc) @@ -946,23 +954,22 @@ enum xfs_dinode_fmt { /* * Inode size for given fs. */ -#define XFS_LITINO(mp, version) \ - ((int)(((mp)->m_sb.sb_inodesize) - xfs_dinode_size(version))) +#define XFS_DINODE_SIZE(sbp) \ + (xfs_sb_version_has_v3inode(sbp) ? \ + sizeof(struct xfs_dinode) : \ + offsetof(struct xfs_dinode, di_crc)) +#define XFS_LITINO(mp) \ + ((mp)->m_sb.sb_inodesize - XFS_DINODE_SIZE(&(mp)->m_sb)) /* * Inode data & attribute fork sizes, per inode. */ -#define XFS_DFORK_Q(dip) ((dip)->di_forkoff != 0) #define XFS_DFORK_BOFF(dip) ((int)((dip)->di_forkoff << 3)) #define XFS_DFORK_DSIZE(dip,mp) \ - (XFS_DFORK_Q(dip) ? \ - XFS_DFORK_BOFF(dip) : \ - XFS_LITINO(mp, (dip)->di_version)) + ((dip)->di_forkoff ? XFS_DFORK_BOFF(dip) : XFS_LITINO(mp)) #define XFS_DFORK_ASIZE(dip,mp) \ - (XFS_DFORK_Q(dip) ? \ - XFS_LITINO(mp, (dip)->di_version) - XFS_DFORK_BOFF(dip) : \ - 0) + ((dip)->di_forkoff ? XFS_LITINO(mp) - XFS_DFORK_BOFF(dip) : 0) #define XFS_DFORK_SIZE(dip,mp,w) \ ((w) == XFS_DATA_FORK ? \ XFS_DFORK_DSIZE(dip, mp) : \ @@ -1142,16 +1149,26 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) #define XFS_DQUOT_MAGIC 0x4451 /* 'DQ' */ #define XFS_DQUOT_VERSION (uint8_t)0x01 /* latest version number */ +#define XFS_DQTYPE_USER 0x01 /* user dquot record */ +#define XFS_DQTYPE_PROJ 0x02 /* project dquot record */ +#define XFS_DQTYPE_GROUP 0x04 /* group dquot record */ + +/* bitmask to determine if this is a user/group/project dquot */ +#define XFS_DQTYPE_REC_MASK (XFS_DQTYPE_USER | \ + XFS_DQTYPE_PROJ | \ + XFS_DQTYPE_GROUP) + +#define XFS_DQTYPE_ANY (XFS_DQTYPE_REC_MASK) + /* - * This is the main portion of the on-disk representation of quota - * information for a user. This is the q_core of the struct xfs_dquot that - * is kept in kernel memory. We pad this with some more expansion room - * to construct the on disk structure. + * This is the main portion of the on-disk representation of quota information + * for a user. We pad this with some more expansion room to construct the on + * disk structure. */ struct xfs_disk_dquot { __be16 d_magic; /* dquot magic = XFS_DQUOT_MAGIC */ __u8 d_version; /* dquot version */ - __u8 d_flags; /* XFS_DQ_USER/PROJ/GROUP */ + __u8 d_type; /* XFS_DQTYPE_USER/PROJ/GROUP */ __be32 d_id; /* user,project,group id */ __be64 d_blk_hardlimit;/* absolute limit on disk blks */ __be64 d_blk_softlimit;/* preferred limit on disk blks */ @@ -1192,6 +1209,22 @@ typedef struct xfs_dqblk { #define XFS_DQUOT_CRC_OFF offsetof(struct xfs_dqblk, dd_crc) /* + * This defines the unit of allocation of dquots. + * + * Currently, it is just one file system block, and a 4K blk contains 30 + * (136 * 30 = 4080) dquots. It's probably not worth trying to make + * this more dynamic. + * + * However, if this number is changed, we have to make sure that we don't + * implicitly assume that we do allocations in chunks of a single filesystem + * block in the dquot/xqm code. + * + * This is part of the ondisk format because the structure size is not a power + * of two, which leaves slack at the end of the disk block. + */ +#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 + +/* * Remote symlink format and access functions. */ #define XFS_SYMLINK_MAGIC 0x58534c4d /* XSLM */ @@ -1673,7 +1706,7 @@ struct xfs_acl_entry { struct xfs_acl { __be32 acl_cnt; - struct xfs_acl_entry acl_entry[0]; + struct xfs_acl_entry acl_entry[]; }; /* diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index ef95ca07d084..84bcffa87753 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: LGPL-2.1 +/* SPDX-License-Identifier: LGPL-2.1 */ /* * Copyright (c) 1995-2005 Silicon Graphics, Inc. * All Rights Reserved. @@ -568,10 +568,40 @@ typedef struct xfs_fsop_setdm_handlereq { struct fsdmidata __user *data; /* DMAPI data */ } xfs_fsop_setdm_handlereq_t; +/* + * Flags passed in xfs_attr_multiop.am_flags for the attr ioctl interface. + * + * NOTE: Must match the values declared in libattr without the XFS_IOC_ prefix. + */ +#define XFS_IOC_ATTR_ROOT 0x0002 /* use attrs in root namespace */ +#define XFS_IOC_ATTR_SECURE 0x0008 /* use attrs in security namespace */ +#define XFS_IOC_ATTR_CREATE 0x0010 /* fail if attr already exists */ +#define XFS_IOC_ATTR_REPLACE 0x0020 /* fail if attr does not exist */ + typedef struct xfs_attrlist_cursor { __u32 opaque[4]; } xfs_attrlist_cursor_t; +/* + * Define how lists of attribute names are returned to userspace from the + * XFS_IOC_ATTRLIST_BY_HANDLE ioctl. struct xfs_attrlist is the header at the + * beginning of the returned buffer, and a each entry in al_offset contains the + * relative offset of an xfs_attrlist_ent containing the actual entry. + * + * NOTE: struct xfs_attrlist must match struct attrlist defined in libattr, and + * struct xfs_attrlist_ent must match struct attrlist_ent defined in libattr. + */ +struct xfs_attrlist { + __s32 al_count; /* number of entries in attrlist */ + __s32 al_more; /* T/F: more attrs (do call again) */ + __s32 al_offset[1]; /* byte offsets of attrs [var-sized] */ +}; + +struct xfs_attrlist_ent { /* data from attr_list() */ + __u32 a_valuelen; /* number bytes in value of attr */ + char a_name[1]; /* attr name (NULL terminated) */ +}; + typedef struct xfs_fsop_attrlist_handlereq { struct xfs_fsop_handlereq hreq; /* handle interface structure */ struct xfs_attrlist_cursor pos; /* opaque cookie, list offset */ @@ -589,7 +619,7 @@ typedef struct xfs_attr_multiop { void __user *am_attrname; void __user *am_attrvalue; __u32 am_length; - __u32 am_flags; + __u32 am_flags; /* XFS_IOC_ATTR_* */ } xfs_attr_multiop_t; typedef struct xfs_fsop_attrmulti_handlereq { diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h index 272005ac8c88..99e796256c5d 100644 --- a/fs/xfs/libxfs/xfs_health.h +++ b/fs/xfs/libxfs/xfs_health.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * Copyright (C) 2019 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index bf161e930f1d..f742a96a2fe1 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -105,7 +105,7 @@ xfs_inobt_get_rec( int *stat) { struct xfs_mount *mp = cur->bc_mp; - xfs_agnumber_t agno = cur->bc_private.a.agno; + xfs_agnumber_t agno = cur->bc_ag.agno; union xfs_btree_rec *rec; int error; uint64_t realfree; @@ -177,7 +177,7 @@ xfs_inobt_insert( xfs_btnum_t btnum) { struct xfs_btree_cur *cur; - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + struct xfs_agi *agi = agbp->b_addr; xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); xfs_agino_t thisino; int i; @@ -304,7 +304,7 @@ xfs_ialloc_inode_init( * That means for v3 inode we log the entire buffer rather than just the * inode cores. */ - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_sb_version_has_v3inode(&mp->m_sb)) { version = 3; ino = XFS_AGINO_TO_INO(mp, agno, XFS_AGB_TO_AGINO(mp, agbno)); @@ -339,7 +339,7 @@ xfs_ialloc_inode_init( xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) { int ioffset = i << mp->m_sb.sb_inodelog; - uint isize = xfs_dinode_size(version); + uint isize = XFS_DINODE_SIZE(&mp->m_sb); free = xfs_make_iptr(mp, fbuf, i); free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); @@ -525,7 +525,7 @@ xfs_inobt_insert_sprec( bool merge) /* merge or replace */ { struct xfs_btree_cur *cur; - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + struct xfs_agi *agi = agbp->b_addr; xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); int error; int i; @@ -658,7 +658,7 @@ xfs_ialloc_ag_alloc( * chunk of inodes. If the filesystem is striped, this will fill * an entire stripe unit with inodes. */ - agi = XFS_BUF_TO_AGI(agbp); + agi = agbp->b_addr; newino = be32_to_cpu(agi->agi_newino); agno = be32_to_cpu(agi->agi_seqno); args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + @@ -888,10 +888,9 @@ sparse_alloc: */ be32_add_cpu(&agi->agi_count, newlen); be32_add_cpu(&agi->agi_freecount, newlen); - pag = xfs_perag_get(args.mp, agno); + pag = agbp->b_pag; pag->pagi_freecount += newlen; pag->pagi_count += newlen; - xfs_perag_put(pag); agi->agi_newino = cpu_to_be32(newino); /* @@ -1130,11 +1129,11 @@ xfs_dialloc_ag_inobt( xfs_ino_t *inop) { struct xfs_mount *mp = tp->t_mountp; - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + struct xfs_agi *agi = agbp->b_addr; xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); - struct xfs_perag *pag; + struct xfs_perag *pag = agbp->b_pag; struct xfs_btree_cur *cur, *tcur; struct xfs_inobt_rec_incore rec, trec; xfs_ino_t ino; @@ -1143,8 +1142,6 @@ xfs_dialloc_ag_inobt( int i, j; int searchdistance = 10; - pag = xfs_perag_get(mp, agno); - ASSERT(pag->pagi_init); ASSERT(pag->pagi_inodeok); ASSERT(pag->pagi_freecount > 0); @@ -1384,14 +1381,12 @@ alloc_inode: xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); - xfs_perag_put(pag); *inop = ino; return 0; error1: xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); error0: xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - xfs_perag_put(pag); return error; } @@ -1583,11 +1578,10 @@ xfs_dialloc_ag( xfs_ino_t *inop) { struct xfs_mount *mp = tp->t_mountp; - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + struct xfs_agi *agi = agbp->b_addr; xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent); xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent); - struct xfs_perag *pag; struct xfs_btree_cur *cur; /* finobt cursor */ struct xfs_btree_cur *icur; /* inobt cursor */ struct xfs_inobt_rec_incore rec; @@ -1599,8 +1593,6 @@ xfs_dialloc_ag( if (!xfs_sb_version_hasfinobt(&mp->m_sb)) return xfs_dialloc_ag_inobt(tp, agbp, parent, inop); - pag = xfs_perag_get(mp, agno); - /* * If pagino is 0 (this is the root inode allocation) use newino. * This must work because we've just allocated some. @@ -1667,7 +1659,7 @@ xfs_dialloc_ag( */ be32_add_cpu(&agi->agi_freecount, -1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); - pag->pagi_freecount--; + agbp->b_pag->pagi_freecount--; xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); @@ -1680,7 +1672,6 @@ xfs_dialloc_ag( xfs_btree_del_cursor(icur, XFS_BTREE_NOERROR); xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); - xfs_perag_put(pag); *inop = ino; return 0; @@ -1688,7 +1679,6 @@ error_icur: xfs_btree_del_cursor(icur, XFS_BTREE_ERROR); error_cur: xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - xfs_perag_put(pag); return error; } @@ -1943,9 +1933,8 @@ xfs_difree_inobt( struct xfs_icluster *xic, struct xfs_inobt_rec_incore *orec) { - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + struct xfs_agi *agi = agbp->b_addr; xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); - struct xfs_perag *pag; struct xfs_btree_cur *cur; struct xfs_inobt_rec_incore rec; int ilen; @@ -2007,6 +1996,8 @@ xfs_difree_inobt( if (!(mp->m_flags & XFS_MOUNT_IKEEP) && rec.ir_free == XFS_INOBT_ALL_FREE && mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) { + struct xfs_perag *pag = agbp->b_pag; + xic->deleted = true; xic->first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); xic->alloc = xfs_inobt_irec_to_allocmask(&rec); @@ -2020,10 +2011,8 @@ xfs_difree_inobt( be32_add_cpu(&agi->agi_count, -ilen); be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); - pag = xfs_perag_get(mp, agno); pag->pagi_freecount -= ilen - 1; pag->pagi_count -= ilen; - xfs_perag_put(pag); xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); @@ -2049,9 +2038,7 @@ xfs_difree_inobt( */ be32_add_cpu(&agi->agi_freecount, 1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); - pag = xfs_perag_get(mp, agno); - pag->pagi_freecount++; - xfs_perag_put(pag); + agbp->b_pag->pagi_freecount++; xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); } @@ -2079,7 +2066,7 @@ xfs_difree_finobt( xfs_agino_t agino, struct xfs_inobt_rec_incore *ibtrec) /* inobt record */ { - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + struct xfs_agi *agi = agbp->b_addr; xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno); struct xfs_btree_cur *cur; struct xfs_inobt_rec_incore rec; @@ -2489,9 +2476,8 @@ xfs_ialloc_log_agi( sizeof(xfs_agi_t) }; #ifdef DEBUG - xfs_agi_t *agi; /* allocation group header */ + struct xfs_agi *agi = bp->b_addr; - agi = XFS_BUF_TO_AGI(bp); ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); #endif @@ -2523,14 +2509,13 @@ xfs_agi_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_mount; - struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); + struct xfs_agi *agi = bp->b_addr; int i; if (xfs_sb_version_hascrc(&mp->m_sb)) { if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; - if (!xfs_log_check_lsn(mp, - be64_to_cpu(XFS_BUF_TO_AGI(bp)->agi_lsn))) + if (!xfs_log_check_lsn(mp, be64_to_cpu(agi->agi_lsn))) return __this_address; } @@ -2593,6 +2578,7 @@ xfs_agi_write_verify( { struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; + struct xfs_agi *agi = bp->b_addr; xfs_failaddr_t fa; fa = xfs_agi_verify(bp); @@ -2605,7 +2591,7 @@ xfs_agi_write_verify( return; if (bip) - XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn); + agi->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn); xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF); } @@ -2661,8 +2647,8 @@ xfs_ialloc_read_agi( if (error) return error; - agi = XFS_BUF_TO_AGI(*bpp); - pag = xfs_perag_get(mp, agno); + agi = (*bpp)->b_addr; + pag = (*bpp)->b_pag; if (!pag->pagi_init) { pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); pag->pagi_count = be32_to_cpu(agi->agi_count); @@ -2675,7 +2661,6 @@ xfs_ialloc_read_agi( */ ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || XFS_FORCED_SHUTDOWN(mp)); - xfs_perag_put(pag); return 0; } @@ -2873,7 +2858,7 @@ xfs_ialloc_setup_geometry( * cannot change the behavior. */ igeo->inode_cluster_size_raw = XFS_INODE_BIG_CLUSTER_SIZE; - if (xfs_sb_version_hascrc(&mp->m_sb)) { + if (xfs_sb_version_has_v3inode(&mp->m_sb)) { int new_size = igeo->inode_cluster_size_raw; new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index b82992f795aa..3c8aebc36e64 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -12,6 +12,7 @@ #include "xfs_bit.h" #include "xfs_mount.h" #include "xfs_btree.h" +#include "xfs_btree_staging.h" #include "xfs_ialloc.h" #include "xfs_ialloc_btree.h" #include "xfs_alloc.h" @@ -20,7 +21,6 @@ #include "xfs_trans.h" #include "xfs_rmap.h" - STATIC int xfs_inobt_get_minrecs( struct xfs_btree_cur *cur, @@ -34,7 +34,7 @@ xfs_inobt_dup_cursor( struct xfs_btree_cur *cur) { return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agbp, cur->bc_private.a.agno, + cur->bc_ag.agbp, cur->bc_ag.agno, cur->bc_btnum); } @@ -44,8 +44,8 @@ xfs_inobt_set_root( union xfs_btree_ptr *nptr, int inc) /* level change */ { - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agi *agi = agbp->b_addr; agi->agi_root = nptr->s; be32_add_cpu(&agi->agi_level, inc); @@ -58,8 +58,8 @@ xfs_finobt_set_root( union xfs_btree_ptr *nptr, int inc) /* level change */ { - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agi *agi = agbp->b_addr; agi->agi_free_root = nptr->s; be32_add_cpu(&agi->agi_free_level, inc); @@ -83,7 +83,7 @@ __xfs_inobt_alloc_block( args.tp = cur->bc_tp; args.mp = cur->bc_mp; args.oinfo = XFS_RMAP_OINFO_INOBT; - args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno); + args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_ag.agno, sbno); args.minlen = 1; args.maxlen = 1; args.prod = 1; @@ -212,9 +212,9 @@ xfs_inobt_init_ptr_from_cur( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr) { - struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); + struct xfs_agi *agi = cur->bc_ag.agbp->b_addr; - ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno)); + ASSERT(cur->bc_ag.agno == be32_to_cpu(agi->agi_seqno)); ptr->s = agi->agi_root; } @@ -224,9 +224,9 @@ xfs_finobt_init_ptr_from_cur( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr) { - struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); + struct xfs_agi *agi = cur->bc_ag.agbp->b_addr; - ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno)); + ASSERT(cur->bc_ag.agno == be32_to_cpu(agi->agi_seqno)); ptr->s = agi->agi_free_root; } @@ -400,32 +400,27 @@ static const struct xfs_btree_ops xfs_finobt_ops = { }; /* - * Allocate a new inode btree cursor. + * Initialize a new inode btree cursor. */ -struct xfs_btree_cur * /* new inode btree cursor */ -xfs_inobt_init_cursor( +static struct xfs_btree_cur * +xfs_inobt_init_common( struct xfs_mount *mp, /* file system mount point */ struct xfs_trans *tp, /* transaction pointer */ - struct xfs_buf *agbp, /* buffer for agi structure */ xfs_agnumber_t agno, /* allocation group number */ xfs_btnum_t btnum) /* ialloc or free ino btree */ { - struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); struct xfs_btree_cur *cur; - cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); - + cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); cur->bc_tp = tp; cur->bc_mp = mp; cur->bc_btnum = btnum; if (btnum == XFS_BTNUM_INO) { - cur->bc_nlevels = be32_to_cpu(agi->agi_level); - cur->bc_ops = &xfs_inobt_ops; cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_ibt_2); + cur->bc_ops = &xfs_inobt_ops; } else { - cur->bc_nlevels = be32_to_cpu(agi->agi_free_level); - cur->bc_ops = &xfs_finobt_ops; cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_fibt_2); + cur->bc_ops = &xfs_finobt_ops; } cur->bc_blocklog = mp->m_sb.sb_blocklog; @@ -433,12 +428,75 @@ xfs_inobt_init_cursor( if (xfs_sb_version_hascrc(&mp->m_sb)) cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; - cur->bc_private.a.agbp = agbp; - cur->bc_private.a.agno = agno; + cur->bc_ag.agno = agno; + return cur; +} + +/* Create an inode btree cursor. */ +struct xfs_btree_cur * +xfs_inobt_init_cursor( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_agnumber_t agno, + xfs_btnum_t btnum) +{ + struct xfs_btree_cur *cur; + struct xfs_agi *agi = agbp->b_addr; + cur = xfs_inobt_init_common(mp, tp, agno, btnum); + if (btnum == XFS_BTNUM_INO) + cur->bc_nlevels = be32_to_cpu(agi->agi_level); + else + cur->bc_nlevels = be32_to_cpu(agi->agi_free_level); + cur->bc_ag.agbp = agbp; return cur; } +/* Create an inode btree cursor with a fake root for staging. */ +struct xfs_btree_cur * +xfs_inobt_stage_cursor( + struct xfs_mount *mp, + struct xbtree_afakeroot *afake, + xfs_agnumber_t agno, + xfs_btnum_t btnum) +{ + struct xfs_btree_cur *cur; + + cur = xfs_inobt_init_common(mp, NULL, agno, btnum); + xfs_btree_stage_afakeroot(cur, afake); + return cur; +} + +/* + * Install a new inobt btree root. Caller is responsible for invalidating + * and freeing the old btree blocks. + */ +void +xfs_inobt_commit_staged_btree( + struct xfs_btree_cur *cur, + struct xfs_trans *tp, + struct xfs_buf *agbp) +{ + struct xfs_agi *agi = agbp->b_addr; + struct xbtree_afakeroot *afake = cur->bc_ag.afake; + + ASSERT(cur->bc_flags & XFS_BTREE_STAGING); + + if (cur->bc_btnum == XFS_BTNUM_INO) { + agi->agi_root = cpu_to_be32(afake->af_root); + agi->agi_level = cpu_to_be32(afake->af_levels); + xfs_ialloc_log_agi(tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL); + xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_inobt_ops); + } else { + agi->agi_free_root = cpu_to_be32(afake->af_root); + agi->agi_free_level = cpu_to_be32(afake->af_levels); + xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREE_ROOT | + XFS_AGI_FREE_LEVEL); + xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_finobt_ops); + } +} + /* * Calculate number of records in an inobt btree block. */ diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index 951305ecaae1..35bbd978c272 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h @@ -48,6 +48,9 @@ struct xfs_mount; extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t, xfs_btnum_t); +struct xfs_btree_cur *xfs_inobt_stage_cursor(struct xfs_mount *mp, + struct xbtree_afakeroot *afake, xfs_agnumber_t agno, + xfs_btnum_t btnum); extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); /* ir_holemask to inode allocation bitmap conversion */ @@ -68,4 +71,7 @@ int xfs_inobt_cur(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, xfs_btnum_t btnum, struct xfs_btree_cur **curpp, struct xfs_buf **agi_bpp); +void xfs_inobt_commit_staged_btree(struct xfs_btree_cur *cur, + struct xfs_trans *tp, struct xfs_buf *agbp); + #endif /* __XFS_IALLOC_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 8afacfe4be0a..8d5dd08eab75 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -21,41 +21,6 @@ #include <linux/iversion.h> /* - * Check that none of the inode's in the buffer have a next - * unlinked field of 0. - */ -#if defined(DEBUG) -void -xfs_inobp_check( - xfs_mount_t *mp, - xfs_buf_t *bp) -{ - int i; - xfs_dinode_t *dip; - - for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) { - dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize); - if (!dip->di_next_unlinked) { - xfs_alert(mp, - "Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.", - i, (long long)bp->b_bn); - } - } -} -#endif - -bool -xfs_dinode_good_version( - struct xfs_mount *mp, - __u8 version) -{ - if (xfs_sb_version_hascrc(&mp->m_sb)) - return version == 3; - - return version == 1 || version == 2; -} - -/* * If we are doing readahead on an inode buffer, we might be in log recovery * reading an inode allocation buffer that hasn't yet been replayed, and hence * has not had the inode cores stamped into it. Hence for readahead, the buffer @@ -64,10 +29,10 @@ xfs_dinode_good_version( * If the readahead buffer is invalid, we need to mark it with an error and * clear the DONE status of the buffer so that a followup read will re-read it * from disk. We don't report the error otherwise to avoid warnings during log - * recovery and we don't get unnecssary panics on debug kernels. We use EIO here + * recovery and we don't get unnecessary panics on debug kernels. We use EIO here * because all we want to do is say readahead failed; there is no-one to report * the error to, so this will distinguish it from a non-ra verifier failure. - * Changes to this readahead error behavour also need to be reflected in + * Changes to this readahead error behaviour also need to be reflected in * xfs_dquot_buf_readahead_verify(). */ static void @@ -93,7 +58,7 @@ xfs_inode_buf_verify( dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); unlinked_ino = be32_to_cpu(dip->di_next_unlinked); di_ok = xfs_verify_magic16(bp, dip->di_magic) && - xfs_dinode_good_version(mp, dip->di_version) && + xfs_dinode_good_version(&mp->m_sb, dip->di_version) && xfs_verify_agino_or_null(mp, agno, unlinked_ino); if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP))) { @@ -172,8 +137,7 @@ xfs_imap_to_bp( struct xfs_imap *imap, struct xfs_dinode **dipp, struct xfs_buf **bpp, - uint buf_flags, - uint iget_flags) + uint buf_flags) { struct xfs_buf *bp; int error; @@ -183,48 +147,63 @@ xfs_imap_to_bp( (int)imap->im_len, buf_flags, &bp, &xfs_inode_buf_ops); if (error) { - if (error == -EAGAIN) { - ASSERT(buf_flags & XBF_TRYLOCK); - return error; - } - xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.", - __func__, error); + ASSERT(error != -EAGAIN || (buf_flags & XBF_TRYLOCK)); return error; } *bpp = bp; - *dipp = xfs_buf_offset(bp, imap->im_boffset); + if (dipp) + *dipp = xfs_buf_offset(bp, imap->im_boffset); return 0; } -void +int xfs_inode_from_disk( struct xfs_inode *ip, struct xfs_dinode *from) { struct xfs_icdinode *to = &ip->i_d; struct inode *inode = VFS_I(ip); + int error; + xfs_failaddr_t fa; + + ASSERT(ip->i_cowfp == NULL); + ASSERT(ip->i_afp == NULL); + + fa = xfs_dinode_verify(ip->i_mount, ip->i_ino, from); + if (fa) { + xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", from, + sizeof(*from), fa); + return -EFSCORRUPTED; + } + /* + * First get the permanent information that is needed to allocate an + * inode. If the inode is unused, mode is zero and we shouldn't mess + * with the uninitialized part of it. + */ + to->di_flushiter = be16_to_cpu(from->di_flushiter); + inode->i_generation = be32_to_cpu(from->di_gen); + inode->i_mode = be16_to_cpu(from->di_mode); + if (!inode->i_mode) + return 0; /* * Convert v1 inodes immediately to v2 inode format as this is the * minimum inode version format we support in the rest of the code. + * They will also be unconditionally written back to disk as v2 inodes. */ - to->di_version = from->di_version; - if (to->di_version == 1) { + if (unlikely(from->di_version == 1)) { set_nlink(inode, be16_to_cpu(from->di_onlink)); to->di_projid = 0; - to->di_version = 2; } else { set_nlink(inode, be32_to_cpu(from->di_nlink)); to->di_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 | be16_to_cpu(from->di_projid_lo); } - to->di_format = from->di_format; - to->di_uid = be32_to_cpu(from->di_uid); - to->di_gid = be32_to_cpu(from->di_gid); - to->di_flushiter = be16_to_cpu(from->di_flushiter); + i_uid_write(inode, be32_to_cpu(from->di_uid)); + i_gid_write(inode, be32_to_cpu(from->di_gid)); /* * Time is signed, so need to convert to signed 32 bit before @@ -238,21 +217,16 @@ xfs_inode_from_disk( inode->i_mtime.tv_nsec = (int)be32_to_cpu(from->di_mtime.t_nsec); inode->i_ctime.tv_sec = (int)be32_to_cpu(from->di_ctime.t_sec); inode->i_ctime.tv_nsec = (int)be32_to_cpu(from->di_ctime.t_nsec); - inode->i_generation = be32_to_cpu(from->di_gen); - inode->i_mode = be16_to_cpu(from->di_mode); to->di_size = be64_to_cpu(from->di_size); to->di_nblocks = be64_to_cpu(from->di_nblocks); to->di_extsize = be32_to_cpu(from->di_extsize); - to->di_nextents = be32_to_cpu(from->di_nextents); - to->di_anextents = be16_to_cpu(from->di_anextents); to->di_forkoff = from->di_forkoff; - to->di_aformat = from->di_aformat; to->di_dmevmask = be32_to_cpu(from->di_dmevmask); to->di_dmstate = be16_to_cpu(from->di_dmstate); to->di_flags = be16_to_cpu(from->di_flags); - if (to->di_version == 3) { + if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) { inode_set_iversion_queried(inode, be64_to_cpu(from->di_changecount)); to->di_crtime.tv_sec = be32_to_cpu(from->di_crtime.t_sec); @@ -260,6 +234,22 @@ xfs_inode_from_disk( to->di_flags2 = be64_to_cpu(from->di_flags2); to->di_cowextsize = be32_to_cpu(from->di_cowextsize); } + + error = xfs_iformat_data_fork(ip, from); + if (error) + return error; + if (from->di_forkoff) { + error = xfs_iformat_attr_fork(ip, from); + if (error) + goto out_destroy_data_fork; + } + if (xfs_is_reflink_inode(ip)) + xfs_ifork_init_cow(ip); + return 0; + +out_destroy_data_fork: + xfs_idestroy_fork(&ip->i_df); + return error; } void @@ -274,10 +264,9 @@ xfs_inode_to_disk( to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); to->di_onlink = 0; - to->di_version = from->di_version; - to->di_format = from->di_format; - to->di_uid = cpu_to_be32(from->di_uid); - to->di_gid = cpu_to_be32(from->di_gid); + to->di_format = xfs_ifork_format(&ip->i_df); + to->di_uid = cpu_to_be32(i_uid_read(inode)); + to->di_gid = cpu_to_be32(i_gid_read(inode)); to->di_projid_lo = cpu_to_be16(from->di_projid & 0xffff); to->di_projid_hi = cpu_to_be16(from->di_projid >> 16); @@ -295,15 +284,16 @@ xfs_inode_to_disk( to->di_size = cpu_to_be64(from->di_size); to->di_nblocks = cpu_to_be64(from->di_nblocks); to->di_extsize = cpu_to_be32(from->di_extsize); - to->di_nextents = cpu_to_be32(from->di_nextents); - to->di_anextents = cpu_to_be16(from->di_anextents); + to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df)); + to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp)); to->di_forkoff = from->di_forkoff; - to->di_aformat = from->di_aformat; + to->di_aformat = xfs_ifork_format(ip->i_afp); to->di_dmevmask = cpu_to_be32(from->di_dmevmask); to->di_dmstate = cpu_to_be16(from->di_dmstate); to->di_flags = cpu_to_be16(from->di_flags); - if (from->di_version == 3) { + if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) { + to->di_version = 3; to->di_changecount = cpu_to_be64(inode_peek_iversion(inode)); to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.tv_sec); to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.tv_nsec); @@ -315,6 +305,7 @@ xfs_inode_to_disk( uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid); to->di_flushiter = 0; } else { + to->di_version = 2; to->di_flushiter = cpu_to_be16(from->di_flushiter); } } @@ -417,7 +408,7 @@ xfs_dinode_verify_forkoff( struct xfs_dinode *dip, struct xfs_mount *mp) { - if (!XFS_DFORK_Q(dip)) + if (!dip->di_forkoff) return NULL; switch (dip->di_format) { @@ -428,7 +419,7 @@ xfs_dinode_verify_forkoff( case XFS_DINODE_FMT_LOCAL: /* fall through ... */ case XFS_DINODE_FMT_EXTENTS: /* fall through ... */ case XFS_DINODE_FMT_BTREE: - if (dip->di_forkoff >= (XFS_LITINO(mp, dip->di_version) >> 3)) + if (dip->di_forkoff >= (XFS_LITINO(mp) >> 3)) return __this_address; break; default: @@ -454,7 +445,7 @@ xfs_dinode_verify( /* Verify v3 integrity information first */ if (dip->di_version >= 3) { - if (!xfs_sb_version_hascrc(&mp->m_sb)) + if (!xfs_sb_version_has_v3inode(&mp->m_sb)) return __this_address; if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, XFS_DINODE_CRC_OFF)) @@ -520,7 +511,7 @@ xfs_dinode_verify( return __this_address; } - if (XFS_DFORK_Q(dip)) { + if (dip->di_forkoff) { fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK); if (fa) return fa; @@ -597,125 +588,6 @@ xfs_dinode_calc_crc( } /* - * Read the disk inode attributes into the in-core inode structure. - * - * For version 5 superblocks, if we are initialising a new inode and we are not - * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new - * inode core with a random generation number. If we are keeping inodes around, - * we need to read the inode cluster to get the existing generation number off - * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode - * format) then log recovery is dependent on the di_flushiter field being - * initialised from the current on-disk value and hence we must also read the - * inode off disk. - */ -int -xfs_iread( - xfs_mount_t *mp, - xfs_trans_t *tp, - xfs_inode_t *ip, - uint iget_flags) -{ - xfs_buf_t *bp; - xfs_dinode_t *dip; - xfs_failaddr_t fa; - int error; - - /* - * Fill in the location information in the in-core inode. - */ - error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); - if (error) - return error; - - /* shortcut IO on inode allocation if possible */ - if ((iget_flags & XFS_IGET_CREATE) && - xfs_sb_version_hascrc(&mp->m_sb) && - !(mp->m_flags & XFS_MOUNT_IKEEP)) { - VFS_I(ip)->i_generation = prandom_u32(); - ip->i_d.di_version = 3; - return 0; - } - - /* - * Get pointers to the on-disk inode and the buffer containing it. - */ - error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags); - if (error) - return error; - - /* even unallocated inodes are verified */ - fa = xfs_dinode_verify(mp, ip->i_ino, dip); - if (fa) { - xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", dip, - sizeof(*dip), fa); - error = -EFSCORRUPTED; - goto out_brelse; - } - - /* - * If the on-disk inode is already linked to a directory - * entry, copy all of the inode into the in-core inode. - * xfs_iformat_fork() handles copying in the inode format - * specific information. - * Otherwise, just get the truly permanent information. - */ - if (dip->di_mode) { - xfs_inode_from_disk(ip, dip); - error = xfs_iformat_fork(ip, dip); - if (error) { -#ifdef DEBUG - xfs_alert(mp, "%s: xfs_iformat() returned error %d", - __func__, error); -#endif /* DEBUG */ - goto out_brelse; - } - } else { - /* - * Partial initialisation of the in-core inode. Just the bits - * that xfs_ialloc won't overwrite or relies on being correct. - */ - ip->i_d.di_version = dip->di_version; - VFS_I(ip)->i_generation = be32_to_cpu(dip->di_gen); - ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); - - /* - * Make sure to pull in the mode here as well in - * case the inode is released without being used. - * This ensures that xfs_inactive() will see that - * the inode is already free and not try to mess - * with the uninitialized part of it. - */ - VFS_I(ip)->i_mode = 0; - } - - ASSERT(ip->i_d.di_version >= 2); - ip->i_delayed_blks = 0; - - /* - * Mark the buffer containing the inode as something to keep - * around for a while. This helps to keep recently accessed - * meta-data in-core longer. - */ - xfs_buf_set_ref(bp, XFS_INO_REF); - - /* - * Use xfs_trans_brelse() to release the buffer containing the on-disk - * inode, because it was acquired with xfs_trans_read_buf() in - * xfs_imap_to_bp() above. If tp is NULL, this is just a normal - * brelse(). If we're within a transaction, then xfs_trans_brelse() - * will only release the buffer if it is not dirty within the - * transaction. It will be OK to release the buffer in this case, - * because inodes on disk are never destroyed and we will be locking the - * new in-core inode before putting it in the cache where other - * processes can find it. Thus we don't have to worry about the inode - * being changed just because we released the buffer. - */ - out_brelse: - xfs_trans_brelse(tp, bp); - return error; -} - -/* * Validate di_extsize hint. * * The rules are documented at xfs_ioctl_setattr_check_extsize(). diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index fd94b1078722..6b08b9d060c2 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h @@ -16,19 +16,12 @@ struct xfs_dinode; * format specific structures at the appropriate time. */ struct xfs_icdinode { - int8_t di_version; /* inode version */ - int8_t di_format; /* format of di_c data */ uint16_t di_flushiter; /* incremented on flush */ - uint32_t di_uid; /* owner's user id */ - uint32_t di_gid; /* owner's group id */ uint32_t di_projid; /* owner's project id */ xfs_fsize_t di_size; /* number of bytes in file */ xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */ xfs_extlen_t di_extsize; /* basic/minimum extent size for file */ - xfs_extnum_t di_nextents; /* number of extents in data fork */ - xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/ uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */ - int8_t di_aformat; /* format of attr fork's data */ uint32_t di_dmevmask; /* DMIG event mask */ uint16_t di_dmstate; /* DMIG state info */ uint16_t di_flags; /* random flags, XFS_DIFLAG_... */ @@ -51,24 +44,14 @@ struct xfs_imap { int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *, struct xfs_imap *, struct xfs_dinode **, - struct xfs_buf **, uint, uint); -int xfs_iread(struct xfs_mount *, struct xfs_trans *, - struct xfs_inode *, uint); + struct xfs_buf **, uint); void xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *); void xfs_inode_to_disk(struct xfs_inode *ip, struct xfs_dinode *to, xfs_lsn_t lsn); -void xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from); +int xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from); void xfs_log_dinode_to_disk(struct xfs_log_dinode *from, struct xfs_dinode *to); -bool xfs_dinode_good_version(struct xfs_mount *mp, __u8 version); - -#if defined(DEBUG) -void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); -#else -#define xfs_inobp_check(mp, bp) -#endif /* DEBUG */ - xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino, struct xfs_dinode *dip); xfs_failaddr_t xfs_inode_validate_extsize(struct xfs_mount *mp, diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index ad2b9c313fd2..0cf853d42d62 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -26,110 +26,6 @@ kmem_zone_t *xfs_ifork_zone; -STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); -STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); -STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); - -/* - * Copy inode type and data and attr format specific information from the - * on-disk inode to the in-core inode and fork structures. For fifos, devices, - * and sockets this means set i_rdev to the proper value. For files, - * directories, and symlinks this means to bring in the in-line data or extent - * pointers as well as the attribute fork. For a fork in B-tree format, only - * the root is immediately brought in-core. The rest will be read in later when - * first referenced (see xfs_iread_extents()). - */ -int -xfs_iformat_fork( - struct xfs_inode *ip, - struct xfs_dinode *dip) -{ - struct inode *inode = VFS_I(ip); - struct xfs_attr_shortform *atp; - int size; - int error = 0; - xfs_fsize_t di_size; - - switch (inode->i_mode & S_IFMT) { - case S_IFIFO: - case S_IFCHR: - case S_IFBLK: - case S_IFSOCK: - ip->i_d.di_size = 0; - inode->i_rdev = xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip)); - break; - - case S_IFREG: - case S_IFLNK: - case S_IFDIR: - switch (dip->di_format) { - case XFS_DINODE_FMT_LOCAL: - di_size = be64_to_cpu(dip->di_size); - size = (int)di_size; - error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); - break; - case XFS_DINODE_FMT_EXTENTS: - error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK); - break; - case XFS_DINODE_FMT_BTREE: - error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); - break; - default: - xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, - dip, sizeof(*dip), __this_address); - return -EFSCORRUPTED; - } - break; - - default: - xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, - sizeof(*dip), __this_address); - return -EFSCORRUPTED; - } - if (error) - return error; - - if (xfs_is_reflink_inode(ip)) { - ASSERT(ip->i_cowfp == NULL); - xfs_ifork_init_cow(ip); - } - - if (!XFS_DFORK_Q(dip)) - return 0; - - ASSERT(ip->i_afp == NULL); - ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_NOFS); - - switch (dip->di_aformat) { - case XFS_DINODE_FMT_LOCAL: - atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); - size = be16_to_cpu(atp->hdr.totsize); - - error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); - break; - case XFS_DINODE_FMT_EXTENTS: - error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK); - break; - case XFS_DINODE_FMT_BTREE: - error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); - break; - default: - xfs_inode_verifier_error(ip, error, __func__, dip, - sizeof(*dip), __this_address); - error = -EFSCORRUPTED; - break; - } - if (error) { - kmem_cache_free(xfs_ifork_zone, ip->i_afp); - ip->i_afp = NULL; - if (ip->i_cowfp) - kmem_cache_free(xfs_ifork_zone, ip->i_cowfp); - ip->i_cowfp = NULL; - xfs_idestroy_fork(ip, XFS_DATA_FORK); - } - return error; -} - void xfs_init_local_fork( struct xfs_inode *ip, @@ -183,7 +79,7 @@ xfs_iformat_local( */ if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { xfs_warn(ip->i_mount, - "corrupt inode %Lu (bad size %d for local fork, size = %d).", + "corrupt inode %Lu (bad size %d for local fork, size = %zd).", (unsigned long long) ip->i_ino, size, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); xfs_inode_verifier_error(ip, -EFSCORRUPTED, @@ -292,12 +188,11 @@ xfs_iformat_btree( * or the number of extents is greater than the number of * blocks. */ - if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= - XFS_IFORK_MAXEXT(ip, whichfork) || + if (unlikely(ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork) || nrecs == 0 || XFS_BMDR_SPACE_CALC(nrecs) > XFS_DFORK_SIZE(dip, mp, whichfork) || - XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) || + ifp->if_nextents > ip->i_d.di_nblocks) || level == 0 || level > XFS_BTREE_MAXLEVELS) { xfs_warn(mp, "corrupt inode %Lu (btree).", (unsigned long long) ip->i_ino); @@ -325,6 +220,110 @@ xfs_iformat_btree( return 0; } +int +xfs_iformat_data_fork( + struct xfs_inode *ip, + struct xfs_dinode *dip) +{ + struct inode *inode = VFS_I(ip); + int error; + + /* + * Initialize the extent count early, as the per-format routines may + * depend on it. + */ + ip->i_df.if_format = dip->di_format; + ip->i_df.if_nextents = be32_to_cpu(dip->di_nextents); + + switch (inode->i_mode & S_IFMT) { + case S_IFIFO: + case S_IFCHR: + case S_IFBLK: + case S_IFSOCK: + ip->i_d.di_size = 0; + inode->i_rdev = xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip)); + return 0; + case S_IFREG: + case S_IFLNK: + case S_IFDIR: + switch (ip->i_df.if_format) { + case XFS_DINODE_FMT_LOCAL: + error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, + be64_to_cpu(dip->di_size)); + if (!error) + error = xfs_ifork_verify_local_data(ip); + return error; + case XFS_DINODE_FMT_EXTENTS: + return xfs_iformat_extents(ip, dip, XFS_DATA_FORK); + case XFS_DINODE_FMT_BTREE: + return xfs_iformat_btree(ip, dip, XFS_DATA_FORK); + default: + xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, + dip, sizeof(*dip), __this_address); + return -EFSCORRUPTED; + } + break; + default: + xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, + sizeof(*dip), __this_address); + return -EFSCORRUPTED; + } +} + +static uint16_t +xfs_dfork_attr_shortform_size( + struct xfs_dinode *dip) +{ + struct xfs_attr_shortform *atp = + (struct xfs_attr_shortform *)XFS_DFORK_APTR(dip); + + return be16_to_cpu(atp->hdr.totsize); +} + +int +xfs_iformat_attr_fork( + struct xfs_inode *ip, + struct xfs_dinode *dip) +{ + int error = 0; + + /* + * Initialize the extent count early, as the per-format routines may + * depend on it. + */ + ip->i_afp = kmem_cache_zalloc(xfs_ifork_zone, GFP_NOFS | __GFP_NOFAIL); + ip->i_afp->if_format = dip->di_aformat; + if (unlikely(ip->i_afp->if_format == 0)) /* pre IRIX 6.2 file system */ + ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS; + ip->i_afp->if_nextents = be16_to_cpu(dip->di_anextents); + + switch (ip->i_afp->if_format) { + case XFS_DINODE_FMT_LOCAL: + error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, + xfs_dfork_attr_shortform_size(dip)); + if (!error) + error = xfs_ifork_verify_local_attr(ip); + break; + case XFS_DINODE_FMT_EXTENTS: + error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK); + break; + case XFS_DINODE_FMT_BTREE: + error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); + break; + default: + xfs_inode_verifier_error(ip, error, __func__, dip, + sizeof(*dip), __this_address); + error = -EFSCORRUPTED; + break; + } + + if (error) { + kmem_cache_free(xfs_ifork_zone, ip->i_afp); + ip->i_afp = NULL; + } + return error; +} + /* * Reallocate the space for if_broot based on the number of records * being added or deleted as indicated in rec_diff. Move the records @@ -504,38 +503,24 @@ xfs_idata_realloc( void xfs_idestroy_fork( - xfs_inode_t *ip, - int whichfork) + struct xfs_ifork *ifp) { - struct xfs_ifork *ifp; - - ifp = XFS_IFORK_PTR(ip, whichfork); if (ifp->if_broot != NULL) { kmem_free(ifp->if_broot); ifp->if_broot = NULL; } /* - * If the format is local, then we can't have an extents - * array so just look for an inline data array. If we're - * not local then we may or may not have an extents list, - * so check and free it up if we do. + * If the format is local, then we can't have an extents array so just + * look for an inline data array. If we're not local then we may or may + * not have an extents list, so check and free it up if we do. */ - if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { - if (ifp->if_u1.if_data != NULL) { - kmem_free(ifp->if_u1.if_data); - ifp->if_u1.if_data = NULL; - } - } else if ((ifp->if_flags & XFS_IFEXTENTS) && ifp->if_height) { - xfs_iext_destroy(ifp); - } - - if (whichfork == XFS_ATTR_FORK) { - kmem_cache_free(xfs_ifork_zone, ip->i_afp); - ip->i_afp = NULL; - } else if (whichfork == XFS_COW_FORK) { - kmem_cache_free(xfs_ifork_zone, ip->i_cowfp); - ip->i_cowfp = NULL; + if (ifp->if_format == XFS_DINODE_FMT_LOCAL) { + kmem_free(ifp->if_u1.if_data); + ifp->if_u1.if_data = NULL; + } else if (ifp->if_flags & XFS_IFEXTENTS) { + if (ifp->if_height) + xfs_iext_destroy(ifp); } } @@ -592,7 +577,7 @@ void xfs_iflush_fork( xfs_inode_t *ip, xfs_dinode_t *dip, - xfs_inode_log_item_t *iip, + struct xfs_inode_log_item *iip, int whichfork) { char *cp; @@ -618,7 +603,7 @@ xfs_iflush_fork( } cp = XFS_DFORK_PTR(dip, whichfork); mp = ip->i_mount; - switch (XFS_IFORK_FORMAT(ip, whichfork)) { + switch (ifp->if_format) { case XFS_DINODE_FMT_LOCAL: if ((iip->ili_fields & dataflag[whichfork]) && (ifp->if_bytes > 0)) { @@ -633,7 +618,7 @@ xfs_iflush_fork( !(iip->ili_fields & extflag[whichfork])); if ((iip->ili_fields & extflag[whichfork]) && (ifp->if_bytes > 0)) { - ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); + ASSERT(ifp->if_nextents > 0); (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, whichfork); } @@ -688,51 +673,58 @@ xfs_ifork_init_cow( if (ip->i_cowfp) return; - ip->i_cowfp = kmem_zone_zalloc(xfs_ifork_zone, - KM_NOFS); + ip->i_cowfp = kmem_cache_zalloc(xfs_ifork_zone, + GFP_NOFS | __GFP_NOFAIL); ip->i_cowfp->if_flags = XFS_IFEXTENTS; - ip->i_cformat = XFS_DINODE_FMT_EXTENTS; - ip->i_cnextents = 0; + ip->i_cowfp->if_format = XFS_DINODE_FMT_EXTENTS; } -/* Default fork content verifiers. */ -struct xfs_ifork_ops xfs_default_ifork_ops = { - .verify_attr = xfs_attr_shortform_verify, - .verify_dir = xfs_dir2_sf_verify, - .verify_symlink = xfs_symlink_shortform_verify, -}; - /* Verify the inline contents of the data fork of an inode. */ -xfs_failaddr_t -xfs_ifork_verify_data( - struct xfs_inode *ip, - struct xfs_ifork_ops *ops) +int +xfs_ifork_verify_local_data( + struct xfs_inode *ip) { - /* Non-local data fork, we're done. */ - if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) - return NULL; + xfs_failaddr_t fa = NULL; - /* Check the inline data fork if there is one. */ switch (VFS_I(ip)->i_mode & S_IFMT) { case S_IFDIR: - return ops->verify_dir(ip); + fa = xfs_dir2_sf_verify(ip); + break; case S_IFLNK: - return ops->verify_symlink(ip); + fa = xfs_symlink_shortform_verify(ip); + break; default: - return NULL; + break; } + + if (fa) { + xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork", + ip->i_df.if_u1.if_data, ip->i_df.if_bytes, fa); + return -EFSCORRUPTED; + } + + return 0; } /* Verify the inline contents of the attr fork of an inode. */ -xfs_failaddr_t -xfs_ifork_verify_attr( - struct xfs_inode *ip, - struct xfs_ifork_ops *ops) +int +xfs_ifork_verify_local_attr( + struct xfs_inode *ip) { - /* There has to be an attr fork allocated if aformat is local. */ - if (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) - return NULL; - if (!XFS_IFORK_PTR(ip, XFS_ATTR_FORK)) - return __this_address; - return ops->verify_attr(ip); + struct xfs_ifork *ifp = ip->i_afp; + xfs_failaddr_t fa; + + if (!ifp) + fa = __this_address; + else + fa = xfs_attr_shortform_verify(ip); + + if (fa) { + xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork", + ifp ? ifp->if_u1.if_data : NULL, + ifp ? ifp->if_bytes : 0, fa); + return -EFSCORRUPTED; + } + + return 0; } diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index 500333d0101e..a4953e95c4f3 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -23,6 +23,8 @@ struct xfs_ifork { } if_u1; short if_broot_bytes; /* bytes allocated for root */ unsigned char if_flags; /* per-fork flags */ + int8_t if_format; /* format of this fork */ + xfs_extnum_t if_nextents; /* # of extents in this fork */ }; /* @@ -46,57 +48,45 @@ struct xfs_ifork { (ip)->i_afp : \ (ip)->i_cowfp)) #define XFS_IFORK_DSIZE(ip) \ - (XFS_IFORK_Q(ip) ? \ - XFS_IFORK_BOFF(ip) : \ - XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version)) + (XFS_IFORK_Q(ip) ? XFS_IFORK_BOFF(ip) : XFS_LITINO((ip)->i_mount)) #define XFS_IFORK_ASIZE(ip) \ - (XFS_IFORK_Q(ip) ? \ - XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version) - \ - XFS_IFORK_BOFF(ip) : \ - 0) + (XFS_IFORK_Q(ip) ? XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : 0) #define XFS_IFORK_SIZE(ip,w) \ ((w) == XFS_DATA_FORK ? \ XFS_IFORK_DSIZE(ip) : \ ((w) == XFS_ATTR_FORK ? \ XFS_IFORK_ASIZE(ip) : \ 0)) -#define XFS_IFORK_FORMAT(ip,w) \ - ((w) == XFS_DATA_FORK ? \ - (ip)->i_d.di_format : \ - ((w) == XFS_ATTR_FORK ? \ - (ip)->i_d.di_aformat : \ - (ip)->i_cformat)) -#define XFS_IFORK_FMT_SET(ip,w,n) \ - ((w) == XFS_DATA_FORK ? \ - ((ip)->i_d.di_format = (n)) : \ - ((w) == XFS_ATTR_FORK ? \ - ((ip)->i_d.di_aformat = (n)) : \ - ((ip)->i_cformat = (n)))) -#define XFS_IFORK_NEXTENTS(ip,w) \ - ((w) == XFS_DATA_FORK ? \ - (ip)->i_d.di_nextents : \ - ((w) == XFS_ATTR_FORK ? \ - (ip)->i_d.di_anextents : \ - (ip)->i_cnextents)) -#define XFS_IFORK_NEXT_SET(ip,w,n) \ - ((w) == XFS_DATA_FORK ? \ - ((ip)->i_d.di_nextents = (n)) : \ - ((w) == XFS_ATTR_FORK ? \ - ((ip)->i_d.di_anextents = (n)) : \ - ((ip)->i_cnextents = (n)))) #define XFS_IFORK_MAXEXT(ip, w) \ (XFS_IFORK_SIZE(ip, w) / sizeof(xfs_bmbt_rec_t)) -#define xfs_ifork_has_extents(ip, w) \ - (XFS_IFORK_FORMAT((ip), (w)) == XFS_DINODE_FMT_EXTENTS || \ - XFS_IFORK_FORMAT((ip), (w)) == XFS_DINODE_FMT_BTREE) +static inline bool xfs_ifork_has_extents(struct xfs_ifork *ifp) +{ + return ifp->if_format == XFS_DINODE_FMT_EXTENTS || + ifp->if_format == XFS_DINODE_FMT_BTREE; +} + +static inline xfs_extnum_t xfs_ifork_nextents(struct xfs_ifork *ifp) +{ + if (!ifp) + return 0; + return ifp->if_nextents; +} + +static inline int8_t xfs_ifork_format(struct xfs_ifork *ifp) +{ + if (!ifp) + return XFS_DINODE_FMT_EXTENTS; + return ifp->if_format; +} struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state); -int xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *); +int xfs_iformat_data_fork(struct xfs_inode *, struct xfs_dinode *); +int xfs_iformat_attr_fork(struct xfs_inode *, struct xfs_dinode *); void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *, struct xfs_inode_log_item *, int); -void xfs_idestroy_fork(struct xfs_inode *, int); +void xfs_idestroy_fork(struct xfs_ifork *ifp); void xfs_idata_realloc(struct xfs_inode *ip, int64_t byte_diff, int whichfork); void xfs_iroot_realloc(struct xfs_inode *, int, int); @@ -180,18 +170,7 @@ extern struct kmem_zone *xfs_ifork_zone; extern void xfs_ifork_init_cow(struct xfs_inode *ip); -typedef xfs_failaddr_t (*xfs_ifork_verifier_t)(struct xfs_inode *); - -struct xfs_ifork_ops { - xfs_ifork_verifier_t verify_symlink; - xfs_ifork_verifier_t verify_dir; - xfs_ifork_verifier_t verify_attr; -}; -extern struct xfs_ifork_ops xfs_default_ifork_ops; - -xfs_failaddr_t xfs_ifork_verify_data(struct xfs_inode *ip, - struct xfs_ifork_ops *ops); -xfs_failaddr_t xfs_ifork_verify_attr(struct xfs_inode *ip, - struct xfs_ifork_ops *ops); +int xfs_ifork_verify_local_data(struct xfs_inode *ip); +int xfs_ifork_verify_local_attr(struct xfs_inode *ip); #endif /* __XFS_INODE_FORK_H__ */ diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 9bac0d2e56dc..e3400c9c71cd 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -424,12 +424,10 @@ struct xfs_log_dinode { /* structure must be padded to 64 bit alignment */ }; -static inline uint xfs_log_dinode_size(int version) -{ - if (version == 3) - return sizeof(struct xfs_log_dinode); - return offsetof(struct xfs_log_dinode, di_next_unlinked); -} +#define xfs_log_dinode_size(mp) \ + (xfs_sb_version_has_v3inode(&(mp)->m_sb) ? \ + sizeof(struct xfs_log_dinode) : \ + offsetof(struct xfs_log_dinode, di_next_unlinked)) /* * Buffer Log Format definitions diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h index 3bf671637a91..641132d0e39d 100644 --- a/fs/xfs/libxfs/xfs_log_recover.h +++ b/fs/xfs/libxfs/xfs_log_recover.h @@ -7,6 +7,73 @@ #define __XFS_LOG_RECOVER_H__ /* + * Each log item type (XFS_LI_*) gets its own xlog_recover_item_ops to + * define how recovery should work for that type of log item. + */ +struct xlog_recover_item; + +/* Sorting hat for log items as they're read in. */ +enum xlog_recover_reorder { + XLOG_REORDER_BUFFER_LIST, + XLOG_REORDER_ITEM_LIST, + XLOG_REORDER_INODE_BUFFER_LIST, + XLOG_REORDER_CANCEL_LIST, +}; + +struct xlog_recover_item_ops { + uint16_t item_type; /* XFS_LI_* type code. */ + + /* + * Help sort recovered log items into the order required to replay them + * correctly. Log item types that always use XLOG_REORDER_ITEM_LIST do + * not have to supply a function here. See the comment preceding + * xlog_recover_reorder_trans for more details about what the return + * values mean. + */ + enum xlog_recover_reorder (*reorder)(struct xlog_recover_item *item); + + /* Start readahead for pass2, if provided. */ + void (*ra_pass2)(struct xlog *log, struct xlog_recover_item *item); + + /* Do whatever work we need to do for pass1, if provided. */ + int (*commit_pass1)(struct xlog *log, struct xlog_recover_item *item); + + /* + * This function should do whatever work is needed for pass2 of log + * recovery, if provided. + * + * If the recovered item is an intent item, this function should parse + * the recovered item to construct an in-core log intent item and + * insert it into the AIL. The in-core log intent item should have 1 + * refcount so that the item is freed either (a) when we commit the + * recovered log item for the intent-done item; (b) replay the work and + * log a new intent-done item; or (c) recovery fails and we have to + * abort. + * + * If the recovered item is an intent-done item, this function should + * parse the recovered item to find the id of the corresponding intent + * log item. Next, it should find the in-core log intent item in the + * AIL and release it. + */ + int (*commit_pass2)(struct xlog *log, struct list_head *buffer_list, + struct xlog_recover_item *item, xfs_lsn_t lsn); +}; + +extern const struct xlog_recover_item_ops xlog_icreate_item_ops; +extern const struct xlog_recover_item_ops xlog_buf_item_ops; +extern const struct xlog_recover_item_ops xlog_inode_item_ops; +extern const struct xlog_recover_item_ops xlog_dquot_item_ops; +extern const struct xlog_recover_item_ops xlog_quotaoff_item_ops; +extern const struct xlog_recover_item_ops xlog_bui_item_ops; +extern const struct xlog_recover_item_ops xlog_bud_item_ops; +extern const struct xlog_recover_item_ops xlog_efi_item_ops; +extern const struct xlog_recover_item_ops xlog_efd_item_ops; +extern const struct xlog_recover_item_ops xlog_rui_item_ops; +extern const struct xlog_recover_item_ops xlog_rud_item_ops; +extern const struct xlog_recover_item_ops xlog_cui_item_ops; +extern const struct xlog_recover_item_ops xlog_cud_item_ops; + +/* * Macros, structures, prototypes for internal log manager use. */ @@ -22,13 +89,13 @@ /* * item headers are in ri_buf[0]. Additional buffers follow. */ -typedef struct xlog_recover_item { +struct xlog_recover_item { struct list_head ri_list; - int ri_type; int ri_cnt; /* count of regions found */ int ri_total; /* total regions */ - xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */ -} xlog_recover_item_t; + struct xfs_log_iovec *ri_buf; /* ptr to regions buffer */ + const struct xlog_recover_item_ops *ri_ops; +}; struct xlog_recover { struct hlist_node r_list; @@ -51,4 +118,12 @@ struct xlog_recover { #define XLOG_RECOVER_PASS1 1 #define XLOG_RECOVER_PASS2 2 +void xlog_buf_readahead(struct xlog *log, xfs_daddr_t blkno, uint len, + const struct xfs_buf_ops *ops); +bool xlog_is_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len); +void xlog_recover_iodone(struct xfs_buf *bp); + +void xlog_recover_release_intent(struct xlog *log, unsigned short intent_type, + uint64_t intent_id); + #endif /* __XFS_LOG_RECOVER_H__ */ diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h index b2113b17e53c..076bdc7037ee 100644 --- a/fs/xfs/libxfs/xfs_quota_defs.h +++ b/fs/xfs/libxfs/xfs_quota_defs.h @@ -18,23 +18,22 @@ typedef uint64_t xfs_qcnt_t; typedef uint16_t xfs_qwarncnt_t; +typedef uint8_t xfs_dqtype_t; + +#define XFS_DQTYPE_STRINGS \ + { XFS_DQTYPE_USER, "USER" }, \ + { XFS_DQTYPE_PROJ, "PROJ" }, \ + { XFS_DQTYPE_GROUP, "GROUP" } + /* * flags for q_flags field in the dquot. */ -#define XFS_DQ_USER 0x0001 /* a user quota */ -#define XFS_DQ_PROJ 0x0002 /* project quota */ -#define XFS_DQ_GROUP 0x0004 /* a group quota */ -#define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */ -#define XFS_DQ_FREEING 0x0010 /* dquot is being torn down */ - -#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP) +#define XFS_DQFLAG_DIRTY (1 << 0) /* dquot is dirty */ +#define XFS_DQFLAG_FREEING (1 << 1) /* dquot is being torn down */ -#define XFS_DQ_FLAGS \ - { XFS_DQ_USER, "USER" }, \ - { XFS_DQ_PROJ, "PROJ" }, \ - { XFS_DQ_GROUP, "GROUP" }, \ - { XFS_DQ_DIRTY, "DIRTY" }, \ - { XFS_DQ_FREEING, "FREEING" } +#define XFS_DQFLAG_STRINGS \ + { XFS_DQFLAG_DIRTY, "DIRTY" }, \ + { XFS_DQFLAG_FREEING, "FREEING" } /* * We have the possibility of all three quota types being active at once, and @@ -100,7 +99,6 @@ typedef uint16_t xfs_qwarncnt_t; #define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ #define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ #define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ -#define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */ /* * flags to xfs_trans_mod_dquot to indicate which field needs to be @@ -138,11 +136,11 @@ typedef uint16_t xfs_qwarncnt_t; #define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS) extern xfs_failaddr_t xfs_dquot_verify(struct xfs_mount *mp, - struct xfs_disk_dquot *ddq, xfs_dqid_t id, uint type); + struct xfs_disk_dquot *ddq, xfs_dqid_t id); extern xfs_failaddr_t xfs_dqblk_verify(struct xfs_mount *mp, - struct xfs_dqblk *dqb, xfs_dqid_t id, uint type); + struct xfs_dqblk *dqb, xfs_dqid_t id); extern int xfs_calc_dquots_per_chunk(unsigned int nbblks); extern void xfs_dqblk_repair(struct xfs_mount *mp, struct xfs_dqblk *dqb, - xfs_dqid_t id, uint type); + xfs_dqid_t id, xfs_dqtype_t type); #endif /* __XFS_QUOTA_H__ */ diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 6e1665f2cb67..2076627243b0 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -46,7 +46,7 @@ xfs_refcount_lookup_le( xfs_agblock_t bno, int *stat) { - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, + trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.agno, bno, XFS_LOOKUP_LE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; @@ -63,7 +63,7 @@ xfs_refcount_lookup_ge( xfs_agblock_t bno, int *stat) { - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, + trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.agno, bno, XFS_LOOKUP_GE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; @@ -80,7 +80,7 @@ xfs_refcount_lookup_eq( xfs_agblock_t bno, int *stat) { - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, + trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.agno, bno, XFS_LOOKUP_LE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; @@ -108,7 +108,7 @@ xfs_refcount_get_rec( int *stat) { struct xfs_mount *mp = cur->bc_mp; - xfs_agnumber_t agno = cur->bc_private.a.agno; + xfs_agnumber_t agno = cur->bc_ag.agno; union xfs_btree_rec *rec; int error; xfs_agblock_t realstart; @@ -119,7 +119,7 @@ xfs_refcount_get_rec( xfs_refcount_btrec_to_irec(rec, irec); - agno = cur->bc_private.a.agno; + agno = cur->bc_ag.agno; if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) goto out_bad_rec; @@ -144,7 +144,7 @@ xfs_refcount_get_rec( if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) goto out_bad_rec; - trace_xfs_refcount_get(cur->bc_mp, cur->bc_private.a.agno, irec); + trace_xfs_refcount_get(cur->bc_mp, cur->bc_ag.agno, irec); return 0; out_bad_rec: @@ -169,14 +169,14 @@ xfs_refcount_update( union xfs_btree_rec rec; int error; - trace_xfs_refcount_update(cur->bc_mp, cur->bc_private.a.agno, irec); + trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.agno, irec); rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock); rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); error = xfs_btree_update(cur, &rec); if (error) trace_xfs_refcount_update_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -193,7 +193,7 @@ xfs_refcount_insert( { int error; - trace_xfs_refcount_insert(cur->bc_mp, cur->bc_private.a.agno, irec); + trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.agno, irec); cur->bc_rec.rc.rc_startblock = irec->rc_startblock; cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; cur->bc_rec.rc.rc_refcount = irec->rc_refcount; @@ -208,7 +208,7 @@ xfs_refcount_insert( out_error: if (error) trace_xfs_refcount_insert_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -234,7 +234,7 @@ xfs_refcount_delete( error = -EFSCORRUPTED; goto out_error; } - trace_xfs_refcount_delete(cur->bc_mp, cur->bc_private.a.agno, &irec); + trace_xfs_refcount_delete(cur->bc_mp, cur->bc_ag.agno, &irec); error = xfs_btree_delete(cur, i); if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { error = -EFSCORRUPTED; @@ -246,7 +246,7 @@ xfs_refcount_delete( out_error: if (error) trace_xfs_refcount_delete_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -366,7 +366,7 @@ xfs_refcount_split_extent( return 0; *shape_changed = true; - trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_ag.agno, &rcext, agbno); /* Establish the right extent. */ @@ -391,7 +391,7 @@ xfs_refcount_split_extent( out_error: trace_xfs_refcount_split_extent_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -411,7 +411,7 @@ xfs_refcount_merge_center_extents( int found_rec; trace_xfs_refcount_merge_center_extents(cur->bc_mp, - cur->bc_private.a.agno, left, center, right); + cur->bc_ag.agno, left, center, right); /* * Make sure the center and right extents are not in the btree. @@ -468,7 +468,7 @@ xfs_refcount_merge_center_extents( out_error: trace_xfs_refcount_merge_center_extents_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -487,7 +487,7 @@ xfs_refcount_merge_left_extent( int found_rec; trace_xfs_refcount_merge_left_extent(cur->bc_mp, - cur->bc_private.a.agno, left, cleft); + cur->bc_ag.agno, left, cleft); /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ if (cleft->rc_refcount > 1) { @@ -530,7 +530,7 @@ xfs_refcount_merge_left_extent( out_error: trace_xfs_refcount_merge_left_extent_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -548,7 +548,7 @@ xfs_refcount_merge_right_extent( int found_rec; trace_xfs_refcount_merge_right_extent(cur->bc_mp, - cur->bc_private.a.agno, cright, right); + cur->bc_ag.agno, cright, right); /* * If the extent ending at agbno+aglen (cright) wasn't synthesized, @@ -594,7 +594,7 @@ xfs_refcount_merge_right_extent( out_error: trace_xfs_refcount_merge_right_extent_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -679,13 +679,13 @@ xfs_refcount_find_left_extents( cleft->rc_blockcount = aglen; cleft->rc_refcount = 1; } - trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.agno, left, cleft, agbno); return error; out_error: trace_xfs_refcount_find_left_extent_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -768,13 +768,13 @@ xfs_refcount_find_right_extents( cright->rc_blockcount = aglen; cright->rc_refcount = 1; } - trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.agno, cright, right, agbno + aglen); return error; out_error: trace_xfs_refcount_find_right_extent_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -883,7 +883,7 @@ xfs_refcount_still_have_space( { unsigned long overhead; - overhead = cur->bc_private.a.priv.refc.shape_changes * + overhead = cur->bc_ag.refc.shape_changes * xfs_allocfree_log_count(cur->bc_mp, 1); overhead *= cur->bc_mp->m_sb.sb_blocksize; @@ -891,17 +891,17 @@ xfs_refcount_still_have_space( * Only allow 2 refcount extent updates per transaction if the * refcount continue update "error" has been injected. */ - if (cur->bc_private.a.priv.refc.nr_ops > 2 && + if (cur->bc_ag.refc.nr_ops > 2 && XFS_TEST_ERROR(false, cur->bc_mp, XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) return false; - if (cur->bc_private.a.priv.refc.nr_ops == 0) + if (cur->bc_ag.refc.nr_ops == 0) return true; else if (overhead > cur->bc_tp->t_log_res) return false; return cur->bc_tp->t_log_res - overhead > - cur->bc_private.a.priv.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; + cur->bc_ag.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; } /* @@ -952,7 +952,7 @@ xfs_refcount_adjust_extents( ext.rc_startblock - *agbno); tmp.rc_refcount = 1 + adj; trace_xfs_refcount_modify_extent(cur->bc_mp, - cur->bc_private.a.agno, &tmp); + cur->bc_ag.agno, &tmp); /* * Either cover the hole (increment) or @@ -968,10 +968,10 @@ xfs_refcount_adjust_extents( error = -EFSCORRUPTED; goto out_error; } - cur->bc_private.a.priv.refc.nr_ops++; + cur->bc_ag.refc.nr_ops++; } else { fsbno = XFS_AGB_TO_FSB(cur->bc_mp, - cur->bc_private.a.agno, + cur->bc_ag.agno, tmp.rc_startblock); xfs_bmap_add_free(cur->bc_tp, fsbno, tmp.rc_blockcount, oinfo); @@ -998,12 +998,12 @@ xfs_refcount_adjust_extents( goto skip; ext.rc_refcount += adj; trace_xfs_refcount_modify_extent(cur->bc_mp, - cur->bc_private.a.agno, &ext); + cur->bc_ag.agno, &ext); if (ext.rc_refcount > 1) { error = xfs_refcount_update(cur, &ext); if (error) goto out_error; - cur->bc_private.a.priv.refc.nr_ops++; + cur->bc_ag.refc.nr_ops++; } else if (ext.rc_refcount == 1) { error = xfs_refcount_delete(cur, &found_rec); if (error) @@ -1012,11 +1012,11 @@ xfs_refcount_adjust_extents( error = -EFSCORRUPTED; goto out_error; } - cur->bc_private.a.priv.refc.nr_ops++; + cur->bc_ag.refc.nr_ops++; goto advloop; } else { fsbno = XFS_AGB_TO_FSB(cur->bc_mp, - cur->bc_private.a.agno, + cur->bc_ag.agno, ext.rc_startblock); xfs_bmap_add_free(cur->bc_tp, fsbno, ext.rc_blockcount, oinfo); @@ -1035,7 +1035,7 @@ advloop: return error; out_error: trace_xfs_refcount_modify_extent_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -1057,10 +1057,10 @@ xfs_refcount_adjust( *new_agbno = agbno; *new_aglen = aglen; if (adj == XFS_REFCOUNT_ADJUST_INCREASE) - trace_xfs_refcount_increase(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_refcount_increase(cur->bc_mp, cur->bc_ag.agno, agbno, aglen); else - trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_ag.agno, agbno, aglen); /* @@ -1088,7 +1088,7 @@ xfs_refcount_adjust( if (shape_changed) shape_changes++; if (shape_changes) - cur->bc_private.a.priv.refc.shape_changes++; + cur->bc_ag.refc.shape_changes++; /* Now that we've taken care of the ends, adjust the middle extents */ error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, @@ -1099,7 +1099,7 @@ xfs_refcount_adjust( return 0; out_error: - trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -1115,7 +1115,7 @@ xfs_refcount_finish_one_cleanup( if (rcur == NULL) return; - agbp = rcur->bc_private.a.agbp; + agbp = rcur->bc_ag.agbp; xfs_btree_del_cursor(rcur, error); if (error) xfs_trans_brelse(tp, agbp); @@ -1165,9 +1165,9 @@ xfs_refcount_finish_one( * the startblock, get one now. */ rcur = *pcur; - if (rcur != NULL && rcur->bc_private.a.agno != agno) { - nr_ops = rcur->bc_private.a.priv.refc.nr_ops; - shape_changes = rcur->bc_private.a.priv.refc.shape_changes; + if (rcur != NULL && rcur->bc_ag.agno != agno) { + nr_ops = rcur->bc_ag.refc.nr_ops; + shape_changes = rcur->bc_ag.refc.shape_changes; xfs_refcount_finish_one_cleanup(tp, rcur, 0); rcur = NULL; *pcur = NULL; @@ -1183,8 +1183,8 @@ xfs_refcount_finish_one( error = -ENOMEM; goto out_cur; } - rcur->bc_private.a.priv.refc.nr_ops = nr_ops; - rcur->bc_private.a.priv.refc.shape_changes = shape_changes; + rcur->bc_ag.refc.nr_ops = nr_ops; + rcur->bc_ag.refc.shape_changes = shape_changes; } *pcur = rcur; @@ -1303,7 +1303,7 @@ xfs_refcount_find_shared( int have; int error; - trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_ag.agno, agbno, aglen); /* By default, skip the whole range */ @@ -1383,12 +1383,12 @@ xfs_refcount_find_shared( done: trace_xfs_refcount_find_shared_result(cur->bc_mp, - cur->bc_private.a.agno, *fbno, *flen); + cur->bc_ag.agno, *fbno, *flen); out_error: if (error) trace_xfs_refcount_find_shared_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -1485,7 +1485,7 @@ xfs_refcount_adjust_cow_extents( tmp.rc_blockcount = aglen; tmp.rc_refcount = 1; trace_xfs_refcount_modify_extent(cur->bc_mp, - cur->bc_private.a.agno, &tmp); + cur->bc_ag.agno, &tmp); error = xfs_refcount_insert(cur, &tmp, &found_tmp); @@ -1513,7 +1513,7 @@ xfs_refcount_adjust_cow_extents( ext.rc_refcount = 0; trace_xfs_refcount_modify_extent(cur->bc_mp, - cur->bc_private.a.agno, &ext); + cur->bc_ag.agno, &ext); error = xfs_refcount_delete(cur, &found_rec); if (error) goto out_error; @@ -1529,7 +1529,7 @@ xfs_refcount_adjust_cow_extents( return error; out_error: trace_xfs_refcount_modify_extent_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -1575,7 +1575,7 @@ xfs_refcount_adjust_cow( return 0; out_error: - trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -1589,7 +1589,7 @@ __xfs_refcount_cow_alloc( xfs_agblock_t agbno, xfs_extlen_t aglen) { - trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, + trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_ag.agno, agbno, aglen); /* Add refcount btree reservation */ @@ -1606,7 +1606,7 @@ __xfs_refcount_cow_free( xfs_agblock_t agbno, xfs_extlen_t aglen) { - trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, + trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_ag.agno, agbno, aglen); /* Remove refcount btree reservation */ diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 38529dbacd55..a6ac60ae9421 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -12,6 +12,7 @@ #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_btree.h" +#include "xfs_btree_staging.h" #include "xfs_refcount_btree.h" #include "xfs_alloc.h" #include "xfs_error.h" @@ -25,7 +26,7 @@ xfs_refcountbt_dup_cursor( struct xfs_btree_cur *cur) { return xfs_refcountbt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agbp, cur->bc_private.a.agno); + cur->bc_ag.agbp, cur->bc_ag.agno); } STATIC void @@ -34,17 +35,15 @@ xfs_refcountbt_set_root( union xfs_btree_ptr *ptr, int inc) { - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); - xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); - struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno); + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agf *agf = agbp->b_addr; + struct xfs_perag *pag = agbp->b_pag; ASSERT(ptr->s != 0); agf->agf_refcount_root = ptr->s; be32_add_cpu(&agf->agf_refcount_level, inc); pag->pagf_refcount_level += inc; - xfs_perag_put(pag); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_ROOT | XFS_AGF_REFCOUNT_LEVEL); @@ -57,8 +56,8 @@ xfs_refcountbt_alloc_block( union xfs_btree_ptr *new, int *stat) { - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agf *agf = agbp->b_addr; struct xfs_alloc_arg args; /* block allocation args */ int error; /* error return value */ @@ -66,7 +65,7 @@ xfs_refcountbt_alloc_block( args.tp = cur->bc_tp; args.mp = cur->bc_mp; args.type = XFS_ALLOCTYPE_NEAR_BNO; - args.fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno, + args.fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.agno, xfs_refc_block(args.mp)); args.oinfo = XFS_RMAP_OINFO_REFC; args.minlen = args.maxlen = args.prod = 1; @@ -75,13 +74,13 @@ xfs_refcountbt_alloc_block( error = xfs_alloc_vextent(&args); if (error) goto out_error; - trace_xfs_refcountbt_alloc_block(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_refcountbt_alloc_block(cur->bc_mp, cur->bc_ag.agno, args.agbno, 1); if (args.fsbno == NULLFSBLOCK) { *stat = 0; return 0; } - ASSERT(args.agno == cur->bc_private.a.agno); + ASSERT(args.agno == cur->bc_ag.agno); ASSERT(args.len == 1); new->s = cpu_to_be32(args.agbno); @@ -101,12 +100,12 @@ xfs_refcountbt_free_block( struct xfs_buf *bp) { struct xfs_mount *mp = cur->bc_mp; - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agf *agf = agbp->b_addr; xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp)); int error; - trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.agno, XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1); be32_add_cpu(&agf->agf_refcount_blocks, -1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); @@ -169,9 +168,9 @@ xfs_refcountbt_init_ptr_from_cur( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; - ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno)); + ASSERT(cur->bc_ag.agno == be32_to_cpu(agf->agf_seqno)); ptr->s = agf->agf_refcount_root; } @@ -311,42 +310,91 @@ static const struct xfs_btree_ops xfs_refcountbt_ops = { }; /* - * Allocate a new refcount btree cursor. + * Initialize a new refcount btree cursor. */ -struct xfs_btree_cur * -xfs_refcountbt_init_cursor( +static struct xfs_btree_cur * +xfs_refcountbt_init_common( struct xfs_mount *mp, struct xfs_trans *tp, - struct xfs_buf *agbp, xfs_agnumber_t agno) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); struct xfs_btree_cur *cur; ASSERT(agno != NULLAGNUMBER); ASSERT(agno < mp->m_sb.sb_agcount); - cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); + cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); cur->bc_tp = tp; cur->bc_mp = mp; cur->bc_btnum = XFS_BTNUM_REFC; cur->bc_blocklog = mp->m_sb.sb_blocklog; - cur->bc_ops = &xfs_refcountbt_ops; cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_refcbt_2); - cur->bc_nlevels = be32_to_cpu(agf->agf_refcount_level); - - cur->bc_private.a.agbp = agbp; - cur->bc_private.a.agno = agno; + cur->bc_ag.agno = agno; cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; - cur->bc_private.a.priv.refc.nr_ops = 0; - cur->bc_private.a.priv.refc.shape_changes = 0; + cur->bc_ag.refc.nr_ops = 0; + cur->bc_ag.refc.shape_changes = 0; + cur->bc_ops = &xfs_refcountbt_ops; + return cur; +} + +/* Create a btree cursor. */ +struct xfs_btree_cur * +xfs_refcountbt_init_cursor( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_agnumber_t agno) +{ + struct xfs_agf *agf = agbp->b_addr; + struct xfs_btree_cur *cur; + cur = xfs_refcountbt_init_common(mp, tp, agno); + cur->bc_nlevels = be32_to_cpu(agf->agf_refcount_level); + cur->bc_ag.agbp = agbp; + return cur; +} + +/* Create a btree cursor with a fake root for staging. */ +struct xfs_btree_cur * +xfs_refcountbt_stage_cursor( + struct xfs_mount *mp, + struct xbtree_afakeroot *afake, + xfs_agnumber_t agno) +{ + struct xfs_btree_cur *cur; + + cur = xfs_refcountbt_init_common(mp, NULL, agno); + xfs_btree_stage_afakeroot(cur, afake); return cur; } /* + * Swap in the new btree root. Once we pass this point the newly rebuilt btree + * is in place and we have to kill off all the old btree blocks. + */ +void +xfs_refcountbt_commit_staged_btree( + struct xfs_btree_cur *cur, + struct xfs_trans *tp, + struct xfs_buf *agbp) +{ + struct xfs_agf *agf = agbp->b_addr; + struct xbtree_afakeroot *afake = cur->bc_ag.afake; + + ASSERT(cur->bc_flags & XFS_BTREE_STAGING); + + agf->agf_refcount_root = cpu_to_be32(afake->af_root); + agf->agf_refcount_level = cpu_to_be32(afake->af_levels); + agf->agf_refcount_blocks = cpu_to_be32(afake->af_blocks); + xfs_alloc_log_agf(tp, agbp, XFS_AGF_REFCOUNT_BLOCKS | + XFS_AGF_REFCOUNT_ROOT | + XFS_AGF_REFCOUNT_LEVEL); + xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_refcountbt_ops); +} + +/* * Calculate the number of records in a refcount btree block. */ int @@ -420,7 +468,7 @@ xfs_refcountbt_calc_reserves( if (error) return error; - agf = XFS_BUF_TO_AGF(agbp); + agf = agbp->b_addr; agblocks = be32_to_cpu(agf->agf_length); tree_len = be32_to_cpu(agf->agf_refcount_blocks); xfs_trans_brelse(tp, agbp); diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h index ba416f71c824..69dc515db671 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.h +++ b/fs/xfs/libxfs/xfs_refcount_btree.h @@ -13,6 +13,7 @@ struct xfs_buf; struct xfs_btree_cur; struct xfs_mount; +struct xbtree_afakeroot; /* * Btree block header size @@ -46,6 +47,8 @@ struct xfs_mount; extern struct xfs_btree_cur *xfs_refcountbt_init_cursor(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *agbp, xfs_agnumber_t agno); +struct xfs_btree_cur *xfs_refcountbt_stage_cursor(struct xfs_mount *mp, + struct xbtree_afakeroot *afake, xfs_agnumber_t agno); extern int xfs_refcountbt_maxrecs(int blocklen, bool leaf); extern void xfs_refcountbt_compute_maxlevels(struct xfs_mount *mp); @@ -58,4 +61,7 @@ extern int xfs_refcountbt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); +void xfs_refcountbt_commit_staged_btree(struct xfs_btree_cur *cur, + struct xfs_trans *tp, struct xfs_buf *agbp); + #endif /* __XFS_REFCOUNT_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index ff9412f113c4..27c39268c31f 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -79,7 +79,7 @@ xfs_rmap_update( union xfs_btree_rec rec; int error; - trace_xfs_rmap_update(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_rmap_update(cur->bc_mp, cur->bc_ag.agno, irec->rm_startblock, irec->rm_blockcount, irec->rm_owner, irec->rm_offset, irec->rm_flags); @@ -91,7 +91,7 @@ xfs_rmap_update( error = xfs_btree_update(cur, &rec); if (error) trace_xfs_rmap_update_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -107,7 +107,7 @@ xfs_rmap_insert( int i; int error; - trace_xfs_rmap_insert(rcur->bc_mp, rcur->bc_private.a.agno, agbno, + trace_xfs_rmap_insert(rcur->bc_mp, rcur->bc_ag.agno, agbno, len, owner, offset, flags); error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i); @@ -133,7 +133,7 @@ xfs_rmap_insert( done: if (error) trace_xfs_rmap_insert_error(rcur->bc_mp, - rcur->bc_private.a.agno, error, _RET_IP_); + rcur->bc_ag.agno, error, _RET_IP_); return error; } @@ -149,7 +149,7 @@ xfs_rmap_delete( int i; int error; - trace_xfs_rmap_delete(rcur->bc_mp, rcur->bc_private.a.agno, agbno, + trace_xfs_rmap_delete(rcur->bc_mp, rcur->bc_ag.agno, agbno, len, owner, offset, flags); error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i); @@ -170,7 +170,7 @@ xfs_rmap_delete( done: if (error) trace_xfs_rmap_delete_error(rcur->bc_mp, - rcur->bc_private.a.agno, error, _RET_IP_); + rcur->bc_ag.agno, error, _RET_IP_); return error; } @@ -197,7 +197,7 @@ xfs_rmap_get_rec( int *stat) { struct xfs_mount *mp = cur->bc_mp; - xfs_agnumber_t agno = cur->bc_private.a.agno; + xfs_agnumber_t agno = cur->bc_ag.agno; union xfs_btree_rec *rec; int error; @@ -260,7 +260,7 @@ xfs_rmap_find_left_neighbor_helper( struct xfs_find_left_neighbor_info *info = priv; trace_xfs_rmap_find_left_neighbor_candidate(cur->bc_mp, - cur->bc_private.a.agno, rec->rm_startblock, + cur->bc_ag.agno, rec->rm_startblock, rec->rm_blockcount, rec->rm_owner, rec->rm_offset, rec->rm_flags); @@ -312,7 +312,7 @@ xfs_rmap_find_left_neighbor( info.stat = stat; trace_xfs_rmap_find_left_neighbor_query(cur->bc_mp, - cur->bc_private.a.agno, bno, 0, owner, offset, flags); + cur->bc_ag.agno, bno, 0, owner, offset, flags); error = xfs_rmap_query_range(cur, &info.high, &info.high, xfs_rmap_find_left_neighbor_helper, &info); @@ -320,7 +320,7 @@ xfs_rmap_find_left_neighbor( error = 0; if (*stat) trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp, - cur->bc_private.a.agno, irec->rm_startblock, + cur->bc_ag.agno, irec->rm_startblock, irec->rm_blockcount, irec->rm_owner, irec->rm_offset, irec->rm_flags); return error; @@ -336,7 +336,7 @@ xfs_rmap_lookup_le_range_helper( struct xfs_find_left_neighbor_info *info = priv; trace_xfs_rmap_lookup_le_range_candidate(cur->bc_mp, - cur->bc_private.a.agno, rec->rm_startblock, + cur->bc_ag.agno, rec->rm_startblock, rec->rm_blockcount, rec->rm_owner, rec->rm_offset, rec->rm_flags); @@ -385,14 +385,14 @@ xfs_rmap_lookup_le_range( info.stat = stat; trace_xfs_rmap_lookup_le_range(cur->bc_mp, - cur->bc_private.a.agno, bno, 0, owner, offset, flags); + cur->bc_ag.agno, bno, 0, owner, offset, flags); error = xfs_rmap_query_range(cur, &info.high, &info.high, xfs_rmap_lookup_le_range_helper, &info); if (error == -ECANCELED) error = 0; if (*stat) trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, - cur->bc_private.a.agno, irec->rm_startblock, + cur->bc_ag.agno, irec->rm_startblock, irec->rm_blockcount, irec->rm_owner, irec->rm_offset, irec->rm_flags); return error; @@ -498,7 +498,7 @@ xfs_rmap_unmap( (flags & XFS_RMAP_BMBT_BLOCK); if (unwritten) flags |= XFS_RMAP_UNWRITTEN; - trace_xfs_rmap_unmap(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_unmap(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); /* @@ -522,7 +522,7 @@ xfs_rmap_unmap( goto out_error; } trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, - cur->bc_private.a.agno, ltrec.rm_startblock, + cur->bc_ag.agno, ltrec.rm_startblock, ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_offset, ltrec.rm_flags); ltoff = ltrec.rm_offset; @@ -588,7 +588,7 @@ xfs_rmap_unmap( if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) { /* exact match, simply remove the record from rmap tree */ - trace_xfs_rmap_delete(mp, cur->bc_private.a.agno, + trace_xfs_rmap_delete(mp, cur->bc_ag.agno, ltrec.rm_startblock, ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_offset, ltrec.rm_flags); @@ -666,7 +666,7 @@ xfs_rmap_unmap( else cur->bc_rec.r.rm_offset = offset + len; cur->bc_rec.r.rm_flags = flags; - trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, + trace_xfs_rmap_insert(mp, cur->bc_ag.agno, cur->bc_rec.r.rm_startblock, cur->bc_rec.r.rm_blockcount, cur->bc_rec.r.rm_owner, @@ -678,11 +678,11 @@ xfs_rmap_unmap( } out_done: - trace_xfs_rmap_unmap_done(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_unmap_done(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); out_error: if (error) - trace_xfs_rmap_unmap_error(mp, cur->bc_private.a.agno, + trace_xfs_rmap_unmap_error(mp, cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -773,7 +773,7 @@ xfs_rmap_map( (flags & XFS_RMAP_BMBT_BLOCK); if (unwritten) flags |= XFS_RMAP_UNWRITTEN; - trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_map(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); ASSERT(!xfs_rmap_should_skip_owner_update(oinfo)); @@ -795,7 +795,7 @@ xfs_rmap_map( goto out_error; } trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, - cur->bc_private.a.agno, ltrec.rm_startblock, + cur->bc_ag.agno, ltrec.rm_startblock, ltrec.rm_blockcount, ltrec.rm_owner, ltrec.rm_offset, ltrec.rm_flags); @@ -831,7 +831,7 @@ xfs_rmap_map( goto out_error; } trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, - cur->bc_private.a.agno, gtrec.rm_startblock, + cur->bc_ag.agno, gtrec.rm_startblock, gtrec.rm_blockcount, gtrec.rm_owner, gtrec.rm_offset, gtrec.rm_flags); if (!xfs_rmap_is_mergeable(>rec, owner, flags)) @@ -870,7 +870,7 @@ xfs_rmap_map( * result: |rrrrrrrrrrrrrrrrrrrrrrrrrrrrr| */ ltrec.rm_blockcount += gtrec.rm_blockcount; - trace_xfs_rmap_delete(mp, cur->bc_private.a.agno, + trace_xfs_rmap_delete(mp, cur->bc_ag.agno, gtrec.rm_startblock, gtrec.rm_blockcount, gtrec.rm_owner, @@ -921,7 +921,7 @@ xfs_rmap_map( cur->bc_rec.r.rm_owner = owner; cur->bc_rec.r.rm_offset = offset; cur->bc_rec.r.rm_flags = flags; - trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_insert(mp, cur->bc_ag.agno, bno, len, owner, offset, flags); error = xfs_btree_insert(cur, &i); if (error) @@ -932,11 +932,11 @@ xfs_rmap_map( } } - trace_xfs_rmap_map_done(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_map_done(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); out_error: if (error) - trace_xfs_rmap_map_error(mp, cur->bc_private.a.agno, + trace_xfs_rmap_map_error(mp, cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -1010,7 +1010,7 @@ xfs_rmap_convert( (flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)))); oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0; new_endoff = offset + len; - trace_xfs_rmap_convert(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_convert(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); /* @@ -1034,7 +1034,7 @@ xfs_rmap_convert( goto done; } trace_xfs_rmap_lookup_le_range_result(cur->bc_mp, - cur->bc_private.a.agno, PREV.rm_startblock, + cur->bc_ag.agno, PREV.rm_startblock, PREV.rm_blockcount, PREV.rm_owner, PREV.rm_offset, PREV.rm_flags); @@ -1076,7 +1076,7 @@ xfs_rmap_convert( goto done; } trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp, - cur->bc_private.a.agno, LEFT.rm_startblock, + cur->bc_ag.agno, LEFT.rm_startblock, LEFT.rm_blockcount, LEFT.rm_owner, LEFT.rm_offset, LEFT.rm_flags); if (LEFT.rm_startblock + LEFT.rm_blockcount == bno && @@ -1114,7 +1114,7 @@ xfs_rmap_convert( goto done; } trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, - cur->bc_private.a.agno, RIGHT.rm_startblock, + cur->bc_ag.agno, RIGHT.rm_startblock, RIGHT.rm_blockcount, RIGHT.rm_owner, RIGHT.rm_offset, RIGHT.rm_flags); if (bno + len == RIGHT.rm_startblock && @@ -1132,7 +1132,7 @@ xfs_rmap_convert( RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX) state &= ~RMAP_RIGHT_CONTIG; - trace_xfs_rmap_convert_state(mp, cur->bc_private.a.agno, state, + trace_xfs_rmap_convert_state(mp, cur->bc_ag.agno, state, _RET_IP_); /* reset the cursor back to PREV */ @@ -1162,7 +1162,7 @@ xfs_rmap_convert( error = -EFSCORRUPTED; goto done; } - trace_xfs_rmap_delete(mp, cur->bc_private.a.agno, + trace_xfs_rmap_delete(mp, cur->bc_ag.agno, RIGHT.rm_startblock, RIGHT.rm_blockcount, RIGHT.rm_owner, RIGHT.rm_offset, RIGHT.rm_flags); @@ -1180,7 +1180,7 @@ xfs_rmap_convert( error = -EFSCORRUPTED; goto done; } - trace_xfs_rmap_delete(mp, cur->bc_private.a.agno, + trace_xfs_rmap_delete(mp, cur->bc_ag.agno, PREV.rm_startblock, PREV.rm_blockcount, PREV.rm_owner, PREV.rm_offset, PREV.rm_flags); @@ -1210,7 +1210,7 @@ xfs_rmap_convert( * Setting all of a previous oldext extent to newext. * The left neighbor is contiguous, the right is not. */ - trace_xfs_rmap_delete(mp, cur->bc_private.a.agno, + trace_xfs_rmap_delete(mp, cur->bc_ag.agno, PREV.rm_startblock, PREV.rm_blockcount, PREV.rm_owner, PREV.rm_offset, PREV.rm_flags); @@ -1247,7 +1247,7 @@ xfs_rmap_convert( error = -EFSCORRUPTED; goto done; } - trace_xfs_rmap_delete(mp, cur->bc_private.a.agno, + trace_xfs_rmap_delete(mp, cur->bc_ag.agno, RIGHT.rm_startblock, RIGHT.rm_blockcount, RIGHT.rm_owner, RIGHT.rm_offset, RIGHT.rm_flags); @@ -1326,7 +1326,7 @@ xfs_rmap_convert( NEW.rm_blockcount = len; NEW.rm_flags = newext; cur->bc_rec.r = NEW; - trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno, + trace_xfs_rmap_insert(mp, cur->bc_ag.agno, bno, len, owner, offset, newext); error = xfs_btree_insert(cur, &i); if (error) @@ -1383,7 +1383,7 @@ xfs_rmap_convert( NEW.rm_blockcount = len; NEW.rm_flags = newext; cur->bc_rec.r = NEW; - trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno, + trace_xfs_rmap_insert(mp, cur->bc_ag.agno, bno, len, owner, offset, newext); error = xfs_btree_insert(cur, &i); if (error) @@ -1414,7 +1414,7 @@ xfs_rmap_convert( NEW = PREV; NEW.rm_blockcount = offset - PREV.rm_offset; cur->bc_rec.r = NEW; - trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, + trace_xfs_rmap_insert(mp, cur->bc_ag.agno, NEW.rm_startblock, NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset, NEW.rm_flags); @@ -1441,7 +1441,7 @@ xfs_rmap_convert( /* new middle extent - newext */ cur->bc_rec.r.rm_flags &= ~XFS_RMAP_UNWRITTEN; cur->bc_rec.r.rm_flags |= newext; - trace_xfs_rmap_insert(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_insert(mp, cur->bc_ag.agno, bno, len, owner, offset, newext); error = xfs_btree_insert(cur, &i); if (error) @@ -1465,12 +1465,12 @@ xfs_rmap_convert( ASSERT(0); } - trace_xfs_rmap_convert_done(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_convert_done(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); done: if (error) trace_xfs_rmap_convert_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -1506,7 +1506,7 @@ xfs_rmap_convert_shared( (flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)))); oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0; new_endoff = offset + len; - trace_xfs_rmap_convert(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_convert(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); /* @@ -1573,7 +1573,7 @@ xfs_rmap_convert_shared( goto done; } trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, - cur->bc_private.a.agno, RIGHT.rm_startblock, + cur->bc_ag.agno, RIGHT.rm_startblock, RIGHT.rm_blockcount, RIGHT.rm_owner, RIGHT.rm_offset, RIGHT.rm_flags); if (xfs_rmap_is_mergeable(&RIGHT, owner, newext)) @@ -1589,7 +1589,7 @@ xfs_rmap_convert_shared( RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX) state &= ~RMAP_RIGHT_CONTIG; - trace_xfs_rmap_convert_state(mp, cur->bc_private.a.agno, state, + trace_xfs_rmap_convert_state(mp, cur->bc_ag.agno, state, _RET_IP_); /* * Switch out based on the FILLING and CONTIG state bits. @@ -1880,12 +1880,12 @@ xfs_rmap_convert_shared( ASSERT(0); } - trace_xfs_rmap_convert_done(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_convert_done(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); done: if (error) trace_xfs_rmap_convert_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -1923,7 +1923,7 @@ xfs_rmap_unmap_shared( xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); if (unwritten) flags |= XFS_RMAP_UNWRITTEN; - trace_xfs_rmap_unmap(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_unmap(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); /* @@ -2072,12 +2072,12 @@ xfs_rmap_unmap_shared( goto out_error; } - trace_xfs_rmap_unmap_done(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_unmap_done(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); out_error: if (error) trace_xfs_rmap_unmap_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -2112,7 +2112,7 @@ xfs_rmap_map_shared( xfs_owner_info_unpack(oinfo, &owner, &offset, &flags); if (unwritten) flags |= XFS_RMAP_UNWRITTEN; - trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_map(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); /* Is there a left record that abuts our range? */ @@ -2138,7 +2138,7 @@ xfs_rmap_map_shared( goto out_error; } trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp, - cur->bc_private.a.agno, gtrec.rm_startblock, + cur->bc_ag.agno, gtrec.rm_startblock, gtrec.rm_blockcount, gtrec.rm_owner, gtrec.rm_offset, gtrec.rm_flags); @@ -2231,12 +2231,12 @@ xfs_rmap_map_shared( goto out_error; } - trace_xfs_rmap_map_done(mp, cur->bc_private.a.agno, bno, len, + trace_xfs_rmap_map_done(mp, cur->bc_ag.agno, bno, len, unwritten, oinfo); out_error: if (error) trace_xfs_rmap_map_error(cur->bc_mp, - cur->bc_private.a.agno, error, _RET_IP_); + cur->bc_ag.agno, error, _RET_IP_); return error; } @@ -2336,7 +2336,7 @@ xfs_rmap_finish_one_cleanup( if (rcur == NULL) return; - agbp = rcur->bc_private.a.agbp; + agbp = rcur->bc_ag.agbp; xfs_btree_del_cursor(rcur, error); if (error) xfs_trans_brelse(tp, agbp); @@ -2386,7 +2386,7 @@ xfs_rmap_finish_one( * the startblock, get one now. */ rcur = *pcur; - if (rcur != NULL && rcur->bc_private.a.agno != agno) { + if (rcur != NULL && rcur->bc_ag.agno != agno) { xfs_rmap_finish_one_cleanup(tp, rcur, 0); rcur = NULL; *pcur = NULL; @@ -2694,7 +2694,6 @@ struct xfs_rmap_key_state { uint64_t owner; uint64_t offset; unsigned int flags; - bool has_rmap; }; /* For each rmap given, figure out if it doesn't match the key we want. */ @@ -2709,7 +2708,6 @@ xfs_rmap_has_other_keys_helper( if (rks->owner == rec->rm_owner && rks->offset == rec->rm_offset && ((rks->flags & rec->rm_flags) & XFS_RMAP_KEY_FLAGS) == rks->flags) return 0; - rks->has_rmap = true; return -ECANCELED; } @@ -2731,7 +2729,7 @@ xfs_rmap_has_other_keys( int error; xfs_owner_info_unpack(oinfo, &rks.owner, &rks.offset, &rks.flags); - rks.has_rmap = false; + *has_rmap = false; low.rm_startblock = bno; memset(&high, 0xFF, sizeof(high)); @@ -2739,11 +2737,12 @@ xfs_rmap_has_other_keys( error = xfs_rmap_query_range(cur, &low, &high, xfs_rmap_has_other_keys_helper, &rks); - if (error < 0) - return error; + if (error == -ECANCELED) { + *has_rmap = true; + return 0; + } - *has_rmap = rks.has_rmap; - return 0; + return error; } const struct xfs_owner_info XFS_RMAP_OINFO_SKIP_UPDATE = { diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index fc78efa52c94..beb81c84a937 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -14,6 +14,7 @@ #include "xfs_trans.h" #include "xfs_alloc.h" #include "xfs_btree.h" +#include "xfs_btree_staging.h" #include "xfs_rmap.h" #include "xfs_rmap_btree.h" #include "xfs_trace.h" @@ -51,7 +52,7 @@ xfs_rmapbt_dup_cursor( struct xfs_btree_cur *cur) { return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agbp, cur->bc_private.a.agno); + cur->bc_ag.agbp, cur->bc_ag.agno); } STATIC void @@ -60,18 +61,16 @@ xfs_rmapbt_set_root( union xfs_btree_ptr *ptr, int inc) { - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); - xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agf *agf = agbp->b_addr; int btnum = cur->bc_btnum; - struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno); + struct xfs_perag *pag = agbp->b_pag; ASSERT(ptr->s != 0); agf->agf_roots[btnum] = ptr->s; be32_add_cpu(&agf->agf_levels[btnum], inc); pag->pagf_levels[btnum] += inc; - xfs_perag_put(pag); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); } @@ -83,25 +82,25 @@ xfs_rmapbt_alloc_block( union xfs_btree_ptr *new, int *stat) { - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agf *agf = agbp->b_addr; int error; xfs_agblock_t bno; /* Allocate the new block from the freelist. If we can't, give up. */ - error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, + error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_ag.agbp, &bno, 1); if (error) return error; - trace_xfs_rmapbt_alloc_block(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_rmapbt_alloc_block(cur->bc_mp, cur->bc_ag.agno, bno, 1); if (bno == NULLAGBLOCK) { *stat = 0; return 0; } - xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, + xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.agno, bno, 1, false); xfs_trans_agbtree_delta(cur->bc_tp, 1); @@ -109,7 +108,7 @@ xfs_rmapbt_alloc_block( be32_add_cpu(&agf->agf_rmap_blocks, 1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); - xfs_ag_resv_rmapbt_alloc(cur->bc_mp, cur->bc_private.a.agno); + xfs_ag_resv_rmapbt_alloc(cur->bc_mp, cur->bc_ag.agno); *stat = 1; return 0; @@ -120,13 +119,14 @@ xfs_rmapbt_free_block( struct xfs_btree_cur *cur, struct xfs_buf *bp) { - struct xfs_buf *agbp = cur->bc_private.a.agbp; - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agf *agf = agbp->b_addr; + struct xfs_perag *pag; xfs_agblock_t bno; int error; bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp)); - trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_private.a.agno, + trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_ag.agno, bno, 1); be32_add_cpu(&agf->agf_rmap_blocks, -1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); @@ -138,8 +138,8 @@ xfs_rmapbt_free_block( XFS_EXTENT_BUSY_SKIP_DISCARD); xfs_trans_agbtree_delta(cur->bc_tp, -1); - xfs_ag_resv_rmapbt_free(cur->bc_mp, cur->bc_private.a.agno); - + pag = cur->bc_ag.agbp->b_pag; + xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1); return 0; } @@ -215,9 +215,9 @@ xfs_rmapbt_init_ptr_from_cur( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; - ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno)); + ASSERT(cur->bc_ag.agno == be32_to_cpu(agf->agf_seqno)); ptr->s = agf->agf_roots[cur->bc_btnum]; } @@ -448,37 +448,83 @@ static const struct xfs_btree_ops xfs_rmapbt_ops = { .recs_inorder = xfs_rmapbt_recs_inorder, }; -/* - * Allocate a new allocation btree cursor. - */ -struct xfs_btree_cur * -xfs_rmapbt_init_cursor( +static struct xfs_btree_cur * +xfs_rmapbt_init_common( struct xfs_mount *mp, struct xfs_trans *tp, - struct xfs_buf *agbp, xfs_agnumber_t agno) { - struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); struct xfs_btree_cur *cur; - cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); + cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); cur->bc_tp = tp; cur->bc_mp = mp; /* Overlapping btree; 2 keys per pointer. */ cur->bc_btnum = XFS_BTNUM_RMAP; cur->bc_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING; cur->bc_blocklog = mp->m_sb.sb_blocklog; + cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2); + cur->bc_ag.agno = agno; cur->bc_ops = &xfs_rmapbt_ops; + + return cur; +} + +/* Create a new reverse mapping btree cursor. */ +struct xfs_btree_cur * +xfs_rmapbt_init_cursor( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_agnumber_t agno) +{ + struct xfs_agf *agf = agbp->b_addr; + struct xfs_btree_cur *cur; + + cur = xfs_rmapbt_init_common(mp, tp, agno); cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]); - cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2); + cur->bc_ag.agbp = agbp; + return cur; +} - cur->bc_private.a.agbp = agbp; - cur->bc_private.a.agno = agno; +/* Create a new reverse mapping btree cursor with a fake root for staging. */ +struct xfs_btree_cur * +xfs_rmapbt_stage_cursor( + struct xfs_mount *mp, + struct xbtree_afakeroot *afake, + xfs_agnumber_t agno) +{ + struct xfs_btree_cur *cur; + cur = xfs_rmapbt_init_common(mp, NULL, agno); + xfs_btree_stage_afakeroot(cur, afake); return cur; } /* + * Install a new reverse mapping btree root. Caller is responsible for + * invalidating and freeing the old btree blocks. + */ +void +xfs_rmapbt_commit_staged_btree( + struct xfs_btree_cur *cur, + struct xfs_trans *tp, + struct xfs_buf *agbp) +{ + struct xfs_agf *agf = agbp->b_addr; + struct xbtree_afakeroot *afake = cur->bc_ag.afake; + + ASSERT(cur->bc_flags & XFS_BTREE_STAGING); + + agf->agf_roots[cur->bc_btnum] = cpu_to_be32(afake->af_root); + agf->agf_levels[cur->bc_btnum] = cpu_to_be32(afake->af_levels); + agf->agf_rmap_blocks = cpu_to_be32(afake->af_blocks); + xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS | + XFS_AGF_RMAP_BLOCKS); + xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_rmapbt_ops); +} + +/* * Calculate number of records in an rmap btree block. */ int @@ -569,7 +615,7 @@ xfs_rmapbt_calc_reserves( if (error) return error; - agf = XFS_BUF_TO_AGF(agbp); + agf = agbp->b_addr; agblocks = be32_to_cpu(agf->agf_length); tree_len = be32_to_cpu(agf->agf_rmap_blocks); xfs_trans_brelse(tp, agbp); diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h index 820d668b063d..115c3455a734 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.h +++ b/fs/xfs/libxfs/xfs_rmap_btree.h @@ -9,6 +9,7 @@ struct xfs_buf; struct xfs_btree_cur; struct xfs_mount; +struct xbtree_afakeroot; /* rmaps only exist on crc enabled filesystems */ #define XFS_RMAP_BLOCK_LEN XFS_BTREE_SBLOCK_CRC_LEN @@ -43,6 +44,10 @@ struct xfs_mount; struct xfs_btree_cur *xfs_rmapbt_init_cursor(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *bp, xfs_agnumber_t agno); +struct xfs_btree_cur *xfs_rmapbt_stage_cursor(struct xfs_mount *mp, + struct xbtree_afakeroot *afake, xfs_agnumber_t agno); +void xfs_rmapbt_commit_staged_btree(struct xfs_btree_cur *cur, + struct xfs_trans *tp, struct xfs_buf *agbp); int xfs_rmapbt_maxrecs(int blocklen, int leaf); extern void xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp); diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index f42c74cb8be5..1d9fa8a300f1 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -66,11 +66,11 @@ xfs_rtbuf_get( ip = issum ? mp->m_rsumip : mp->m_rbmip; - error = xfs_bmapi_read(ip, block, 1, &map, &nmap, XFS_DATA_FORK); + error = xfs_bmapi_read(ip, block, 1, &map, &nmap, 0); if (error) return error; - if (XFS_IS_CORRUPT(mp, nmap == 0 || !xfs_bmap_is_real_extent(&map))) + if (XFS_IS_CORRUPT(mp, nmap == 0 || !xfs_bmap_is_written_extent(&map))) return -EFSCORRUPTED; ASSERT(map.br_startblock != NULLFSBLOCK); diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 2f60fc3c99a0..ae9aaf1f34bf 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -220,7 +220,7 @@ xfs_validate_sb_common( struct xfs_buf *bp, struct xfs_sb *sbp) { - struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); + struct xfs_dsb *dsb = bp->b_addr; uint32_t agcount = 0; uint32_t rem; @@ -243,7 +243,7 @@ xfs_validate_sb_common( } else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD | XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) { xfs_notice(mp, -"Superblock earlier than Version 5 has XFS_[PQ]UOTA_{ENFD|CHKD} bits."); +"Superblock earlier than Version 5 has XFS_{P|G}QUOTA_{ENFD|CHKD} bits."); return -EFSCORRUPTED; } @@ -328,6 +328,38 @@ xfs_validate_sb_common( return -EFSCORRUPTED; } + /* Validate the realtime geometry; stolen from xfs_repair */ + if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE || + sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) { + xfs_notice(mp, + "realtime extent sanity check failed"); + return -EFSCORRUPTED; + } + + if (sbp->sb_rblocks == 0) { + if (sbp->sb_rextents != 0 || sbp->sb_rbmblocks != 0 || + sbp->sb_rextslog != 0 || sbp->sb_frextents != 0) { + xfs_notice(mp, + "realtime zeroed geometry check failed"); + return -EFSCORRUPTED; + } + } else { + uint64_t rexts; + uint64_t rbmblocks; + + rexts = div_u64(sbp->sb_rblocks, sbp->sb_rextsize); + rbmblocks = howmany_64(sbp->sb_rextents, + NBBY * sbp->sb_blocksize); + + if (sbp->sb_rextents != rexts || + sbp->sb_rextslog != xfs_highbit32(sbp->sb_rextents) || + sbp->sb_rbmblocks != rbmblocks) { + xfs_notice(mp, + "realtime geometry sanity check failed"); + return -EFSCORRUPTED; + } + } + if (sbp->sb_unit) { if (!xfs_sb_version_hasdalign(sbp) || sbp->sb_unit > sbp->sb_width || @@ -568,7 +600,7 @@ xfs_sb_quota_to_disk( * disk. If neither are active, we should NULL the inode. * * In all cases, the separate pquotino must remain 0 because it - * it beyond the "end" of the valid non-pquotino superblock. + * is beyond the "end" of the valid non-pquotino superblock. */ if (from->sb_qflags & XFS_GQUOTA_ACCT) to->sb_gquotino = cpu_to_be64(from->sb_gquotino); @@ -681,7 +713,7 @@ xfs_sb_read_verify( { struct xfs_sb sb; struct xfs_mount *mp = bp->b_mount; - struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); + struct xfs_dsb *dsb = bp->b_addr; int error; /* @@ -707,7 +739,7 @@ xfs_sb_read_verify( * Check all the superblock fields. Don't byteswap the xquota flags * because _verify_common checks the on-disk values. */ - __xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp), false); + __xfs_sb_from_disk(&sb, dsb, false); error = xfs_validate_sb_common(mp, bp, &sb); if (error) goto out_error; @@ -730,7 +762,7 @@ static void xfs_sb_quiet_read_verify( struct xfs_buf *bp) { - struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); + struct xfs_dsb *dsb = bp->b_addr; if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) { /* XFS filesystem, verify noisily! */ @@ -748,13 +780,14 @@ xfs_sb_write_verify( struct xfs_sb sb; struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; + struct xfs_dsb *dsb = bp->b_addr; int error; /* * Check all the superblock fields. Don't byteswap the xquota flags * because _verify_common checks the on-disk values. */ - __xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp), false); + __xfs_sb_from_disk(&sb, dsb, false); error = xfs_validate_sb_common(mp, bp, &sb); if (error) goto out_error; @@ -766,7 +799,7 @@ xfs_sb_write_verify( return; if (bip) - XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn); + dsb->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn); xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF); return; @@ -927,7 +960,7 @@ xfs_log_sb( mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree); mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks); - xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb); + xfs_sb_to_disk(bp->b_addr, &mp->m_sb); xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb) - 1); } @@ -1007,7 +1040,7 @@ xfs_update_secondary_sbs( bp->b_ops = &xfs_sb_buf_ops; xfs_buf_oneshot(bp); xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); - xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb); + xfs_sb_to_disk(bp->b_addr, &mp->m_sb); xfs_buf_delwri_queue(bp, &buffer_list); xfs_buf_relse(bp); diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index c45acbd3add9..708feb8eac76 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -65,6 +65,7 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp, #define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ #define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ #define XFS_TRANS_NO_WRITECOUNT 0x40 /* do not elevate SB writecount */ +#define XFS_TRANS_RES_FDBLKS 0x80 /* reserve newly freed blocks */ /* * LOWMODE is used by the allocator to activate the lowspace algorithm - when * free space is running low the extent allocator may choose to allocate an diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index 3b8260ca7d1b..594bc447a7dd 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -204,16 +204,12 @@ xfs_failaddr_t xfs_symlink_shortform_verify( struct xfs_inode *ip) { - char *sfp; - char *endp; - struct xfs_ifork *ifp; - int size; - - ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL); - ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); - sfp = (char *)ifp->if_u1.if_data; - size = ifp->if_bytes; - endp = sfp + size; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + char *sfp = (char *)ifp->if_u1.if_data; + int size = ifp->if_bytes; + char *endp = sfp + size; + + ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); /* * Zero length symlinks should never occur in memory as they are diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c index 2b8ccb5b975d..b7e222befb08 100644 --- a/fs/xfs/libxfs/xfs_trans_inode.c +++ b/fs/xfs/libxfs/xfs_trans_inode.c @@ -8,6 +8,8 @@ #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" @@ -27,7 +29,7 @@ xfs_trans_ijoin( struct xfs_inode *ip, uint lock_flags) { - xfs_inode_log_item_t *iip; + struct xfs_inode_log_item *iip; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); if (ip->i_itemp == NULL) @@ -36,6 +38,7 @@ xfs_trans_ijoin( ASSERT(iip->ili_lock_flags == 0); iip->ili_lock_flags = lock_flags; + ASSERT(!xfs_iflags_test(ip, XFS_ISTALE)); /* * Get a log_item_desc to point at the new item. @@ -71,24 +74,35 @@ xfs_trans_ichgtime( } /* - * This is called to mark the fields indicated in fieldmask as needing - * to be logged when the transaction is committed. The inode must - * already be associated with the given transaction. + * This is called to mark the fields indicated in fieldmask as needing to be + * logged when the transaction is committed. The inode must already be + * associated with the given transaction. * - * The values for fieldmask are defined in xfs_inode_item.h. We always - * log all of the core inode if any of it has changed, and we always log - * all of the inline data/extents/b-tree root if any of them has changed. + * The values for fieldmask are defined in xfs_inode_item.h. We always log all + * of the core inode if any of it has changed, and we always log all of the + * inline data/extents/b-tree root if any of them has changed. + * + * Grab and pin the cluster buffer associated with this inode to avoid RMW + * cycles at inode writeback time. Avoid the need to add error handling to every + * xfs_trans_log_inode() call by shutting down on read error. This will cause + * transactions to fail and everything to error out, just like if we return a + * read error in a dirty transaction and cancel it. */ void xfs_trans_log_inode( - xfs_trans_t *tp, - xfs_inode_t *ip, - uint flags) + struct xfs_trans *tp, + struct xfs_inode *ip, + uint flags) { - struct inode *inode = VFS_I(ip); + struct xfs_inode_log_item *iip = ip->i_itemp; + struct inode *inode = VFS_I(ip); + uint iversion_flags = 0; - ASSERT(ip->i_itemp != NULL); + ASSERT(iip); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(!xfs_iflags_test(ip, XFS_ISTALE)); + + tp->t_flags |= XFS_TRANS_DIRTY; /* * Don't bother with i_lock for the I_DIRTY_TIME check here, as races @@ -96,22 +110,13 @@ xfs_trans_log_inode( * to log the timestamps, or will clear already cleared fields in the * worst case. */ - if (inode->i_state & (I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED)) { + if (inode->i_state & I_DIRTY_TIME) { spin_lock(&inode->i_lock); - inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED); + inode->i_state &= ~I_DIRTY_TIME; spin_unlock(&inode->i_lock); } /* - * Record the specific change for fdatasync optimisation. This - * allows fdatasync to skip log forces for inodes that are only - * timestamp dirty. We do this before the change count so that - * the core being logged in this case does not impact on fdatasync - * behaviour. - */ - ip->i_itemp->ili_fsync_fields |= flags; - - /* * First time we log the inode in a transaction, bump the inode change * counter if it is configured for this to occur. While we have the * inode locked exclusively for metadata modification, we can usually @@ -120,23 +125,64 @@ xfs_trans_log_inode( * set however, then go ahead and bump the i_version counter * unconditionally. */ - if (!test_and_set_bit(XFS_LI_DIRTY, &ip->i_itemp->ili_item.li_flags) && - IS_I_VERSION(VFS_I(ip))) { - if (inode_maybe_inc_iversion(VFS_I(ip), flags & XFS_ILOG_CORE)) - flags |= XFS_ILOG_CORE; + if (!test_and_set_bit(XFS_LI_DIRTY, &iip->ili_item.li_flags)) { + if (IS_I_VERSION(inode) && + inode_maybe_inc_iversion(inode, flags & XFS_ILOG_CORE)) + iversion_flags = XFS_ILOG_CORE; } - tp->t_flags |= XFS_TRANS_DIRTY; + /* + * Record the specific change for fdatasync optimisation. This allows + * fdatasync to skip log forces for inodes that are only timestamp + * dirty. + */ + spin_lock(&iip->ili_lock); + iip->ili_fsync_fields |= flags; + + if (!iip->ili_item.li_buf) { + struct xfs_buf *bp; + int error; + + /* + * We hold the ILOCK here, so this inode is not going to be + * flushed while we are here. Further, because there is no + * buffer attached to the item, we know that there is no IO in + * progress, so nothing will clear the ili_fields while we read + * in the buffer. Hence we can safely drop the spin lock and + * read the buffer knowing that the state will not change from + * here. + */ + spin_unlock(&iip->ili_lock); + error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, NULL, + &bp, 0); + if (error) { + xfs_force_shutdown(ip->i_mount, SHUTDOWN_META_IO_ERROR); + return; + } + + /* + * We need an explicit buffer reference for the log item but + * don't want the buffer to remain attached to the transaction. + * Hold the buffer but release the transaction reference once + * we've attached the inode log item to the buffer log item + * list. + */ + xfs_buf_hold(bp); + spin_lock(&iip->ili_lock); + iip->ili_item.li_buf = bp; + bp->b_flags |= _XBF_INODES; + list_add_tail(&iip->ili_item.li_bio_list, &bp->b_li_list); + xfs_trans_brelse(tp, bp); + } /* - * Always OR in the bits from the ili_last_fields field. - * This is to coordinate with the xfs_iflush() and xfs_iflush_done() - * routines in the eventual clearing of the ili_fields bits. - * See the big comment in xfs_iflush() for an explanation of - * this coordination mechanism. + * Always OR in the bits from the ili_last_fields field. This is to + * coordinate with the xfs_iflush() and xfs_iflush_done() routines in + * the eventual clearing of the ili_fields bits. See the big comment in + * xfs_iflush() for an explanation of this coordination mechanism. */ - flags |= ip->i_itemp->ili_last_fields; - ip->i_itemp->ili_fields |= flags; + iip->ili_fields |= (flags | iip->ili_last_fields | iversion_flags); + spin_unlock(&iip->ili_lock); } int diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 7a9c04920505..d1a0848cb52e 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -187,7 +187,7 @@ xfs_calc_inode_chunk_res( XFS_FSB_TO_B(mp, 1)); if (alloc) { /* icreate tx uses ordered buffers */ - if (xfs_sb_version_hascrc(&mp->m_sb)) + if (xfs_sb_version_has_v3inode(&mp->m_sb)) return res; size = XFS_FSB_TO_B(mp, 1); } diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h index 88221c7a04cc..c6df01a2a158 100644 --- a/fs/xfs/libxfs/xfs_trans_space.h +++ b/fs/xfs/libxfs/xfs_trans_space.h @@ -57,7 +57,7 @@ XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK) #define XFS_IALLOC_SPACE_RES(mp) \ (M_IGEO(mp)->ialloc_blks + \ - (xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \ + ((xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1) * \ (M_IGEO(mp)->inobt_maxlevels - 1))) /* |