diff options
Diffstat (limited to 'fs/xfs')
60 files changed, 1770 insertions, 1452 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 7e80732cb547..5f0494702e0b 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -46,7 +46,7 @@ xfs_perag_get( struct xfs_perag *pag; rcu_read_lock(); - pag = radix_tree_lookup(&mp->m_perag_tree, agno); + pag = xa_load(&mp->m_perags, agno); if (pag) { trace_xfs_perag_get(pag, _RET_IP_); ASSERT(atomic_read(&pag->pag_ref) >= 0); @@ -56,31 +56,6 @@ xfs_perag_get( return pag; } -/* - * search from @first to find the next perag with the given tag set. - */ -struct xfs_perag * -xfs_perag_get_tag( - struct xfs_mount *mp, - xfs_agnumber_t first, - unsigned int tag) -{ - struct xfs_perag *pag; - int found; - - rcu_read_lock(); - found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, - (void **)&pag, first, 1, tag); - if (found <= 0) { - rcu_read_unlock(); - return NULL; - } - trace_xfs_perag_get_tag(pag, _RET_IP_); - atomic_inc(&pag->pag_ref); - rcu_read_unlock(); - return pag; -} - /* Get a passive reference to the given perag. */ struct xfs_perag * xfs_perag_hold( @@ -117,7 +92,7 @@ xfs_perag_grab( struct xfs_perag *pag; rcu_read_lock(); - pag = radix_tree_lookup(&mp->m_perag_tree, agno); + pag = xa_load(&mp->m_perags, agno); if (pag) { trace_xfs_perag_grab(pag, _RET_IP_); if (!atomic_inc_not_zero(&pag->pag_active_ref)) @@ -127,32 +102,6 @@ xfs_perag_grab( return pag; } -/* - * search from @first to find the next perag with the given tag set. - */ -struct xfs_perag * -xfs_perag_grab_tag( - struct xfs_mount *mp, - xfs_agnumber_t first, - int tag) -{ - struct xfs_perag *pag; - int found; - - rcu_read_lock(); - found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, - (void **)&pag, first, 1, tag); - if (found <= 0) { - rcu_read_unlock(); - return NULL; - } - trace_xfs_perag_grab_tag(pag, _RET_IP_); - if (!atomic_inc_not_zero(&pag->pag_active_ref)) - pag = NULL; - rcu_read_unlock(); - return pag; -} - void xfs_perag_rele( struct xfs_perag *pag) @@ -235,16 +184,6 @@ out: return error; } -STATIC void -__xfs_free_perag( - struct rcu_head *head) -{ - struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head); - - ASSERT(!delayed_work_pending(&pag->pag_blockgc_work)); - kfree(pag); -} - /* * Free up the per-ag resources associated with the mount structure. */ @@ -256,9 +195,7 @@ xfs_free_perag( xfs_agnumber_t agno; for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { - spin_lock(&mp->m_perag_lock); - pag = radix_tree_delete(&mp->m_perag_tree, agno); - spin_unlock(&mp->m_perag_lock); + pag = xa_erase(&mp->m_perags, agno); ASSERT(pag); XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0); xfs_defer_drain_free(&pag->pag_intents_drain); @@ -270,7 +207,7 @@ xfs_free_perag( xfs_perag_rele(pag); XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_active_ref) != 0); - call_rcu(&pag->rcu_head, __xfs_free_perag); + kfree_rcu_mightsleep(pag); } } @@ -347,9 +284,7 @@ xfs_free_unused_perag_range( xfs_agnumber_t index; for (index = agstart; index < agend; index++) { - spin_lock(&mp->m_perag_lock); - pag = radix_tree_delete(&mp->m_perag_tree, index); - spin_unlock(&mp->m_perag_lock); + pag = xa_erase(&mp->m_perags, index); if (!pag) break; xfs_buf_cache_destroy(&pag->pag_bcache); @@ -390,20 +325,11 @@ xfs_initialize_perag( pag->pag_agno = index; pag->pag_mount = mp; - error = radix_tree_preload(GFP_KERNEL | __GFP_RETRY_MAYFAIL); - if (error) - goto out_free_pag; - - spin_lock(&mp->m_perag_lock); - if (radix_tree_insert(&mp->m_perag_tree, index, pag)) { - WARN_ON_ONCE(1); - spin_unlock(&mp->m_perag_lock); - radix_tree_preload_end(); - error = -EEXIST; + error = xa_insert(&mp->m_perags, index, pag, GFP_KERNEL); + if (error) { + WARN_ON_ONCE(error == -EBUSY); goto out_free_pag; } - spin_unlock(&mp->m_perag_lock); - radix_tree_preload_end(); #ifdef __KERNEL__ /* Place kernel structure only init below this point. */ @@ -451,9 +377,7 @@ xfs_initialize_perag( out_remove_pag: xfs_defer_drain_free(&pag->pag_intents_drain); - spin_lock(&mp->m_perag_lock); - radix_tree_delete(&mp->m_perag_tree, index); - spin_unlock(&mp->m_perag_lock); + pag = xa_erase(&mp->m_perags, index); out_free_pag: kfree(pag); out_unwind_new_pags: diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 35de09a2516c..d9cccd093b60 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -63,9 +63,6 @@ struct xfs_perag { /* Blocks reserved for the reverse mapping btree. */ struct xfs_ag_resv pag_rmapbt_resv; - /* for rcu-safe freeing */ - struct rcu_head rcu_head; - /* Precalculated geometry info */ xfs_agblock_t block_count; xfs_agblock_t min_block; @@ -156,15 +153,11 @@ void xfs_free_perag(struct xfs_mount *mp); /* Passive AG references */ struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno); -struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno, - unsigned int tag); struct xfs_perag *xfs_perag_hold(struct xfs_perag *pag); void xfs_perag_put(struct xfs_perag *pag); /* Active AG references */ struct xfs_perag *xfs_perag_grab(struct xfs_mount *, xfs_agnumber_t); -struct xfs_perag *xfs_perag_grab_tag(struct xfs_mount *, xfs_agnumber_t, - int tag); void xfs_perag_rele(struct xfs_perag *pag); /* @@ -266,13 +259,6 @@ xfs_perag_next( (agno) = 0; \ for_each_perag_from((mp), (agno), (pag)) -#define for_each_perag_tag(mp, agno, pag, tag) \ - for ((agno) = 0, (pag) = xfs_perag_grab_tag((mp), 0, (tag)); \ - (pag) != NULL; \ - (agno) = (pag)->pag_agno + 1, \ - xfs_perag_rele(pag), \ - (pag) = xfs_perag_grab_tag((mp), (agno), (tag))) - static inline struct xfs_perag * xfs_perag_next_wrap( struct xfs_perag *pag, diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 585e98e87ef9..aada676eee51 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -569,11 +569,11 @@ xfs_allocbt_block_maxrecs( /* * Calculate number of records in an alloc btree block. */ -int +unsigned int xfs_allocbt_maxrecs( struct xfs_mount *mp, - int blocklen, - int leaf) + unsigned int blocklen, + bool leaf) { blocklen -= XFS_ALLOC_BLOCK_LEN(mp); return xfs_allocbt_block_maxrecs(blocklen, leaf); diff --git a/fs/xfs/libxfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h index 155b47f231ab..12647f9aaa6d 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.h +++ b/fs/xfs/libxfs/xfs_alloc_btree.h @@ -53,7 +53,8 @@ struct xfs_btree_cur *xfs_bnobt_init_cursor(struct xfs_mount *mp, struct xfs_btree_cur *xfs_cntbt_init_cursor(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *bp, struct xfs_perag *pag); -extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int); +unsigned int xfs_allocbt_maxrecs(struct xfs_mount *mp, unsigned int blocklen, + bool leaf); extern xfs_extlen_t xfs_allocbt_calc_size(struct xfs_mount *mp, unsigned long long len); diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index b9e98950eb3d..6aaec1246c95 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -686,7 +686,7 @@ xfs_attr_shortform_bytesfit( */ if (!dp->i_forkoff && dp->i_df.if_bytes > xfs_default_attroffset(dp)) - dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS); + dsize = xfs_bmdr_space_calc(MINDBTPTRS); break; case XFS_DINODE_FMT_BTREE: /* @@ -700,7 +700,7 @@ xfs_attr_shortform_bytesfit( return 0; return dp->i_forkoff; } - dsize = XFS_BMAP_BROOT_SPACE(mp, dp->i_df.if_broot); + dsize = xfs_bmap_bmdr_space(dp->i_df.if_broot); break; } @@ -708,11 +708,11 @@ xfs_attr_shortform_bytesfit( * A data fork btree root must have space for at least * MINDBTPTRS key/ptr pairs if the data fork is small or empty. */ - minforkoff = max_t(int64_t, dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS)); + minforkoff = max_t(int64_t, dsize, xfs_bmdr_space_calc(MINDBTPTRS)); minforkoff = roundup(minforkoff, 8) >> 3; /* attr fork btree root can have at least this many key/ptr pairs */ - maxforkoff = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS); + maxforkoff = XFS_LITINO(mp) - xfs_bmdr_space_calc(MINABTPTRS); maxforkoff = maxforkoff >> 3; /* rounded down */ if (offset >= maxforkoff) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 7df74c35d9f9..8090e8249116 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -79,9 +79,9 @@ xfs_bmap_compute_maxlevels( maxleafents = xfs_iext_max_nextents(xfs_has_large_extent_counts(mp), whichfork); if (whichfork == XFS_DATA_FORK) - sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS); + sz = xfs_bmdr_space_calc(MINDBTPTRS); else - sz = XFS_BMDR_SPACE_CALC(MINABTPTRS); + sz = xfs_bmdr_space_calc(MINABTPTRS); maxrootrecs = xfs_bmdr_maxrecs(sz, 0); minleafrecs = mp->m_bmap_dmnr[0]; @@ -102,8 +102,8 @@ xfs_bmap_compute_attr_offset( struct xfs_mount *mp) { if (mp->m_sb.sb_inodesize == 256) - return XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS); - return XFS_BMDR_SPACE_CALC(6 * MINABTPTRS); + return XFS_LITINO(mp) - xfs_bmdr_space_calc(MINABTPTRS); + return xfs_bmdr_space_calc(6 * MINABTPTRS); } STATIC int /* error */ @@ -298,7 +298,7 @@ xfs_check_block( prevp = NULL; for( i = 1; i <= xfs_btree_get_numrecs(block); i++) { dmxr = mp->m_bmap_dmxr[0]; - keyp = XFS_BMBT_KEY_ADDR(mp, block, i); + keyp = xfs_bmbt_key_addr(mp, block, i); if (prevp) { ASSERT(be64_to_cpu(prevp->br_startoff) < @@ -310,15 +310,15 @@ xfs_check_block( * Compare the block numbers to see if there are dups. */ if (root) - pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz); + pp = xfs_bmap_broot_ptr_addr(mp, block, i, sz); else - pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr); + pp = xfs_bmbt_ptr_addr(mp, block, i, dmxr); for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) { if (root) - thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz); + thispa = xfs_bmap_broot_ptr_addr(mp, block, j, sz); else - thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr); + thispa = xfs_bmbt_ptr_addr(mp, block, j, dmxr); if (*thispa == *pp) { xfs_warn(mp, "%s: thispa(%d) == pp(%d) %lld", __func__, j, i, @@ -373,7 +373,7 @@ xfs_bmap_check_leaf_extents( level = be16_to_cpu(block->bb_level); ASSERT(level > 0); xfs_check_block(block, mp, 1, ifp->if_broot_bytes); - pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); + pp = xfs_bmap_broot_ptr_addr(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); ASSERT(bno != NULLFSBLOCK); @@ -406,7 +406,7 @@ xfs_bmap_check_leaf_extents( */ xfs_check_block(block, mp, 0, 0); - pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); + pp = xfs_bmbt_ptr_addr(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, bno))) { xfs_btree_mark_sick(cur); @@ -446,14 +446,14 @@ xfs_bmap_check_leaf_extents( * conform with the first entry in this one. */ - ep = XFS_BMBT_REC_ADDR(mp, block, 1); + ep = xfs_bmbt_rec_addr(mp, block, 1); if (i) { ASSERT(xfs_bmbt_disk_get_startoff(&last) + xfs_bmbt_disk_get_blockcount(&last) <= xfs_bmbt_disk_get_startoff(ep)); } for (j = 1; j < num_recs; j++) { - nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1); + nextp = xfs_bmbt_rec_addr(mp, block, j + 1); ASSERT(xfs_bmbt_disk_get_startoff(ep) + xfs_bmbt_disk_get_blockcount(ep) <= xfs_bmbt_disk_get_startoff(nextp)); @@ -584,9 +584,9 @@ xfs_bmap_btree_to_extents( ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE); ASSERT(be16_to_cpu(rblock->bb_level) == 1); ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1); - ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1); + ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, false) == 1); - pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes); + pp = xfs_bmap_broot_ptr_addr(mp, rblock, 1, ifp->if_broot_bytes); cbno = be64_to_cpu(*pp); #ifdef DEBUG if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_verify_fsbno(mp, cbno))) { @@ -714,7 +714,7 @@ xfs_bmap_extents_to_btree( for_each_xfs_iext(ifp, &icur, &rec) { if (isnullstartblock(rec.br_startblock)) continue; - arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt); + arp = xfs_bmbt_rec_addr(mp, ablock, 1 + cnt); xfs_bmbt_disk_set_all(arp, &rec); cnt++; } @@ -724,10 +724,10 @@ xfs_bmap_extents_to_btree( /* * Fill in the root key and pointer. */ - kp = XFS_BMBT_KEY_ADDR(mp, block, 1); - arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); + kp = xfs_bmbt_key_addr(mp, block, 1); + arp = xfs_bmbt_rec_addr(mp, ablock, 1); kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp)); - pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur, + pp = xfs_bmbt_ptr_addr(mp, block, 1, xfs_bmbt_get_maxrecs(cur, be16_to_cpu(block->bb_level))); *pp = cpu_to_be64(args.fsbno); @@ -896,7 +896,7 @@ xfs_bmap_add_attrfork_btree( mp = ip->i_mount; - if (XFS_BMAP_BMDR_SPACE(block) <= xfs_inode_data_fork_size(ip)) + if (xfs_bmap_bmdr_space(block) <= xfs_inode_data_fork_size(ip)) *flags |= XFS_ILOG_DBROOT; else { cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); @@ -1160,7 +1160,7 @@ xfs_iread_bmbt_block( } /* Copy records into the incore cache. */ - frp = XFS_BMBT_REC_ADDR(mp, block, 1); + frp = xfs_bmbt_rec_addr(mp, block, 1); for (j = 0; j < num_recs; j++, frp++, ir->loaded++) { struct xfs_bmbt_irec new; xfs_failaddr_t fa; @@ -3112,6 +3112,23 @@ xfs_bmap_extsize_align( return 0; } +static inline bool +xfs_bmap_adjacent_valid( + struct xfs_bmalloca *ap, + xfs_fsblock_t x, + xfs_fsblock_t y) +{ + struct xfs_mount *mp = ap->ip->i_mount; + + if (XFS_IS_REALTIME_INODE(ap->ip) && + (ap->datatype & XFS_ALLOC_USERDATA)) + return x < mp->m_sb.sb_rblocks; + + return XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && + XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && + XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks; +} + #define XFS_ALLOC_GAP_UNITS 4 /* returns true if ap->blkno was modified */ @@ -3119,36 +3136,25 @@ bool xfs_bmap_adjacent( struct xfs_bmalloca *ap) /* bmap alloc argument struct */ { - xfs_fsblock_t adjust; /* adjustment to block numbers */ - xfs_mount_t *mp; /* mount point structure */ - int rt; /* true if inode is realtime */ - -#define ISVALID(x,y) \ - (rt ? \ - (x) < mp->m_sb.sb_rblocks : \ - XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \ - XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \ - XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks) - - mp = ap->ip->i_mount; - rt = XFS_IS_REALTIME_INODE(ap->ip) && - (ap->datatype & XFS_ALLOC_USERDATA); + xfs_fsblock_t adjust; /* adjustment to block numbers */ + /* * If allocating at eof, and there's a previous real block, * try to use its last block as our starting point. */ if (ap->eof && ap->prev.br_startoff != NULLFILEOFF && !isnullstartblock(ap->prev.br_startblock) && - ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount, - ap->prev.br_startblock)) { + xfs_bmap_adjacent_valid(ap, + ap->prev.br_startblock + ap->prev.br_blockcount, + ap->prev.br_startblock)) { ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount; /* * Adjust for the gap between prevp and us. */ adjust = ap->offset - (ap->prev.br_startoff + ap->prev.br_blockcount); - if (adjust && - ISVALID(ap->blkno + adjust, ap->prev.br_startblock)) + if (adjust && xfs_bmap_adjacent_valid(ap, ap->blkno + adjust, + ap->prev.br_startblock)) ap->blkno += adjust; return true; } @@ -3171,7 +3177,8 @@ xfs_bmap_adjacent( !isnullstartblock(ap->prev.br_startblock) && (prevbno = ap->prev.br_startblock + ap->prev.br_blockcount) && - ISVALID(prevbno, ap->prev.br_startblock)) { + xfs_bmap_adjacent_valid(ap, prevbno, + ap->prev.br_startblock)) { /* * Calculate gap to end of previous block. */ @@ -3187,8 +3194,8 @@ xfs_bmap_adjacent( * number, then just use the end of the previous block. */ if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length && - ISVALID(prevbno + prevdiff, - ap->prev.br_startblock)) + xfs_bmap_adjacent_valid(ap, prevbno + prevdiff, + ap->prev.br_startblock)) prevbno += adjust; else prevdiff += adjust; @@ -3220,9 +3227,11 @@ xfs_bmap_adjacent( * offset by our length. */ if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length && - ISVALID(gotbno - gotdiff, gotbno)) + xfs_bmap_adjacent_valid(ap, gotbno - gotdiff, + gotbno)) gotbno -= adjust; - else if (ISVALID(gotbno - ap->length, gotbno)) { + else if (xfs_bmap_adjacent_valid(ap, gotbno - ap->length, + gotbno)) { gotbno -= ap->length; gotdiff += adjust - ap->length; } else @@ -3250,7 +3259,7 @@ xfs_bmap_adjacent( return true; } } -#undef ISVALID + return false; } @@ -4847,6 +4856,7 @@ xfs_bmapi_remap( } ip->i_nblocks += len; + ip->i_delayed_blks -= len; /* see xfs_bmap_defer_add */ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); if (ifp->if_format == XFS_DINODE_FMT_BTREE) @@ -5376,7 +5386,8 @@ xfs_bmap_del_extent_real( */ if (!(tp->t_flags & XFS_TRANS_RTBITMAP_LOCKED)) { tp->t_flags |= XFS_TRANS_RTBITMAP_LOCKED; - xfs_rtbitmap_lock(tp, mp); + xfs_rtbitmap_lock(mp); + xfs_rtbitmap_trans_join(tp); } error = xfs_rtfree_blocks(tp, del->br_startblock, del->br_blockcount); diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index d1b06ccde19e..3464be771f95 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -65,10 +65,10 @@ xfs_bmdr_to_bmbt( ASSERT(be16_to_cpu(rblock->bb_level) > 0); rblock->bb_numrecs = dblock->bb_numrecs; dmxr = xfs_bmdr_maxrecs(dblocklen, 0); - fkp = XFS_BMDR_KEY_ADDR(dblock, 1); - tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); - fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr); - tpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen); + fkp = xfs_bmdr_key_addr(dblock, 1); + tkp = xfs_bmbt_key_addr(mp, rblock, 1); + fpp = xfs_bmdr_ptr_addr(dblock, 1, dmxr); + tpp = xfs_bmap_broot_ptr_addr(mp, rblock, 1, rblocklen); dmxr = be16_to_cpu(dblock->bb_numrecs); memcpy(tkp, fkp, sizeof(*fkp) * dmxr); memcpy(tpp, fpp, sizeof(*fpp) * dmxr); @@ -168,10 +168,10 @@ xfs_bmbt_to_bmdr( dblock->bb_level = rblock->bb_level; dblock->bb_numrecs = rblock->bb_numrecs; dmxr = xfs_bmdr_maxrecs(dblocklen, 0); - fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); - tkp = XFS_BMDR_KEY_ADDR(dblock, 1); - fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen); - tpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr); + fkp = xfs_bmbt_key_addr(mp, rblock, 1); + tkp = xfs_bmdr_key_addr(dblock, 1); + fpp = xfs_bmap_broot_ptr_addr(mp, rblock, 1, rblocklen); + tpp = xfs_bmdr_ptr_addr(dblock, 1, dmxr); dmxr = be16_to_cpu(dblock->bb_numrecs); memcpy(tkp, fkp, sizeof(*fkp) * dmxr); memcpy(tpp, fpp, sizeof(*fpp) * dmxr); @@ -645,13 +645,13 @@ xfs_bmbt_commit_staged_btree( /* * Calculate number of records in a bmap btree block. */ -int +unsigned int xfs_bmbt_maxrecs( struct xfs_mount *mp, - int blocklen, - int leaf) + unsigned int blocklen, + bool leaf) { - blocklen -= XFS_BMBT_BLOCK_LEN(mp); + blocklen -= xfs_bmbt_block_len(mp); return xfs_bmbt_block_maxrecs(blocklen, leaf); } diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h index de1b73f1225c..49a3bae3f6ec 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.h +++ b/fs/xfs/libxfs/xfs_bmap_btree.h @@ -14,70 +14,6 @@ struct xfs_trans; struct xbtree_ifakeroot; /* - * Btree block header size depends on a superblock flag. - */ -#define XFS_BMBT_BLOCK_LEN(mp) \ - (xfs_has_crc(((mp))) ? \ - XFS_BTREE_LBLOCK_CRC_LEN : XFS_BTREE_LBLOCK_LEN) - -#define XFS_BMBT_REC_ADDR(mp, block, index) \ - ((xfs_bmbt_rec_t *) \ - ((char *)(block) + \ - XFS_BMBT_BLOCK_LEN(mp) + \ - ((index) - 1) * sizeof(xfs_bmbt_rec_t))) - -#define XFS_BMBT_KEY_ADDR(mp, block, index) \ - ((xfs_bmbt_key_t *) \ - ((char *)(block) + \ - XFS_BMBT_BLOCK_LEN(mp) + \ - ((index) - 1) * sizeof(xfs_bmbt_key_t))) - -#define XFS_BMBT_PTR_ADDR(mp, block, index, maxrecs) \ - ((xfs_bmbt_ptr_t *) \ - ((char *)(block) + \ - XFS_BMBT_BLOCK_LEN(mp) + \ - (maxrecs) * sizeof(xfs_bmbt_key_t) + \ - ((index) - 1) * sizeof(xfs_bmbt_ptr_t))) - -#define XFS_BMDR_REC_ADDR(block, index) \ - ((xfs_bmdr_rec_t *) \ - ((char *)(block) + \ - sizeof(struct xfs_bmdr_block) + \ - ((index) - 1) * sizeof(xfs_bmdr_rec_t))) - -#define XFS_BMDR_KEY_ADDR(block, index) \ - ((xfs_bmdr_key_t *) \ - ((char *)(block) + \ - sizeof(struct xfs_bmdr_block) + \ - ((index) - 1) * sizeof(xfs_bmdr_key_t))) - -#define XFS_BMDR_PTR_ADDR(block, index, maxrecs) \ - ((xfs_bmdr_ptr_t *) \ - ((char *)(block) + \ - sizeof(struct xfs_bmdr_block) + \ - (maxrecs) * sizeof(xfs_bmdr_key_t) + \ - ((index) - 1) * sizeof(xfs_bmdr_ptr_t))) - -/* - * These are to be used when we know the size of the block and - * we don't have a cursor. - */ -#define XFS_BMAP_BROOT_PTR_ADDR(mp, bb, i, sz) \ - XFS_BMBT_PTR_ADDR(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, 0)) - -#define XFS_BMAP_BROOT_SPACE_CALC(mp, nrecs) \ - (int)(XFS_BMBT_BLOCK_LEN(mp) + \ - ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) - -#define XFS_BMAP_BROOT_SPACE(mp, bb) \ - (XFS_BMAP_BROOT_SPACE_CALC(mp, be16_to_cpu((bb)->bb_numrecs))) -#define XFS_BMDR_SPACE_CALC(nrecs) \ - (int)(sizeof(xfs_bmdr_block_t) + \ - ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) -#define XFS_BMAP_BMDR_SPACE(bb) \ - (XFS_BMDR_SPACE_CALC(be16_to_cpu((bb)->bb_numrecs))) - -/* * Maximum number of bmap btree levels. */ #define XFS_BM_MAXLEVELS(mp,w) ((mp)->m_bm_maxlevels[(w)]) @@ -99,7 +35,8 @@ extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int, extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level); extern int xfs_bmdr_maxrecs(int blocklen, int leaf); -extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf); +unsigned int xfs_bmbt_maxrecs(struct xfs_mount *mp, unsigned int blocklen, + bool leaf); extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, xfs_ino_t new_owner, @@ -121,4 +58,144 @@ void xfs_bmbt_destroy_cur_cache(void); void xfs_bmbt_init_block(struct xfs_inode *ip, struct xfs_btree_block *buf, struct xfs_buf *bp, __u16 level, __u16 numrecs); +/* + * Btree block header size depends on a superblock flag. + */ +static inline size_t +xfs_bmbt_block_len(struct xfs_mount *mp) +{ + return xfs_has_crc(mp) ? + XFS_BTREE_LBLOCK_CRC_LEN : XFS_BTREE_LBLOCK_LEN; +} + +/* Addresses of key, pointers, and records within an incore bmbt block. */ + +static inline struct xfs_bmbt_rec * +xfs_bmbt_rec_addr( + struct xfs_mount *mp, + struct xfs_btree_block *block, + unsigned int index) +{ + return (struct xfs_bmbt_rec *) + ((char *)block + xfs_bmbt_block_len(mp) + + (index - 1) * sizeof(struct xfs_bmbt_rec)); +} + +static inline struct xfs_bmbt_key * +xfs_bmbt_key_addr( + struct xfs_mount *mp, + struct xfs_btree_block *block, + unsigned int index) +{ + return (struct xfs_bmbt_key *) + ((char *)block + xfs_bmbt_block_len(mp) + + (index - 1) * sizeof(struct xfs_bmbt_key *)); +} + +static inline xfs_bmbt_ptr_t * +xfs_bmbt_ptr_addr( + struct xfs_mount *mp, + struct xfs_btree_block *block, + unsigned int index, + unsigned int maxrecs) +{ + return (xfs_bmbt_ptr_t *) + ((char *)block + xfs_bmbt_block_len(mp) + + maxrecs * sizeof(struct xfs_bmbt_key) + + (index - 1) * sizeof(xfs_bmbt_ptr_t)); +} + +/* Addresses of key, pointers, and records within an ondisk bmbt block. */ + +static inline struct xfs_bmbt_rec * +xfs_bmdr_rec_addr( + struct xfs_bmdr_block *block, + unsigned int index) +{ + return (struct xfs_bmbt_rec *) + ((char *)(block + 1) + + (index - 1) * sizeof(struct xfs_bmbt_rec)); +} + +static inline struct xfs_bmbt_key * +xfs_bmdr_key_addr( + struct xfs_bmdr_block *block, + unsigned int index) +{ + return (struct xfs_bmbt_key *) + ((char *)(block + 1) + + (index - 1) * sizeof(struct xfs_bmbt_key)); +} + +static inline xfs_bmbt_ptr_t * +xfs_bmdr_ptr_addr( + struct xfs_bmdr_block *block, + unsigned int index, + unsigned int maxrecs) +{ + return (xfs_bmbt_ptr_t *) + ((char *)(block + 1) + + maxrecs * sizeof(struct xfs_bmbt_key) + + (index - 1) * sizeof(xfs_bmbt_ptr_t)); +} + +/* + * Address of pointers within the incore btree root. + * + * These are to be used when we know the size of the block and + * we don't have a cursor. + */ +static inline xfs_bmbt_ptr_t * +xfs_bmap_broot_ptr_addr( + struct xfs_mount *mp, + struct xfs_btree_block *bb, + unsigned int i, + unsigned int sz) +{ + return xfs_bmbt_ptr_addr(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, false)); +} + +/* + * Compute the space required for the incore btree root containing the given + * number of records. + */ +static inline size_t +xfs_bmap_broot_space_calc( + struct xfs_mount *mp, + unsigned int nrecs) +{ + return xfs_bmbt_block_len(mp) + + (nrecs * (sizeof(struct xfs_bmbt_key) + sizeof(xfs_bmbt_ptr_t))); +} + +/* + * Compute the space required for the incore btree root given the ondisk + * btree root block. + */ +static inline size_t +xfs_bmap_broot_space( + struct xfs_mount *mp, + struct xfs_bmdr_block *bb) +{ + return xfs_bmap_broot_space_calc(mp, be16_to_cpu(bb->bb_numrecs)); +} + +/* Compute the space required for the ondisk root block. */ +static inline size_t +xfs_bmdr_space_calc(unsigned int nrecs) +{ + return sizeof(struct xfs_bmdr_block) + + (nrecs * (sizeof(struct xfs_bmbt_key) + sizeof(xfs_bmbt_ptr_t))); +} + +/* + * Compute the space required for the ondisk root block given an incore root + * block. + */ +static inline size_t +xfs_bmap_bmdr_space(struct xfs_btree_block *bb) +{ + return xfs_bmdr_space_calc(be16_to_cpu(bb->bb_numrecs)); +} + #endif /* __XFS_BMAP_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 40021849b42f..2cd212ad2c1d 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -28,7 +28,6 @@ #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_attr.h" -#include "xfs_trans_priv.h" #include "xfs_exchmaps.h" static struct kmem_cache *xfs_defer_pending_cache; diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 454b63ef7201..860284064c5a 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -8,6 +8,7 @@ /* * SGI's XFS filesystem's major stuff (constants, structures) + * NOTE: This file must be compile-able with C++ compilers. */ /* @@ -826,6 +827,30 @@ struct xfs_exchange_range { }; /* + * Using the same definition of file2 as struct xfs_exchange_range, commit the + * contents of file1 into file2 if file2 has the same inode number, mtime, and + * ctime as the arguments provided to the call. The old contents of file2 will + * be moved to file1. + * + * Returns -EBUSY if there isn't an exact match for the file2 fields. + * + * Filesystems must be able to restart and complete the operation even after + * the system goes down. + */ +struct xfs_commit_range { + __s32 file1_fd; + __u32 pad; /* must be zeroes */ + __u64 file1_offset; /* file1 offset, bytes */ + __u64 file2_offset; /* file2 offset, bytes */ + __u64 length; /* bytes to exchange */ + + __u64 flags; /* see XFS_EXCHANGE_RANGE_* below */ + + /* opaque file2 metadata for freshness checks */ + __u64 file2_freshness[6]; +}; + +/* * Exchange file data all the way to the ends of both files, and then exchange * the file sizes. This flag can be used to replace a file's contents with a * different amount of data. length will be ignored. @@ -906,13 +931,13 @@ static inline struct xfs_getparents_rec * xfs_getparents_next_rec(struct xfs_getparents *gp, struct xfs_getparents_rec *gpr) { - void *next = ((void *)gpr + gpr->gpr_reclen); + void *next = ((char *)gpr + gpr->gpr_reclen); void *end = (void *)(uintptr_t)(gp->gp_buffer + gp->gp_bufsize); if (next >= end) return NULL; - return next; + return (struct xfs_getparents_rec *)next; } /* Iterate through this file handle's directory parent pointers. */ @@ -997,6 +1022,8 @@ struct xfs_getparents_by_handle { #define XFS_IOC_BULKSTAT _IOR ('X', 127, struct xfs_bulkstat_req) #define XFS_IOC_INUMBERS _IOR ('X', 128, struct xfs_inumbers_req) #define XFS_IOC_EXCHANGE_RANGE _IOW ('X', 129, struct xfs_exchange_range) +#define XFS_IOC_START_COMMIT _IOR ('X', 130, struct xfs_commit_range) +#define XFS_IOC_COMMIT_RANGE _IOW ('X', 131, struct xfs_commit_range) /* XFS_IOC_GETFSUUID ---------- deprecated 140 */ diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 0af5b7a33d05..20bb5ce38134 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1855,11 +1855,12 @@ out_release: int xfs_dialloc( struct xfs_trans **tpp, - xfs_ino_t parent, - umode_t mode, + const struct xfs_icreate_args *args, xfs_ino_t *new_ino) { struct xfs_mount *mp = (*tpp)->t_mountp; + xfs_ino_t parent = args->pip ? args->pip->i_ino : 0; + umode_t mode = args->mode & S_IFMT; xfs_agnumber_t agno; int error = 0; xfs_agnumber_t start_agno; @@ -2947,8 +2948,8 @@ xfs_ialloc_setup_geometry( /* Compute inode btree geometry. */ igeo->agino_log = sbp->sb_inopblog + sbp->sb_agblklog; - igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1); - igeo->inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0); + igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, true); + igeo->inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, false); igeo->inobt_mnr[0] = igeo->inobt_mxr[0] / 2; igeo->inobt_mnr[1] = igeo->inobt_mxr[1] / 2; diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index b549627e3a61..3a1323155a45 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -33,11 +33,13 @@ xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o) return xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog); } +struct xfs_icreate_args; + /* * Allocate an inode on disk. Mode is used to tell whether the new inode will * need space, and whether it is a directory. */ -int xfs_dialloc(struct xfs_trans **tpp, xfs_ino_t parent, umode_t mode, +int xfs_dialloc(struct xfs_trans **tpp, const struct xfs_icreate_args *args, xfs_ino_t *new_ino); int xfs_difree(struct xfs_trans *tp, struct xfs_perag *pag, diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 797d5b5f7b72..401b42d52af6 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -572,11 +572,11 @@ xfs_inobt_block_maxrecs( /* * Calculate number of records in an inobt btree block. */ -int +unsigned int xfs_inobt_maxrecs( struct xfs_mount *mp, - int blocklen, - int leaf) + unsigned int blocklen, + bool leaf) { blocklen -= XFS_INOBT_BLOCK_LEN(mp); return xfs_inobt_block_maxrecs(blocklen, leaf); diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index 6472ec1ecbb4..300edf5bc009 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h @@ -50,7 +50,8 @@ struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agbp); struct xfs_btree_cur *xfs_finobt_init_cursor(struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agbp); -extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); +unsigned int xfs_inobt_maxrecs(struct xfs_mount *mp, unsigned int blocklen, + bool leaf); /* ir_holemask to inode allocation bitmap conversion */ uint64_t xfs_inobt_irec_to_allocmask(const struct xfs_inobt_rec_incore *irec); diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 9d11ae015909..1158ca48626b 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -185,7 +185,7 @@ xfs_iformat_btree( ifp = xfs_ifork_ptr(ip, whichfork); dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); - size = XFS_BMAP_BROOT_SPACE(mp, dfp); + size = xfs_bmap_broot_space(mp, dfp); nrecs = be16_to_cpu(dfp->bb_numrecs); level = be16_to_cpu(dfp->bb_level); @@ -198,7 +198,7 @@ xfs_iformat_btree( */ if (unlikely(ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork) || nrecs == 0 || - XFS_BMDR_SPACE_CALC(nrecs) > + xfs_bmdr_space_calc(nrecs) > XFS_DFORK_SIZE(dip, mp, whichfork) || ifp->if_nextents > ip->i_nblocks) || level == 0 || level > XFS_BM_MAXLEVELS(mp, whichfork)) { @@ -409,7 +409,7 @@ xfs_iroot_realloc( * allocate it now and get out. */ if (ifp->if_broot_bytes == 0) { - new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff); + new_size = xfs_bmap_broot_space_calc(mp, rec_diff); ifp->if_broot = kmalloc(new_size, GFP_KERNEL | __GFP_NOFAIL); ifp->if_broot_bytes = (int)new_size; @@ -422,17 +422,17 @@ xfs_iroot_realloc( * location. The records don't change location because * they are kept butted up against the btree block header. */ - cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); + cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, false); new_max = cur_max + rec_diff; - new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max); + new_size = xfs_bmap_broot_space_calc(mp, new_max); ifp->if_broot = krealloc(ifp->if_broot, new_size, GFP_KERNEL | __GFP_NOFAIL); - op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, + op = (char *)xfs_bmap_broot_ptr_addr(mp, ifp->if_broot, 1, ifp->if_broot_bytes); - np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, + np = (char *)xfs_bmap_broot_ptr_addr(mp, ifp->if_broot, 1, (int)new_size); ifp->if_broot_bytes = (int)new_size; - ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= + ASSERT(xfs_bmap_bmdr_space(ifp->if_broot) <= xfs_inode_fork_size(ip, whichfork)); memmove(np, op, cur_max * (uint)sizeof(xfs_fsblock_t)); return; @@ -444,11 +444,11 @@ xfs_iroot_realloc( * records, just get rid of the root and clear the status bit. */ ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0)); - cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); + cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, false); new_max = cur_max + rec_diff; ASSERT(new_max >= 0); if (new_max > 0) - new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max); + new_size = xfs_bmap_broot_space_calc(mp, new_max); else new_size = 0; if (new_size > 0) { @@ -457,28 +457,28 @@ xfs_iroot_realloc( * First copy over the btree block header. */ memcpy(new_broot, ifp->if_broot, - XFS_BMBT_BLOCK_LEN(ip->i_mount)); + xfs_bmbt_block_len(ip->i_mount)); } else { new_broot = NULL; } /* - * Only copy the records and pointers if there are any. + * Only copy the keys and pointers if there are any. */ if (new_max > 0) { /* - * First copy the records. + * First copy the keys. */ - op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1); - np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1); - memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t)); + op = (char *)xfs_bmbt_key_addr(mp, ifp->if_broot, 1); + np = (char *)xfs_bmbt_key_addr(mp, new_broot, 1); + memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_key_t)); /* * Then copy the pointers. */ - op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, + op = (char *)xfs_bmap_broot_ptr_addr(mp, ifp->if_broot, 1, ifp->if_broot_bytes); - np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1, + np = (char *)xfs_bmap_broot_ptr_addr(mp, new_broot, 1, (int)new_size); memcpy(np, op, new_max * (uint)sizeof(xfs_fsblock_t)); } @@ -486,7 +486,7 @@ xfs_iroot_realloc( ifp->if_broot = new_broot; ifp->if_broot_bytes = (int)new_size; if (ifp->if_broot) - ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= + ASSERT(xfs_bmap_bmdr_space(ifp->if_broot) <= xfs_inode_fork_size(ip, whichfork)); return; } @@ -655,7 +655,7 @@ xfs_iflush_fork( if ((iip->ili_fields & brootflag[whichfork]) && (ifp->if_broot_bytes > 0)) { ASSERT(ifp->if_broot != NULL); - ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= + ASSERT(xfs_bmap_bmdr_space(ifp->if_broot) <= xfs_inode_fork_size(ip, whichfork)); xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes, (xfs_bmdr_block_t *)cp, diff --git a/fs/xfs/libxfs/xfs_inode_util.c b/fs/xfs/libxfs/xfs_inode_util.c index 032333289113..cc38e1c3c3e1 100644 --- a/fs/xfs/libxfs/xfs_inode_util.c +++ b/fs/xfs/libxfs/xfs_inode_util.c @@ -308,7 +308,7 @@ xfs_inode_init( !vfsgid_in_group_p(i_gid_into_vfsgid(args->idmap, inode))) inode->i_mode &= ~S_ISGID; - ip->i_projid = pip ? xfs_get_initial_prid(pip) : 0; + ip->i_projid = xfs_get_initial_prid(pip); } ip->i_disk_size = 0; diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index cb3b1d42ae9a..795928d1a66d 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -417,9 +417,10 @@ xfs_refcountbt_block_maxrecs( /* * Calculate the number of records in a refcount btree block. */ -int +unsigned int xfs_refcountbt_maxrecs( - int blocklen, + struct xfs_mount *mp, + unsigned int blocklen, bool leaf) { blocklen -= XFS_REFCOUNT_BLOCK_LEN; diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h index 1e0ab25f6c68..beb93bef6a81 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.h +++ b/fs/xfs/libxfs/xfs_refcount_btree.h @@ -48,7 +48,8 @@ struct xbtree_afakeroot; extern struct xfs_btree_cur *xfs_refcountbt_init_cursor(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *agbp, struct xfs_perag *pag); -extern int xfs_refcountbt_maxrecs(int blocklen, bool leaf); +unsigned int xfs_refcountbt_maxrecs(struct xfs_mount *mp, unsigned int blocklen, + bool leaf); extern void xfs_refcountbt_compute_maxlevels(struct xfs_mount *mp); extern xfs_extlen_t xfs_refcountbt_calc_size(struct xfs_mount *mp, diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 56fd6c4bd8b4..ac2f1f499b76 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -731,10 +731,11 @@ xfs_rmapbt_block_maxrecs( /* * Calculate number of records in an rmap btree block. */ -int +unsigned int xfs_rmapbt_maxrecs( - int blocklen, - int leaf) + struct xfs_mount *mp, + unsigned int blocklen, + bool leaf) { blocklen -= XFS_RMAP_BLOCK_LEN; return xfs_rmapbt_block_maxrecs(blocklen, leaf); diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h index eb90d89e8086..119b1567cd0e 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.h +++ b/fs/xfs/libxfs/xfs_rmap_btree.h @@ -47,7 +47,8 @@ struct xfs_btree_cur *xfs_rmapbt_init_cursor(struct xfs_mount *mp, struct xfs_perag *pag); void xfs_rmapbt_commit_staged_btree(struct xfs_btree_cur *cur, struct xfs_trans *tp, struct xfs_buf *agbp); -int xfs_rmapbt_maxrecs(int blocklen, int leaf); +unsigned int xfs_rmapbt_maxrecs(struct xfs_mount *mp, unsigned int blocklen, + bool leaf); extern void xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp); extern xfs_extlen_t xfs_rmapbt_calc_size(struct xfs_mount *mp, diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 386b672c5058..27a4472402ba 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -13,6 +13,8 @@ #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_bmap.h" +#include "xfs_bmap_btree.h" +#include "xfs_trans_space.h" #include "xfs_trans.h" #include "xfs_rtalloc.h" #include "xfs_error.h" @@ -69,7 +71,7 @@ xfs_rtbuf_cache_relse( * Get a buffer for the bitmap or summary file block specified. * The buffer is returned read and locked. */ -int +static int xfs_rtbuf_get( struct xfs_rtalloc_args *args, xfs_fileoff_t block, /* block number in bitmap or summary */ @@ -138,15 +140,43 @@ xfs_rtbuf_get( return 0; } +int +xfs_rtbitmap_read_buf( + struct xfs_rtalloc_args *args, + xfs_fileoff_t block) +{ + struct xfs_mount *mp = args->mp; + + if (XFS_IS_CORRUPT(mp, block >= mp->m_sb.sb_rbmblocks)) { + xfs_rt_mark_sick(mp, XFS_SICK_RT_BITMAP); + return -EFSCORRUPTED; + } + + return xfs_rtbuf_get(args, block, 0); +} + +int +xfs_rtsummary_read_buf( + struct xfs_rtalloc_args *args, + xfs_fileoff_t block) +{ + struct xfs_mount *mp = args->mp; + + if (XFS_IS_CORRUPT(mp, block >= mp->m_rsumblocks)) { + xfs_rt_mark_sick(args->mp, XFS_SICK_RT_SUMMARY); + return -EFSCORRUPTED; + } + return xfs_rtbuf_get(args, block, 1); +} + /* - * Searching backward from start to limit, find the first block whose - * allocated/free state is different from start's. + * Searching backward from start find the first block whose allocated/free state + * is different from start's. */ int xfs_rtfind_back( struct xfs_rtalloc_args *args, xfs_rtxnum_t start, /* starting rtext to look at */ - xfs_rtxnum_t limit, /* last rtext to look at */ xfs_rtxnum_t *rtx) /* out: start rtext found */ { struct xfs_mount *mp = args->mp; @@ -175,7 +205,7 @@ xfs_rtfind_back( */ word = xfs_rtx_to_rbmword(mp, start); bit = (int)(start & (XFS_NBWORD - 1)); - len = start - limit + 1; + len = start + 1; /* * Compute match value, based on the bit at start: if 1 (free) * then all-ones, else all-zeroes. @@ -316,6 +346,8 @@ xfs_rtfind_forw( xfs_rtword_t incore; unsigned int word; /* word number in the buffer */ + ASSERT(start <= limit); + /* * Compute and read in starting bitmap block for starting block. */ @@ -698,7 +730,7 @@ xfs_rtfree_range( * We need to find the beginning and end of the extent so we can * properly update the summary. */ - error = xfs_rtfind_back(args, start, 0, &preblock); + error = xfs_rtfind_back(args, start, &preblock); if (error) { return error; } @@ -990,25 +1022,24 @@ xfs_rtfree_blocks( xfs_filblks_t rtlen) { struct xfs_mount *mp = tp->t_mountp; - xfs_rtxnum_t start; - xfs_filblks_t len; xfs_extlen_t mod; ASSERT(rtlen <= XFS_MAX_BMBT_EXTLEN); - len = xfs_rtb_to_rtxrem(mp, rtlen, &mod); + mod = xfs_rtb_to_rtxoff(mp, rtlen); if (mod) { ASSERT(mod == 0); return -EIO; } - start = xfs_rtb_to_rtxrem(mp, rtbno, &mod); + mod = xfs_rtb_to_rtxoff(mp, rtbno); if (mod) { ASSERT(mod == 0); return -EIO; } - return xfs_rtfree_extent(tp, start, len); + return xfs_rtfree_extent(tp, xfs_rtb_to_rtx(mp, rtbno), + xfs_rtb_to_rtx(mp, rtlen)); } /* Find all the free records within a given range. */ @@ -1016,8 +1047,8 @@ int xfs_rtalloc_query_range( struct xfs_mount *mp, struct xfs_trans *tp, - const struct xfs_rtalloc_rec *low_rec, - const struct xfs_rtalloc_rec *high_rec, + xfs_rtxnum_t start, + xfs_rtxnum_t end, xfs_rtalloc_query_range_fn fn, void *priv) { @@ -1025,45 +1056,42 @@ xfs_rtalloc_query_range( .mp = mp, .tp = tp, }; - struct xfs_rtalloc_rec rec; - xfs_rtxnum_t rtstart; - xfs_rtxnum_t rtend; - xfs_rtxnum_t high_key; - int is_free; int error = 0; - if (low_rec->ar_startext > high_rec->ar_startext) + if (start > end) return -EINVAL; - if (low_rec->ar_startext >= mp->m_sb.sb_rextents || - low_rec->ar_startext == high_rec->ar_startext) + if (start == end || start >= mp->m_sb.sb_rextents) return 0; - high_key = min(high_rec->ar_startext, mp->m_sb.sb_rextents - 1); + end = min(end, mp->m_sb.sb_rextents - 1); /* Iterate the bitmap, looking for discrepancies. */ - rtstart = low_rec->ar_startext; - while (rtstart <= high_key) { + while (start <= end) { + struct xfs_rtalloc_rec rec; + int is_free; + xfs_rtxnum_t rtend; + /* Is the first block free? */ - error = xfs_rtcheck_range(&args, rtstart, 1, 1, &rtend, + error = xfs_rtcheck_range(&args, start, 1, 1, &rtend, &is_free); if (error) break; /* How long does the extent go for? */ - error = xfs_rtfind_forw(&args, rtstart, high_key, &rtend); + error = xfs_rtfind_forw(&args, start, end, &rtend); if (error) break; if (is_free) { - rec.ar_startext = rtstart; - rec.ar_extcount = rtend - rtstart + 1; + rec.ar_startext = start; + rec.ar_extcount = rtend - start + 1; error = fn(mp, tp, &rec, priv); if (error) break; } - rtstart = rtend + 1; + start = rtend + 1; } xfs_rtbuf_cache_relse(&args); @@ -1078,13 +1106,8 @@ xfs_rtalloc_query_all( xfs_rtalloc_query_range_fn fn, void *priv) { - struct xfs_rtalloc_rec keys[2]; - - keys[0].ar_startext = 0; - keys[1].ar_startext = mp->m_sb.sb_rextents - 1; - keys[0].ar_extcount = keys[1].ar_extcount = 0; - - return xfs_rtalloc_query_range(mp, tp, &keys[0], &keys[1], fn, priv); + return xfs_rtalloc_query_range(mp, tp, 0, mp->m_sb.sb_rextents - 1, fn, + priv); } /* Is the given extent all free? */ @@ -1125,21 +1148,6 @@ xfs_rtbitmap_blockcount( return howmany_64(rtextents, NBBY * mp->m_sb.sb_blocksize); } -/* - * Compute the number of rtbitmap words needed to populate every block of a - * bitmap that is large enough to track the given number of rt extents. - */ -unsigned long long -xfs_rtbitmap_wordcount( - struct xfs_mount *mp, - xfs_rtbxlen_t rtextents) -{ - xfs_filblks_t blocks; - - blocks = xfs_rtbitmap_blockcount(mp, rtextents); - return XFS_FSB_TO_B(mp, blocks) >> XFS_WORDLOG; -} - /* Compute the number of rtsummary blocks needed to track the given rt space. */ xfs_filblks_t xfs_rtsummary_blockcount( @@ -1153,39 +1161,25 @@ xfs_rtsummary_blockcount( return XFS_B_TO_FSB(mp, rsumwords << XFS_WORDLOG); } -/* - * Compute the number of rtsummary info words needed to populate every block of - * a summary file that is large enough to track the given rt space. - */ -unsigned long long -xfs_rtsummary_wordcount( - struct xfs_mount *mp, - unsigned int rsumlevels, - xfs_extlen_t rbmblocks) +/* Lock both realtime free space metadata inodes for a freespace update. */ +void +xfs_rtbitmap_lock( + struct xfs_mount *mp) { - xfs_filblks_t blocks; - - blocks = xfs_rtsummary_blockcount(mp, rsumlevels, rbmblocks); - return XFS_FSB_TO_B(mp, blocks) >> XFS_WORDLOG; + xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP); + xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM); } /* - * Lock both realtime free space metadata inodes for a freespace update. If a - * transaction is given, the inodes will be joined to the transaction and the + * Join both realtime free space metadata inodes to the transaction. The * ILOCKs will be released on transaction commit. */ void -xfs_rtbitmap_lock( - struct xfs_trans *tp, - struct xfs_mount *mp) +xfs_rtbitmap_trans_join( + struct xfs_trans *tp) { - xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP); - if (tp) - xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); - - xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM); - if (tp) - xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, tp->t_mountp->m_rbmip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, tp->t_mountp->m_rsumip, XFS_ILOCK_EXCL); } /* Unlock both realtime free space metadata inodes after a freespace update. */ @@ -1225,3 +1219,127 @@ xfs_rtbitmap_unlock_shared( if (rbmlock_flags & XFS_RBMLOCK_BITMAP) xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); } + +static int +xfs_rtfile_alloc_blocks( + struct xfs_inode *ip, + xfs_fileoff_t offset_fsb, + xfs_filblks_t count_fsb, + struct xfs_bmbt_irec *map) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int nmap = 1; + int error; + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtalloc, + XFS_GROWFSRT_SPACE_RES(mp, count_fsb), 0, 0, &tp); + if (error) + return error; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + + error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, + XFS_IEXT_ADD_NOSPLIT_CNT); + if (error) + goto out_trans_cancel; + + error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, + XFS_BMAPI_METADATA, 0, map, &nmap); + if (error) + goto out_trans_cancel; + + return xfs_trans_commit(tp); + +out_trans_cancel: + xfs_trans_cancel(tp); + return error; +} + +/* Get a buffer for the block. */ +static int +xfs_rtfile_initialize_block( + struct xfs_inode *ip, + xfs_fsblock_t fsbno, + void *data) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + struct xfs_buf *bp; + const size_t copylen = mp->m_blockwsize << XFS_WORDLOG; + enum xfs_blft buf_type; + int error; + + if (ip == mp->m_rsumip) + buf_type = XFS_BLFT_RTSUMMARY_BUF; + else + buf_type = XFS_BLFT_RTBITMAP_BUF; + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtzero, 0, 0, 0, &tp); + if (error) + return error; + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + + error = xfs_trans_get_buf(tp, mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, fsbno), mp->m_bsize, 0, &bp); + if (error) { + xfs_trans_cancel(tp); + return error; + } + + xfs_trans_buf_set_type(tp, bp, buf_type); + bp->b_ops = &xfs_rtbuf_ops; + if (data) + memcpy(bp->b_addr, data, copylen); + else + memset(bp->b_addr, 0, copylen); + xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); + return xfs_trans_commit(tp); +} + +/* + * Allocate space to the bitmap or summary file, and zero it, for growfs. + * @data must be a contiguous buffer large enough to fill all blocks in the + * file; or NULL to initialize the contents to zeroes. + */ +int +xfs_rtfile_initialize_blocks( + struct xfs_inode *ip, /* inode (bitmap/summary) */ + xfs_fileoff_t offset_fsb, /* offset to start from */ + xfs_fileoff_t end_fsb, /* offset to allocate to */ + void *data) /* data to fill the blocks */ +{ + struct xfs_mount *mp = ip->i_mount; + const size_t copylen = mp->m_blockwsize << XFS_WORDLOG; + + while (offset_fsb < end_fsb) { + struct xfs_bmbt_irec map; + xfs_filblks_t i; + int error; + + error = xfs_rtfile_alloc_blocks(ip, offset_fsb, + end_fsb - offset_fsb, &map); + if (error) + return error; + + /* + * Now we need to clear the allocated blocks. + * + * Do this one block per transaction, to keep it simple. + */ + for (i = 0; i < map.br_blockcount; i++) { + error = xfs_rtfile_initialize_block(ip, + map.br_startblock + i, data); + if (error) + return error; + if (data) + data += copylen; + } + + offset_fsb = map.br_startoff + map.br_blockcount; + } + + return 0; +} diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h index 6186585f2c37..140513d1d6bc 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.h +++ b/fs/xfs/libxfs/xfs_rtbitmap.h @@ -87,24 +87,6 @@ xfs_rtb_to_rtxoff( } /* - * Crack an rt block number into an rt extent number and an offset within that - * rt extent. Returns the rt extent number directly and the offset in @off. - */ -static inline xfs_rtxnum_t -xfs_rtb_to_rtxrem( - struct xfs_mount *mp, - xfs_rtblock_t rtbno, - xfs_extlen_t *off) -{ - if (likely(mp->m_rtxblklog >= 0)) { - *off = rtbno & mp->m_rtxblkmask; - return rtbno >> mp->m_rtxblklog; - } - - return div_u64_rem(rtbno, mp->m_sb.sb_rextsize, off); -} - -/* * Convert an rt block number into an rt extent number, rounding up to the next * rt extent if the rt block is not aligned to an rt extent boundary. */ @@ -293,30 +275,12 @@ typedef int (*xfs_rtalloc_query_range_fn)( #ifdef CONFIG_XFS_RT void xfs_rtbuf_cache_relse(struct xfs_rtalloc_args *args); - -int xfs_rtbuf_get(struct xfs_rtalloc_args *args, xfs_fileoff_t block, - int issum); - -static inline int -xfs_rtbitmap_read_buf( - struct xfs_rtalloc_args *args, - xfs_fileoff_t block) -{ - return xfs_rtbuf_get(args, block, 0); -} - -static inline int -xfs_rtsummary_read_buf( - struct xfs_rtalloc_args *args, - xfs_fileoff_t block) -{ - return xfs_rtbuf_get(args, block, 1); -} - +int xfs_rtbitmap_read_buf(struct xfs_rtalloc_args *args, xfs_fileoff_t block); +int xfs_rtsummary_read_buf(struct xfs_rtalloc_args *args, xfs_fileoff_t block); int xfs_rtcheck_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, xfs_rtxlen_t len, int val, xfs_rtxnum_t *new, int *stat); int xfs_rtfind_back(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, - xfs_rtxnum_t limit, xfs_rtxnum_t *rtblock); + xfs_rtxnum_t *rtblock); int xfs_rtfind_forw(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, xfs_rtxnum_t limit, xfs_rtxnum_t *rtblock); int xfs_rtmodify_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, @@ -328,8 +292,7 @@ int xfs_rtmodify_summary(struct xfs_rtalloc_args *args, int log, int xfs_rtfree_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, xfs_rtxlen_t len); int xfs_rtalloc_query_range(struct xfs_mount *mp, struct xfs_trans *tp, - const struct xfs_rtalloc_rec *low_rec, - const struct xfs_rtalloc_rec *high_rec, + xfs_rtxnum_t start, xfs_rtxnum_t end, xfs_rtalloc_query_range_fn fn, void *priv); int xfs_rtalloc_query_all(struct xfs_mount *mp, struct xfs_trans *tp, xfs_rtalloc_query_range_fn fn, @@ -353,16 +316,15 @@ int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno, xfs_filblks_t xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents); -unsigned long long xfs_rtbitmap_wordcount(struct xfs_mount *mp, - xfs_rtbxlen_t rtextents); - xfs_filblks_t xfs_rtsummary_blockcount(struct xfs_mount *mp, unsigned int rsumlevels, xfs_extlen_t rbmblocks); -unsigned long long xfs_rtsummary_wordcount(struct xfs_mount *mp, - unsigned int rsumlevels, xfs_extlen_t rbmblocks); -void xfs_rtbitmap_lock(struct xfs_trans *tp, struct xfs_mount *mp); +int xfs_rtfile_initialize_blocks(struct xfs_inode *ip, + xfs_fileoff_t offset_fsb, xfs_fileoff_t end_fsb, void *data); + +void xfs_rtbitmap_lock(struct xfs_mount *mp); void xfs_rtbitmap_unlock(struct xfs_mount *mp); +void xfs_rtbitmap_trans_join(struct xfs_trans *tp); /* Lock the rt bitmap inode in shared mode */ #define XFS_RBMLOCK_BITMAP (1U << 0) @@ -388,10 +350,9 @@ xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents) /* shut up gcc */ return 0; } -# define xfs_rtbitmap_wordcount(mp, r) (0) # define xfs_rtsummary_blockcount(mp, l, b) (0) -# define xfs_rtsummary_wordcount(mp, l, b) (0) -# define xfs_rtbitmap_lock(tp, mp) do { } while (0) +# define xfs_rtbitmap_lock(mp) do { } while (0) +# define xfs_rtbitmap_trans_join(tp) do { } while (0) # define xfs_rtbitmap_unlock(mp) do { } while (0) # define xfs_rtbitmap_lock_shared(mp, lf) do { } while (0) # define xfs_rtbitmap_unlock_shared(mp, lf) do { } while (0) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 6b56f0f6d4c1..d95409f3cba6 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -232,6 +232,38 @@ xfs_validate_sb_read( return 0; } +static uint64_t +xfs_sb_calc_rbmblocks( + struct xfs_sb *sbp) +{ + return howmany_64(sbp->sb_rextents, NBBY * sbp->sb_blocksize); +} + +/* Validate the realtime geometry */ +bool +xfs_validate_rt_geometry( + struct xfs_sb *sbp) +{ + if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE || + sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) + return false; + + if (sbp->sb_rblocks == 0) { + if (sbp->sb_rextents != 0 || sbp->sb_rbmblocks != 0 || + sbp->sb_rextslog != 0 || sbp->sb_frextents != 0) + return false; + return true; + } + + if (sbp->sb_rextents == 0 || + sbp->sb_rextents != div_u64(sbp->sb_rblocks, sbp->sb_rextsize) || + sbp->sb_rextslog != xfs_compute_rextslog(sbp->sb_rextents) || + sbp->sb_rbmblocks != xfs_sb_calc_rbmblocks(sbp)) + return false; + + return true; +} + /* Check all the superblock fields we care about when writing one out. */ STATIC int xfs_validate_sb_write( @@ -491,39 +523,13 @@ xfs_validate_sb_common( } } - /* Validate the realtime geometry; stolen from xfs_repair */ - if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE || - sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) { + if (!xfs_validate_rt_geometry(sbp)) { xfs_notice(mp, - "realtime extent sanity check failed"); + "realtime %sgeometry check failed", + sbp->sb_rblocks ? "" : "zeroed "); return -EFSCORRUPTED; } - if (sbp->sb_rblocks == 0) { - if (sbp->sb_rextents != 0 || sbp->sb_rbmblocks != 0 || - sbp->sb_rextslog != 0 || sbp->sb_frextents != 0) { - xfs_notice(mp, - "realtime zeroed geometry check failed"); - return -EFSCORRUPTED; - } - } else { - uint64_t rexts; - uint64_t rbmblocks; - - rexts = div_u64(sbp->sb_rblocks, sbp->sb_rextsize); - rbmblocks = howmany_64(sbp->sb_rextents, - NBBY * sbp->sb_blocksize); - - if (!xfs_validate_rtextents(rexts) || - sbp->sb_rextents != rexts || - sbp->sb_rextslog != xfs_compute_rextslog(rexts) || - sbp->sb_rbmblocks != rbmblocks) { - xfs_notice(mp, - "realtime geometry sanity check failed"); - return -EFSCORRUPTED; - } - } - /* * Either (sb_unit and !hasdalign) or (!sb_unit and hasdalign) * would imply the image is corrupted. @@ -959,6 +965,15 @@ const struct xfs_buf_ops xfs_sb_quiet_buf_ops = { .verify_write = xfs_sb_write_verify, }; +void +xfs_mount_sb_set_rextsize( + struct xfs_mount *mp, + struct xfs_sb *sbp) +{ + mp->m_rtxblklog = log2_if_power2(sbp->sb_rextsize); + mp->m_rtxblkmask = mask64_if_power2(sbp->sb_rextsize); +} + /* * xfs_mount_common * @@ -983,26 +998,25 @@ xfs_sb_mount_common( mp->m_blockmask = sbp->sb_blocksize - 1; mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; mp->m_blockwmask = mp->m_blockwsize - 1; - mp->m_rtxblklog = log2_if_power2(sbp->sb_rextsize); - mp->m_rtxblkmask = mask64_if_power2(sbp->sb_rextsize); + xfs_mount_sb_set_rextsize(mp, sbp); - mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1); - mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0); + mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, true); + mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, false); mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2; mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2; - mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1); - mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0); + mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, true); + mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, false); mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2; mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2; - mp->m_rmap_mxr[0] = xfs_rmapbt_maxrecs(sbp->sb_blocksize, 1); - mp->m_rmap_mxr[1] = xfs_rmapbt_maxrecs(sbp->sb_blocksize, 0); + mp->m_rmap_mxr[0] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, true); + mp->m_rmap_mxr[1] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, false); mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2; mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2; - mp->m_refc_mxr[0] = xfs_refcountbt_maxrecs(sbp->sb_blocksize, true); - mp->m_refc_mxr[1] = xfs_refcountbt_maxrecs(sbp->sb_blocksize, false); + mp->m_refc_mxr[0] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize, true); + mp->m_refc_mxr[1] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize, false); mp->m_refc_mnr[0] = mp->m_refc_mxr[0] / 2; mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2; diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 37b1ed1bc209..885c83755991 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -17,6 +17,8 @@ extern void xfs_log_sb(struct xfs_trans *tp); extern int xfs_sync_sb(struct xfs_mount *mp, bool wait); extern int xfs_sync_sb_buf(struct xfs_mount *mp); extern void xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp); +void xfs_mount_sb_set_rextsize(struct xfs_mount *mp, + struct xfs_sb *sbp); extern void xfs_sb_from_disk(struct xfs_sb *to, struct xfs_dsb *from); extern void xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from); extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp); @@ -38,6 +40,7 @@ extern int xfs_sb_get_secondary(struct xfs_mount *mp, bool xfs_validate_stripe_geometry(struct xfs_mount *mp, __s64 sunit, __s64 swidth, int sectorsize, bool may_repair, bool silent); +bool xfs_validate_rt_geometry(struct xfs_sb *sbp); uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 45aaf169806a..1a7f95bcf069 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -130,7 +130,7 @@ xfs_calc_inode_res( (4 * sizeof(struct xlog_op_header) + sizeof(struct xfs_inode_log_format) + mp->m_sb.sb_inodesize + - 2 * XFS_BMBT_BLOCK_LEN(mp)); + 2 * xfs_bmbt_block_len(mp)); } /* @@ -918,7 +918,7 @@ xfs_calc_growrtfree_reservation( return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + xfs_calc_inode_res(mp, 2) + xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) + - xfs_calc_buf_res(1, mp->m_rsumsize); + xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, mp->m_rsumblocks)); } /* diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 76eb9e328835..a8cd44d03ef6 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -235,16 +235,4 @@ bool xfs_verify_fileoff(struct xfs_mount *mp, xfs_fileoff_t off); bool xfs_verify_fileext(struct xfs_mount *mp, xfs_fileoff_t off, xfs_fileoff_t len); -/* Do we support an rt volume having this number of rtextents? */ -static inline bool -xfs_validate_rtextents( - xfs_rtbxlen_t rtextents) -{ - /* No runt rt volumes */ - if (rtextents == 0) - return false; - - return true; -} - #endif /* __XFS_TYPES_H__ */ diff --git a/fs/xfs/scrub/bmap_repair.c b/fs/xfs/scrub/bmap_repair.c index 1e656fab5e41..49dc38acc66b 100644 --- a/fs/xfs/scrub/bmap_repair.c +++ b/fs/xfs/scrub/bmap_repair.c @@ -480,7 +480,7 @@ xrep_bmap_iroot_size( { ASSERT(level > 0); - return XFS_BMAP_BROOT_SPACE_CALC(cur->bc_mp, nr_this_level); + return xfs_bmap_broot_space_calc(cur->bc_mp, nr_this_level); } /* Update the inode counters. */ diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index 3d5f1f6b4b7b..47148cc4a833 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -53,6 +53,11 @@ int xchk_checkpoint_log(struct xfs_mount *mp); bool xchk_should_check_xref(struct xfs_scrub *sc, int *error, struct xfs_btree_cur **curpp); +static inline int xchk_setup_nothing(struct xfs_scrub *sc) +{ + return -ENOENT; +} + /* Setup functions */ int xchk_setup_agheader(struct xfs_scrub *sc); int xchk_setup_fs(struct xfs_scrub *sc); @@ -72,16 +77,8 @@ int xchk_setup_dirtree(struct xfs_scrub *sc); int xchk_setup_rtbitmap(struct xfs_scrub *sc); int xchk_setup_rtsummary(struct xfs_scrub *sc); #else -static inline int -xchk_setup_rtbitmap(struct xfs_scrub *sc) -{ - return -ENOENT; -} -static inline int -xchk_setup_rtsummary(struct xfs_scrub *sc) -{ - return -ENOENT; -} +# define xchk_setup_rtbitmap xchk_setup_nothing +# define xchk_setup_rtsummary xchk_setup_nothing #endif #ifdef CONFIG_XFS_QUOTA int xchk_ino_dqattach(struct xfs_scrub *sc); @@ -93,16 +90,8 @@ xchk_ino_dqattach(struct xfs_scrub *sc) { return 0; } -static inline int -xchk_setup_quota(struct xfs_scrub *sc) -{ - return -ENOENT; -} -static inline int -xchk_setup_quotacheck(struct xfs_scrub *sc) -{ - return -ENOENT; -} +# define xchk_setup_quota xchk_setup_nothing +# define xchk_setup_quotacheck xchk_setup_nothing #endif int xchk_setup_fscounters(struct xfs_scrub *sc); int xchk_setup_nlinks(struct xfs_scrub *sc); diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c index daf9f1ee7c2c..3e45b9b72312 100644 --- a/fs/xfs/scrub/inode_repair.c +++ b/fs/xfs/scrub/inode_repair.c @@ -846,7 +846,7 @@ xrep_dinode_bad_bmbt_fork( nrecs = be16_to_cpu(dfp->bb_numrecs); level = be16_to_cpu(dfp->bb_level); - if (nrecs == 0 || XFS_BMDR_SPACE_CALC(nrecs) > dfork_size) + if (nrecs == 0 || xfs_bmdr_space_calc(nrecs) > dfork_size) return true; if (level == 0 || level >= XFS_BM_MAXLEVELS(sc->mp, whichfork)) return true; @@ -858,12 +858,12 @@ xrep_dinode_bad_bmbt_fork( xfs_fileoff_t fileoff; xfs_fsblock_t fsbno; - fkp = XFS_BMDR_KEY_ADDR(dfp, i); + fkp = xfs_bmdr_key_addr(dfp, i); fileoff = be64_to_cpu(fkp->br_startoff); if (!xfs_verify_fileoff(sc->mp, fileoff)) return true; - fpp = XFS_BMDR_PTR_ADDR(dfp, i, dmxr); + fpp = xfs_bmdr_ptr_addr(dfp, i, dmxr); fsbno = be64_to_cpu(*fpp); if (!xfs_verify_fsbno(sc->mp, fsbno)) return true; @@ -1121,7 +1121,7 @@ xrep_dinode_ensure_forkoff( struct xfs_bmdr_block *bmdr; struct xfs_scrub *sc = ri->sc; xfs_extnum_t attr_extents, data_extents; - size_t bmdr_minsz = XFS_BMDR_SPACE_CALC(1); + size_t bmdr_minsz = xfs_bmdr_space_calc(1); unsigned int lit_sz = XFS_LITINO(sc->mp); unsigned int afork_min, dfork_min; @@ -1173,7 +1173,7 @@ xrep_dinode_ensure_forkoff( case XFS_DINODE_FMT_BTREE: /* Must have space for btree header and key/pointers. */ bmdr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK); - afork_min = XFS_BMAP_BROOT_SPACE(sc->mp, bmdr); + afork_min = xfs_bmap_broot_space(sc->mp, bmdr); break; default: /* We should never see any other formats. */ @@ -1223,7 +1223,7 @@ xrep_dinode_ensure_forkoff( case XFS_DINODE_FMT_BTREE: /* Must have space for btree header and key/pointers. */ bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK); - dfork_min = XFS_BMAP_BROOT_SPACE(sc->mp, bmdr); + dfork_min = xfs_bmap_broot_space(sc->mp, bmdr); break; default: dfork_min = 0; diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c index 3fee603f5244..7c7366c98338 100644 --- a/fs/xfs/scrub/rtsummary.c +++ b/fs/xfs/scrub/rtsummary.c @@ -63,7 +63,8 @@ xchk_setup_rtsummary( * us to avoid pinning kernel memory for this purpose. */ descr = xchk_xfile_descr(sc, "realtime summary file"); - error = xfile_create(descr, mp->m_rsumsize, &sc->xfile); + error = xfile_create(descr, XFS_FSB_TO_B(mp, mp->m_rsumblocks), + &sc->xfile); kfree(descr); if (error) return error; @@ -95,16 +96,14 @@ xchk_setup_rtsummary( * volume. Hence it is safe to compute and check the geometry values. */ if (mp->m_sb.sb_rblocks) { - xfs_filblks_t rsumblocks; int rextslog; rts->rextents = xfs_rtb_to_rtx(mp, mp->m_sb.sb_rblocks); rextslog = xfs_compute_rextslog(rts->rextents); rts->rsumlevels = rextslog + 1; rts->rbmblocks = xfs_rtbitmap_blockcount(mp, rts->rextents); - rsumblocks = xfs_rtsummary_blockcount(mp, rts->rsumlevels, + rts->rsumblocks = xfs_rtsummary_blockcount(mp, rts->rsumlevels, rts->rbmblocks); - rts->rsumsize = XFS_FSB_TO_B(mp, rsumblocks); } return 0; } @@ -316,7 +315,7 @@ xchk_rtsummary( } /* Is m_rsumsize correct? */ - if (mp->m_rsumsize != rts->rsumsize) { + if (mp->m_rsumblocks != rts->rsumblocks) { xchk_ino_set_corrupt(sc, mp->m_rsumip->i_ino); goto out_rbm; } @@ -332,7 +331,7 @@ xchk_rtsummary( * growfsrt expands the summary file before updating sb_rextents, so * the file can be larger than rsumsize. */ - if (mp->m_rsumip->i_disk_size < rts->rsumsize) { + if (mp->m_rsumip->i_disk_size < XFS_FSB_TO_B(mp, rts->rsumblocks)) { xchk_ino_set_corrupt(sc, mp->m_rsumip->i_ino); goto out_rbm; } diff --git a/fs/xfs/scrub/rtsummary.h b/fs/xfs/scrub/rtsummary.h index e1d50304d8d4..e44b04cb6e2d 100644 --- a/fs/xfs/scrub/rtsummary.h +++ b/fs/xfs/scrub/rtsummary.h @@ -14,7 +14,7 @@ struct xchk_rtsummary { uint64_t rextents; uint64_t rbmblocks; - uint64_t rsumsize; + xfs_filblks_t rsumblocks; unsigned int rsumlevels; unsigned int resblks; diff --git a/fs/xfs/scrub/rtsummary_repair.c b/fs/xfs/scrub/rtsummary_repair.c index d9e971c4c79f..7deeb948cb70 100644 --- a/fs/xfs/scrub/rtsummary_repair.c +++ b/fs/xfs/scrub/rtsummary_repair.c @@ -56,7 +56,7 @@ xrep_setup_rtsummary( * transaction (which we cannot drop because we cannot drop the * rtsummary ILOCK) and cannot ask for more reservation. */ - blocks = XFS_B_TO_FSB(mp, mp->m_rsumsize); + blocks = mp->m_rsumblocks; blocks += xfs_bmbt_calc_size(mp, blocks) * 2; if (blocks > UINT_MAX) return -EOPNOTSUPP; @@ -100,7 +100,6 @@ xrep_rtsummary( { struct xchk_rtsummary *rts = sc->buf; struct xfs_mount *mp = sc->mp; - xfs_filblks_t rsumblocks; int error; /* We require the rmapbt to rebuild anything. */ @@ -131,10 +130,9 @@ xrep_rtsummary( } /* Make sure we have space allocated for the entire summary file. */ - rsumblocks = XFS_B_TO_FSB(mp, rts->rsumsize); xfs_trans_ijoin(sc->tp, sc->ip, 0); xfs_trans_ijoin(sc->tp, sc->tempip, 0); - error = xrep_tempfile_prealloc(sc, 0, rsumblocks); + error = xrep_tempfile_prealloc(sc, 0, rts->rsumblocks); if (error) return error; @@ -143,11 +141,11 @@ xrep_rtsummary( return error; /* Copy the rtsummary file that we generated. */ - error = xrep_tempfile_copyin(sc, 0, rsumblocks, + error = xrep_tempfile_copyin(sc, 0, rts->rsumblocks, xrep_rtsummary_prep_buf, rts); if (error) return error; - error = xrep_tempfile_set_isize(sc, rts->rsumsize); + error = xrep_tempfile_set_isize(sc, XFS_FSB_TO_B(mp, rts->rsumblocks)); if (error) return error; @@ -168,7 +166,7 @@ xrep_rtsummary( memset(mp->m_rsum_cache, 0xFF, mp->m_sb.sb_rbmblocks); mp->m_rsumlevels = rts->rsumlevels; - mp->m_rsumsize = rts->rsumsize; + mp->m_rsumblocks = rts->rsumblocks; /* Free the old rtsummary blocks if they're not in use. */ return xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK); diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index 1bc33f010d0e..5993fcaffb2c 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -231,6 +231,11 @@ xchk_should_terminate( return false; } +static inline int xchk_nothing(struct xfs_scrub *sc) +{ + return -ENOENT; +} + /* Metadata scrubbers */ int xchk_tester(struct xfs_scrub *sc); int xchk_superblock(struct xfs_scrub *sc); @@ -254,31 +259,15 @@ int xchk_dirtree(struct xfs_scrub *sc); int xchk_rtbitmap(struct xfs_scrub *sc); int xchk_rtsummary(struct xfs_scrub *sc); #else -static inline int -xchk_rtbitmap(struct xfs_scrub *sc) -{ - return -ENOENT; -} -static inline int -xchk_rtsummary(struct xfs_scrub *sc) -{ - return -ENOENT; -} +# define xchk_rtbitmap xchk_nothing +# define xchk_rtsummary xchk_nothing #endif #ifdef CONFIG_XFS_QUOTA int xchk_quota(struct xfs_scrub *sc); int xchk_quotacheck(struct xfs_scrub *sc); #else -static inline int -xchk_quota(struct xfs_scrub *sc) -{ - return -ENOENT; -} -static inline int -xchk_quotacheck(struct xfs_scrub *sc) -{ - return -ENOENT; -} +# define xchk_quota xchk_nothing +# define xchk_quotacheck xchk_nothing #endif int xchk_fscounters(struct xfs_scrub *sc); int xchk_nlinks(struct xfs_scrub *sc); diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c index d390d56cd875..177f922acfaf 100644 --- a/fs/xfs/scrub/tempfile.c +++ b/fs/xfs/scrub/tempfile.c @@ -88,7 +88,7 @@ xrep_tempfile_create( goto out_release_dquots; /* Allocate inode, set up directory. */ - error = xfs_dialloc(&tp, dp->i_ino, mode, &ino); + error = xfs_dialloc(&tp, &args, &ino); if (error) goto out_trans_cancel; error = xfs_icreate(tp, ino, &args, &sc->tempip); diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index e224b49b7cff..35a8c1b8b3cb 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -346,6 +346,17 @@ xfs_bmap_defer_add( trace_xfs_bmap_defer(bi); xfs_bmap_update_get_group(tp->t_mountp, bi); + + /* + * Ensure the deferred mapping is pre-recorded in i_delayed_blks. + * + * Otherwise stat can report zero blocks for an inode that actually has + * data when the entire mapping is in the process of being overwritten + * using the out of place write path. This is undone in xfs_bmapi_remap + * after it has incremented di_nblocks for a successful operation. + */ + if (bi->bi_type == XFS_BMAP_MAP) + bi->bi_owner->i_delayed_blks += bi->bi_bmap.br_blockcount; xfs_defer_add(tp, &bi->bi_list, &xfs_bmap_update_defer_type); } @@ -367,6 +378,9 @@ xfs_bmap_update_cancel_item( { struct xfs_bmap_intent *bi = bi_entry(item); + if (bi->bi_type == XFS_BMAP_MAP) + bi->bi_owner->i_delayed_blks -= bi->bi_bmap.br_blockcount; + xfs_bmap_update_put_group(bi); kmem_cache_free(xfs_bmap_intent_cache, bi); } @@ -464,6 +478,9 @@ xfs_bui_recover_work( bi->bi_owner = *ipp; xfs_bmap_update_get_group(mp, bi); + /* see xfs_bmap_defer_add for details */ + if (bi->bi_type == XFS_BMAP_MAP) + bi->bi_owner->i_delayed_blks += bi->bi_bmap.br_blockcount; xfs_defer_add_item(dfp, &bi->bi_list); return bi; } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index e9fdebaa40ea..053d567c9108 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -331,8 +331,7 @@ xfs_getbmap( } if (xfs_get_extsz_hint(ip) || - (ip->i_diflags & - (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))) + (ip->i_diflags & XFS_DIFLAG_PREALLOC)) max_len = mp->m_super->s_maxbytes; else max_len = XFS_ISIZE(ip); @@ -492,12 +491,12 @@ bool xfs_can_free_eofblocks( struct xfs_inode *ip) { - struct xfs_bmbt_irec imap; struct xfs_mount *mp = ip->i_mount; + bool found_blocks = false; xfs_fileoff_t end_fsb; xfs_fileoff_t last_fsb; - int nimaps = 1; - int error; + struct xfs_bmbt_irec imap; + struct xfs_iext_cursor icur; /* * Caller must either hold the exclusive io lock; or be inactivating @@ -524,12 +523,11 @@ xfs_can_free_eofblocks( return false; /* - * Only free real extents for inodes with persistent preallocations or - * the append-only flag. + * Do not free real extents in preallocated files unless the file has + * delalloc blocks and we are forced to remove them. */ - if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) - if (ip->i_delayed_blks == 0) - return false; + if ((ip->i_diflags & XFS_DIFLAG_PREALLOC) && !ip->i_delayed_blks) + return false; /* * Do not try to free post-EOF blocks if EOF is beyond the end of the @@ -544,21 +542,13 @@ xfs_can_free_eofblocks( return false; /* - * Look up the mapping for the first block past EOF. If we can't find - * it, there's nothing to free. + * Check if there is an post-EOF extent to free. */ xfs_ilock(ip, XFS_ILOCK_SHARED); - error = xfs_bmapi_read(ip, end_fsb, last_fsb - end_fsb, &imap, &nimaps, - 0); + if (xfs_iext_lookup_extent(ip, &ip->i_df, end_fsb, &icur, &imap)) + found_blocks = true; xfs_iunlock(ip, XFS_ILOCK_SHARED); - if (error || nimaps == 0) - return false; - - /* - * If there's a real mapping there or there are delayed allocation - * reservations, then we have post-EOF blocks to try to free. - */ - return imap.br_startblock != HOLESTARTBLOCK || ip->i_delayed_blks; + return found_blocks; } /* @@ -1195,7 +1185,7 @@ xfs_swap_extents_check_format( */ if (tifp->if_format == XFS_DINODE_FMT_BTREE) { if (xfs_inode_has_attr_fork(ip) && - XFS_BMAP_BMDR_SPACE(tifp->if_broot) > xfs_inode_fork_boff(ip)) + xfs_bmap_bmdr_space(tifp->if_broot) > xfs_inode_fork_boff(ip)) return -EINVAL; if (tifp->if_nextents <= XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) return -EINVAL; @@ -1204,7 +1194,7 @@ xfs_swap_extents_check_format( /* Reciprocal target->temp btree format checks */ if (ifp->if_format == XFS_DINODE_FMT_BTREE) { if (xfs_inode_has_attr_fork(tip) && - XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > xfs_inode_fork_boff(tip)) + xfs_bmap_bmdr_space(ip->i_df.if_broot) > xfs_inode_fork_boff(tip)) return -EINVAL; if (ifp->if_nextents <= XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) return -EINVAL; diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 25f5dffeab2a..d8c4a5dcca7a 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -554,11 +554,10 @@ xfs_trim_rtdev_extents( xfs_daddr_t end, xfs_daddr_t minlen) { - struct xfs_rtalloc_rec low = { }; - struct xfs_rtalloc_rec high = { }; struct xfs_trim_rtdev tr = { .minlen_fsb = XFS_BB_TO_FSB(mp, minlen), }; + xfs_rtxnum_t low, high; struct xfs_trans *tp; xfs_daddr_t rtdev_daddr; int error; @@ -584,17 +583,17 @@ xfs_trim_rtdev_extents( XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks) - 1); /* Convert the rt blocks to rt extents */ - low.ar_startext = xfs_rtb_to_rtxup(mp, XFS_BB_TO_FSB(mp, start)); - high.ar_startext = xfs_rtb_to_rtx(mp, XFS_BB_TO_FSBT(mp, end)); + low = xfs_rtb_to_rtxup(mp, XFS_BB_TO_FSB(mp, start)); + high = xfs_rtb_to_rtx(mp, XFS_BB_TO_FSBT(mp, end)); /* * Walk the free ranges between low and high. The query_range function * trims the extents returned. */ do { - tr.stop_rtx = low.ar_startext + (mp->m_sb.sb_blocksize * NBBY); + tr.stop_rtx = low + (mp->m_sb.sb_blocksize * NBBY); xfs_rtbitmap_lock_shared(mp, XFS_RBMLOCK_BITMAP); - error = xfs_rtalloc_query_range(mp, tp, &low, &high, + error = xfs_rtalloc_query_range(mp, tp, low, high, xfs_trim_gather_rtextent, &tr); if (error == -ECANCELED) @@ -615,8 +614,8 @@ xfs_trim_rtdev_extents( if (error) break; - low.ar_startext = tr.restart_rtx; - } while (!xfs_trim_should_stop() && low.ar_startext <= high.ar_startext); + low = tr.restart_rtx; + } while (!xfs_trim_should_stop() && low <= high); xfs_trans_cancel(tp); return error; @@ -708,7 +707,7 @@ xfs_ioc_trim( return last_error; range.len = min_t(unsigned long long, range.len, - XFS_FSB_TO_B(mp, max_blocks)); + XFS_FSB_TO_B(mp, max_blocks) - range.start); if (copy_to_user(urange, &range, sizeof(range))) return -EFAULT; return 0; diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c index c8a655c92c92..d0889190ab7f 100644 --- a/fs/xfs/xfs_exchrange.c +++ b/fs/xfs/xfs_exchrange.c @@ -72,6 +72,34 @@ xfs_exchrange_estimate( return error; } +/* + * Check that file2's metadata agree with the snapshot that we took for the + * range commit request. + * + * This should be called after the filesystem has locked /all/ inode metadata + * against modification. + */ +STATIC int +xfs_exchrange_check_freshness( + const struct xfs_exchrange *fxr, + struct xfs_inode *ip2) +{ + struct inode *inode2 = VFS_I(ip2); + struct timespec64 ctime = inode_get_ctime(inode2); + struct timespec64 mtime = inode_get_mtime(inode2); + + trace_xfs_exchrange_freshness(fxr, ip2); + + /* Check that file2 hasn't otherwise been modified. */ + if (fxr->file2_ino != ip2->i_ino || + fxr->file2_gen != inode2->i_generation || + !timespec64_equal(&fxr->file2_ctime, &ctime) || + !timespec64_equal(&fxr->file2_mtime, &mtime)) + return -EBUSY; + + return 0; +} + #define QRETRY_IP1 (0x1) #define QRETRY_IP2 (0x2) @@ -607,6 +635,12 @@ xfs_exchrange_prep( if (error || fxr->length == 0) return error; + if (fxr->flags & __XFS_EXCHANGE_RANGE_CHECK_FRESH2) { + error = xfs_exchrange_check_freshness(fxr, ip2); + if (error) + return error; + } + /* Attach dquots to both inodes before changing block maps. */ error = xfs_qm_dqattach(ip2); if (error) @@ -719,7 +753,8 @@ xfs_exchange_range( if (fxr->file1->f_path.mnt != fxr->file2->f_path.mnt) return -EXDEV; - if (fxr->flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS) + if (fxr->flags & ~(XFS_EXCHANGE_RANGE_ALL_FLAGS | + __XFS_EXCHANGE_RANGE_CHECK_FRESH2)) return -EINVAL; /* Userspace requests only honored for regular files. */ @@ -802,3 +837,109 @@ xfs_ioc_exchange_range( fdput(file1); return error; } + +/* Opaque freshness blob for XFS_IOC_COMMIT_RANGE */ +struct xfs_commit_range_fresh { + xfs_fsid_t fsid; /* m_fixedfsid */ + __u64 file2_ino; /* inode number */ + __s64 file2_mtime; /* modification time */ + __s64 file2_ctime; /* change time */ + __s32 file2_mtime_nsec; /* mod time, nsec */ + __s32 file2_ctime_nsec; /* change time, nsec */ + __u32 file2_gen; /* inode generation */ + __u32 magic; /* zero */ +}; +#define XCR_FRESH_MAGIC 0x444F524B /* DORK */ + +/* Set up a commitrange operation by sampling file2's write-related attrs */ +long +xfs_ioc_start_commit( + struct file *file, + struct xfs_commit_range __user *argp) +{ + struct xfs_commit_range args = { }; + struct timespec64 ts; + struct xfs_commit_range_fresh *kern_f; + struct xfs_commit_range_fresh __user *user_f; + struct inode *inode2 = file_inode(file); + struct xfs_inode *ip2 = XFS_I(inode2); + const unsigned int lockflags = XFS_IOLOCK_SHARED | + XFS_MMAPLOCK_SHARED | + XFS_ILOCK_SHARED; + + BUILD_BUG_ON(sizeof(struct xfs_commit_range_fresh) != + sizeof(args.file2_freshness)); + + kern_f = (struct xfs_commit_range_fresh *)&args.file2_freshness; + + memcpy(&kern_f->fsid, ip2->i_mount->m_fixedfsid, sizeof(xfs_fsid_t)); + + xfs_ilock(ip2, lockflags); + ts = inode_get_ctime(inode2); + kern_f->file2_ctime = ts.tv_sec; + kern_f->file2_ctime_nsec = ts.tv_nsec; + ts = inode_get_mtime(inode2); + kern_f->file2_mtime = ts.tv_sec; + kern_f->file2_mtime_nsec = ts.tv_nsec; + kern_f->file2_ino = ip2->i_ino; + kern_f->file2_gen = inode2->i_generation; + kern_f->magic = XCR_FRESH_MAGIC; + xfs_iunlock(ip2, lockflags); + + user_f = (struct xfs_commit_range_fresh __user *)&argp->file2_freshness; + if (copy_to_user(user_f, kern_f, sizeof(*kern_f))) + return -EFAULT; + + return 0; +} + +/* + * Exchange file1 and file2 contents if file2 has not been written since the + * start commit operation. + */ +long +xfs_ioc_commit_range( + struct file *file, + struct xfs_commit_range __user *argp) +{ + struct xfs_exchrange fxr = { + .file2 = file, + }; + struct xfs_commit_range args; + struct xfs_commit_range_fresh *kern_f; + struct xfs_inode *ip2 = XFS_I(file_inode(file)); + struct xfs_mount *mp = ip2->i_mount; + struct fd file1; + int error; + + kern_f = (struct xfs_commit_range_fresh *)&args.file2_freshness; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + if (args.flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS) + return -EINVAL; + if (kern_f->magic != XCR_FRESH_MAGIC) + return -EBUSY; + if (memcmp(&kern_f->fsid, mp->m_fixedfsid, sizeof(xfs_fsid_t))) + return -EBUSY; + + fxr.file1_offset = args.file1_offset; + fxr.file2_offset = args.file2_offset; + fxr.length = args.length; + fxr.flags = args.flags | __XFS_EXCHANGE_RANGE_CHECK_FRESH2; + fxr.file2_ino = kern_f->file2_ino; + fxr.file2_gen = kern_f->file2_gen; + fxr.file2_mtime.tv_sec = kern_f->file2_mtime; + fxr.file2_mtime.tv_nsec = kern_f->file2_mtime_nsec; + fxr.file2_ctime.tv_sec = kern_f->file2_ctime; + fxr.file2_ctime.tv_nsec = kern_f->file2_ctime_nsec; + + file1 = fdget(args.file1_fd); + if (!file1.file) + return -EBADF; + fxr.file1 = file1.file; + + error = xfs_exchange_range(&fxr); + fdput(file1); + return error; +} diff --git a/fs/xfs/xfs_exchrange.h b/fs/xfs/xfs_exchrange.h index 039abcca546e..bc1298aba806 100644 --- a/fs/xfs/xfs_exchrange.h +++ b/fs/xfs/xfs_exchrange.h @@ -10,8 +10,12 @@ #define __XFS_EXCHANGE_RANGE_UPD_CMTIME1 (1ULL << 63) #define __XFS_EXCHANGE_RANGE_UPD_CMTIME2 (1ULL << 62) +/* Freshness check required */ +#define __XFS_EXCHANGE_RANGE_CHECK_FRESH2 (1ULL << 61) + #define XFS_EXCHANGE_RANGE_PRIV_FLAGS (__XFS_EXCHANGE_RANGE_UPD_CMTIME1 | \ - __XFS_EXCHANGE_RANGE_UPD_CMTIME2) + __XFS_EXCHANGE_RANGE_UPD_CMTIME2 | \ + __XFS_EXCHANGE_RANGE_CHECK_FRESH2) struct xfs_exchrange { struct file *file1; @@ -22,10 +26,20 @@ struct xfs_exchrange { u64 length; u64 flags; /* XFS_EXCHANGE_RANGE flags */ + + /* file2 metadata for freshness checks */ + u64 file2_ino; + struct timespec64 file2_mtime; + struct timespec64 file2_ctime; + u32 file2_gen; }; long xfs_ioc_exchange_range(struct file *file, struct xfs_exchange_range __user *argp); +long xfs_ioc_start_commit(struct file *file, + struct xfs_commit_range __user *argp); +long xfs_ioc_commit_range(struct file *file, + struct xfs_commit_range __user *argp); struct xfs_exchmaps_req; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index f6e4912769a0..e97d789495a5 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1238,12 +1238,78 @@ xfs_dir_open( return error; } +/* + * Don't bother propagating errors. We're just doing cleanup, and the caller + * ignores the return value anyway. + */ STATIC int xfs_file_release( - struct inode *inode, - struct file *filp) + struct inode *inode, + struct file *file) { - return xfs_release(XFS_I(inode)); + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + + /* + * If this is a read-only mount or the file system has been shut down, + * don't generate I/O. + */ + if (xfs_is_readonly(mp) || xfs_is_shutdown(mp)) + return 0; + + /* + * If we previously truncated this file and removed old data in the + * process, we want to initiate "early" writeout on the last close. + * This is an attempt to combat the notorious NULL files problem which + * is particularly noticeable from a truncate down, buffered (re-)write + * (delalloc), followed by a crash. What we are effectively doing here + * is significantly reducing the time window where we'd otherwise be + * exposed to that problem. + */ + if (xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED)) { + xfs_iflags_clear(ip, XFS_EOFBLOCKS_RELEASED); + if (ip->i_delayed_blks > 0) + filemap_flush(inode->i_mapping); + } + + /* + * XFS aggressively preallocates post-EOF space to generate contiguous + * allocations for writers that append to the end of the file. + * + * To support workloads that close and reopen the file frequently, these + * preallocations usually persist after a close unless it is the first + * close for the inode. This is a tradeoff to generate tightly packed + * data layouts for unpacking tarballs or similar archives that write + * one file after another without going back to it while keeping the + * preallocation for files that have recurring open/write/close cycles. + * + * This heuristic is skipped for inodes with the append-only flag as + * that flag is rather pointless for inodes written only once. + * + * There is no point in freeing blocks here for open but unlinked files + * as they will be taken care of by the inactivation path soon. + * + * When releasing a read-only context, don't flush data or trim post-EOF + * blocks. This avoids open/read/close workloads from removing EOF + * blocks that other writers depend upon to reduce fragmentation. + * + * If we can't get the iolock just skip truncating the blocks past EOF + * because we could deadlock with the mmap_lock otherwise. We'll get + * another chance to drop them once the last reference to the inode is + * dropped, so we'll never leak blocks permanently. + */ + if (inode->i_nlink && + (file->f_mode & FMODE_WRITE) && + !(ip->i_diflags & XFS_DIFLAG_APPEND) && + !xfs_iflags_test(ip, XFS_EOFBLOCKS_RELEASED) && + xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { + if (xfs_can_free_eofblocks(ip) && + !xfs_iflags_test_and_set(ip, XFS_EOFBLOCKS_RELEASED)) + xfs_free_eofblocks(ip); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + } + + return 0; } STATIC int diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 71f32354944e..ae18ab86e608 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -44,7 +44,7 @@ xfs_fsmap_from_internal( } /* Convert an fsmap to an xfs_fsmap. */ -void +static void xfs_fsmap_to_internal( struct xfs_fsmap *dest, struct fsmap *src) @@ -441,141 +441,6 @@ xfs_getfsmap_set_irec_flags( irec->rm_flags |= XFS_RMAP_UNWRITTEN; } -/* Execute a getfsmap query against the log device. */ -STATIC int -xfs_getfsmap_logdev( - struct xfs_trans *tp, - const struct xfs_fsmap *keys, - struct xfs_getfsmap_info *info) -{ - struct xfs_mount *mp = tp->t_mountp; - struct xfs_rmap_irec rmap; - xfs_daddr_t rec_daddr, len_daddr; - xfs_fsblock_t start_fsb, end_fsb; - uint64_t eofs; - - eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); - if (keys[0].fmr_physical >= eofs) - return 0; - start_fsb = XFS_BB_TO_FSBT(mp, - keys[0].fmr_physical + keys[0].fmr_length); - end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); - - /* Adjust the low key if we are continuing from where we left off. */ - if (keys[0].fmr_length > 0) - info->low_daddr = XFS_FSB_TO_BB(mp, start_fsb); - - trace_xfs_fsmap_low_key_linear(mp, info->dev, start_fsb); - trace_xfs_fsmap_high_key_linear(mp, info->dev, end_fsb); - - if (start_fsb > 0) - return 0; - - /* Fabricate an rmap entry for the external log device. */ - rmap.rm_startblock = 0; - rmap.rm_blockcount = mp->m_sb.sb_logblocks; - rmap.rm_owner = XFS_RMAP_OWN_LOG; - rmap.rm_offset = 0; - rmap.rm_flags = 0; - - rec_daddr = XFS_FSB_TO_BB(mp, rmap.rm_startblock); - len_daddr = XFS_FSB_TO_BB(mp, rmap.rm_blockcount); - return xfs_getfsmap_helper(tp, info, &rmap, rec_daddr, len_daddr); -} - -#ifdef CONFIG_XFS_RT -/* Transform a rtbitmap "record" into a fsmap */ -STATIC int -xfs_getfsmap_rtdev_rtbitmap_helper( - struct xfs_mount *mp, - struct xfs_trans *tp, - const struct xfs_rtalloc_rec *rec, - void *priv) -{ - struct xfs_getfsmap_info *info = priv; - struct xfs_rmap_irec irec; - xfs_rtblock_t rtbno; - xfs_daddr_t rec_daddr, len_daddr; - - rtbno = xfs_rtx_to_rtb(mp, rec->ar_startext); - rec_daddr = XFS_FSB_TO_BB(mp, rtbno); - irec.rm_startblock = rtbno; - - rtbno = xfs_rtx_to_rtb(mp, rec->ar_extcount); - len_daddr = XFS_FSB_TO_BB(mp, rtbno); - irec.rm_blockcount = rtbno; - - irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */ - irec.rm_offset = 0; - irec.rm_flags = 0; - - return xfs_getfsmap_helper(tp, info, &irec, rec_daddr, len_daddr); -} - -/* Execute a getfsmap query against the realtime device rtbitmap. */ -STATIC int -xfs_getfsmap_rtdev_rtbitmap( - struct xfs_trans *tp, - const struct xfs_fsmap *keys, - struct xfs_getfsmap_info *info) -{ - - struct xfs_rtalloc_rec alow = { 0 }; - struct xfs_rtalloc_rec ahigh = { 0 }; - struct xfs_mount *mp = tp->t_mountp; - xfs_rtblock_t start_rtb; - xfs_rtblock_t end_rtb; - uint64_t eofs; - int error; - - eofs = XFS_FSB_TO_BB(mp, xfs_rtx_to_rtb(mp, mp->m_sb.sb_rextents)); - if (keys[0].fmr_physical >= eofs) - return 0; - start_rtb = XFS_BB_TO_FSBT(mp, - keys[0].fmr_physical + keys[0].fmr_length); - end_rtb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); - - info->missing_owner = XFS_FMR_OWN_UNKNOWN; - - /* Adjust the low key if we are continuing from where we left off. */ - if (keys[0].fmr_length > 0) { - info->low_daddr = XFS_FSB_TO_BB(mp, start_rtb); - if (info->low_daddr >= eofs) - return 0; - } - - trace_xfs_fsmap_low_key_linear(mp, info->dev, start_rtb); - trace_xfs_fsmap_high_key_linear(mp, info->dev, end_rtb); - - xfs_rtbitmap_lock_shared(mp, XFS_RBMLOCK_BITMAP); - - /* - * Set up query parameters to return free rtextents covering the range - * we want. - */ - alow.ar_startext = xfs_rtb_to_rtx(mp, start_rtb); - ahigh.ar_startext = xfs_rtb_to_rtxup(mp, end_rtb); - error = xfs_rtalloc_query_range(mp, tp, &alow, &ahigh, - xfs_getfsmap_rtdev_rtbitmap_helper, info); - if (error) - goto err; - - /* - * Report any gaps at the end of the rtbitmap by simulating a null - * rmap starting at the block after the end of the query range. - */ - info->last = true; - ahigh.ar_startext = min(mp->m_sb.sb_rextents, ahigh.ar_startext); - - error = xfs_getfsmap_rtdev_rtbitmap_helper(mp, tp, &ahigh, info); - if (error) - goto err; -err: - xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP); - return error; -} -#endif /* CONFIG_XFS_RT */ - static inline bool rmap_not_shareable(struct xfs_mount *mp, const struct xfs_rmap_irec *r) { @@ -800,6 +665,140 @@ xfs_getfsmap_datadev_bnobt( xfs_getfsmap_datadev_bnobt_query, &akeys[0]); } +/* Execute a getfsmap query against the log device. */ +STATIC int +xfs_getfsmap_logdev( + struct xfs_trans *tp, + const struct xfs_fsmap *keys, + struct xfs_getfsmap_info *info) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_rmap_irec rmap; + xfs_daddr_t rec_daddr, len_daddr; + xfs_fsblock_t start_fsb, end_fsb; + uint64_t eofs; + + eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); + if (keys[0].fmr_physical >= eofs) + return 0; + start_fsb = XFS_BB_TO_FSBT(mp, + keys[0].fmr_physical + keys[0].fmr_length); + end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); + + /* Adjust the low key if we are continuing from where we left off. */ + if (keys[0].fmr_length > 0) + info->low_daddr = XFS_FSB_TO_BB(mp, start_fsb); + + trace_xfs_fsmap_low_key_linear(mp, info->dev, start_fsb); + trace_xfs_fsmap_high_key_linear(mp, info->dev, end_fsb); + + if (start_fsb > 0) + return 0; + + /* Fabricate an rmap entry for the external log device. */ + rmap.rm_startblock = 0; + rmap.rm_blockcount = mp->m_sb.sb_logblocks; + rmap.rm_owner = XFS_RMAP_OWN_LOG; + rmap.rm_offset = 0; + rmap.rm_flags = 0; + + rec_daddr = XFS_FSB_TO_BB(mp, rmap.rm_startblock); + len_daddr = XFS_FSB_TO_BB(mp, rmap.rm_blockcount); + return xfs_getfsmap_helper(tp, info, &rmap, rec_daddr, len_daddr); +} + +#ifdef CONFIG_XFS_RT +/* Transform a rtbitmap "record" into a fsmap */ +STATIC int +xfs_getfsmap_rtdev_rtbitmap_helper( + struct xfs_mount *mp, + struct xfs_trans *tp, + const struct xfs_rtalloc_rec *rec, + void *priv) +{ + struct xfs_getfsmap_info *info = priv; + struct xfs_rmap_irec irec; + xfs_rtblock_t rtbno; + xfs_daddr_t rec_daddr, len_daddr; + + rtbno = xfs_rtx_to_rtb(mp, rec->ar_startext); + rec_daddr = XFS_FSB_TO_BB(mp, rtbno); + irec.rm_startblock = rtbno; + + rtbno = xfs_rtx_to_rtb(mp, rec->ar_extcount); + len_daddr = XFS_FSB_TO_BB(mp, rtbno); + irec.rm_blockcount = rtbno; + + irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */ + irec.rm_offset = 0; + irec.rm_flags = 0; + + return xfs_getfsmap_helper(tp, info, &irec, rec_daddr, len_daddr); +} + +/* Execute a getfsmap query against the realtime device rtbitmap. */ +STATIC int +xfs_getfsmap_rtdev_rtbitmap( + struct xfs_trans *tp, + const struct xfs_fsmap *keys, + struct xfs_getfsmap_info *info) +{ + + struct xfs_rtalloc_rec ahigh = { 0 }; + struct xfs_mount *mp = tp->t_mountp; + xfs_rtblock_t start_rtb; + xfs_rtblock_t end_rtb; + xfs_rtxnum_t high; + uint64_t eofs; + int error; + + eofs = XFS_FSB_TO_BB(mp, xfs_rtx_to_rtb(mp, mp->m_sb.sb_rextents)); + if (keys[0].fmr_physical >= eofs) + return 0; + start_rtb = XFS_BB_TO_FSBT(mp, + keys[0].fmr_physical + keys[0].fmr_length); + end_rtb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); + + info->missing_owner = XFS_FMR_OWN_UNKNOWN; + + /* Adjust the low key if we are continuing from where we left off. */ + if (keys[0].fmr_length > 0) { + info->low_daddr = XFS_FSB_TO_BB(mp, start_rtb); + if (info->low_daddr >= eofs) + return 0; + } + + trace_xfs_fsmap_low_key_linear(mp, info->dev, start_rtb); + trace_xfs_fsmap_high_key_linear(mp, info->dev, end_rtb); + + xfs_rtbitmap_lock_shared(mp, XFS_RBMLOCK_BITMAP); + + /* + * Set up query parameters to return free rtextents covering the range + * we want. + */ + high = xfs_rtb_to_rtxup(mp, end_rtb); + error = xfs_rtalloc_query_range(mp, tp, xfs_rtb_to_rtx(mp, start_rtb), + high, xfs_getfsmap_rtdev_rtbitmap_helper, info); + if (error) + goto err; + + /* + * Report any gaps at the end of the rtbitmap by simulating a null + * rmap starting at the block after the end of the query range. + */ + info->last = true; + ahigh.ar_startext = min(mp->m_sb.sb_rextents, high); + + error = xfs_getfsmap_rtdev_rtbitmap_helper(mp, tp, &ahigh, info); + if (error) + goto err; +err: + xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP); + return error; +} +#endif /* CONFIG_XFS_RT */ + /* Do we recognize the device? */ STATIC bool xfs_getfsmap_is_valid_device( @@ -890,7 +889,7 @@ xfs_getfsmap_check_keys( * xfs_getfsmap_info.low/high -- per-AG low/high keys computed from * dkeys; used to query the metadata. */ -int +STATIC int xfs_getfsmap( struct xfs_mount *mp, struct xfs_fsmap_head *head, @@ -1020,3 +1019,133 @@ xfs_getfsmap( head->fmh_oflags = FMH_OF_DEV_T; return error; } + +int +xfs_ioc_getfsmap( + struct xfs_inode *ip, + struct fsmap_head __user *arg) +{ + struct xfs_fsmap_head xhead = {0}; + struct fsmap_head head; + struct fsmap *recs; + unsigned int count; + __u32 last_flags = 0; + bool done = false; + int error; + + if (copy_from_user(&head, arg, sizeof(struct fsmap_head))) + return -EFAULT; + if (memchr_inv(head.fmh_reserved, 0, sizeof(head.fmh_reserved)) || + memchr_inv(head.fmh_keys[0].fmr_reserved, 0, + sizeof(head.fmh_keys[0].fmr_reserved)) || + memchr_inv(head.fmh_keys[1].fmr_reserved, 0, + sizeof(head.fmh_keys[1].fmr_reserved))) + return -EINVAL; + + /* + * Use an internal memory buffer so that we don't have to copy fsmap + * data to userspace while holding locks. Start by trying to allocate + * up to 128k for the buffer, but fall back to a single page if needed. + */ + count = min_t(unsigned int, head.fmh_count, + 131072 / sizeof(struct fsmap)); + recs = kvcalloc(count, sizeof(struct fsmap), GFP_KERNEL); + if (!recs) { + count = min_t(unsigned int, head.fmh_count, + PAGE_SIZE / sizeof(struct fsmap)); + recs = kvcalloc(count, sizeof(struct fsmap), GFP_KERNEL); + if (!recs) + return -ENOMEM; + } + + xhead.fmh_iflags = head.fmh_iflags; + xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]); + xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]); + + trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]); + trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]); + + head.fmh_entries = 0; + do { + struct fsmap __user *user_recs; + struct fsmap *last_rec; + + user_recs = &arg->fmh_recs[head.fmh_entries]; + xhead.fmh_entries = 0; + xhead.fmh_count = min_t(unsigned int, count, + head.fmh_count - head.fmh_entries); + + /* Run query, record how many entries we got. */ + error = xfs_getfsmap(ip->i_mount, &xhead, recs); + switch (error) { + case 0: + /* + * There are no more records in the result set. Copy + * whatever we got to userspace and break out. + */ + done = true; + break; + case -ECANCELED: + /* + * The internal memory buffer is full. Copy whatever + * records we got to userspace and go again if we have + * not yet filled the userspace buffer. + */ + error = 0; + break; + default: + goto out_free; + } + head.fmh_entries += xhead.fmh_entries; + head.fmh_oflags = xhead.fmh_oflags; + + /* + * If the caller wanted a record count or there aren't any + * new records to return, we're done. + */ + if (head.fmh_count == 0 || xhead.fmh_entries == 0) + break; + + /* Copy all the records we got out to userspace. */ + if (copy_to_user(user_recs, recs, + xhead.fmh_entries * sizeof(struct fsmap))) { + error = -EFAULT; + goto out_free; + } + + /* Remember the last record flags we copied to userspace. */ + last_rec = &recs[xhead.fmh_entries - 1]; + last_flags = last_rec->fmr_flags; + + /* Set up the low key for the next iteration. */ + xfs_fsmap_to_internal(&xhead.fmh_keys[0], last_rec); + trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]); + } while (!done && head.fmh_entries < head.fmh_count); + + /* + * If there are no more records in the query result set and we're not + * in counting mode, mark the last record returned with the LAST flag. + */ + if (done && head.fmh_count > 0 && head.fmh_entries > 0) { + struct fsmap __user *user_rec; + + last_flags |= FMR_OF_LAST; + user_rec = &arg->fmh_recs[head.fmh_entries - 1]; + + if (copy_to_user(&user_rec->fmr_flags, &last_flags, + sizeof(last_flags))) { + error = -EFAULT; + goto out_free; + } + } + + /* copy back header */ + if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) { + error = -EFAULT; + goto out_free; + } + +out_free: + kvfree(recs); + return error; +} diff --git a/fs/xfs/xfs_fsmap.h b/fs/xfs/xfs_fsmap.h index a0775788e7b1..a0bcc38486a5 100644 --- a/fs/xfs/xfs_fsmap.h +++ b/fs/xfs/xfs_fsmap.h @@ -7,6 +7,7 @@ #define __XFS_FSMAP_H__ struct fsmap; +struct fsmap_head; /* internal fsmap representation */ struct xfs_fsmap { @@ -27,9 +28,6 @@ struct xfs_fsmap_head { struct xfs_fsmap fmh_keys[2]; /* low and high keys */ }; -void xfs_fsmap_to_internal(struct xfs_fsmap *dest, struct fsmap *src); - -int xfs_getfsmap(struct xfs_mount *mp, struct xfs_fsmap_head *head, - struct fsmap *out_recs); +int xfs_ioc_getfsmap(struct xfs_inode *ip, struct fsmap_head __user *arg); #endif /* __XFS_FSMAP_H__ */ diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index c211ea2b63c4..3643cc843f62 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -485,7 +485,7 @@ xfs_do_force_shutdown( const char *why; - if (test_and_set_bit(XFS_OPSTATE_SHUTDOWN, &mp->m_opstate)) { + if (xfs_set_shutdown(mp)) { xlog_shutdown_wait(mp->m_log); return; } diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index cf629302d48e..20d9924f28c2 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -65,6 +65,18 @@ static int xfs_icwalk_ag(struct xfs_perag *pag, XFS_ICWALK_FLAG_RECLAIM_SICK | \ XFS_ICWALK_FLAG_UNION) +/* Marks for the perag xarray */ +#define XFS_PERAG_RECLAIM_MARK XA_MARK_0 +#define XFS_PERAG_BLOCKGC_MARK XA_MARK_1 + +static inline xa_mark_t ici_tag_to_mark(unsigned int tag) +{ + if (tag == XFS_ICI_RECLAIM_TAG) + return XFS_PERAG_RECLAIM_MARK; + ASSERT(tag == XFS_ICI_BLOCKGC_TAG); + return XFS_PERAG_BLOCKGC_MARK; +} + /* * Allocate and initialise an xfs_inode. */ @@ -191,7 +203,7 @@ xfs_reclaim_work_queue( { rcu_read_lock(); - if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { + if (xa_marked(&mp->m_perags, XFS_PERAG_RECLAIM_MARK)) { queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work, msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); } @@ -241,9 +253,7 @@ xfs_perag_set_inode_tag( return; /* propagate the tag up into the perag radix tree */ - spin_lock(&mp->m_perag_lock); - radix_tree_tag_set(&mp->m_perag_tree, pag->pag_agno, tag); - spin_unlock(&mp->m_perag_lock); + xa_set_mark(&mp->m_perags, pag->pag_agno, ici_tag_to_mark(tag)); /* start background work */ switch (tag) { @@ -285,14 +295,39 @@ xfs_perag_clear_inode_tag( return; /* clear the tag from the perag radix tree */ - spin_lock(&mp->m_perag_lock); - radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno, tag); - spin_unlock(&mp->m_perag_lock); + xa_clear_mark(&mp->m_perags, pag->pag_agno, ici_tag_to_mark(tag)); trace_xfs_perag_clear_inode_tag(pag, _RET_IP_); } /* + * Find the next AG after @pag, or the first AG if @pag is NULL. + */ +static struct xfs_perag * +xfs_perag_grab_next_tag( + struct xfs_mount *mp, + struct xfs_perag *pag, + int tag) +{ + unsigned long index = 0; + + if (pag) { + index = pag->pag_agno + 1; + xfs_perag_rele(pag); + } + + rcu_read_lock(); + pag = xa_find(&mp->m_perags, &index, ULONG_MAX, ici_tag_to_mark(tag)); + if (pag) { + trace_xfs_perag_grab_next_tag(pag, _RET_IP_); + if (!atomic_inc_not_zero(&pag->pag_active_ref)) + pag = NULL; + } + rcu_read_unlock(); + return pag; +} + +/* * When we recycle a reclaimable inode, we need to re-initialise the VFS inode * part of the structure. This is made more complex by the fact we store * information about the on-disk values in the VFS inode and so we can't just @@ -755,7 +790,7 @@ xfs_iget( ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0); /* reject inode numbers outside existing AGs */ - if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) + if (!xfs_verify_ino(mp, ino)) return -EINVAL; XFS_STATS_INC(mp, xs_ig_attempts); @@ -977,7 +1012,7 @@ xfs_reclaim_inodes( if (xfs_want_reclaim_sick(mp)) icw.icw_flags |= XFS_ICWALK_FLAG_RECLAIM_SICK; - while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { + while (xa_marked(&mp->m_perags, XFS_PERAG_RECLAIM_MARK)) { xfs_ail_push_all_sync(mp->m_ail); xfs_icwalk(mp, XFS_ICWALK_RECLAIM, &icw); } @@ -1019,15 +1054,17 @@ long xfs_reclaim_inodes_count( struct xfs_mount *mp) { - struct xfs_perag *pag; - xfs_agnumber_t ag = 0; + XA_STATE (xas, &mp->m_perags, 0); long reclaimable = 0; + struct xfs_perag *pag; - while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { - ag = pag->pag_agno + 1; + rcu_read_lock(); + xas_for_each_marked(&xas, pag, ULONG_MAX, XFS_PERAG_RECLAIM_MARK) { + trace_xfs_reclaim_inodes_count(pag, _THIS_IP_); reclaimable += pag->pag_ici_reclaimable; - xfs_perag_put(pag); } + rcu_read_unlock(); + return reclaimable; } @@ -1159,7 +1196,7 @@ xfs_inode_free_eofblocks( if (xfs_can_free_eofblocks(ip)) return xfs_free_eofblocks(ip); - /* inode could be preallocated or append-only */ + /* inode could be preallocated */ trace_xfs_inode_free_eofblocks_invalid(ip); xfs_inode_clear_eofblocks_tag(ip); return 0; @@ -1369,14 +1406,13 @@ void xfs_blockgc_start( struct xfs_mount *mp) { - struct xfs_perag *pag; - xfs_agnumber_t agno; + struct xfs_perag *pag = NULL; if (xfs_set_blockgc_enabled(mp)) return; trace_xfs_blockgc_start(mp, __return_address); - for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG) + while ((pag = xfs_perag_grab_next_tag(mp, pag, XFS_ICI_BLOCKGC_TAG))) xfs_blockgc_queue(pag); } @@ -1492,21 +1528,19 @@ int xfs_blockgc_flush_all( struct xfs_mount *mp) { - struct xfs_perag *pag; - xfs_agnumber_t agno; + struct xfs_perag *pag = NULL; trace_xfs_blockgc_flush_all(mp, __return_address); /* - * For each blockgc worker, move its queue time up to now. If it - * wasn't queued, it will not be requeued. Then flush whatever's - * left. + * For each blockgc worker, move its queue time up to now. If it wasn't + * queued, it will not be requeued. Then flush whatever is left. */ - for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG) + while ((pag = xfs_perag_grab_next_tag(mp, pag, XFS_ICI_BLOCKGC_TAG))) mod_delayed_work(pag->pag_mount->m_blockgc_wq, &pag->pag_blockgc_work, 0); - for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG) + while ((pag = xfs_perag_grab_next_tag(mp, pag, XFS_ICI_BLOCKGC_TAG))) flush_delayed_work(&pag->pag_blockgc_work); return xfs_inodegc_flush(mp); @@ -1752,12 +1786,11 @@ xfs_icwalk( enum xfs_icwalk_goal goal, struct xfs_icwalk *icw) { - struct xfs_perag *pag; + struct xfs_perag *pag = NULL; int error = 0; int last_error = 0; - xfs_agnumber_t agno; - for_each_perag_tag(mp, agno, pag, goal) { + while ((pag = xfs_perag_grab_next_tag(mp, pag, goal))) { error = xfs_icwalk_ag(pag, goal, icw); if (error) { last_error = error; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 7dc6f326936c..bcc277fc0a83 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -704,7 +704,7 @@ xfs_create( * entry pointing to them, but a directory also the "." entry * pointing to itself. */ - error = xfs_dialloc(&tp, dp->i_ino, args->mode, &ino); + error = xfs_dialloc(&tp, args, &ino); if (!error) error = xfs_icreate(tp, ino, args, &du.ip); if (error) @@ -812,7 +812,7 @@ xfs_create_tmpfile( if (error) goto out_release_dquots; - error = xfs_dialloc(&tp, dp->i_ino, args->mode, &ino); + error = xfs_dialloc(&tp, args, &ino); if (!error) error = xfs_icreate(tp, ino, args, &ip); if (error) @@ -1079,88 +1079,6 @@ out: return error; } -int -xfs_release( - xfs_inode_t *ip) -{ - xfs_mount_t *mp = ip->i_mount; - int error = 0; - - if (!S_ISREG(VFS_I(ip)->i_mode) || (VFS_I(ip)->i_mode == 0)) - return 0; - - /* If this is a read-only mount, don't do this (would generate I/O) */ - if (xfs_is_readonly(mp)) - return 0; - - if (!xfs_is_shutdown(mp)) { - int truncated; - - /* - * If we previously truncated this file and removed old data - * in the process, we want to initiate "early" writeout on - * the last close. This is an attempt to combat the notorious - * NULL files problem which is particularly noticeable from a - * truncate down, buffered (re-)write (delalloc), followed by - * a crash. What we are effectively doing here is - * significantly reducing the time window where we'd otherwise - * be exposed to that problem. - */ - truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); - if (truncated) { - xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE); - if (ip->i_delayed_blks > 0) { - error = filemap_flush(VFS_I(ip)->i_mapping); - if (error) - return error; - } - } - } - - if (VFS_I(ip)->i_nlink == 0) - return 0; - - /* - * If we can't get the iolock just skip truncating the blocks past EOF - * because we could deadlock with the mmap_lock otherwise. We'll get - * another chance to drop them once the last reference to the inode is - * dropped, so we'll never leak blocks permanently. - */ - if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) - return 0; - - if (xfs_can_free_eofblocks(ip)) { - /* - * Check if the inode is being opened, written and closed - * frequently and we have delayed allocation blocks outstanding - * (e.g. streaming writes from the NFS server), truncating the - * blocks past EOF will cause fragmentation to occur. - * - * In this case don't do the truncation, but we have to be - * careful how we detect this case. Blocks beyond EOF show up as - * i_delayed_blks even when the inode is clean, so we need to - * truncate them away first before checking for a dirty release. - * Hence on the first dirty close we will still remove the - * speculative allocation, but after that we will leave it in - * place. - */ - if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) - goto out_unlock; - - error = xfs_free_eofblocks(ip); - if (error) - goto out_unlock; - - /* delalloc blocks after truncation means it really is dirty */ - if (ip->i_delayed_blks) - xfs_iflags_set(ip, XFS_IDIRTY_RELEASE); - } - -out_unlock: - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return error; -} - /* * Mark all the buffers attached to this directory stale. In theory we should * never be freeing a directory with any blocks at all, but this covers the diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 51defdebef30..97ed912306fd 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -276,12 +276,13 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip) return ip->i_diflags2 & XFS_DIFLAG2_REFLINK; } -static inline bool xfs_is_metadata_inode(struct xfs_inode *ip) +static inline bool xfs_is_metadata_inode(const struct xfs_inode *ip) { struct xfs_mount *mp = ip->i_mount; - return ip == mp->m_rbmip || ip == mp->m_rsumip || - xfs_is_quota_inode(&mp->m_sb, ip->i_ino); + return ip->i_ino == mp->m_sb.sb_rbmino || + ip->i_ino == mp->m_sb.sb_rsumino || + xfs_is_quota_inode(&mp->m_sb, ip->i_ino); } bool xfs_is_always_cow_inode(struct xfs_inode *ip); @@ -335,7 +336,7 @@ static inline bool xfs_inode_has_bigrtalloc(struct xfs_inode *ip) #define XFS_INEW (1 << 3) /* inode has just been allocated */ #define XFS_IPRESERVE_DM_FIELDS (1 << 4) /* has legacy DMAPI fields set */ #define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */ -#define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */ +#define XFS_EOFBLOCKS_RELEASED (1 << 6) /* eofblocks were freed in ->release */ #define XFS_IFLUSHING (1 << 7) /* inode is being flushed */ #define __XFS_IPINNED_BIT 8 /* wakeup key for zero pin count */ #define XFS_IPINNED (1 << __XFS_IPINNED_BIT) @@ -382,7 +383,7 @@ static inline bool xfs_inode_has_bigrtalloc(struct xfs_inode *ip) */ #define XFS_IRECLAIM_RESET_FLAGS \ (XFS_IRECLAIMABLE | XFS_IRECLAIM | \ - XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | XFS_NEED_INACTIVE | \ + XFS_EOFBLOCKS_RELEASED | XFS_ITRUNCATED | XFS_NEED_INACTIVE | \ XFS_INACTIVATING | XFS_IQUOTAUNCHECKED) /* @@ -512,7 +513,6 @@ enum layout_break_reason { #define XFS_INHERIT_GID(pip) \ (xfs_has_grpid((pip)->i_mount) || (VFS_I(pip)->i_mode & S_ISGID)) -int xfs_release(struct xfs_inode *ip); int xfs_inactive(struct xfs_inode *ip); int xfs_lookup(struct xfs_inode *dp, const struct xfs_name *name, struct xfs_inode **ipp, struct xfs_name *ci_name); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 6b13666d4e96..7226d27e8afc 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -876,136 +876,6 @@ out_free_buf: return error; } -STATIC int -xfs_ioc_getfsmap( - struct xfs_inode *ip, - struct fsmap_head __user *arg) -{ - struct xfs_fsmap_head xhead = {0}; - struct fsmap_head head; - struct fsmap *recs; - unsigned int count; - __u32 last_flags = 0; - bool done = false; - int error; - - if (copy_from_user(&head, arg, sizeof(struct fsmap_head))) - return -EFAULT; - if (memchr_inv(head.fmh_reserved, 0, sizeof(head.fmh_reserved)) || - memchr_inv(head.fmh_keys[0].fmr_reserved, 0, - sizeof(head.fmh_keys[0].fmr_reserved)) || - memchr_inv(head.fmh_keys[1].fmr_reserved, 0, - sizeof(head.fmh_keys[1].fmr_reserved))) - return -EINVAL; - - /* - * Use an internal memory buffer so that we don't have to copy fsmap - * data to userspace while holding locks. Start by trying to allocate - * up to 128k for the buffer, but fall back to a single page if needed. - */ - count = min_t(unsigned int, head.fmh_count, - 131072 / sizeof(struct fsmap)); - recs = kvcalloc(count, sizeof(struct fsmap), GFP_KERNEL); - if (!recs) { - count = min_t(unsigned int, head.fmh_count, - PAGE_SIZE / sizeof(struct fsmap)); - recs = kvcalloc(count, sizeof(struct fsmap), GFP_KERNEL); - if (!recs) - return -ENOMEM; - } - - xhead.fmh_iflags = head.fmh_iflags; - xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]); - xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]); - - trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]); - trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]); - - head.fmh_entries = 0; - do { - struct fsmap __user *user_recs; - struct fsmap *last_rec; - - user_recs = &arg->fmh_recs[head.fmh_entries]; - xhead.fmh_entries = 0; - xhead.fmh_count = min_t(unsigned int, count, - head.fmh_count - head.fmh_entries); - - /* Run query, record how many entries we got. */ - error = xfs_getfsmap(ip->i_mount, &xhead, recs); - switch (error) { - case 0: - /* - * There are no more records in the result set. Copy - * whatever we got to userspace and break out. - */ - done = true; - break; - case -ECANCELED: - /* - * The internal memory buffer is full. Copy whatever - * records we got to userspace and go again if we have - * not yet filled the userspace buffer. - */ - error = 0; - break; - default: - goto out_free; - } - head.fmh_entries += xhead.fmh_entries; - head.fmh_oflags = xhead.fmh_oflags; - - /* - * If the caller wanted a record count or there aren't any - * new records to return, we're done. - */ - if (head.fmh_count == 0 || xhead.fmh_entries == 0) - break; - - /* Copy all the records we got out to userspace. */ - if (copy_to_user(user_recs, recs, - xhead.fmh_entries * sizeof(struct fsmap))) { - error = -EFAULT; - goto out_free; - } - - /* Remember the last record flags we copied to userspace. */ - last_rec = &recs[xhead.fmh_entries - 1]; - last_flags = last_rec->fmr_flags; - - /* Set up the low key for the next iteration. */ - xfs_fsmap_to_internal(&xhead.fmh_keys[0], last_rec); - trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]); - } while (!done && head.fmh_entries < head.fmh_count); - - /* - * If there are no more records in the query result set and we're not - * in counting mode, mark the last record returned with the LAST flag. - */ - if (done && head.fmh_count > 0 && head.fmh_entries > 0) { - struct fsmap __user *user_rec; - - last_flags |= FMR_OF_LAST; - user_rec = &arg->fmh_recs[head.fmh_entries - 1]; - - if (copy_to_user(&user_rec->fmr_flags, &last_flags, - sizeof(last_flags))) { - error = -EFAULT; - goto out_free; - } - } - - /* copy back header */ - if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) { - error = -EFAULT; - goto out_free; - } - -out_free: - kvfree(recs); - return error; -} - int xfs_ioc_swapext( xfs_swapext_t *sxp) @@ -1518,6 +1388,10 @@ xfs_file_ioctl( case XFS_IOC_EXCHANGE_RANGE: return xfs_ioc_exchange_range(filp, arg); + case XFS_IOC_START_COMMIT: + return xfs_ioc_start_commit(filp, arg); + case XFS_IOC_COMMIT_RANGE: + return xfs_ioc_commit_range(filp, arg); default: return -ENOTTY; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 817ea7e0a8ab..26b2f5887b88 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3495,7 +3495,7 @@ xlog_force_shutdown( * If this log shutdown also sets the mount shutdown state, issue a * shutdown warning message. */ - if (!test_and_set_bit(XFS_OPSTATE_SHUTDOWN, &log->l_mp->m_opstate)) { + if (!xfs_set_shutdown(log->l_mp)) { xfs_alert_tag(log->l_mp, XFS_PTAG_SHUTDOWN_LOGERROR, "Filesystem has been shut down due to log error (0x%x).", shutdown_flags); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 4423dd344239..1a74fe22672e 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1336,7 +1336,7 @@ xlog_find_tail( * headers if we have a filesystem using non-persistent counters. */ if (clean) - set_bit(XFS_OPSTATE_CLEAN, &log->l_mp->m_opstate); + xfs_set_clean(log->l_mp); /* * Make sure that there are no blocks in front of the head diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 09eef1721ef4..460f93a9ce00 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -595,7 +595,7 @@ xfs_unmount_flush_inodes( xfs_extent_busy_wait_all(mp); flush_workqueue(xfs_discard_wq); - set_bit(XFS_OPSTATE_UNMOUNTING, &mp->m_opstate); + xfs_set_unmounting(mp); xfs_ail_push_all_sync(mp->m_ail); xfs_inodegc_stop(mp); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index d0567dfbc036..96496f39f551 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -147,7 +147,7 @@ typedef struct xfs_mount { int m_logbufs; /* number of log buffers */ int m_logbsize; /* size of each log buffer */ uint m_rsumlevels; /* rt summary levels */ - uint m_rsumsize; /* size of rt summary, bytes */ + xfs_filblks_t m_rsumblocks; /* size of rt summary, FSBs */ int m_fixedfsid[2]; /* unchanged for life of FS */ uint m_qflags; /* quota status flags */ uint64_t m_features; /* active filesystem features */ @@ -208,8 +208,7 @@ typedef struct xfs_mount { */ atomic64_t m_allocbt_blks; - struct radix_tree_root m_perag_tree; /* per-ag accounting info */ - spinlock_t m_perag_lock; /* lock for m_perag_tree */ + struct xarray m_perags; /* per-ag accounting info */ uint64_t m_resblks; /* total reserved blocks */ uint64_t m_resblks_avail;/* available reserved blocks */ uint64_t m_resblks_save; /* reserved blks @ remount,ro */ diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 7443debaffd6..d0f5b403bdbe 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -230,9 +230,8 @@ _xfs_mru_cache_clear_reap_list( __releases(mru->lock) __acquires(mru->lock) { struct xfs_mru_cache_elem *elem, *next; - struct list_head tmp; + LIST_HEAD(tmp); - INIT_LIST_HEAD(&tmp); list_for_each_entry_safe(elem, next, &mru->reap_list, list_node) { /* Remove the element from the data store. */ diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 9490b913a4ab..7e2307921deb 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -799,7 +799,7 @@ xfs_qm_qino_alloc( }; xfs_ino_t ino; - error = xfs_dialloc(&tp, 0, S_IFREG, &ino); + error = xfs_dialloc(&tp, &args, &ino); if (!error) error = xfs_icreate(tp, ino, &args, ipp); if (error) { @@ -1539,6 +1539,43 @@ xfs_qm_mount_quotas( } /* + * Load the inode for a given type of quota, assuming that the sb fields have + * been sorted out. This is not true when switching quota types on a V4 + * filesystem, so do not use this function for that. + * + * Returns -ENOENT if the quota inode field is NULLFSINO; 0 and an inode on + * success; or a negative errno. + */ +int +xfs_qm_qino_load( + struct xfs_mount *mp, + xfs_dqtype_t type, + struct xfs_inode **ipp) +{ + xfs_ino_t ino = NULLFSINO; + + switch (type) { + case XFS_DQTYPE_USER: + ino = mp->m_sb.sb_uquotino; + break; + case XFS_DQTYPE_GROUP: + ino = mp->m_sb.sb_gquotino; + break; + case XFS_DQTYPE_PROJ: + ino = mp->m_sb.sb_pquotino; + break; + default: + ASSERT(0); + return -EFSCORRUPTED; + } + + if (ino == NULLFSINO) + return -ENOENT; + + return xfs_iget(mp, NULL, ino, 0, 0, ipp); +} + +/* * This is called after the superblock has been read in and we're ready to * iget the quota inodes. */ @@ -1561,24 +1598,21 @@ xfs_qm_init_quotainos( if (XFS_IS_UQUOTA_ON(mp) && mp->m_sb.sb_uquotino != NULLFSINO) { ASSERT(mp->m_sb.sb_uquotino > 0); - error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, - 0, 0, &uip); + error = xfs_qm_qino_load(mp, XFS_DQTYPE_USER, &uip); if (error) return error; } if (XFS_IS_GQUOTA_ON(mp) && mp->m_sb.sb_gquotino != NULLFSINO) { ASSERT(mp->m_sb.sb_gquotino > 0); - error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, - 0, 0, &gip); + error = xfs_qm_qino_load(mp, XFS_DQTYPE_GROUP, &gip); if (error) goto error_rele; } if (XFS_IS_PQUOTA_ON(mp) && mp->m_sb.sb_pquotino != NULLFSINO) { ASSERT(mp->m_sb.sb_pquotino > 0); - error = xfs_iget(mp, NULL, mp->m_sb.sb_pquotino, - 0, 0, &pip); + error = xfs_qm_qino_load(mp, XFS_DQTYPE_PROJ, &pip); if (error) goto error_rele; } diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 6e09dfcd13e2..e919c7f62f57 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -184,4 +184,7 @@ xfs_get_defquota(struct xfs_quotainfo *qi, xfs_dqtype_t type) } } +int xfs_qm_qino_load(struct xfs_mount *mp, xfs_dqtype_t type, + struct xfs_inode **ipp); + #endif /* __XFS_QM_H__ */ diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 392cb39cc10c..4eda50ae2d1c 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -53,16 +53,15 @@ xfs_qm_scall_quotaoff( STATIC int xfs_qm_scall_trunc_qfile( struct xfs_mount *mp, - xfs_ino_t ino) + xfs_dqtype_t type) { struct xfs_inode *ip; struct xfs_trans *tp; int error; - if (ino == NULLFSINO) + error = xfs_qm_qino_load(mp, type, &ip); + if (error == -ENOENT) return 0; - - error = xfs_iget(mp, NULL, ino, 0, 0, &ip); if (error) return error; @@ -113,17 +112,17 @@ xfs_qm_scall_trunc_qfiles( } if (flags & XFS_QMOPT_UQUOTA) { - error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino); + error = xfs_qm_scall_trunc_qfile(mp, XFS_DQTYPE_USER); if (error) return error; } if (flags & XFS_QMOPT_GQUOTA) { - error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino); + error = xfs_qm_scall_trunc_qfile(mp, XFS_DQTYPE_GROUP); if (error) return error; } if (flags & XFS_QMOPT_PQUOTA) - error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_pquotino); + error = xfs_qm_scall_trunc_qfile(mp, XFS_DQTYPE_PROJ); return error; } diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index 9c162e69976b..4c7f7ce4fd2f 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -16,24 +16,25 @@ #include "xfs_qm.h" -static void +static int xfs_qm_fill_state( struct qc_type_state *tstate, struct xfs_mount *mp, - struct xfs_inode *ip, - xfs_ino_t ino, - struct xfs_def_quota *defq) + xfs_dqtype_t type) { - bool tempqip = false; - - tstate->ino = ino; - if (!ip && ino == NULLFSINO) - return; - if (!ip) { - if (xfs_iget(mp, NULL, ino, 0, 0, &ip)) - return; - tempqip = true; + struct xfs_inode *ip; + struct xfs_def_quota *defq; + int error; + + error = xfs_qm_qino_load(mp, type, &ip); + if (error) { + tstate->ino = NULLFSINO; + return error != -ENOENT ? error : 0; } + + defq = xfs_get_defquota(mp->m_quotainfo, type); + + tstate->ino = ip->i_ino; tstate->flags |= QCI_SYSFILE; tstate->blocks = ip->i_nblocks; tstate->nextents = ip->i_df.if_nextents; @@ -43,8 +44,9 @@ xfs_qm_fill_state( tstate->spc_warnlimit = 0; tstate->ino_warnlimit = 0; tstate->rt_spc_warnlimit = 0; - if (tempqip) - xfs_irele(ip); + xfs_irele(ip); + + return 0; } /* @@ -56,8 +58,9 @@ xfs_fs_get_quota_state( struct super_block *sb, struct qc_state *state) { - struct xfs_mount *mp = XFS_M(sb); - struct xfs_quotainfo *q = mp->m_quotainfo; + struct xfs_mount *mp = XFS_M(sb); + struct xfs_quotainfo *q = mp->m_quotainfo; + int error; memset(state, 0, sizeof(*state)); if (!XFS_IS_QUOTA_ON(mp)) @@ -76,12 +79,18 @@ xfs_fs_get_quota_state( if (XFS_IS_PQUOTA_ENFORCED(mp)) state->s_state[PRJQUOTA].flags |= QCI_LIMITS_ENFORCED; - xfs_qm_fill_state(&state->s_state[USRQUOTA], mp, q->qi_uquotaip, - mp->m_sb.sb_uquotino, &q->qi_usr_default); - xfs_qm_fill_state(&state->s_state[GRPQUOTA], mp, q->qi_gquotaip, - mp->m_sb.sb_gquotino, &q->qi_grp_default); - xfs_qm_fill_state(&state->s_state[PRJQUOTA], mp, q->qi_pquotaip, - mp->m_sb.sb_pquotino, &q->qi_prj_default); + error = xfs_qm_fill_state(&state->s_state[USRQUOTA], mp, + XFS_DQTYPE_USER); + if (error) + return error; + error = xfs_qm_fill_state(&state->s_state[GRPQUOTA], mp, + XFS_DQTYPE_GROUP); + if (error) + return error; + error = xfs_qm_fill_state(&state->s_state[PRJQUOTA], mp, + XFS_DQTYPE_PROJ); + if (error) + return error; return 0; } diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index ebeab8e4dab1..3a2005a1e673 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -142,7 +142,7 @@ xfs_rtallocate_range( * We need to find the beginning and end of the extent so we can * properly update the summary. */ - error = xfs_rtfind_back(args, start, 0, &preblock); + error = xfs_rtfind_back(args, start, &preblock); if (error) return error; @@ -194,6 +194,17 @@ xfs_rtallocate_range( return xfs_rtmodify_range(args, start, len, 0); } +/* Reduce @rtxlen until it is a multiple of @prod. */ +static inline xfs_rtxlen_t +xfs_rtalloc_align_len( + xfs_rtxlen_t rtxlen, + xfs_rtxlen_t prod) +{ + if (unlikely(prod > 1)) + return rounddown(rtxlen, prod); + return rtxlen; +} + /* * Make sure we don't run off the end of the rt volume. Be careful that * adjusting maxlen downwards doesn't cause us to fail the alignment checks. @@ -208,7 +219,7 @@ xfs_rtallocate_clamp_len( xfs_rtxlen_t ret; ret = min(mp->m_sb.sb_rextents, startrtx + rtxlen) - startrtx; - return rounddown(ret, prod); + return xfs_rtalloc_align_len(ret, prod); } /* @@ -229,39 +240,40 @@ xfs_rtallocate_extent_block( xfs_rtxnum_t *rtx) /* out: start rtext allocated */ { struct xfs_mount *mp = args->mp; - xfs_rtxnum_t besti; /* best rtext found so far */ - xfs_rtxnum_t bestlen;/* best length found so far */ + xfs_rtxnum_t besti = -1; /* best rtext found so far */ xfs_rtxnum_t end; /* last rtext in chunk */ - int error; xfs_rtxnum_t i; /* current rtext trying */ xfs_rtxnum_t next; /* next rtext to try */ + xfs_rtxlen_t scanlen; /* number of free rtx to look for */ + xfs_rtxlen_t bestlen = 0; /* best length found so far */ int stat; /* status from internal calls */ + int error; /* - * Loop over all the extents starting in this bitmap block, - * looking for one that's long enough. + * Loop over all the extents starting in this bitmap block up to the + * end of the rt volume, looking for one that's long enough. */ - for (i = xfs_rbmblock_to_rtx(mp, bbno), besti = -1, bestlen = 0, - end = xfs_rbmblock_to_rtx(mp, bbno + 1) - 1; - i <= end; - i++) { + end = min(mp->m_sb.sb_rextents, xfs_rbmblock_to_rtx(mp, bbno + 1)) - 1; + for (i = xfs_rbmblock_to_rtx(mp, bbno); i <= end; i++) { /* Make sure we don't scan off the end of the rt volume. */ - maxlen = xfs_rtallocate_clamp_len(mp, i, maxlen, prod); + scanlen = xfs_rtallocate_clamp_len(mp, i, maxlen, prod); + if (scanlen < minlen) + break; /* - * See if there's a free extent of maxlen starting at i. + * See if there's a free extent of scanlen starting at i. * If it's not so then next will contain the first non-free. */ - error = xfs_rtcheck_range(args, i, maxlen, 1, &next, &stat); + error = xfs_rtcheck_range(args, i, scanlen, 1, &next, &stat); if (error) return error; if (stat) { /* - * i for maxlen is all free, allocate and return that. + * i to scanlen is all free, allocate and return that. */ - bestlen = maxlen; - besti = i; - goto allocate; + *len = scanlen; + *rtx = i; + return 0; } /* @@ -289,38 +301,28 @@ xfs_rtallocate_extent_block( return error; } - /* - * Searched the whole thing & didn't find a maxlen free extent. - */ - if (minlen > maxlen || besti == -1) { - /* - * Allocation failed. Set *nextp to the next block to try. - */ - *nextp = next; - return -ENOSPC; - } + /* Searched the whole thing & didn't find a maxlen free extent. */ + if (besti == -1) + goto nospace; /* - * If size should be a multiple of prod, make that so. + * Ensure bestlen is a multiple of prod, but don't return a too-short + * extent. */ - if (prod > 1) { - xfs_rtxlen_t p; /* amount to trim length by */ - - div_u64_rem(bestlen, prod, &p); - if (p) - bestlen -= p; - } + bestlen = xfs_rtalloc_align_len(bestlen, prod); + if (bestlen < minlen) + goto nospace; /* - * Allocate besti for bestlen & return that. + * Pick besti for bestlen & return that. */ -allocate: - error = xfs_rtallocate_range(args, besti, bestlen); - if (error) - return error; *len = bestlen; *rtx = besti; return 0; +nospace: + /* Allocation failed. Set *nextp to the next block to try. */ + *nextp = next; + return -ENOSPC; } /* @@ -339,45 +341,46 @@ xfs_rtallocate_extent_exact( xfs_rtxlen_t prod, /* extent product factor */ xfs_rtxnum_t *rtx) /* out: start rtext allocated */ { - int error; - xfs_rtxlen_t i; /* extent length trimmed due to prod */ - int isfree; /* extent is free */ + struct xfs_mount *mp = args->mp; xfs_rtxnum_t next; /* next rtext to try (dummy) */ + xfs_rtxlen_t alloclen; /* candidate length */ + xfs_rtxlen_t scanlen; /* number of free rtx to look for */ + int isfree; /* extent is free */ + int error; ASSERT(minlen % prod == 0); ASSERT(maxlen % prod == 0); - /* - * Check if the range in question (for maxlen) is free. - */ - error = xfs_rtcheck_range(args, start, maxlen, 1, &next, &isfree); + + /* Make sure we don't run off the end of the rt volume. */ + scanlen = xfs_rtallocate_clamp_len(mp, start, maxlen, prod); + if (scanlen < minlen) + return -ENOSPC; + + /* Check if the range in question (for scanlen) is free. */ + error = xfs_rtcheck_range(args, start, scanlen, 1, &next, &isfree); if (error) return error; - if (!isfree) { - /* - * If not, allocate what there is, if it's at least minlen. - */ - maxlen = next - start; - if (maxlen < minlen) - return -ENOSPC; - - /* - * Trim off tail of extent, if prod is specified. - */ - if (prod > 1 && (i = maxlen % prod)) { - maxlen -= i; - if (maxlen < minlen) - return -ENOSPC; - } + if (isfree) { + /* start to scanlen is all free; allocate it. */ + *len = scanlen; + *rtx = start; + return 0; } /* - * Allocate what we can and return it. + * If not, allocate what there is, if it's at least minlen. */ - error = xfs_rtallocate_range(args, start, maxlen); - if (error) - return error; - *len = maxlen; + alloclen = next - start; + if (alloclen < minlen) + return -ENOSPC; + + /* Ensure alloclen is a multiple of prod. */ + alloclen = xfs_rtalloc_align_len(alloclen, prod); + if (alloclen < minlen) + return -ENOSPC; + + *len = alloclen; *rtx = start; return 0; } @@ -416,11 +419,6 @@ xfs_rtallocate_extent_near( if (start >= mp->m_sb.sb_rextents) start = mp->m_sb.sb_rextents - 1; - /* Make sure we don't run off the end of the rt volume. */ - maxlen = xfs_rtallocate_clamp_len(mp, start, maxlen, prod); - if (maxlen < minlen) - return -ENOSPC; - /* * Try the exact allocation first. */ @@ -429,7 +427,6 @@ xfs_rtallocate_extent_near( if (error != -ENOSPC) return error; - bbno = xfs_rtx_to_rbmblock(mp, start); i = 0; j = -1; @@ -552,11 +549,11 @@ xfs_rtalloc_sumlevel( xfs_rtxnum_t *rtx) /* out: start rtext allocated */ { xfs_fileoff_t i; /* bitmap block number */ + int error; for (i = 0; i < args->mp->m_sb.sb_rbmblocks; i++) { xfs_suminfo_t sum; /* summary information for extents */ xfs_rtxnum_t n; /* next rtext to be tried */ - int error; error = xfs_rtget_summary(args, l, i, &sum); if (error) @@ -652,136 +649,20 @@ xfs_rtallocate_extent_size( return -ENOSPC; } -/* - * Allocate space to the bitmap or summary file, and zero it, for growfs. - */ -STATIC int -xfs_growfs_rt_alloc( - struct xfs_mount *mp, /* file system mount point */ - xfs_extlen_t oblocks, /* old count of blocks */ - xfs_extlen_t nblocks, /* new count of blocks */ - struct xfs_inode *ip) /* inode (bitmap/summary) */ -{ - xfs_fileoff_t bno; /* block number in file */ - struct xfs_buf *bp; /* temporary buffer for zeroing */ - xfs_daddr_t d; /* disk block address */ - int error; /* error return value */ - xfs_fsblock_t fsbno; /* filesystem block for bno */ - struct xfs_bmbt_irec map; /* block map output */ - int nmap; /* number of block maps */ - int resblks; /* space reservation */ - enum xfs_blft buf_type; - struct xfs_trans *tp; - - if (ip == mp->m_rsumip) - buf_type = XFS_BLFT_RTSUMMARY_BUF; - else - buf_type = XFS_BLFT_RTBITMAP_BUF; - - /* - * Allocate space to the file, as necessary. - */ - while (oblocks < nblocks) { - resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks); - /* - * Reserve space & log for one extent added to the file. - */ - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtalloc, resblks, - 0, 0, &tp); - if (error) - return error; - /* - * Lock the inode. - */ - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - - error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, - XFS_IEXT_ADD_NOSPLIT_CNT); - if (error) - goto out_trans_cancel; - - /* - * Allocate blocks to the bitmap file. - */ - nmap = 1; - error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks, - XFS_BMAPI_METADATA, 0, &map, &nmap); - if (error) - goto out_trans_cancel; - /* - * Free any blocks freed up in the transaction, then commit. - */ - error = xfs_trans_commit(tp); - if (error) - return error; - /* - * Now we need to clear the allocated blocks. - * Do this one block per transaction, to keep it simple. - */ - for (bno = map.br_startoff, fsbno = map.br_startblock; - bno < map.br_startoff + map.br_blockcount; - bno++, fsbno++) { - /* - * Reserve log for one block zeroing. - */ - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtzero, - 0, 0, 0, &tp); - if (error) - return error; - /* - * Lock the bitmap inode. - */ - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - /* - * Get a buffer for the block. - */ - d = XFS_FSB_TO_DADDR(mp, fsbno); - error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, - mp->m_bsize, 0, &bp); - if (error) - goto out_trans_cancel; - - xfs_trans_buf_set_type(tp, bp, buf_type); - bp->b_ops = &xfs_rtbuf_ops; - memset(bp->b_addr, 0, mp->m_sb.sb_blocksize); - xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); - /* - * Commit the transaction. - */ - error = xfs_trans_commit(tp); - if (error) - return error; - } - /* - * Go on to the next extent, if any. - */ - oblocks = map.br_startoff + map.br_blockcount; - } - - return 0; - -out_trans_cancel: - xfs_trans_cancel(tp); - return error; -} - -static void +static int xfs_alloc_rsum_cache( - xfs_mount_t *mp, /* file system mount structure */ - xfs_extlen_t rbmblocks) /* number of rt bitmap blocks */ + struct xfs_mount *mp, + xfs_extlen_t rbmblocks) { /* * The rsum cache is initialized to the maximum value, which is * trivially an upper bound on the maximum level with any free extents. - * We can continue without the cache if it couldn't be allocated. */ mp->m_rsum_cache = kvmalloc(rbmblocks, GFP_KERNEL); - if (mp->m_rsum_cache) - memset(mp->m_rsum_cache, -1, rbmblocks); - else - xfs_warn(mp, "could not allocate realtime summary cache"); + if (!mp->m_rsum_cache) + return -ENOMEM; + memset(mp->m_rsum_cache, -1, rbmblocks); + return 0; } /* @@ -817,9 +698,168 @@ out_iolock: return error; } +static int +xfs_growfs_rt_bmblock( + struct xfs_mount *mp, + xfs_rfsblock_t nrblocks, + xfs_agblock_t rextsize, + xfs_fileoff_t bmbno) +{ + struct xfs_inode *rbmip = mp->m_rbmip; + struct xfs_inode *rsumip = mp->m_rsumip; + struct xfs_rtalloc_args args = { + .mp = mp, + }; + struct xfs_rtalloc_args nargs = { + }; + struct xfs_mount *nmp; + xfs_rfsblock_t nrblocks_step; + xfs_rtbxlen_t freed_rtx; + int error; + + + nrblocks_step = (bmbno + 1) * NBBY * mp->m_sb.sb_blocksize * rextsize; + + nmp = nargs.mp = kmemdup(mp, sizeof(*mp), GFP_KERNEL); + if (!nmp) + return -ENOMEM; + + /* + * Calculate new sb and mount fields for this round. + */ + nmp->m_sb.sb_rextsize = rextsize; + xfs_mount_sb_set_rextsize(nmp, &nmp->m_sb); + nmp->m_sb.sb_rbmblocks = bmbno + 1; + nmp->m_sb.sb_rblocks = min(nrblocks, nrblocks_step); + nmp->m_sb.sb_rextents = xfs_rtb_to_rtx(nmp, nmp->m_sb.sb_rblocks); + nmp->m_sb.sb_rextslog = xfs_compute_rextslog(nmp->m_sb.sb_rextents); + nmp->m_rsumlevels = nmp->m_sb.sb_rextslog + 1; + nmp->m_rsumblocks = xfs_rtsummary_blockcount(mp, nmp->m_rsumlevels, + nmp->m_sb.sb_rbmblocks); + + /* + * Recompute the growfsrt reservation from the new rsumsize, so that the + * transaction below use the new, potentially larger value. + * */ + xfs_trans_resv_calc(nmp, &nmp->m_resv); + error = xfs_trans_alloc(mp, &M_RES(nmp)->tr_growrtfree, 0, 0, 0, + &args.tp); + if (error) + goto out_free; + nargs.tp = args.tp; + + xfs_rtbitmap_lock(mp); + xfs_rtbitmap_trans_join(args.tp); + + /* + * Update the bitmap inode's size ondisk and incore. We need to update + * the incore size so that inode inactivation won't punch what it thinks + * are "posteof" blocks. + */ + rbmip->i_disk_size = nmp->m_sb.sb_rbmblocks * nmp->m_sb.sb_blocksize; + i_size_write(VFS_I(rbmip), rbmip->i_disk_size); + xfs_trans_log_inode(args.tp, rbmip, XFS_ILOG_CORE); + + /* + * Update the summary inode's size. We need to update the incore size + * so that inode inactivation won't punch what it thinks are "posteof" + * blocks. + */ + rsumip->i_disk_size = nmp->m_rsumblocks * nmp->m_sb.sb_blocksize; + i_size_write(VFS_I(rsumip), rsumip->i_disk_size); + xfs_trans_log_inode(args.tp, rsumip, XFS_ILOG_CORE); + + /* + * Copy summary data from old to new sizes when the real size (not + * block-aligned) changes. + */ + if (mp->m_sb.sb_rbmblocks != nmp->m_sb.sb_rbmblocks || + mp->m_rsumlevels != nmp->m_rsumlevels) { + error = xfs_rtcopy_summary(&args, &nargs); + if (error) + goto out_cancel; + } + + /* + * Update superblock fields. + */ + if (nmp->m_sb.sb_rextsize != mp->m_sb.sb_rextsize) + xfs_trans_mod_sb(args.tp, XFS_TRANS_SB_REXTSIZE, + nmp->m_sb.sb_rextsize - mp->m_sb.sb_rextsize); + if (nmp->m_sb.sb_rbmblocks != mp->m_sb.sb_rbmblocks) + xfs_trans_mod_sb(args.tp, XFS_TRANS_SB_RBMBLOCKS, + nmp->m_sb.sb_rbmblocks - mp->m_sb.sb_rbmblocks); + if (nmp->m_sb.sb_rblocks != mp->m_sb.sb_rblocks) + xfs_trans_mod_sb(args.tp, XFS_TRANS_SB_RBLOCKS, + nmp->m_sb.sb_rblocks - mp->m_sb.sb_rblocks); + if (nmp->m_sb.sb_rextents != mp->m_sb.sb_rextents) + xfs_trans_mod_sb(args.tp, XFS_TRANS_SB_REXTENTS, + nmp->m_sb.sb_rextents - mp->m_sb.sb_rextents); + if (nmp->m_sb.sb_rextslog != mp->m_sb.sb_rextslog) + xfs_trans_mod_sb(args.tp, XFS_TRANS_SB_REXTSLOG, + nmp->m_sb.sb_rextslog - mp->m_sb.sb_rextslog); + + /* + * Free the new extent. + */ + freed_rtx = nmp->m_sb.sb_rextents - mp->m_sb.sb_rextents; + error = xfs_rtfree_range(&nargs, mp->m_sb.sb_rextents, freed_rtx); + xfs_rtbuf_cache_relse(&nargs); + if (error) + goto out_cancel; + + /* + * Mark more blocks free in the superblock. + */ + xfs_trans_mod_sb(args.tp, XFS_TRANS_SB_FREXTENTS, freed_rtx); + + /* + * Update the calculated values in the real mount structure. + */ + mp->m_rsumlevels = nmp->m_rsumlevels; + mp->m_rsumblocks = nmp->m_rsumblocks; + xfs_mount_sb_set_rextsize(mp, &mp->m_sb); + + /* + * Recompute the growfsrt reservation from the new rsumsize. + */ + xfs_trans_resv_calc(mp, &mp->m_resv); + + error = xfs_trans_commit(args.tp); + if (error) + goto out_free; + + /* + * Ensure the mount RT feature flag is now set. + */ + mp->m_features |= XFS_FEAT_REALTIME; + + kfree(nmp); + return 0; + +out_cancel: + xfs_trans_cancel(args.tp); +out_free: + kfree(nmp); + return error; +} + /* - * Visible (exported) functions. + * Calculate the last rbmblock currently used. + * + * This also deals with the case where there were no rtextents before. */ +static xfs_fileoff_t +xfs_last_rt_bmblock( + struct xfs_mount *mp) +{ + xfs_fileoff_t bmbno = mp->m_sb.sb_rbmblocks; + + /* Skip the current block if it is exactly full. */ + if (xfs_rtx_to_rbmword(mp, mp->m_sb.sb_rextents) != 0) + bmbno--; + return bmbno; +} /* * Grow the realtime area of the filesystem. @@ -832,23 +872,14 @@ xfs_growfs_rt( xfs_fileoff_t bmbno; /* bitmap block number */ struct xfs_buf *bp; /* temporary buffer */ int error; /* error return value */ - xfs_mount_t *nmp; /* new (fake) mount structure */ - xfs_rfsblock_t nrblocks; /* new number of realtime blocks */ xfs_extlen_t nrbmblocks; /* new number of rt bitmap blocks */ xfs_rtxnum_t nrextents; /* new number of realtime extents */ - uint8_t nrextslog; /* new log2 of sb_rextents */ xfs_extlen_t nrsumblocks; /* new number of summary blocks */ - uint nrsumlevels; /* new rt summary levels */ - uint nrsumsize; /* new size of rt summary, bytes */ - xfs_sb_t *nsbp; /* new superblock */ xfs_extlen_t rbmblocks; /* current number of rt bitmap blocks */ xfs_extlen_t rsumblocks; /* current number of rt summary blks */ - xfs_sb_t *sbp; /* old superblock */ uint8_t *rsum_cache; /* old summary cache */ xfs_agblock_t old_rextsize = mp->m_sb.sb_rextsize; - sbp = &mp->m_sb; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -867,11 +898,10 @@ xfs_growfs_rt( goto out_unlock; /* Shrink not supported. */ - if (in->newblocks <= sbp->sb_rblocks) + if (in->newblocks <= mp->m_sb.sb_rblocks) goto out_unlock; - /* Can only change rt extent size when adding rt volume. */ - if (sbp->sb_rblocks > 0 && in->extsize != sbp->sb_rextsize) + if (mp->m_sb.sb_rblocks > 0 && in->extsize != mp->m_sb.sb_rextsize) goto out_unlock; /* Range check the extent size. */ @@ -884,15 +914,14 @@ xfs_growfs_rt( if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp) || xfs_has_quota(mp)) goto out_unlock; - nrblocks = in->newblocks; - error = xfs_sb_validate_fsb_count(sbp, nrblocks); + error = xfs_sb_validate_fsb_count(&mp->m_sb, in->newblocks); if (error) goto out_unlock; /* * Read in the last block of the device, make sure it exists. */ error = xfs_buf_read_uncached(mp->m_rtdev_targp, - XFS_FSB_TO_BB(mp, nrblocks - 1), + XFS_FSB_TO_BB(mp, in->newblocks - 1), XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL); if (error) goto out_unlock; @@ -901,17 +930,15 @@ xfs_growfs_rt( /* * Calculate new parameters. These are the final values to be reached. */ - nrextents = nrblocks; - do_div(nrextents, in->extsize); - if (!xfs_validate_rtextents(nrextents)) { + nrextents = div_u64(in->newblocks, in->extsize); + if (nrextents == 0) { error = -EINVAL; goto out_unlock; } nrbmblocks = xfs_rtbitmap_blockcount(mp, nrextents); - nrextslog = xfs_compute_rextslog(nrextents); - nrsumlevels = nrextslog + 1; - nrsumblocks = xfs_rtsummary_blockcount(mp, nrsumlevels, nrbmblocks); - nrsumsize = XFS_FSB_TO_B(mp, nrsumblocks); + nrsumblocks = xfs_rtsummary_blockcount(mp, + xfs_compute_rextslog(nrextents) + 1, nrbmblocks); + /* * New summary size can't be more than half the size of * the log. This prevents us from getting a log overflow, @@ -931,154 +958,29 @@ xfs_growfs_rt( /* * Allocate space to the bitmap and summary files, as necessary. */ - error = xfs_growfs_rt_alloc(mp, rbmblocks, nrbmblocks, mp->m_rbmip); + error = xfs_rtfile_initialize_blocks(mp->m_rbmip, rbmblocks, + nrbmblocks, NULL); if (error) goto out_unlock; - error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, mp->m_rsumip); + error = xfs_rtfile_initialize_blocks(mp->m_rsumip, rsumblocks, + nrsumblocks, NULL); if (error) goto out_unlock; rsum_cache = mp->m_rsum_cache; - if (nrbmblocks != sbp->sb_rbmblocks) - xfs_alloc_rsum_cache(mp, nrbmblocks); - - /* - * Allocate a new (fake) mount/sb. - */ - nmp = kmalloc(sizeof(*nmp), GFP_KERNEL | __GFP_NOFAIL); - /* - * Loop over the bitmap blocks. - * We will do everything one bitmap block at a time. - * Skip the current block if it is exactly full. - * This also deals with the case where there were no rtextents before. - */ - for (bmbno = sbp->sb_rbmblocks - - ((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0); - bmbno < nrbmblocks; - bmbno++) { - struct xfs_rtalloc_args args = { - .mp = mp, - }; - struct xfs_rtalloc_args nargs = { - .mp = nmp, - }; - struct xfs_trans *tp; - xfs_rfsblock_t nrblocks_step; - - *nmp = *mp; - nsbp = &nmp->m_sb; - /* - * Calculate new sb and mount fields for this round. - */ - nsbp->sb_rextsize = in->extsize; - nmp->m_rtxblklog = -1; /* don't use shift or masking */ - nsbp->sb_rbmblocks = bmbno + 1; - nrblocks_step = (bmbno + 1) * NBBY * nsbp->sb_blocksize * - nsbp->sb_rextsize; - nsbp->sb_rblocks = min(nrblocks, nrblocks_step); - nsbp->sb_rextents = xfs_rtb_to_rtx(nmp, nsbp->sb_rblocks); - ASSERT(nsbp->sb_rextents != 0); - nsbp->sb_rextslog = xfs_compute_rextslog(nsbp->sb_rextents); - nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1; - nrsumblocks = xfs_rtsummary_blockcount(mp, nrsumlevels, - nsbp->sb_rbmblocks); - nmp->m_rsumsize = nrsumsize = XFS_FSB_TO_B(mp, nrsumblocks); - /* recompute growfsrt reservation from new rsumsize */ - xfs_trans_resv_calc(nmp, &nmp->m_resv); - - /* - * Start a transaction, get the log reservation. - */ - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtfree, 0, 0, 0, - &tp); + if (nrbmblocks != mp->m_sb.sb_rbmblocks) { + error = xfs_alloc_rsum_cache(mp, nrbmblocks); if (error) - break; - args.tp = tp; - nargs.tp = tp; - - /* - * Lock out other callers by grabbing the bitmap and summary - * inode locks and joining them to the transaction. - */ - xfs_rtbitmap_lock(tp, mp); - /* - * Update the bitmap inode's size ondisk and incore. We need - * to update the incore size so that inode inactivation won't - * punch what it thinks are "posteof" blocks. - */ - mp->m_rbmip->i_disk_size = - nsbp->sb_rbmblocks * nsbp->sb_blocksize; - i_size_write(VFS_I(mp->m_rbmip), mp->m_rbmip->i_disk_size); - xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); - /* - * Update the summary inode's size. We need to update the - * incore size so that inode inactivation won't punch what it - * thinks are "posteof" blocks. - */ - mp->m_rsumip->i_disk_size = nmp->m_rsumsize; - i_size_write(VFS_I(mp->m_rsumip), mp->m_rsumip->i_disk_size); - xfs_trans_log_inode(tp, mp->m_rsumip, XFS_ILOG_CORE); - /* - * Copy summary data from old to new sizes. - * Do this when the real size (not block-aligned) changes. - */ - if (sbp->sb_rbmblocks != nsbp->sb_rbmblocks || - mp->m_rsumlevels != nmp->m_rsumlevels) { - error = xfs_rtcopy_summary(&args, &nargs); - if (error) - goto error_cancel; - } - /* - * Update superblock fields. - */ - if (nsbp->sb_rextsize != sbp->sb_rextsize) - xfs_trans_mod_sb(tp, XFS_TRANS_SB_REXTSIZE, - nsbp->sb_rextsize - sbp->sb_rextsize); - if (nsbp->sb_rbmblocks != sbp->sb_rbmblocks) - xfs_trans_mod_sb(tp, XFS_TRANS_SB_RBMBLOCKS, - nsbp->sb_rbmblocks - sbp->sb_rbmblocks); - if (nsbp->sb_rblocks != sbp->sb_rblocks) - xfs_trans_mod_sb(tp, XFS_TRANS_SB_RBLOCKS, - nsbp->sb_rblocks - sbp->sb_rblocks); - if (nsbp->sb_rextents != sbp->sb_rextents) - xfs_trans_mod_sb(tp, XFS_TRANS_SB_REXTENTS, - nsbp->sb_rextents - sbp->sb_rextents); - if (nsbp->sb_rextslog != sbp->sb_rextslog) - xfs_trans_mod_sb(tp, XFS_TRANS_SB_REXTSLOG, - nsbp->sb_rextslog - sbp->sb_rextslog); - /* - * Free new extent. - */ - error = xfs_rtfree_range(&nargs, sbp->sb_rextents, - nsbp->sb_rextents - sbp->sb_rextents); - xfs_rtbuf_cache_relse(&nargs); - if (error) { -error_cancel: - xfs_trans_cancel(tp); - break; - } - /* - * Mark more blocks free in the superblock. - */ - xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, - nsbp->sb_rextents - sbp->sb_rextents); - /* - * Update mp values into the real mp structure. - */ - mp->m_rsumlevels = nrsumlevels; - mp->m_rsumsize = nrsumsize; - /* recompute growfsrt reservation from new rsumsize */ - xfs_trans_resv_calc(mp, &mp->m_resv); + goto out_unlock; + } - error = xfs_trans_commit(tp); + /* Initialize the free space bitmap one bitmap block at a time. */ + for (bmbno = xfs_last_rt_bmblock(mp); bmbno < nrbmblocks; bmbno++) { + error = xfs_growfs_rt_bmblock(mp, in->newblocks, in->extsize, + bmbno); if (error) - break; - - /* Ensure the mount RT feature flag is now set. */ - mp->m_features |= XFS_FEAT_REALTIME; + goto out_free; } - if (error) - goto out_free; if (old_rextsize != in->extsize) { error = xfs_growfs_rt_fixup_extsize(mp); @@ -1091,11 +993,6 @@ error_cancel: out_free: /* - * Free the fake mp structure. - */ - kfree(nmp); - - /* * If we had to allocate a new rsum_cache, we either need to free the * old one (if we succeeded) or free the new one and restore the old one * (if there was an error). @@ -1124,7 +1021,6 @@ xfs_rtmount_init( struct xfs_buf *bp; /* buffer for last block of subvolume */ struct xfs_sb *sbp; /* filesystem superblock copy in mount */ xfs_daddr_t d; /* address of last block of subvolume */ - unsigned int rsumblocks; int error; sbp = &mp->m_sb; @@ -1136,9 +1032,8 @@ xfs_rtmount_init( return -ENODEV; } mp->m_rsumlevels = sbp->sb_rextslog + 1; - rsumblocks = xfs_rtsummary_blockcount(mp, mp->m_rsumlevels, + mp->m_rsumblocks = xfs_rtsummary_blockcount(mp, mp->m_rsumlevels, mp->m_sb.sb_rbmblocks); - mp->m_rsumsize = XFS_FSB_TO_B(mp, rsumblocks); mp->m_rbmip = mp->m_rsumip = NULL; /* * Check that the realtime section is an ok size. @@ -1268,7 +1163,9 @@ xfs_rtmount_inodes( if (error) goto out_rele_summary; - xfs_alloc_rsum_cache(mp, sbp->sb_rbmblocks); + error = xfs_alloc_rsum_cache(mp, sbp->sb_rbmblocks); + if (error) + goto out_rele_summary; return 0; out_rele_summary: @@ -1296,12 +1193,11 @@ xfs_rtunmount_inodes( * of rtextents and the fraction. * The fraction sequence is 0, 1/2, 1/4, 3/4, 1/8, ..., 7/8, 1/16, ... */ -static int +static xfs_rtxnum_t xfs_rtpick_extent( xfs_mount_t *mp, /* file system mount point */ xfs_trans_t *tp, /* transaction pointer */ - xfs_rtxlen_t len, /* allocation length (rtextents) */ - xfs_rtxnum_t *pick) /* result rt extent */ + xfs_rtxlen_t len) /* allocation length (rtextents) */ { xfs_rtxnum_t b; /* result rtext */ int log2; /* log of sequence number */ @@ -1332,8 +1228,7 @@ xfs_rtpick_extent( ts.tv_sec = seq + 1; inode_set_atime_to_ts(VFS_I(mp->m_rbmip), ts); xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); - *pick = b; - return 0; + return b; } static void @@ -1365,36 +1260,109 @@ xfs_rtalloc_align_minmax( *raminlen = newminlen; } -int -xfs_bmap_rtalloc( - struct xfs_bmalloca *ap) +static int +xfs_rtallocate( + struct xfs_trans *tp, + xfs_rtblock_t bno_hint, + xfs_rtxlen_t minlen, + xfs_rtxlen_t maxlen, + xfs_rtxlen_t prod, + bool wasdel, + bool initial_user_data, + bool *rtlocked, + xfs_rtblock_t *bno, + xfs_extlen_t *blen) +{ + struct xfs_rtalloc_args args = { + .mp = tp->t_mountp, + .tp = tp, + }; + xfs_rtxnum_t start = 0; + xfs_rtxnum_t rtx; + xfs_rtxlen_t len = 0; + int error = 0; + + /* + * Lock out modifications to both the RT bitmap and summary inodes. + */ + if (!*rtlocked) { + xfs_rtbitmap_lock(args.mp); + xfs_rtbitmap_trans_join(tp); + *rtlocked = true; + } + + /* + * For an allocation to an empty file at offset 0, pick an extent that + * will space things out in the rt area. + */ + if (bno_hint) + start = xfs_rtb_to_rtx(args.mp, bno_hint); + else if (initial_user_data) + start = xfs_rtpick_extent(args.mp, tp, maxlen); + + if (start) { + error = xfs_rtallocate_extent_near(&args, start, minlen, maxlen, + &len, prod, &rtx); + /* + * If we can't allocate near a specific rt extent, try again + * without locality criteria. + */ + if (error == -ENOSPC) { + xfs_rtbuf_cache_relse(&args); + error = 0; + } + } + + if (!error) { + error = xfs_rtallocate_extent_size(&args, minlen, maxlen, &len, + prod, &rtx); + } + + if (error) + goto out_release; + + error = xfs_rtallocate_range(&args, rtx, len); + if (error) + goto out_release; + + xfs_trans_mod_sb(tp, wasdel ? + XFS_TRANS_SB_RES_FREXTENTS : XFS_TRANS_SB_FREXTENTS, + -(long)len); + *bno = xfs_rtx_to_rtb(args.mp, rtx); + *blen = xfs_rtxlen_to_extlen(args.mp, len); + +out_release: + xfs_rtbuf_cache_relse(&args); + return error; +} + +static int +xfs_rtallocate_align( + struct xfs_bmalloca *ap, + xfs_rtxlen_t *ralen, + xfs_rtxlen_t *raminlen, + xfs_rtxlen_t *prod, + bool *noalign) { struct xfs_mount *mp = ap->ip->i_mount; xfs_fileoff_t orig_offset = ap->offset; - xfs_rtxnum_t start; /* allocation hint rtextent no */ - xfs_rtxnum_t rtx; /* actually allocated rtextent no */ - xfs_rtxlen_t prod = 0; /* product factor for allocators */ - xfs_extlen_t mod = 0; /* product factor for allocators */ - xfs_rtxlen_t ralen = 0; /* realtime allocation length */ - xfs_extlen_t align; /* minimum allocation alignment */ - xfs_extlen_t orig_length = ap->length; xfs_extlen_t minlen = mp->m_sb.sb_rextsize; - xfs_rtxlen_t raminlen; - bool rtlocked = false; - bool ignore_locality = false; - struct xfs_rtalloc_args args = { - .mp = mp, - .tp = ap->tp, - }; + xfs_extlen_t align; /* minimum allocation alignment */ + xfs_extlen_t mod; /* product factor for allocators */ int error; - align = xfs_get_extsz_hint(ap->ip); - if (!align) - align = 1; -retry: - error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, - align, 1, ap->eof, 0, - ap->conv, &ap->offset, &ap->length); + if (*noalign) { + align = mp->m_sb.sb_rextsize; + } else { + align = xfs_get_extsz_hint(ap->ip); + if (!align) + align = 1; + if (align == mp->m_sb.sb_rextsize) + *noalign = true; + } + + error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 1, + ap->eof, 0, ap->conv, &ap->offset, &ap->length); if (error) return error; ASSERT(ap->length); @@ -1418,59 +1386,55 @@ retry: * XFS_BMBT_MAX_EXTLEN), we don't hear about that number, and can't * adjust the starting point to match it. */ - ralen = xfs_extlen_to_rtxlen(mp, min(ap->length, XFS_MAX_BMBT_EXTLEN)); - raminlen = max_t(xfs_rtxlen_t, 1, xfs_extlen_to_rtxlen(mp, minlen)); - ASSERT(raminlen > 0); - ASSERT(raminlen <= ralen); - - /* - * Lock out modifications to both the RT bitmap and summary inodes - */ - if (!rtlocked) { - xfs_rtbitmap_lock(ap->tp, mp); - rtlocked = true; - } - - if (ignore_locality) { - start = 0; - } else if (xfs_bmap_adjacent(ap)) { - start = xfs_rtb_to_rtx(mp, ap->blkno); - } else if (ap->datatype & XFS_ALLOC_INITIAL_USER_DATA) { - /* - * If it's an allocation to an empty file at offset 0, pick an - * extent that will space things out in the rt area. - */ - error = xfs_rtpick_extent(mp, ap->tp, ralen, &start); - if (error) - return error; - } else { - start = 0; - } + *ralen = xfs_extlen_to_rtxlen(mp, min(ap->length, XFS_MAX_BMBT_EXTLEN)); + *raminlen = max_t(xfs_rtxlen_t, 1, xfs_extlen_to_rtxlen(mp, minlen)); + ASSERT(*raminlen > 0); + ASSERT(*raminlen <= *ralen); /* * Only bother calculating a real prod factor if offset & length are * perfectly aligned, otherwise it will just get us in trouble. */ div_u64_rem(ap->offset, align, &mod); - if (mod || ap->length % align) { - prod = 1; - } else { - prod = xfs_extlen_to_rtxlen(mp, align); - if (prod > 1) - xfs_rtalloc_align_minmax(&raminlen, &ralen, &prod); - } + if (mod || ap->length % align) + *prod = 1; + else + *prod = xfs_extlen_to_rtxlen(mp, align); - if (start) { - error = xfs_rtallocate_extent_near(&args, start, raminlen, - ralen, &ralen, prod, &rtx); - } else { - error = xfs_rtallocate_extent_size(&args, raminlen, - ralen, &ralen, prod, &rtx); - } - xfs_rtbuf_cache_relse(&args); + if (*prod > 1) + xfs_rtalloc_align_minmax(raminlen, ralen, prod); + return 0; +} +int +xfs_bmap_rtalloc( + struct xfs_bmalloca *ap) +{ + xfs_fileoff_t orig_offset = ap->offset; + xfs_rtxlen_t prod = 0; /* product factor for allocators */ + xfs_rtxlen_t ralen = 0; /* realtime allocation length */ + xfs_rtblock_t bno_hint = NULLRTBLOCK; + xfs_extlen_t orig_length = ap->length; + xfs_rtxlen_t raminlen; + bool rtlocked = false; + bool noalign = false; + bool initial_user_data = + ap->datatype & XFS_ALLOC_INITIAL_USER_DATA; + int error; + +retry: + error = xfs_rtallocate_align(ap, &ralen, &raminlen, &prod, &noalign); + if (error) + return error; + + if (xfs_bmap_adjacent(ap)) + bno_hint = ap->blkno; + + error = xfs_rtallocate(ap->tp, bno_hint, raminlen, ralen, prod, + ap->wasdel, initial_user_data, &rtlocked, + &ap->blkno, &ap->length); if (error == -ENOSPC) { - if (align > mp->m_sb.sb_rextsize) { + if (!noalign) { /* * We previously enlarged the request length to try to * satisfy an extent size hint. The allocator didn't @@ -1480,16 +1444,7 @@ retry: */ ap->offset = orig_offset; ap->length = orig_length; - minlen = align = mp->m_sb.sb_rextsize; - goto retry; - } - - if (!ignore_locality && start != 0) { - /* - * If we can't allocate near a specific rt extent, try - * again without locality criteria. - */ - ignore_locality = true; + noalign = true; goto retry; } @@ -1500,11 +1455,6 @@ retry: if (error) return error; - xfs_trans_mod_sb(ap->tp, ap->wasdel ? - XFS_TRANS_SB_RES_FREXTENTS : XFS_TRANS_SB_FREXTENTS, - -(long)ralen); - ap->blkno = xfs_rtx_to_rtb(mp, rtx); - ap->length = xfs_rtxlen_to_extlen(mp, ralen); xfs_bmap_alloc_account(ap); return 0; } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 27e9f749c4c7..26767745d9fd 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -311,9 +311,9 @@ xfs_set_inode_alloc( * the allocator to accommodate the request. */ if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32) - set_bit(XFS_OPSTATE_INODE32, &mp->m_opstate); + xfs_set_inode32(mp); else - clear_bit(XFS_OPSTATE_INODE32, &mp->m_opstate); + xfs_clear_inode32(mp); for (index = 0; index < agcount; index++) { struct xfs_perag *pag; @@ -1511,7 +1511,7 @@ xfs_fs_fill_super( * the newer fsopen/fsconfig API. */ if (fc->sb_flags & SB_RDONLY) - set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); + xfs_set_readonly(mp); if (fc->sb_flags & SB_DIRSYNC) mp->m_features |= XFS_FEAT_DIRSYNC; if (fc->sb_flags & SB_SYNCHRONOUS) @@ -1820,7 +1820,7 @@ xfs_remount_rw( return -EINVAL; } - clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); + xfs_clear_readonly(mp); /* * If this is the first remount to writeable state we might have some @@ -1908,7 +1908,7 @@ xfs_remount_ro( xfs_save_resvblks(mp); xfs_log_clean(mp); - set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); + xfs_set_readonly(mp); return 0; } @@ -2009,8 +2009,7 @@ static int xfs_init_fs_context( return -ENOMEM; spin_lock_init(&mp->m_sb_lock); - INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); - spin_lock_init(&mp->m_perag_lock); + xa_init(&mp->m_perags); mutex_init(&mp->m_growlock); INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker); INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 77f19e2f66e0..4252b07cd251 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -165,7 +165,7 @@ xfs_symlink( /* * Allocate an inode for the symlink. */ - error = xfs_dialloc(&tp, dp->i_ino, S_IFLNK, &ino); + error = xfs_dialloc(&tp, &args, &ino); if (!error) error = xfs_icreate(tp, ino, &args, &du.ip); if (error) diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 180ce697305a..ee9f0b1f548d 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -210,14 +210,14 @@ DEFINE_EVENT(xfs_perag_class, name, \ TP_PROTO(struct xfs_perag *pag, unsigned long caller_ip), \ TP_ARGS(pag, caller_ip)) DEFINE_PERAG_REF_EVENT(xfs_perag_get); -DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag); DEFINE_PERAG_REF_EVENT(xfs_perag_hold); DEFINE_PERAG_REF_EVENT(xfs_perag_put); DEFINE_PERAG_REF_EVENT(xfs_perag_grab); -DEFINE_PERAG_REF_EVENT(xfs_perag_grab_tag); +DEFINE_PERAG_REF_EVENT(xfs_perag_grab_next_tag); DEFINE_PERAG_REF_EVENT(xfs_perag_rele); DEFINE_PERAG_REF_EVENT(xfs_perag_set_inode_tag); DEFINE_PERAG_REF_EVENT(xfs_perag_clear_inode_tag); +DEFINE_PERAG_REF_EVENT(xfs_reclaim_inodes_count); TRACE_EVENT(xfs_inodegc_worker, TP_PROTO(struct xfs_mount *mp, unsigned int shrinker_hits), @@ -4926,7 +4926,8 @@ DEFINE_INODE_ERROR_EVENT(xfs_exchrange_error); { XFS_EXCHANGE_RANGE_DRY_RUN, "DRY_RUN" }, \ { XFS_EXCHANGE_RANGE_FILE1_WRITTEN, "F1_WRITTEN" }, \ { __XFS_EXCHANGE_RANGE_UPD_CMTIME1, "CMTIME1" }, \ - { __XFS_EXCHANGE_RANGE_UPD_CMTIME2, "CMTIME2" } + { __XFS_EXCHANGE_RANGE_UPD_CMTIME2, "CMTIME2" }, \ + { __XFS_EXCHANGE_RANGE_CHECK_FRESH2, "FRESH2" } /* file exchange-range tracepoint class */ DECLARE_EVENT_CLASS(xfs_exchrange_class, @@ -4986,6 +4987,60 @@ DEFINE_EXCHRANGE_EVENT(xfs_exchrange_prep); DEFINE_EXCHRANGE_EVENT(xfs_exchrange_flush); DEFINE_EXCHRANGE_EVENT(xfs_exchrange_mappings); +TRACE_EVENT(xfs_exchrange_freshness, + TP_PROTO(const struct xfs_exchrange *fxr, struct xfs_inode *ip2), + TP_ARGS(fxr, ip2), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ip2_ino) + __field(long long, ip2_mtime) + __field(long long, ip2_ctime) + __field(int, ip2_mtime_nsec) + __field(int, ip2_ctime_nsec) + + __field(xfs_ino_t, file2_ino) + __field(long long, file2_mtime) + __field(long long, file2_ctime) + __field(int, file2_mtime_nsec) + __field(int, file2_ctime_nsec) + ), + TP_fast_assign( + struct timespec64 ts64; + struct inode *inode2 = VFS_I(ip2); + + __entry->dev = inode2->i_sb->s_dev; + __entry->ip2_ino = ip2->i_ino; + + ts64 = inode_get_ctime(inode2); + __entry->ip2_ctime = ts64.tv_sec; + __entry->ip2_ctime_nsec = ts64.tv_nsec; + + ts64 = inode_get_mtime(inode2); + __entry->ip2_mtime = ts64.tv_sec; + __entry->ip2_mtime_nsec = ts64.tv_nsec; + + __entry->file2_ino = fxr->file2_ino; + __entry->file2_mtime = fxr->file2_mtime.tv_sec; + __entry->file2_ctime = fxr->file2_ctime.tv_sec; + __entry->file2_mtime_nsec = fxr->file2_mtime.tv_nsec; + __entry->file2_ctime_nsec = fxr->file2_ctime.tv_nsec; + ), + TP_printk("dev %d:%d " + "ino 0x%llx mtime %lld:%d ctime %lld:%d -> " + "file 0x%llx mtime %lld:%d ctime %lld:%d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ip2_ino, + __entry->ip2_mtime, + __entry->ip2_mtime_nsec, + __entry->ip2_ctime, + __entry->ip2_ctime_nsec, + __entry->file2_ino, + __entry->file2_mtime, + __entry->file2_mtime_nsec, + __entry->file2_ctime, + __entry->file2_ctime_nsec) +); + TRACE_EVENT(xfs_exchmaps_overhead, TP_PROTO(struct xfs_mount *mp, unsigned long long bmbt_blocks, unsigned long long rmapbt_blocks), |