diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-19 08:03:55 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-19 08:03:55 +0300 |
commit | 8751b21ad9dc33f31dff20297dcae2063cbbcfc9 (patch) | |
tree | df0b13c8d1409687f3d9303373f5a1a11c014cf4 /fs/xfs | |
parent | 4e0373f1f920811a67fef0c3383f1ad602b3845e (diff) | |
parent | 90fa22da6d6b41dc17435aff7b800f9ca3c00401 (diff) | |
download | linux-8751b21ad9dc33f31dff20297dcae2063cbbcfc9.tar.xz |
Merge tag 'xfs-6.12-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs updates from Chandan Babu:
"New code:
- Introduce new ioctls to exchange contents of two files.
The first ioctl does the preparation work to exchange the contents
of two files while the second ioctl performs the actual exchange if
the target file has not been changed since a given sampling point.
Fixes:
- Fix bugs associated with calculating the maximum range of realtime
extents to scan for free space.
- Copy keys instead of records when resizing the incore BMBT root
block.
- Do not report FITRIMming more bytes than possibly exist in the
filesystem.
- Modify xfs_fs.h to prevent C++ compilation errors.
- Do not over eagerly free post-EOF speculative preallocation.
- Ensure st_blocks never goes to zero during COW writes
Cleanups/refactors:
- Use Xarray to hold per-AG data instead of a Radix tree.
- Cleanups to:
- realtime bitmap
- inode allocator
- quota
- inode rooted btree code"
* tag 'xfs-6.12-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (61 commits)
xfs: ensure st_blocks never goes to zero during COW writes
xfs: use xas_for_each_marked in xfs_reclaim_inodes_count
xfs: convert perag lookup to xarray
xfs: simplify tagged perag iteration
xfs: move the tagged perag lookup helpers to xfs_icache.c
xfs: use kfree_rcu_mightsleep to free the perag structures
xfs: use LIST_HEAD() to simplify code
xfs: Remove duplicate xfs_trans_priv.h header
xfs: remove unnecessary check
xfs: Use xfs set and clear mp state helpers
xfs: reclaim speculative preallocations for append only files
xfs: simplify extent lookup in xfs_can_free_eofblocks
xfs: check XFS_EOFBLOCKS_RELEASED earlier in xfs_release_eofblocks
xfs: only free posteof blocks on first close
xfs: don't free post-EOF blocks on read close
xfs: skip all of xfs_file_release when shut down
xfs: don't bother returning errors from xfs_file_release
xfs: refactor f_op->release handling
xfs: remove the i_mode check in xfs_release
xfs: standardize the btree maxrecs function parameters
...
Diffstat (limited to 'fs/xfs')
60 files changed, 1770 insertions, 1452 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 7e80732cb547..5f0494702e0b 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -46,7 +46,7 @@ xfs_perag_get( struct xfs_perag *pag; rcu_read_lock(); - pag = radix_tree_lookup(&mp->m_perag_tree, agno); + pag = xa_load(&mp->m_perags, agno); if (pag) { trace_xfs_perag_get(pag, _RET_IP_); ASSERT(atomic_read(&pag->pag_ref) >= 0); @@ -56,31 +56,6 @@ xfs_perag_get( return pag; } -/* - * search from @first to find the next perag with the given tag set. - */ -struct xfs_perag * -xfs_perag_get_tag( - struct xfs_mount *mp, - xfs_agnumber_t first, - unsigned int tag) -{ - struct xfs_perag *pag; - int found; - - rcu_read_lock(); - found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, - (void **)&pag, first, 1, tag); - if (found <= 0) { - rcu_read_unlock(); - return NULL; - } - trace_xfs_perag_get_tag(pag, _RET_IP_); - atomic_inc(&pag->pag_ref); - rcu_read_unlock(); - return pag; -} - /* Get a passive reference to the given perag. */ struct xfs_perag * xfs_perag_hold( @@ -117,7 +92,7 @@ xfs_perag_grab( struct xfs_perag *pag; rcu_read_lock(); - pag = radix_tree_lookup(&mp->m_perag_tree, agno); + pag = xa_load(&mp->m_perags, agno); if (pag) { trace_xfs_perag_grab(pag, _RET_IP_); if (!atomic_inc_not_zero(&pag->pag_active_ref)) @@ -127,32 +102,6 @@ xfs_perag_grab( return pag; } -/* - * search from @first to find the next perag with the given tag set. - */ -struct xfs_perag * -xfs_perag_grab_tag( - struct xfs_mount *mp, - xfs_agnumber_t first, - int tag) -{ - struct xfs_perag *pag; - int found; - - rcu_read_lock(); - found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, - (void **)&pag, first, 1, tag); - if (found <= 0) { - rcu_read_unlock(); - return NULL; - } - trace_xfs_perag_grab_tag(pag, _RET_IP_); - if (!atomic_inc_not_zero(&pag->pag_active_ref)) - pag = NULL; - rcu_read_unlock(); - return pag; -} - void xfs_perag_rele( struct xfs_perag *pag) @@ -235,16 +184,6 @@ out: return error; } -STATIC void -__xfs_free_perag( - struct rcu_head *head) -{ - struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head); - - ASSERT(!delayed_work_pending(&pag->pag_blockgc_work)); - kfree(pag); -} - /* * Free up the per-ag resources associated with the mount structure. */ @@ -256,9 +195,7 @@ xfs_free_perag( xfs_agnumber_t agno; for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { - spin_lock(&mp->m_perag_lock); - pag = radix_tree_delete(&mp->m_perag_tree, agno); - spin_unlock(&mp->m_perag_lock); + pag = xa_erase(&mp->m_perags, agno); ASSERT(pag); XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0); xfs_defer_drain_free(&pag->pag_intents_drain); @@ -270,7 +207,7 @@ xfs_free_perag( xfs_perag_rele(pag); XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_active_ref) != 0); - call_rcu(&pag->rcu_head, __xfs_free_perag); + kfree_rcu_mightsleep(pag); } } @@ -347,9 +284,7 @@ xfs_free_unused_perag_range( xfs_agnumber_t index; for (index = agstart; index < agend; index++) { - spin_lock(&mp->m_perag_lock); - pag = radix_tree_delete(&mp->m_perag_tree, index); - spin_unlock(&mp->m_perag_lock); + pag = xa_erase(&mp->m_perags, index); if (!pag) break; xfs_buf_cache_destroy(&pag->pag_bcache); @@ -390,20 +325,11 @@ xfs_initialize_perag( pag->pag_agno = index; pag->pag_mount = mp; - error = radix_tree_preload(GFP_KERNEL | __GFP_RETRY_MAYFAIL); - if (error) - goto out_free_pag; - - spin_lock(&mp->m_perag_lock); - if (radix_tree_insert(&mp->m_perag_tree, index, pag)) { - WARN_ON_ONCE(1); - spin_unlock(&mp->m_perag_lock); - radix_tree_preload_end(); - error = -EEXIST; + error = xa_insert(&mp->m_perags, index, pag, GFP_KERNEL); + if (error) { + WARN_ON_ONCE(error == -EBUSY); goto out_free_pag; } - spin_unlock(&mp->m_perag_lock); - radix_tree_preload_end(); #ifdef __KERNEL__ /* Place kernel structure only init below this point. */ @@ -451,9 +377,7 @@ xfs_initialize_perag( out_remove_pag: xfs_defer_drain_free(&pag->pag_intents_drain); - spin_lock(&mp->m_perag_lock); - radix_tree_delete(&mp->m_perag_tree, index); - spin_unlock(&mp->m_perag_lock); + pag = xa_erase(&mp->m_perags, index); out_free_pag: kfree(pag); out_unwind_new_pags: diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 35de09a2516c..d9cccd093b60 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -63,9 +63,6 @@ struct xfs_perag { /* Blocks reserved for the reverse mapping btree. */ struct xfs_ag_resv pag_rmapbt_resv; - /* for rcu-safe freeing */ - struct rcu_head rcu_head; - /* Precalculated geometry info */ xfs_agblock_t block_count; xfs_agblock_t min_block; @@ -156,15 +153,11 @@ void xfs_free_perag(struct xfs_mount *mp); /* Passive AG references */ struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno); -struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno, - unsigned int tag); struct xfs_perag *xfs_perag_hold(struct xfs_perag *pag); void xfs_perag_put(struct xfs_perag *pag); /* Active AG references */ struct xfs_perag *xfs_perag_grab(struct xfs_mount *, xfs_agnumber_t); -struct xfs_perag *xfs_perag_grab_tag(struct xfs_mount *, xfs_agnumber_t, - int tag); void xfs_perag_rele(struct xfs_perag *pag); /* @@ -266,13 +259,6 @@ xfs_perag_next( (agno) = 0; \ for_each_perag_from((mp), (agno), (pag)) -#define for_each_perag_tag(mp, agno, pag, tag) \ - for ((agno) = 0, (pag) = xfs_perag_grab_tag((mp), 0, (tag)); \ - (pag) != NULL; \ - (agno) = (pag)->pag_agno + 1, \ - xfs_perag_rele(pag), \ - (pag) = xfs_perag_grab_tag((mp), (agno), (tag))) - static inline struct xfs_perag * xfs_perag_next_wrap( struct xfs_perag *pag, diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 585e98e87ef9..aada676eee51 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -569,11 +569,11 @@ xfs_allocbt_block_maxrecs( /* * Calculate number of records in an alloc btree block. */ -int +unsigned int xfs_allocbt_maxrecs( struct xfs_mount *mp, - int blocklen, - int leaf) + unsigned int blocklen, + bool leaf) { blocklen -= XFS_ALLOC_BLOCK_LEN(mp); return xfs_allocbt_block_maxrecs(blocklen, leaf); diff --git a/fs/xfs/libxfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h index 155b47f231ab..12647f9aaa6d 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.h +++ b/fs/xfs/libxfs/xfs_alloc_btree.h @@ -53,7 +53,8 @@ struct xfs_btree_cur *xfs_bnobt_init_cursor(struct xfs_mount *mp, struct xfs_btree_cur *xfs_cntbt_init_cursor(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *bp, struct xfs_perag *pag); -extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int); +unsigned int xfs_allocbt_maxrecs(struct xfs_mount *mp, unsigned int blocklen, + bool leaf); extern xfs_extlen_t xfs_allocbt_calc_size(struct xfs_mount *mp, unsigned long long len); diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index b9e98950eb3d..6aaec1246c95 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -686,7 +686,7 @@ xfs_attr_shortform_bytesfit( */ if (!dp->i_forkoff && dp->i_df.if_bytes > xfs_default_attroffset(dp)) - dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS); + dsize = xfs_bmdr_space_calc(MINDBTPTRS); break; case XFS_DINODE_FMT_BTREE: /* @@ -700,7 +700,7 @@ xfs_attr_shortform_bytesfit( return 0; return dp->i_forkoff; } - dsize = XFS_BMAP_BROOT_SPACE(mp, dp->i_df.if_broot); + dsize = xfs_bmap_bmdr_space(dp->i_df.if_broot); break; } @@ -708,11 +708,11 @@ xfs_attr_shortform_bytesfit( * A data fork btree root must have space for at least * MINDBTPTRS key/ptr pairs if the data fork is small or empty. */ - minforkoff = max_t(int64_t, dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS)); + minforkoff = max_t(int64_t, dsize, xfs_bmdr_space_calc(MINDBTPTRS)); minforkoff = roundup(minforkoff, 8) >> 3; /* attr fork btree root can have at least this many key/ptr pairs */ - maxforkoff = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS); + maxforkoff = XFS_LITINO(mp) - xfs_bmdr_space_calc(MINABTPTRS); maxforkoff = maxforkoff >> 3; /* rounded down */ if (offset >= maxforkoff) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 7df74c35d9f9..8090e8249116 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -79,9 +79,9 @@ xfs_bmap_compute_maxlevels( maxleafents = xfs_iext_max_nextents(xfs_has_large_extent_counts(mp), whichfork); if (whichfork == XFS_DATA_FORK) - sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS); + sz = xfs_bmdr_space_calc(MINDBTPTRS); else - sz = XFS_BMDR_SPACE_CALC(MINABTPTRS); + sz = xfs_bmdr_space_calc(MINABTPTRS); maxrootrecs = xfs_bmdr_maxrecs(sz, 0); minleafrecs = mp->m_bmap_dmnr[0]; @@ -102,8 +102,8 @@ xfs_bmap_compute_attr_offset( struct xfs_mount *mp) { if (mp->m_sb.sb_inodesize == 256) - return XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS); - return XFS_BMDR_SPACE_CALC(6 * MINABTPTRS); + return XFS_LITINO(mp) - xfs_bmdr_space_calc(MINABTPTRS); + return xfs_bmdr_space_calc(6 * MINABTPTRS); } STATIC int /* error */ @@ -298,7 +298,7 @@ xfs_check_block( prevp = NULL; for( i = 1; i <= xfs_btree_get_numrecs(block); i++) { dmxr = mp->m_bmap_dmxr[0]; - keyp = XFS_BMBT_KEY_ADDR(mp, block, i); + keyp = xfs_bmbt_key_addr(mp, block, i); if (prevp) { ASSERT(be64_to_cpu(prevp->br_startoff) < @@ -310,15 +310,15 @@ xfs_check_block( * Compare the block numbers to see if there are dups. */ if (root) - pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz); + pp = xfs_bmap_broot_ptr_addr(mp, block, i, sz); else - pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr); + pp = xfs_bmbt_ptr_addr(mp, block, i, dmxr); for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) { if (root) - thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz); + thispa = xfs_bmap_broot_ptr_addr(mp, block, j, sz); else - thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr); + thispa = xfs_bmbt_ptr_addr(mp, block, j, dmxr); if (*thispa == *pp) { xfs_warn(mp, "%s: thispa(%d) == pp(%d) %lld", __func__, j, i, @@ -373,7 +373,7 @@ xfs_bmap_check_leaf_extents( level = be16_to_cpu(block->bb_level); ASSERT(level > 0); xfs_check_block(block, mp, 1, ifp->if_broot_bytes); - pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); + pp = xfs_bmap_broot_ptr_addr(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); ASSERT(bno != NULLFSBLOCK); @@ -406,7 +406,7 @@ xfs_bmap_check_leaf_extents( */ xfs_check_block(block, mp, 0, 0); - pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); + pp = xfs_bmbt_ptr_addr(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, bno))) { xfs_btree_mark_sick(cur); @@ -446,14 +446,14 @@ xfs_bmap_check_leaf_extents( * conform with the first entry in this one. */ - ep = XFS_BMBT_REC_ADDR(mp, block, 1); + ep = xfs_bmbt_rec_addr(mp, block, 1); if (i) { ASSERT(xfs_bmbt_disk_get_startoff(&last) + xfs_bmbt_disk_get_blockcount(&last) <= xfs_bmbt_disk_get_startoff(ep)); } for (j = 1; j < num_recs; j++) { - nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1); + nextp = xfs_bmbt_rec_addr(mp, block, j + 1); ASSERT(xfs_bmbt_disk_get_startoff(ep) + xfs_bmbt_disk_get_blockcount(ep) <= xfs_bmbt_disk_get_startoff(nextp)); @@ -584,9 +584,9 @@ xfs_bmap_btree_to_extents( ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE); ASSERT(be16_to_cpu(rblock->bb_level) == 1); ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1); - ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1); + ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, false) == 1); - pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes); + pp = xfs_bmap_broot_ptr_addr(mp, rblock, 1, ifp->if_broot_bytes); cbno = be64_to_cpu(*pp); #ifdef DEBUG if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_verify_fsbno(mp, cbno))) { @@ -714,7 +714,7 @@ xfs_bmap_extents_to_btree( for_each_xfs_iext(ifp, &icur, &rec) { if (isnullstartblock(rec.br_startblock)) continue; - arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt); + arp = xfs_bmbt_rec_addr(mp, ablock, 1 + cnt); xfs_bmbt_disk_set_all(arp, &rec); cnt++; } @@ -724,10 +724,10 @@ xfs_bmap_extents_to_btree( /* * Fill in the root key and pointer. */ - kp = XFS_BMBT_KEY_ADDR(mp, block, 1); - arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); + kp = xfs_bmbt_key_addr(mp, block, 1); + arp = xfs_bmbt_rec_addr(mp, ablock, 1); kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp)); - pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur, + pp = xfs_bmbt_ptr_addr(mp, block, 1, xfs_bmbt_get_maxrecs(cur, be16_to_cpu(block->bb_level))); *pp = cpu_to_be64(args.fsbno); @@ -896,7 +896,7 @@ xfs_bmap_add_attrfork_btree( mp = ip->i_mount; - if (XFS_BMAP_BMDR_SPACE(block) <= xfs_inode_data_fork_size(ip)) + if (xfs_bmap_bmdr_space(block) <= xfs_inode_data_fork_size(ip)) *flags |= XFS_ILOG_DBROOT; else { cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); @@ -1160,7 +1160,7 @@ xfs_iread_bmbt_block( } /* Copy records into the incore cache. */ - frp = XFS_BMBT_REC_ADDR(mp, block, 1); + frp = xfs_bmbt_rec_addr(mp, block, 1); for (j = 0; j < num_recs; j++, frp++, ir->loaded++) { struct xfs_bmbt_irec new; xfs_failaddr_t fa; @@ -3112,6 +3112,23 @@ xfs_bmap_extsize_align( return 0; } +static inline bool +xfs_bmap_adjacent_valid( + struct xfs_bmalloca *ap, + xfs_fsblock_t x, + xfs_fsblock_t y) +{ + struct xfs_mount *mp = ap->ip->i_mount; + + if (XFS_IS_REALTIME_INODE(ap->ip) && + (ap->datatype & XFS_ALLOC_USERDATA)) + return x < mp->m_sb.sb_rblocks; + + return XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && + XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && + XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks; +} + #define XFS_ALLOC_GAP_UNITS 4 /* returns true if ap->blkno was modified */ @@ -3119,36 +3136,25 @@ bool xfs_bmap_adjacent( struct xfs_bmalloca *ap) /* bmap alloc argument struct */ { - xfs_fsblock_t adjust; /* adjustment to block numbers */ - xfs_mount_t *mp; /* mount point structure */ - int rt; /* true if inode is realtime */ - -#define ISVALID(x,y) \ - (rt ? \ - (x) < mp->m_sb.sb_rblocks : \ - XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \ - XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \ - XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks) - - mp = ap->ip->i_mount; - rt = XFS_IS_REALTIME_INODE(ap->ip) && - (ap->datatype & XFS_ALLOC_USERDATA); + xfs_fsblock_t adjust; /* adjustment to block numbers */ + /* * If allocating at eof, and there's a previous real block, * try to use its last block as our starting point. */ if (ap->eof && ap->prev.br_startoff != NULLFILEOFF && !isnullstartblock(ap->prev.br_startblock) && - ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount, - ap->prev.br_startblock)) { + xfs_bmap_adjacent_valid(ap, + ap->prev.br_startblock + ap->prev.br_blockcount, + ap->prev.br_startblock)) { ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount; /* * Adjust for the gap between prevp and us. */ adjust = ap->offset - (ap->prev.br_startoff + ap->prev.br_blockcount); - if (adjust && - ISVALID(ap->blkno + adjust, ap->prev.br_startblock)) + if (adjust && xfs_bmap_adjacent_valid(ap, ap->blkno + adjust, + ap->prev.br_startblock)) ap->blkno += adjust; return true; } @@ -3171,7 +3177,8 @@ xfs_bmap_adjacent( !isnullstartblock(ap->prev.br_startblock) && (prevbno = ap->prev.br_startblock + ap->prev.br_blockcount) && - ISVALID(prevbno, ap->prev.br_startblock)) { + xfs_bmap_adjacent_valid(ap, prevbno, + ap->prev.br_startblock)) { /* * Calculate gap to end of previous block. */ @@ -3187,8 +3194,8 @@ xfs_bmap_adjacent( * number, then just use the end of the previous block. */ if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length && - ISVALID(prevbno + prevdiff, - ap->prev.br_startblock)) + xfs_bmap_adjacent_valid(ap, prevbno + prevdiff, + ap->prev.br_startblock)) prevbno += adjust; else prevdiff += adjust; @@ -3220,9 +3227,11 @@ xfs_bmap_adjacent( * offset by our length. */ if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length && - ISVALID(gotbno - gotdiff, gotbno)) + xfs_bmap_adjacent_valid(ap, gotbno - gotdiff, + gotbno)) gotbno -= adjust; - else if (ISVALID(gotbno - ap->length, gotbno)) { + else if (xfs_bmap_adjacent_valid(ap, gotbno - ap->length, + gotbno)) { gotbno -= ap->length; gotdiff += adjust - ap->length; } else @@ -3250,7 +3259,7 @@ xfs_bmap_adjacent( return true; } } -#undef ISVALID + return false; } @@ -4847,6 +4856,7 @@ xfs_bmapi_remap( } ip->i_nblocks += len; + ip->i_delayed_blks -= len; /* see xfs_bmap_defer_add */ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); if (ifp->if_format == XFS_DINODE_FMT_BTREE) @@ -5376,7 +5386,8 @@ xfs_bmap_del_extent_real( */ if (!(tp->t_flags & XFS_TRANS_RTBITMAP_LOCKED)) { tp->t_flags |= XFS_TRANS_RTBITMAP_LOCKED; - xfs_rtbitmap_lock(tp, mp); + xfs_rtbitmap_lock(mp); + xfs_rtbitmap_trans_join(tp); } error = xfs_rtfree_blocks(tp, del->br_startblock, del->br_blockcount); diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index d1b06ccde19e..3464be771f95 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -65,10 +65,10 @@ xfs_bmdr_to_bmbt( ASSERT(be16_to_cpu(rblock->bb_level) > 0); rblock->bb_numrecs = dblock->bb_numrecs; dmxr = xfs_bmdr_maxrecs(dblocklen, 0); - fkp = XFS_BMDR_KEY_ADDR(dblock, 1); - tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); - fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr); - tpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen); + fkp = xfs_bmdr_key_addr(dblock, 1); + tkp = xfs_bmbt_key_addr(mp, rblock, 1); + fpp = xfs_bmdr_ptr_addr(dblock, 1, dmxr); + tpp = xfs_bmap_broot_ptr_addr(mp, rblock, 1, rblocklen); dmxr = be16_to_cpu(dblock->bb_numrecs); memcpy(tkp, fkp, sizeof(*fkp) * dmxr); memcpy(tpp, fpp, sizeof(*fpp) * dmxr); @@ -168,10 +168,10 @@ xfs_bmbt_to_bmdr( dblock->bb_level = rblock->bb_level; dblock->bb_numrecs = rblock->bb_numrecs; dmxr = xfs_bmdr_maxrecs(dblocklen, 0); - fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); - tkp = XFS_BMDR_KEY_ADDR(dblock, 1); - fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen); - tpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr); + fkp = xfs_bmbt_key_addr(mp, rblock, 1); + tkp = xfs_bmdr_key_addr(dblock, 1); + fpp = xfs_bmap_broot_ptr_addr(mp, rblock, 1, rblocklen); + tpp = xfs_bmdr_ptr_addr(dblock, 1, dmxr); dmxr = be16_to_cpu(dblock->bb_numrecs); memcpy(tkp, fkp, sizeof(*fkp) * dmxr); memcpy(tpp, fpp, sizeof(*fpp) * dmxr); @@ -645,13 +645,13 @@ xfs_bmbt_commit_staged_btree( /* * Calculate number of records in a bmap btree block. */ -int +unsigned int xfs_bmbt_maxrecs( struct xfs_mount *mp, - int blocklen, - int leaf) + unsigned int blocklen, + bool leaf) { - blocklen -= XFS_BMBT_BLOCK_LEN(mp); + blocklen -= xfs_bmbt_block_len(mp); return xfs_bmbt_block_maxrecs(blocklen, leaf); } diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h index de1b73f1225c..49a3bae3f6ec 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.h +++ b/fs/xfs/libxfs/xfs_bmap_btree.h @@ -14,70 +14,6 @@ struct xfs_trans; struct xbtree_ifakeroot; /* - * Btree block header size depends on a superblock flag. - */ -#define XFS_BMBT_BLOCK_LEN(mp) \ - (xfs_has_crc(((mp))) ? \ - XFS_BTREE_LBLOCK_CRC_LEN : XFS_BTREE_LBLOCK_LEN) - -#define XFS_BMBT_REC_ADDR(mp, block, index) \ - ((xfs_bmbt_rec_t *) \ - ((char *)(block) + \ - XFS_BMBT_BLOCK_LEN(mp) + \ - ((index) - 1) * sizeof(xfs_bmbt_rec_t))) - -#define XFS_BMBT_KEY_ADDR(mp, block, index) \ - ((xfs_bmbt_key_t *) \ - ((char *)(block) + \ - XFS_BMBT_BLOCK_LEN(mp) + \ - ((index) - 1) * sizeof(xfs_bmbt_key_t))) - -#define XFS_BMBT_PTR_ADDR(mp, block, index, maxrecs) \ - ((xfs_bmbt_ptr_t *) \ - ((char *)(block) + \ - XFS_BMBT_BLOCK_LEN(mp) + \ - (maxrecs) * sizeof(xfs_bmbt_key_t) + \ - ((index) - 1) * sizeof(xfs_bmbt_ptr_t))) - -#define XFS_BMDR_REC_ADDR(block, index) \ - ((xfs_bmdr_rec_t *) \ - ((char *)(block) + \ - sizeof(struct xfs_bmdr_block) + \ - ((index) - 1) * sizeof(xfs_bmdr_rec_t))) - -#define XFS_BMDR_KEY_ADDR(block, index) \ - ((xfs_bmdr_key_t *) \ - ((char *)(block) + \ - sizeof(struct xfs_bmdr_block) + \ - ((index) - 1) * sizeof(xfs_bmdr_key_t))) - -#define XFS_BMDR_PTR_ADDR(block, index, maxrecs) \ - ((xfs_bmdr_ptr_t *) \ - ((char *)(block) + \ - sizeof(struct xfs_bmdr_block) + \ - (maxrecs) * sizeof(xfs_bmdr_key_t) + \ - ((index) - 1) * sizeof(xfs_bmdr_ptr_t))) - -/* - * These are to be used when we know the size of the block and - * we don't have a cursor. - */ -#define XFS_BMAP_BROOT_PTR_ADDR(mp, bb, i, sz) \ - XFS_BMBT_PTR_ADDR(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, 0)) - -#define XFS_BMAP_BROOT_SPACE_CALC(mp, nrecs) \ - (int)(XFS_BMBT_BLOCK_LEN(mp) + \ - ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) - -#define XFS_BMAP_BROOT_SPACE(mp, bb) \ - (XFS_BMAP_BROOT_SPACE_CALC(mp, be16_to_cpu((bb)->bb_numrecs))) -#define XFS_BMDR_SPACE_CALC(nrecs) \ - (int)(sizeof(xfs_bmdr_block_t) + \ - ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) -#define XFS_BMAP_BMDR_SPACE(bb) \ - (XFS_BMDR_SPACE_CALC(be16_to_cpu((bb)->bb_numrecs))) - -/* * Maximum number of bmap btree levels. */ #define XFS_BM_MAXLEVELS(mp,w) ((mp)->m_bm_maxlevels[(w)]) @@ -99,7 +35,8 @@ extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int, extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level); extern int xfs_bmdr_maxrecs(int blocklen, int leaf); -extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf); +unsigned int xfs_bmbt_maxrecs(struct xfs_mount *mp, unsigned int blocklen, + bool leaf); extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, xfs_ino_t new_owner, @@ -121,4 +58,144 @@ void xfs_bmbt_destroy_cur_cache(void); void xfs_bmbt_init_block(struct xfs_inode *ip, struct xfs_btree_block *buf, struct xfs_buf *bp, __u16 level, __u16 numrecs); +/* + * Btree block header size depends on a superblock flag. + */ +static inline size_t +xfs_bmbt_block_len(struct xfs_mount *mp) +{ + return xfs_has_crc(mp) ? + XFS_BTREE_LBLOCK_CRC_LEN : XFS_BTREE_LBLOCK_LEN; +} + +/* Addresses of key, pointers, and records within an incore bmbt block. */ + +static inline struct xfs_bmbt_rec * +xfs_bmbt_rec_addr( + struct xfs_mount *mp, + struct xfs_btree_block *block, + unsigned int index) +{ + return (struct xfs_bmbt_rec *) + ((char *)block + xfs_bmbt_block_len(mp) + + (index - 1) * sizeof(struct xfs_bmbt_rec)); +} + +static inline struct xfs_bmbt_key * +xfs_bmbt_key_addr( + struct xfs_mount *mp, + struct xfs_btree_block *block, + unsigned int index) +{ + return (struct xfs_bmbt_key *) + ((char *)block + xfs_bmbt_block_len(mp) + + (index - 1) * sizeof(struct xfs_bmbt_key *)); +} + +static inline xfs_bmbt_ptr_t * +xfs_bmbt_ptr_addr( + struct xfs_mount *mp, + struct xfs_btree_block *block, + unsigned int index, + unsigned int maxrecs) +{ + return (xfs_bmbt_ptr_t *) + ((char *)block + xfs_bmbt_block_len(mp) + + maxrecs * sizeof(struct xfs_bmbt_key) + + (index - 1) * sizeof(xfs_bmbt_ptr_t)); +} + +/* Addresses of key, pointers, and records within an ondisk bmbt block. */ + +static inline struct xfs_bmbt_rec * +xfs_bmdr_rec_addr( + struct xfs_bmdr_block *block, + unsigned int index) +{ + return (struct xfs_bmbt_rec *) + ((char *)(block + 1) + + (index - 1) * sizeof(struct xfs_bmbt_rec)); +} + +static inline struct xfs_bmbt_key * +xfs_bmdr_key_addr( + struct xfs_bmdr_block *block, + unsigned int index) +{ + return (struct xfs_bmbt_key *) + ((char *)(block + 1) + + (index - 1) * sizeof(struct xfs_bmbt_key)); +} + +static inline xfs_bmbt_ptr_t * +xfs_bmdr_ptr_addr( + struct xfs_bmdr_block *block, + unsigned int index, + unsigned int maxrecs) +{ + return (xfs_bmbt_ptr_t *) + ((char *)(block + 1) + + maxrecs * sizeof(struct xfs_bmbt_key) + + (index - 1) * sizeof(xfs_bmbt_ptr_t)); +} + +/* + * Address of pointers within the incore btree root. + * + * These are to be used when we know the size of the block and + * we don't have a cursor. + */ +static inline xfs_bmbt_ptr_t * +xfs_bmap_broot_ptr_addr( + struct xfs_mount *mp, + struct xfs_btree_block *bb, + unsigned int i, + unsigned int sz) +{ + return xfs_bmbt_ptr_addr(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, false)); +} + +/* + * Compute the space required for the incore btree root containing the given + * number of records. + */ +static inline size_t +xfs_bmap_broot_space_calc( + struct xfs_mount *mp, + unsigned int nrecs) +{ + return xfs_bmbt_block_len(mp) + + (nrecs * (sizeof(struct xfs_bmbt_key) + sizeof(xfs_bmbt_ptr_t))); +} + +/* + * Compute the space required for the incore btree root given the ondisk + * btree root block. + */ +static inline size_t +xfs_bmap_broot_space( + struct xfs_mount *mp, + struct xfs_bmdr_block *bb) +{ + return xfs_bmap_broot_space_calc(mp, be16_to_cpu(bb->bb_numrecs)); +} + +/* Compute the space required for the ondisk root block. */ +static inline size_t +xfs_bmdr_space_calc(unsigned int nrecs) +{ + return sizeof(struct xfs_bmdr_block) + + (nrecs * (sizeof(struct xfs_bmbt_key) + sizeof(xfs_bmbt_ptr_t))); +} + +/* + * Compute the space required for the ondisk root block given an incore root + * block. + */ +static inline size_t +xfs_bmap_bmdr_space(struct xfs_btree_block *bb) +{ + return xfs_bmdr_space_calc(be16_to_cpu(bb->bb_numrecs)); +} + #endif /* __XFS_BMAP_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 40021849b42f..2cd212ad2c1d 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -28,7 +28,6 @@ #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_attr.h" -#include "xfs_trans_priv.h" #include "xfs_exchmaps.h" static struct kmem_cache *xfs_defer_pending_cache; diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 454b63ef7201..860284064c5a 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -8,6 +8,7 @@ /* * SGI's XFS filesystem's major stuff (constants, structures) + * NOTE: This file must be compile-able with C++ compilers. */ /* @@ -826,6 +827,30 @@ struct xfs_exchange_range { }; /* + * Using the same definition of file2 as struct xfs_exchange_range, commit the + * contents of file1 into file2 if file2 has the same inode number, mtime, and + * ctime as the arguments provided to the call. The old contents of file2 will + * be moved to file1. + * + * Returns -EBUSY if there isn't an exact match for the file2 fields. + * + * Filesystems must be able to restart and complete the operation even after + * the system goes down. + */ +struct xfs_commit_range { + __s32 file1_fd; + __u32 pad; /* must be zeroes */ + __u64 file1_offset; /* file1 offset, bytes */ + __u64 file2_offset; /* file2 offset, bytes */ + __u64 length; /* bytes to exchange */ + + __u64 flags; /* see XFS_EXCHANGE_RANGE_* below */ + + /* opaque file2 metadata for freshness checks */ + __u64 file2_freshness[6]; +}; + +/* * Exchange file data all the way to the ends of both files, and then exchange * the file sizes. This flag can be used to replace a file's contents with a * different amount of data. length will be ignored. @@ -906,13 +931,13 @@ static inline struct xfs_getparents_rec * xfs_getparents_next_rec(struct xfs_getparents *gp, struct xfs_getparents_rec *gpr) { - void *next = ((void *)gpr + gpr->gpr_reclen); + void *next = ((char *)gpr + gpr->gpr_reclen); void *end = (void *)(uintptr_t)(gp->gp_buffer + gp->gp_bufsize); if (next >= end) return NULL; - return next; + return (struct xfs_getparents_rec *)next; } /* Iterate through this file handle's directory parent pointers. */ @@ -997,6 +1022,8 @@ struct xfs_getparents_by_handle { #define XFS_IOC_BULKSTAT _IOR ('X', 127, struct xfs_bulkstat_req) #define XFS_IOC_INUMBERS _IOR ('X', 128, struct xfs_inumbers_req) #define XFS_IOC_EXCHANGE_RANGE _IOW ('X', 129, struct xfs_exchange_range) +#define XFS_IOC_START_COMMIT _IOR ('X', 130, struct xfs_commit_range) +#define XFS_IOC_COMMIT_RANGE _IOW ('X', 131, struct xfs_commit_range) /* XFS_IOC_GETFSUUID ---------- deprecated 140 */ diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 0af5b7a33d05..20bb5ce38134 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1855,11 +1855,12 @@ out_release: int xfs_dialloc( struct xfs_trans **tpp, - xfs_ino_t parent, - umode_t mode, + const struct xfs_icreate_args *args, xfs_ino_t *new_ino) { struct xfs_mount *mp = (*tpp)->t_mountp; + xfs_ino_t parent = args->pip ? args->pip->i_ino : 0; + umode_t mode = args->mode & S_IFMT; xfs_agnumber_t agno; int error = 0; xfs_agnumber_t start_agno; @@ -2947,8 +2948,8 @@ xfs_ialloc_setup_geometry( /* Compute inode btree geometry. */ igeo->agino_log = sbp->sb_inopblog + sbp->sb_agblklog; - igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1); - igeo->inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0); + igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, true); + igeo->inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, false); igeo->inobt_mnr[0] = igeo->inobt_mxr[0] / 2; igeo->inobt_mnr[1] = igeo->inobt_mxr[1] / 2; diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index b549627e3a61..3a1323155a45 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -33,11 +33,13 @@ xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o) return xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog); } +struct xfs_icreate_args; + /* * Allocate an inode on disk. Mode is used to tell whether the new inode will * need space, and whether it is a directory. */ -int xfs_dialloc(struct xfs_trans **tpp, xfs_ino_t parent, umode_t mode, +int xfs_dialloc(struct xfs_trans **tpp, const struct xfs_icreate_args *args, xfs_ino_t *new_ino); int xfs_difree(struct xfs_trans *tp, struct xfs_perag *pag, diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 797d5b5f7b72..401b42d52af6 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -572,11 +572,11 @@ xfs_inobt_block_maxrecs( /* * Calculate number of records in an inobt btree block. */ -int +unsigned int xfs_inobt_maxrecs( struct xfs_mount *mp, - int blocklen, - int leaf) + unsigned int blocklen, + bool leaf) { blocklen -= XFS_INOBT_BLOCK_LEN(mp); return xfs_inobt_block_maxrecs(blocklen, leaf); diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index 6472ec1ecbb4..300edf5bc009 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h @@ -50,7 +50,8 @@ struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agbp); struct xfs_btree_cur *xfs_finobt_init_cursor(struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agbp); -extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); +unsigned int xfs_inobt_maxrecs(struct xfs_mount *mp, unsigned int blocklen, + bool leaf); /* ir_holemask to inode allocation bitmap conversion */ uint64_t xfs_inobt_irec_to_allocmask(const struct xfs_inobt_rec_incore *irec); diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 9d11ae015909..1158ca48626b 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -185,7 +185,7 @@ xfs_iformat_btree( ifp = xfs_ifork_ptr(ip, whichfork); dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); - size = XFS_BMAP_BROOT_SPACE(mp, dfp); + size = xfs_bmap_broot_space(mp, dfp); nrecs = be16_to_cpu(dfp->bb_numrecs); level = be16_to_cpu(dfp->bb_level); @@ -198,7 +198,7 @@ xfs_iformat_btree( */ if (unlikely(ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork) || nrecs == 0 || - XFS_BMDR_SPACE_CALC(nrecs) > + xfs_bmdr_space_calc(nrecs) > XFS_DFORK_SIZE(dip, mp, whichfork) || ifp->if_nextents > ip->i_nblocks) || level == 0 || level > XFS_BM_MAXLEVELS(mp, whichfork)) { @@ -409,7 +409,7 @@ xfs_iroot_realloc( * allocate it now and get out. */ if (ifp->if_broot_bytes == 0) { - new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff); + new_size = xfs_bmap_broot_space_calc(mp, rec_diff); ifp->if_broot = kmalloc(new_size, GFP_KERNEL | __GFP_NOFAIL); ifp->if_broot_bytes = (int)new_size; @@ -422,17 +422,17 @@ xfs_iroot_realloc( * location. The records don't change location because * they are kept butted up against the btree block header. */ - cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); + cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, false); new_max = cur_max + rec_diff; - new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max); + new_size = xfs_bmap_broot_space_calc(mp, new_max); ifp->if_broot = krealloc(ifp->if_broot, new_size, GFP_KERNEL | __GFP_NOFAIL); - op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, + op = (char *)xfs_bmap_broot_ptr_addr(mp, ifp->if_broot, 1, ifp->if_broot_bytes); - np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, + np = (char *)xfs_bmap_broot_ptr_addr(mp, ifp->if_broot, 1, (int)new_size); ifp->if_broot_bytes = (int)new_size; - ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= + ASSERT(xfs_bmap_bmdr_space(ifp->if_broot) <= xfs_inode_fork_size(ip, whichfork)); memmove(np, op, cur_max * (uint)sizeof(xfs_fsblock_t)); return; @@ -444,11 +444,11 @@ xfs_iroot_realloc( * records, just get rid of the root and clear the status bit. */ ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0)); - cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); + cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, false); new_max = cur_max + rec_diff; ASSERT(new_max >= 0); if (new_max > 0) - new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max); + new_size = xfs_bmap_broot_space_calc(mp, new_max); else new_size = 0; if (new_size > 0) { @@ -457,28 +457,28 @@ xfs_iroot_realloc( * First copy over the btree block header. */ memcpy(new_broot, ifp->if_broot, - XFS_BMBT_BLOCK_LEN(ip->i_mount)); + xfs_bmbt_block_len(ip->i_mount)); } else { new_broot = NULL; } /* - * Only copy the records and pointers if there are any. + * Only copy the keys and pointers if there are any. */ if (new_max > 0) { /* - * First copy the records. + * First copy the keys. */ - op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1); - np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1); - memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t)); + op = (char *)xfs_bmbt_key_addr(mp, ifp->if_broot, 1); + np = (char *)xfs_bmbt_key_addr(mp, new_broot, 1); + memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_key_t)); /* * Then copy the pointers. */ - op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, + op = (char *)xfs_bmap_broot_ptr_addr(mp, ifp->if_broot, 1, ifp->if_broot_bytes); - np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1, + np = (char *)xfs_bmap_broot_ptr_addr(mp, new_broot, 1, (int)new_size); memcpy(np, op, new_max * (uint)sizeof(xfs_fsblock_t)); } @@ -486,7 +486,7 @@ xfs_iroot_realloc( ifp->if_broot = new_broot; ifp->if_broot_bytes = (int)new_size; if (ifp->if_broot) - ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= + ASSERT(xfs_bmap_bmdr_space(ifp->if_broot) <= xfs_inode_fork_size(ip, whichfork)); return; } @@ -655,7 +655,7 @@ xfs_iflush_fork( if ((iip->ili_fields & brootflag[whichfork]) && (ifp->if_broot_bytes > 0)) { ASSERT(ifp->if_broot != NULL); - ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= + ASSERT(xfs_bmap_bmdr_space(ifp->if_broot) <= xfs_inode_fork_size(ip, whichfork)); xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes, (xfs_bmdr_block_t *)cp, diff --git a/fs/xfs/libxfs/xfs_inode_util.c b/fs/xfs/libxfs/xfs_inode_util.c index 032333289113..cc38e1c3c3e1 100644 --- a/fs/xfs/libxfs/xfs_inode_util.c +++ b/fs/xfs/libxfs/xfs_inode_util.c @@ -308,7 +308,7 @@ xfs_inode_init( !vfsgid_in_group_p(i_gid_into_vfsgid(args->idmap, inode))) inode->i_mode &= ~S_ISGID; - ip->i_projid = pip ? xfs_get_initial_prid(pip) : 0; + ip->i_projid = xfs_get_initial_prid(pip); } ip->i_disk_size = 0; diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index cb3b1d42ae9a..795928d1a66d 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -417,9 +417,10 @@ xfs_refcountbt_block_maxrecs( /* * Calculate the number of records in a refcount btree block. */ -int +unsigned int xfs_refcountbt_maxrecs( - int blocklen, + struct xfs_mount *mp, + unsigned int blocklen, bool leaf) { blocklen -= XFS_REFCOUNT_BLOCK_LEN; diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h index 1e0ab25f6c68..beb93bef6a81 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.h +++ b/fs/xfs/libxfs/xfs_refcount_btree.h @@ -48,7 +48,8 @@ struct xbtree_afakeroot; extern struct xfs_btree_cur *xfs_refcountbt_init_cursor(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *agbp, struct xfs_perag *pag); -extern int xfs_refcountbt_maxrecs(int blocklen, bool leaf); +unsigned int xfs_refcountbt_maxrecs(struct xfs_mount *mp, unsigned int blocklen, + bool leaf); extern void xfs_refcountbt_compute_maxlevels(struct xfs_mount *mp); extern xfs_extlen_t xfs_refcountbt_calc_size(struct xfs_mount *mp, diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 56fd6c4bd8b4..ac2f1f499b76 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -731,10 +731,11 @@ xfs_rmapbt_block_maxrecs( /* * Calculate number of records in an rmap btree block. */ -int +unsigned int xfs_rmapbt_maxrecs( - int blocklen, - int leaf) + struct xfs_mount *mp, + unsigned int blocklen, + bool leaf) { blocklen -= XFS_RMAP_BLOCK_LEN; return xfs_rmapbt_block_maxrecs(blocklen, leaf); diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h index eb90d89e8086..119b1567cd0e 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.h +++ b/fs/xfs/libxfs/xfs_rmap_btree.h @@ -47,7 +47,8 @@ struct xfs_btree_cur *xfs_rmapbt_init_cursor(struct xfs_mount *mp, struct xfs_perag *pag); void xfs_rmapbt_commit_staged_btree(struct xfs_btree_cur *cur, struct xfs_trans *tp, struct xfs_buf *agbp); -int xfs_rmapbt_maxrecs(int blocklen, int leaf); +unsigned int xfs_rmapbt_maxrecs(struct xfs_mount *mp, unsigned int blocklen, + bool leaf); extern void xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp); extern xfs_extlen_t xfs_rmapbt_calc_size(struct xfs_mount *mp, diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 386b672c5058..27a4472402ba 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -13,6 +13,8 @@ #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_bmap.h" +#include "xfs_bmap_btree.h" +#include "xfs_trans_space.h" #include "xfs_trans.h" #include "xfs_rtalloc.h" #include "xfs_error.h" @@ -69,7 +71,7 @@ xfs_rtbuf_cache_relse( * Get a buffer for the bitmap or summary file block specified. * The buffer is returned read and locked. */ -int +static int xfs_rtbuf_get( struct xfs_rtalloc_args *args, xfs_fileoff_t block, /* block number in bitmap or summary */ @@ -138,15 +140,43 @@ xfs_rtbuf_get( return 0; } +int +xfs_rtbitmap_read_buf( + struct xfs_rtalloc_args *args, + xfs_fileoff_t block) +{ + struct xfs_mount *mp = args->mp; + + if (XFS_IS_CORRUPT(mp, block >= mp->m_sb.sb_rbmblocks)) { + xfs_rt_mark_sick(mp, XFS_SICK_RT_BITMAP); + return -EFSCORRUPTED; + } + + return xfs_rtbuf_get(args, block, 0); +} + +int +xfs_rtsummary_read_buf( + struct xfs_rtalloc_args *args, + xfs_fileoff_t block) +{ + struct xfs_mount *mp = args->mp; + + if (XFS_IS_CORRUPT(mp, block >= mp->m_rsumblocks)) { + xfs_rt_mark_sick(args->mp, XFS_SICK_RT_SUMMARY); + return -EFSCORRUPTED; + } + return xfs_rtbuf_get(args, block, 1); +} + /* - * Searching backward from start to limit, find the first block whose - * allocated/free state is different from start's. + * Searching backward from start find the first block whose allocated/free state + * is different from start's. */ int xfs_rtfind_back( struct xfs_rtalloc_args *args, xfs_rtxnum_t start, /* starting rtext to look at */ - xfs_rtxnum_t limit, /* last rtext to look at */ xfs_rtxnum_t *rtx) /* out: start rtext found */ { struct xfs_mount *mp = args->mp; @@ -175,7 +205,7 @@ xfs_rtfind_back( */ word = xfs_rtx_to_rbmword(mp, start); bit = (int)(start & (XFS_NBWORD - 1)); - len = start - limit + 1; + len = start + 1; /* * Compute match value, based on the bit at start: if 1 (free) * then all-ones, else all-zeroes. @@ -316,6 +346,8 @@ xfs_rtfind_forw( xfs_rtword_t incore; unsigned int word; /* word number in the buffer */ + ASSERT(start <= limit); + /* * Compute and read in starting bitmap block for starting block. */ @@ -698,7 +730,7 @@ xfs_rtfree_range( * We need to find the beginning and end of the extent so we can * properly update the summary. */ - error = xfs_rtfind_back(args, start, 0, &preblock); + error = xfs_rtfind_back(args, start, &preblock); if (error) { return error; } @@ -990,25 +1022,24 @@ xfs_rtfree_blocks( xfs_filblks_t rtlen) { struct xfs_mount *mp = tp->t_mountp; - xfs_rtxnum_t start; - xfs_filblks_t len; xfs_extlen_t mod; ASSERT(rtlen <= XFS_MAX_BMBT_EXTLEN); - len = xfs_rtb_to_rtxrem(mp, rtlen, &mod); + mod = xfs_rtb_to_rtxoff(mp, rtlen); if (mod) { ASSERT(mod == 0); return -EIO; } - start = xfs_rtb_to_rtxrem(mp, rtbno, &mod); + mod = xfs_rtb_to_rtxoff(mp, rtbno); if (mod) { ASSERT(mod == 0); return -EIO; } - return xfs_rtfree_extent(tp, start, len); + return xfs_rtfree_extent(tp, xfs_rtb_to_rtx(mp, rtbno), + xfs_rtb_to_rtx(mp, rtlen)); } /* Find all the free records within a given range. */ @@ -1016,8 +1047,8 @@ int xfs_rtalloc_query_range( struct xfs_mount *mp, struct xfs_trans *tp, - const struct xfs_rtalloc_rec *low_rec, - const struct xfs_rtalloc_rec *high_rec, + xfs_rtxnum_t start, + xfs_rtxnum_t end, xfs_rtalloc_query_range_fn fn, void *priv) { @@ -1025,45 +1056,42 @@ xfs_rtalloc_query_range( .mp = mp, .tp = tp, }; - struct xfs_rtalloc_rec rec; - xfs_rtxnum_t rtstart; - xfs_rtxnum_t rtend; - xfs_rtxnum_t high_key; - int is_free; int error = 0; - if (low_rec->ar_startext > high_rec->ar_startext) + if (start > end) return -EINVAL; - if (low_rec->ar_startext >= mp->m_sb.sb_rextents || - low_rec->ar_startext == high_rec->ar_startext) + if (start == end || start >= mp->m_sb.sb_rextents) return 0; - high_key = min(high_rec->ar_startext, mp->m_sb.sb_rextents - 1); + end = min(end, mp->m_sb.sb_rextents - 1); /* Iterate the bitmap, looking for discrepancies. */ - rtstart = low_rec->ar_startext; - while (rtstart <= high_key) { + while (start <= end) { + struct xfs_rtalloc_rec rec; + int is_free; + xfs_rtxnum_t rtend; + /* Is the first block free? */ - error = xfs_rtcheck_range(&args, rtstart, 1, 1, &rtend, + error = xfs_rtcheck_range(&args, start, 1, 1, &rtend, &is_free); if (error) break; /* How long does the extent go for? */ - error = xfs_rtfind_forw(&args, rtstart, high_key, &rtend); + error = xfs_rtfind_forw(&args, start, end, &rtend); if (error) break; if (is_free) { - rec.ar_startext = rtstart; - rec.ar_extcount = rtend - rtstart + 1; + rec.ar_startext = start; + rec.ar_extcount = rtend - start + 1; error = fn(mp, tp, &rec, priv); if (error) break; } - rtstart = rtend + 1; + start = rtend + 1; } xfs_rtbuf_cache_relse(&args); @@ -1078,13 +1106,8 @@ xfs_rtalloc_query_all( xfs_rtalloc_query_range_fn fn, void *priv) { - struct xfs_rtalloc_rec keys[2]; - - keys[0].ar_startext = 0; - keys[1].ar_startext = mp->m_sb.sb_rextents - 1; - keys[0].ar_extcount = keys[1].ar_extcount = 0; - - return xfs_rtalloc_query_range(mp, tp, &keys[0], &keys[1], fn, priv); + return xfs_rtalloc_query_range(mp, tp, 0, mp->m_sb.sb_rextents - 1, fn, + priv); } /* Is the given extent all free? */ @@ -1125,21 +1148,6 @@ xfs_rtbitmap_blockcount( return howmany_64(rtextents, NBBY * mp->m_sb.sb_blocksize); } -/* - * Compute the number of rtbitmap words needed to populate every block of a - * bitmap that is large enough to track the given number of rt extents. - */ -unsigned long long -xfs_rtbitmap_wordcount( - struct xfs_mount *mp, - xfs_rtbxlen_t rtextents) -{ - xfs_filblks_t blocks; - - blocks = xfs_rtbitmap_blockcount(mp, rtextents); - return XFS_FSB_TO_B(mp, blocks) >> XFS_WORDLOG; -} - /* Compute the number of rtsummary blocks needed to track the given rt space. */ xfs_filblks_t xfs_rtsummary_blockcount( @@ -1153,39 +1161,25 @@ xfs_rtsummary_blockcount( return XFS_B_TO_FSB(mp, rsumwords << XFS_WORDLOG); } -/* - * Compute the number of rtsummary info words needed to populate every block of - * a summary file that is large enough to track the given rt space. - */ -unsigned long long -xfs_rtsummary_wordcount( - struct xfs_mount *mp, - unsigned int rsumlevels, - xfs_extlen_t rbmblocks) +/* Lock both realtime free space metadata inodes for a freespace update. */ +void +xfs_rtbitmap_lock( + struct xfs_mount *mp) { - xfs_filblks_t blocks; - - blocks = xfs_rtsummary_blockcount(mp, rsumlevels, rbmblocks); - return XFS_FSB_TO_B(mp, blocks) >> XFS_WORDLOG; + xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP); + xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM); } /* - * Lock both realtime free space metadata inodes for a freespace update. If a - * transaction is given, the inodes will be joined to the transaction and the + * Join both realtime free space metadata inodes to the transaction. The * ILOCKs will be released on transaction commit. */ void -xfs_rtbitmap_lock( - struct xfs_trans *tp, - struct xfs_mount *mp) +xfs_rtbitmap_trans_join( + struct xfs_trans *tp) { - xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP); - if (tp) - xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); - - xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM); - if (tp) - xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, tp->t_mountp->m_rbmip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, tp->t_mountp->m_rsumip, XFS_ILOCK_EXCL); } /* Unlock both realtime free space metadata inodes after a freespace update. */ @@ -1225,3 +1219,127 @@ xfs_rtbitmap_unlock_shared( if (rbmlock_flags & XFS_RBMLOCK_BITMAP) xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); } + +static int +xfs_rtfile_alloc_blocks( + struct xfs_inode *ip, + xfs_fileoff_t offset_fsb, + xfs_filblks_t count_fsb, + struct xfs_bmbt_irec *map) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int nmap = 1; + int error; + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtalloc, + XFS_GROWFSRT_SPACE_RES(mp, count_fsb), 0, 0, &tp); + if (error) + return error; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + + error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, + XFS_IEXT_ADD_NOSPLIT_CNT); + if (error) + goto out_trans_cancel; + + error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, + XFS_BMAPI_METADATA, 0, map, &nmap); + if (error) + goto out_trans_cancel; + + return xfs_trans_commit(tp); + +out_trans_cancel: + xfs_trans_cancel(tp); + return error; +} + +/* Get a buffer for the block. */ +static int +xfs_rtfile_initialize_block( + struct xfs_inode *ip, + xfs_fsblock_t fsbno, + void *data) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + struct xfs_buf *bp; + const size_t copylen = mp->m_blockwsize << XFS_WORDLOG; + enum xfs_blft buf_type; + int error; + + if (ip == mp->m_rsumip) + buf_type = XFS_BLFT_RTSUMMARY_BUF; + else + buf_type = XFS_BLFT_RTBITMAP_BUF; + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtzero, 0, 0, 0, &tp); + if (error) + return error; + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + + error = xfs_trans_get_buf(tp, mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, fsbno), mp->m_bsize, 0, &bp); + if (error) { + xfs_trans_cancel(tp); + return error; + } + + xfs_trans_buf_set_type(tp, bp, buf_type); + bp->b_ops = &xfs_rtbuf_ops; + if (data) + memcpy(bp->b_addr, data, copylen); + else + memset(bp->b_addr, 0, copylen); + xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); + return xfs_trans_commit(tp); +} + +/* + * Allocate space to the bitmap or summary file, and zero it, for growfs. + * @data must be a contiguous buffer large enough to fill all blocks in the + * file; or NULL to initialize the contents to zeroes. + */ +int +xfs_rtfile_initialize_blocks( + struct xfs_inode *ip, /* inode (bitmap/summary) */ + xfs_fileoff_t offset_fsb, /* offset to start from */ + xfs_fileoff_t end_fsb, /* offset to allocate to */ + void *data) /* data to fill the blocks */ +{ + struct xfs_mount *mp = ip->i_mount; + const size_t copylen = mp->m_blockwsize << XFS_WORDLOG; + + while (offset_fsb < end_fsb) { + struct xfs_bmbt_irec map; + xfs_filblks_t i; + int error; + + error = xfs_rtfile_alloc_blocks(ip, offset_fsb, + end_fsb - offset_fsb, &map); + if (error) + return error; + + /* + * Now we need to clear the allocated blocks. + * + * Do this one block per transaction, to keep it simple. + */ + for (i = 0; i < map.br_blockcount; i++) { + error = xfs_rtfile_initialize_block(ip, + map.br_startblock + i, data); + if (error) + return error; + if (data) + data += copylen; + } + + offset_fsb = map.br_startoff + map.br_blockcount; + } + + return 0; +} diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h index 6186585f2c37..140513d1d6bc 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.h +++ b/fs/xfs/libxfs/xfs_rtbitmap.h @@ -87,24 +87,6 @@ xfs_rtb_to_rtxoff( } /* - * Crack an rt block number into an rt extent number and an offset within that - * rt extent. Returns the rt extent number directly and the offset in @off. - */ -static inline xfs_rtxnum_t -xfs_rtb_to_rtxrem( - struct xfs_mount *mp, - xfs_rtblock_t rtbno, - xfs_extlen_t *off) -{ - if (likely(mp->m_rtxblklog >= 0)) { - *off = rtbno & mp->m_rtxblkmask; - return rtbno >> mp->m_rtxblklog; - } - - return div_u64_rem(rtbno, mp->m_sb.sb_rextsize, off); -} - -/* * Convert an rt block number into an rt extent number, rounding up to the next * rt extent if the rt block is not aligned to an rt extent boundary. */ @@ -293,30 +275,12 @@ typedef int (*xfs_rtalloc_query_range_fn)( #ifdef CONFIG_XFS_RT void xfs_rtbuf_cache_relse(struct xfs_rtalloc_args *args); - -int xfs_rtbuf_get(struct xfs_rtalloc_args *args, xfs_fileoff_t block, - int issum); - -static inline int -xfs_rtbitmap_read_buf( - struct xfs_rtalloc_args *args, - xfs_fileoff_t block) -{ - return xfs_rtbuf_get(args, block, 0); -} - -static inline int -xfs_rtsummary_read_buf( - struct xfs_rtalloc_args *args, - xfs_fileoff_t block) -{ - return xfs_rtbuf_get(args, block, 1); -} - +int xfs_rtbitmap_read_buf(struct xfs_rtalloc_args *args, xfs_fileoff_t block); +int xfs_rtsummary_read_buf(struct xfs_rtalloc_args *args, xfs_fileoff_t block); int xfs_rtcheck_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, xfs_rtxlen_t len, int val, xfs_rtxnum_t *new, int *stat); int xfs_rtfind_back(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, - xfs_rtxnum_t limit, xfs_rtxnum_t *rtblock); + xfs_rtxnum_t *rtblock); int xfs_rtfind_forw(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, xfs_rtxnum_t limit, xfs_rtxnum_t *rtblock); int xfs_rtmodify_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, @@ -328,8 +292,7 @@ int xfs_rtmodify_summary(struct xfs_rtalloc_args *args, int log, int xfs_rtfree_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, xfs_rtxlen_t len); int xfs_rtalloc_query_range(struct xfs_mount *mp, struct xfs_trans *tp, - const struct xfs_rtalloc_rec *low_rec, - const struct xfs_rtalloc_rec *high_rec, + xfs_rtxnum_t start, xfs_rtxnum_t end, xfs_rtalloc_query_range_fn fn, void *priv); int xfs_rtalloc_query_all(struct xfs_mount *mp, struct xfs_trans *tp, xfs_rtalloc_query_range_fn fn, @@ -353,16 +316,15 @@ int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno, xfs_filblks_t xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents); -unsigned long long xfs_rtbitmap_wordcount(struct xfs_mount *mp, - xfs_rtbxlen_t rtextents); - xfs_filblks_t xfs_rtsummary_blockcount(struct xfs_mount *mp, unsigned int rsumlevels, xfs_extlen_t rbmblocks); -unsigned long long xfs_rtsummary_wordcount(struct xfs_mount *mp, - unsigned int rsumlevels, xfs_extlen_t rbmblocks); -void xfs_rtbitmap_lock(struct xfs_trans *tp, struct xfs_mount *mp); +int xfs_rtfile_initialize_blocks(struct xfs_inode *ip, + xfs_fileoff_t offset_fsb, xfs_fileoff_t end_fsb, void *data); + +void xfs_rtbitmap_lock(struct xfs_mount *mp); void xfs_rtbitmap_unlock(struct xfs_mount *mp); +void xfs_rtbitmap_trans_join(struct xfs_trans *tp); /* Lock the rt bitmap inode in shared mode */ #define XFS_RBMLOCK_BITMAP (1U << 0) @@ -388,10 +350,9 @@ xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents) /* shut up gcc */ return 0; } -# define xfs_rtbitmap_wordcount(mp, r) (0) # define xfs_rtsummary_blockcount(mp, l, b) (0) -# define xfs_rtsummary_wordcount(mp, l, b) (0) -# define xfs_rtbitmap_lock(tp, mp) do { } while (0) +# define xfs_rtbitmap_lock(mp) do { } while (0) +# define xfs_rtbitmap_trans_join(tp) do { } while (0) # define xfs_rtbitmap_unlock(mp) do { } while (0) # define xfs_rtbitmap_lock_shared(mp, lf) do { } while (0) # define xfs_rtbitmap_unlock_shared(mp, lf) do { } while (0) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 6b56f0f6d4c1..d95409f3cba6 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -232,6 +232,38 @@ xfs_validate_sb_read( return 0; } +static uint64_t +xfs_sb_calc_rbmblocks( + struct xfs_sb *sbp) +{ + return howmany_64(sbp->sb_rextents, NBBY * sbp->sb_blocksize); +} + +/* Validate the realtime geometry */ +bool +xfs_validate_rt_geometry( + struct xfs_sb *sbp) +{ + if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE || + sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) + return false; + + if (sbp->sb_rblocks == 0) { + if (sbp->sb_rextents != 0 || sbp->sb_rbmblocks != 0 || + sbp->sb_rextslog != 0 || sbp->sb_frextents != 0) + return false; + return true; + } + + if (sbp->sb_rextents == 0 || + sbp->sb_rextents != div_u64(sbp->sb_rblocks, sbp->sb_rextsize) || + sbp->sb_rextslog != xfs_compute_rextslog(sbp->sb_rextents) || + sbp->sb_rbmblocks != xfs_sb_calc_rbmblocks(sbp)) + return false; + + return true; +} + /* Check all the superblock fields we care about when writing one out. */ STATIC int xfs_validate_sb_write( @@ -491,39 +523,13 @@ xfs_validate_sb_common( } } - /* Validate the realtime geometry; stolen from xfs_repair */ - if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE || - sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) { + if (!xfs_validate_rt_geometry(sbp)) { xfs_notice(mp, - "realtime extent sanity check failed"); + "realtime %sgeometry check failed", + sbp->sb_rblocks ? "" : "zeroed "); return -EFSCORRUPTED; } - if (sbp->sb_rblocks == 0) { - if (sbp->sb_rextents != 0 || sbp->sb_rbmblocks != 0 || - sbp->sb_rextslog != 0 || sbp->sb_frextents != 0) { - xfs_notice(mp, - "realtime zeroed geometry check failed"); - return -EFSCORRUPTED; - } - } else { - uint64_t rexts; - uint64_t rbmblocks; - - rexts = div_u64(sbp->sb_rblocks, sbp->sb_rextsize); - rbmblocks = howmany_64(sbp->sb_rextents, - NBBY * sbp->sb_blocksize); - - if (!xfs_validate_rtextents(rexts) || - sbp->sb_rextents != rexts || - sbp->sb_rextslog != xfs_compute_rextslog(rexts) || - sbp->sb_rbmblocks != rbmblocks) { - xfs_notice(mp, - "realtime geometry sanity check failed"); - return -EFSCORRUPTED; - } - } - /* * Either (sb_unit and !hasdalign) or (!sb_unit and hasdalign) * would imply the image is corrupted. @@ -959,6 +965,15 @@ const struct xfs_buf_ops xfs_sb_quiet_buf_ops = { .verify_write = xfs_sb_write_verify, }; +void +xfs_mount_sb_set_rextsize( + struct xfs_mount *mp, + struct xfs_sb *sbp) +{ + mp->m_rtxblklog = log2_if_power2(sbp->sb_rextsize); + mp->m_rtxblkmask = mask64_if_power2(sbp->sb_rextsize); +} + /* * xfs_mount_common * @@ -983,26 +998,25 @@ xfs_sb_mount_common( mp->m_blockmask = sbp->sb_blocksize - 1; mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; mp->m_blockwmask = mp->m_blockwsize - 1; - mp->m_rtxblklog = log2_if_power2(sbp->sb_rextsize); - mp->m_rtxblkmask = mask64_if_power2(sbp->sb_rextsize); + xfs_mount_sb_set_rextsize(mp, sbp); - mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1); - mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0); + mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, true); + mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, false); mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2; mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2; - mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1); - mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0); + mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, true); + mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, false); mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2; mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2; - mp->m_rmap_mxr[0] = xfs_rmapbt_maxrecs(sbp->sb_blocksize, 1); - mp->m_rmap_mxr[1] = xfs_rmapbt_maxrecs(sbp->sb_blocksize, 0); + mp->m_rmap_mxr[0] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, true); + mp->m_rmap_mxr[1] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, false); mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2; mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2; - mp->m_refc_mxr[0] = xfs_refcountbt_maxrecs(sbp->sb_blocksize, true); - mp->m_refc_mxr[1] = xfs_refcountbt_maxrecs(sbp->sb_blocksize, false); + mp->m_refc_mxr[0] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize, true); + mp->m_refc_mxr[1] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize, false); mp->m_refc_mnr[0] = mp->m_refc_mxr[0] / 2; mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2; diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 37b1ed1bc209..885c83755991 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -17,6 +17,8 @@ extern void xfs_log_sb(struct xfs_trans *tp); extern int xfs_sync_sb(struct xfs_mount *mp, bool wait); extern int xfs_sync_sb_buf(struct xfs_mount *mp); extern void xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp); +void xfs_mount_sb_set_rextsize(struct xfs_mount *mp, + struct xfs_sb *sbp); extern void xfs_sb_from_disk(struct xfs_sb *to, struct xfs_dsb *from); extern void xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from); extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp); @@ -38,6 +40,7 @@ extern int xfs_sb_get_secondary(struct xfs_mount *mp, bool xfs_validate_stripe_geometry(struct xfs_mount *mp, __s64 sunit, __s64 swidth, int sectorsize, bool may_repair, bool silent); +bool xfs_validate_rt_geometry(struct xfs_sb *sbp); uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 45aaf169806a..1a7f95bcf069 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -130,7 +130,7 @@ xfs_calc_inode_res( (4 * sizeof(struct xlog_op_header) + sizeof(struct xfs_inode_log_format) + mp->m_sb.sb_inodesize + - 2 * XFS_BMBT_BLOCK_LEN(mp)); + 2 * xfs_bmbt_block_len(mp)); } /* @@ -918,7 +918,7 @@ xfs_calc_growrtfree_reservation( return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + xfs_calc_inode_res(mp, 2) + xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) + - xfs_calc_buf_res(1, mp->m_rsumsize); + xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, mp->m_rsumblocks)); } /* diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 76eb9e328835..a8cd44d03ef6 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -235,16 +235,4 @@ bool xfs_verify_fileoff(struct xfs_mount *mp, xfs_fileoff_t off); bool xfs_verify_fileext(struct xfs_mount *mp, xfs_fileoff_t off, xfs_fileoff_t len); -/* Do we support an rt volume having this number of rtextents? */ -static inline bool -xfs_validate_rtextents( - xfs_rtbxlen_t rtextents) -{ - /* No runt rt volumes */ - if (rtextents == 0) - return false; - - return true; -} - #endif /* __XFS_TYPES_H__ */ diff --git a/fs/xfs/scrub/bmap_repair.c b/fs/xfs/scrub/bmap_repair.c index 1e656fab5e41..49dc38acc66b 100644 --- a/fs/xfs/scrub/bmap_repair.c +++ b/fs/xfs/scrub/bmap_repair.c @@ -480,7 +480,7 @@ xrep_bmap_iroot_size( { ASSERT(level > 0); - return XFS_BMAP_BROOT_SPACE_CALC(cur->bc_mp, nr_this_level); + return xfs_bmap_broot_space_calc(cur->bc_mp, nr_this_level); } /* Update the inode counters. */ diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index 3d5f1f6b4b7b..47148cc4a833 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -53,6 +53,11 @@ int xchk_checkpoint_log(struct xfs_mount *mp); bool xchk_should_check_xref(struct xfs_scrub *sc, int *error, struct xfs_btree_cur **curpp); +static inline int xchk_setup_nothing(struct xfs_scrub *sc) +{ + return -ENOENT; +} + /* Setup functions */ int xchk_setup_agheader(struct xfs_scrub *sc); int xchk_setup_fs(struct xfs_scrub *sc); @@ -72,16 +77,8 @@ int xchk_setup_dirtree(struct xfs_scrub *sc); int xchk_setup_rtbitmap(struct xfs_scrub *sc); int xchk_setup_rtsummary(struct xfs_scrub *sc); #else -static inline int -xchk_setup_rtbitmap(struct xfs_scrub *sc) -{ - return -ENOENT; -} -static inline int -xchk_setup_rtsummary(struct xfs_scrub *sc) -{ - return -ENOENT; -} +# define xchk_setup_rtbitmap xchk_setup_nothing +# define xchk_setup_rtsummary xchk_setup_nothing #endif #ifdef CONFIG_XFS_QUOTA int xchk_ino_dqattach(struct xfs_scrub *sc); @@ -93,16 +90,8 @@ xchk_ino_dqattach(struct xfs_scrub *sc) { return 0; } -static inline int -xchk_setup_quota(struct xfs_scrub *sc) -{ - return -ENOENT; -} -static inline int -xchk_setup_quotacheck(struct xfs_scrub *sc) -{ - return -ENOENT; -} +# define xchk_setup_quota xchk_setup_nothing +# define xchk_setup_quotacheck xchk_setup_nothing #endif int xchk_setup_fscounters(struct xfs_scrub *sc); int xchk_setup_nlinks(struct xfs_scrub *sc); diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c index daf9f1ee7c2c..3e45b9b72312 100644 --- a/fs/xfs/scrub/inode_repair.c +++ b/fs/xfs/scrub/inode_repair.c @@ -846,7 +846,7 @@ xrep_dinode_bad_bmbt_fork( nrecs = be16_to_cpu(dfp->bb_numrecs); level = be16_to_cpu(dfp->bb_level); - if (nrecs == 0 || XFS_BMDR_SPACE_CALC(nrecs) > dfork_size) + if (nrecs == 0 || xfs_bmdr_space_calc(nrecs) > dfork_size) return true; if (level == 0 || level >= XFS_BM_MAXLEVELS(sc->mp, whichfork)) return true; @@ -858,12 +858,12 @@ xrep_dinode_bad_bmbt_fork( xfs_fileoff_t fileoff; xfs_fsblock_t fsbno; - fkp = XFS_BMDR_KEY_ADDR(dfp, i); + fkp = xfs_bmdr_key_addr(dfp, i); fileoff = be64_to_cpu(fkp->br_startoff); if (!xfs_verify_fileoff(sc->mp, fileoff)) return true; - fpp = XFS_BMDR_PTR_ADDR(dfp, i, dmxr); + fpp = xfs_bmdr_ptr_addr(dfp, i, dmxr); fsbno = be64_to_cpu(*fpp); if (!xfs_verify_fsbno(sc->mp, fsbno)) return true; @@ -1121,7 +1121,7 @@ xrep_dinode_ensure_forkoff( struct xfs_bmdr_block *bmdr; struct xfs_scrub *sc = ri->sc; xfs_extnum_t attr_extents, data_extents; - size_t bmdr_minsz = XFS_BMDR_SPACE_CALC(1); + size_t bmdr_minsz = xfs_bmdr_space_calc(1); unsigned int lit_sz = XFS_LITINO(sc->mp); unsigned int afork_min, dfork_min; @@ -1173,7 +1173,7 @@ xrep_dinode_ensure_forkoff( case XFS_DINODE_FMT_BTREE: /* Must have space for btree header and key/pointers. */ bmdr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK); - afork_min = XFS_BMAP_BROOT_SPACE(sc->mp, bmdr); + afork_min = xfs_bmap_broot_space(sc->mp, bmdr); break; default: /* We should never see any other formats. */ @@ -1223,7 +1223,7 @@ xrep_dinode_ensure_forkoff( case XFS_DINODE_FMT_BTREE: /* Must have space for btree header and key/pointers. */ bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK); - dfork_min = XFS_BMAP_BROOT_SPACE(sc->mp, bmdr); + dfork_min = xfs_bmap_broot_space(sc->mp, bmdr); break; default: dfork_min = 0; diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c index 3fee603f5244..7c7366c98338 100644 --- a/fs/xfs/scrub/rtsummary.c +++ b/fs/xfs/scrub/rtsummary.c @@ -63,7 +63,8 @@ xchk_setup_rtsummary( * us to avoid pinning kernel memory for this purpose. */ descr = xchk_xfile_descr(sc, "realtime summary file"); - error = xfile_create(descr, mp->m_rsumsize, &sc->xfile); + error = xfile_create(descr, XFS_FSB_TO_B(mp, mp->m_rsumblocks), + &sc->xfile); kfree(descr); if (error) return error; @@ -95,16 +96,14 @@ xchk_setup_rtsummary( * volume. Hence it is safe to compute and check the geometry values. */ if (mp->m_sb.sb_rblocks) { - xfs_filblks_t rsumblocks; int rextslog; rts->rextents = xfs_rtb_to_rtx(mp, mp->m_sb.sb_rblocks); rextslog = xfs_compute_rextslog(rts->rextents); rts->rsumlevels = rextslog + 1; rts->rbmblocks = xfs_rtbitmap_blockcount(mp, rts->rextents); - rsumblocks = xfs_rtsummary_blockcount(mp, rts->rsumlevels, + rts->rsumblocks = xfs_rtsummary_blockcount(mp, rts->rsumlevels, rts->rbmblocks); - rts->rsumsize = XFS_FSB_TO_B(mp, rsumblocks); } return 0; } @@ -316,7 +315,7 @@ xchk_rtsummary( } /* Is m_rsumsize correct? */ - if (mp->m_rsumsize != rts->rsumsize) { + if (mp->m_rsumblocks != rts->rsumblocks) { xchk_ino_set_corrupt(sc, mp->m_rsumip->i_ino); goto out_rbm; } @@ -332,7 +331,7 @@ xchk_rtsummary( * growfsrt expands the summary file before updating sb_rextents, so * the file can be larger than rsumsize. */ - if (mp->m_rsumip->i_disk_size < rts->rsumsize) { + if (mp->m_rsumip->i_disk_size < XFS_FSB_TO_B(mp, rts->rsumblocks)) { xchk_ino_set_corrupt(sc, mp->m_rsumip->i_ino); goto out_rbm; } diff --git a/fs/xfs/scrub/rtsummary.h b/fs/xfs/scrub/rtsummary.h index e1d50304d8d4..e44b04cb6e2d 100644 --- a/fs/xfs/scrub/rtsummary.h +++ b/fs/xfs/scrub/rtsummary.h @@ -14,7 +14,7 @@ struct xchk_rtsummary { uint64_t rextents; uint64_t rbmblocks; - uint64_t rsumsize; + xfs_filblks_t rsumblocks; unsigned int rsumlevels; unsigned int resblks; diff --git a/fs/xfs/scrub/rtsummary_repair.c b/fs/xfs/scrub/rtsummary_repair.c index d9e971c4c79f..7deeb948cb70 100644 --- a/fs/xfs/scrub/rtsummary_repair.c +++ b/fs/xfs/scrub/rtsummary_repair.c @@ -56,7 +56,7 @@ xrep_setup_rtsummary( * transaction (which we cannot drop because we cannot drop the * rtsummary ILOCK) and cannot ask for more reservation. */ - blocks = XFS_B_TO_FSB(mp, mp->m_rsumsize); + blocks = mp->m_rsumblocks; blocks += xfs_bmbt_calc_size(mp, blocks) * 2; if (blocks > UINT_MAX) return -EOPNOTSUPP; @@ -100,7 +100,6 @@ xrep_rtsummary( { struct xchk_rtsummary *rts = sc->buf; struct xfs_mount *mp = sc->mp; - xfs_filblks_t rsumblocks; int error; /* We require the rmapbt to rebuild anything. */ @@ -131,10 +130,9 @@ xrep_rtsummary( } /* Make sure we have space allocated for the entire summary file. */ - rsumblocks = XFS_B_TO_FSB(mp, rts->rsumsize); xfs_trans_ijoin(sc->tp, sc->ip, 0); xfs_trans_ijoin(sc->tp, sc->tempip, 0); - error = xrep_tempfile_prealloc(sc, 0, rsumblocks); + error = xrep_tempfile_prealloc(sc, 0, rts->rsumblocks); if (error) return error; @@ -143,11 +141,11 @@ xrep_rtsummary( return error; /* Copy the rtsummary file that we generated. */ - error = xrep_tempfile_copyin(sc, 0, rsumblocks, + error = xrep_tempfile_copyin(sc, 0, rts->rsumblocks, xrep_rtsummary_prep_buf, rts); if (error) return error; - error = xrep_tempfile_set_isize(sc, rts->rsumsize); + error = xrep_tempfile_set_isize(sc, XFS_FSB_TO_B(mp, rts->rsumblocks)); if (error) return error; @@ -168,7 +166,7 @@ xrep_rtsummary( memset(mp->m_rsum_cache, 0xFF, mp->m_sb.sb_rbmblocks); mp->m_rsumlevels = rts->rsumlevels; - mp->m_rsumsize = rts->rsumsize; + mp->m_rsumblocks = rts->rsumblocks; /* Free the old rtsummary blocks if they're not in use. */ return xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK); diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index 1bc33f010d0e..5993fcaffb2c 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -231,6 +231,11 @@ xchk_should_terminate( return false; } +static inline int xchk_nothing(struct xfs_scrub *sc) +{ + return -ENOENT; +} + /* Metadata scrubbers */ int xchk_tester(struct xfs_scrub *sc); int xchk_superblock(struct xfs_scrub *sc); @@ -254,31 +259,15 @@ int xchk_dirtree(struct xfs_scrub *sc); int xchk_rtbitmap(struct xfs_scrub *sc); int xchk_rtsummary(struct xfs_scrub *sc); #else -static inline int -xchk_rtbitmap(struct xfs_scrub *sc) -{ - return -ENOENT; -} -static inline int -xchk_rtsummary(struct xfs_scrub *sc) -{ - return -ENOENT; -} +# define xchk_rtbitmap xchk_nothing +# define xchk_rtsummary xchk_nothing #endif #ifdef CONFIG_XFS_QUOTA int xchk_quota(struct xfs_scrub *sc); int xchk_quotacheck(struct xfs_scrub *sc); #else -static inline int -xchk_quota(struct xfs_scrub *sc) -{ - return -ENOENT; -} -static inline int -xchk_quotacheck(struct xfs_scrub *sc) -{ - return -ENOENT; -} +# define xchk_quota xchk_nothing +# define xchk_quotacheck xchk_nothing #endif int xchk_fscounters(struct xfs_scrub *sc); int xchk_nlinks(struct xfs_scrub *sc); diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c index d390d56cd875..177f922acfaf 100644 --- a/fs/xfs/scrub/tempfile.c +++ b/fs/xfs/scrub/tempfile.c @@ -88,7 +88,7 @@ xrep_tempfile_create( goto out_release_dquots; /* Allocate inode, set up directory. */ - error = xfs_dialloc(&tp, dp->i_ino, mode, &ino); + error = xfs_dialloc(&tp, &args, &ino); if (error) goto out_trans_cancel; error = xfs_icreate(tp, ino, &args, &sc->tempip); diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index e224b49b7cff..35a8c1b8b3cb 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -346,6 +346,17 @@ xfs_bmap_defer_add( trace_xfs_bmap_defer(bi); xfs_bmap_update_get_group(tp->t_mountp, bi); + + /* + * Ensure the deferred mapping is pre-recorded in i_delayed_blks. + * + * Otherwise stat can report zero blocks for an inode that actually has + * data when the entire mapping is in the process of being overwritten + * using the out of place write path. This is undone in xfs_bmapi_remap + * after it has incremented di_nblocks for a successful operation. + */ + if (bi->bi_type == XFS_BMAP_MAP) + bi->bi_owner->i_delayed_blks += bi->bi_bmap.br_blockcount; xfs_defer_add(tp, &bi->bi_list, &xfs_bmap_update_defer_type); } @@ -367,6 +378,9 @@ xfs_bmap_update_cancel_item( { struct xfs_bmap_intent *bi = bi_entry(item); + if (bi->bi_type == XFS_BMAP_MAP) + bi->bi_owner->i_delayed_blks -= bi->bi_bmap.br_blockcount; + xfs_bmap_update_put_group(bi); kmem_cache_free(xfs_bmap_intent_cache, bi); } @@ -464,6 +478,9 @@ xfs_bui_recover_work( bi->bi_owner = *ipp; xfs_bmap_update_get_group(mp, bi); + /* see xfs_bmap_defer_add for details */ + if (bi->bi_type == XFS_BMAP_MAP) + bi->bi_owner->i_delayed_blks += bi->bi_bmap.br_blockcount; xfs_defer_add_item(dfp, &bi->bi_list); return bi; } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index e9fdebaa40ea..053d567c9108 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -331,8 +331,7 @@ xfs_getbmap( } if (xfs_get_extsz_hint(ip) || - (ip->i_diflags & - (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))) + (ip->i_diflags & XFS_DIFLAG_PREALLOC)) max_len = mp->m_super->s_maxbytes; else max_len = XFS_ISIZE(ip); @@ -492,12 +491,12 @@ bool xfs_can_free_eofblocks( struct xfs_inode *ip) { - struct xfs_bmbt_irec imap; struct xfs_mount *mp = ip->i_mount; + bool found_blocks = false; xfs_fileoff_t end_fsb; xfs_fileoff_t last_fsb; - int nimaps = 1; - int error; + struct xfs_bmbt_irec imap; + struct xfs_iext_cursor icur; /* * Caller must either hold the exclusive io lock; or be inactivating @@ -524,12 +523,11 @@ xfs_can_free_eofblocks( return false; /* - * Only free real extents for inodes with persistent preallocations or - * the append-only flag. + * Do not free real extents in preallocated files unless the file has + * delalloc blocks and we are forced to remove them. */ - if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) - if (ip->i_delayed_blks == 0) - return false; + if ((ip->i_diflags & XFS_DIFLAG_PREALLOC) && !ip->i_delayed_blks) + return false; /* * Do not try to free post-EOF blocks if EOF is beyond the end of the @@ -544,21 +542,13 @@ xfs_can_free_eofblocks( return false; /* - * Look up the mapping for the first block past EOF. If we can't find - * it, there's nothing to free. + * Check if there is an post-EOF extent to free. */ xfs_ilock(ip, XFS_ILOCK_SHARED); - error = xfs_bmapi_read(ip, end_fsb, last_fsb - end_fsb, &imap, &nimaps, - 0); + if (xfs_iext_lookup_extent(ip, &ip->i_df, end_fsb, &icur, &imap)) + found_blocks = true; xfs_iunlock(ip, XFS_ILOCK_SHARED); - if (error || nimaps == 0) - return false; - - /* - * If there's a real mapping there or there are delayed allocation - * reservations, then we have post-EOF blocks to try to free. - */ - return imap.br_startblock != HOLESTARTBLOCK || ip->i_delayed_blks; + return found_blocks; } /* @@ -1195,7 +1185,7 @@ xfs_swap_extents_check_format( */ if (tifp->if_format == XFS_DINODE_FMT_BTREE) { if (xfs_inode_has_attr_fork(ip) && - XFS_BMAP_BMDR_SPACE(tifp->if_broot) > xfs_inode_fork_boff(ip)) + xfs_bmap_bmdr_space(tifp->if_broot) > xfs_inode_fork_boff(ip)) return -EINVAL; if (tifp->if_nextents <= XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) return -EINVAL; @@ -1204,7 +1194,7 @@ xfs_swap_extents_check_format( /* Reciprocal target->temp btree format checks */ if (ifp->if_format == XFS_DINODE_FMT_BTREE) { if (xfs_inode_has_attr_fork(tip) && - XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > xfs_inode_fork_boff(tip)) + xfs_bmap_bmdr_space(ip->i_df.if_broot) > xfs_inode_fork_boff(tip)) return -EINVAL; if (ifp->if_nextents <= XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) return -EINVAL; diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 25f5dffeab2a..d8c4a5dcca7a 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -554,11 +554,10 @@ xfs_trim_rtdev_extents( xfs_daddr_t end, xfs_daddr_t minlen) { - struct xfs_rtalloc_rec low = { }; - struct xfs_rtalloc_rec high = { }; struct xfs_trim_rtdev tr = { .minlen_fsb = XFS_BB_TO_FSB(mp, minlen), }; + xfs_rtxnum_t low, high; struct xfs_trans *tp; xfs_daddr_t rtdev_daddr; int error; @@ -584,17 +583,17 @@ xfs_trim_rtdev_extents( XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks) - 1); /* Convert the rt blocks to rt extents */ - low.ar_startext = xfs_rtb_to_rtxup(mp, XFS_BB_TO_FSB(mp, start)); - high.ar_startext = xfs_rtb_to_rtx(mp, XFS_BB_TO_FSBT(mp, end)); + low = xfs_rtb_to_rtxup(mp, XFS_BB_TO_FSB(mp, start)); + high = xfs_rtb_to_rtx(mp, XFS_BB_TO_FSBT(mp, end)); /* * Walk the free ranges between low and high. The query_range function * trims the extents returned. */ do { - tr.stop_rtx = low.ar_startext + (mp->m_sb.sb_blocksize * NBBY); + tr.stop_rtx = low + (mp->m_sb.sb_blocksize * NBBY); xfs_rtbitmap_lock_shared(mp, XFS_RBMLOCK_BITMAP); - error = xfs_rtalloc_query_range(mp, tp, &low, &high, + error = xfs_rtalloc_query_range(mp, tp, low, high, xfs_trim_gather_rtextent, &tr); if (error == -ECANCELED) @@ -615,8 +614,8 @@ xfs_trim_rtdev_extents( if (error) break; - low.ar_startext = tr.restart_rtx; - } while (!xfs_trim_should_stop() && low.ar_startext <= high.ar_startext); + low = tr.restart_rtx; + } while (!xfs_trim_should_stop() && low <= high); xfs_trans_cancel(tp); return error; @@ -708,7 +707,7 @@ xfs_ioc_trim( return last_error; range.len = min_t(unsigned long long, range.len, - XFS_FSB_TO_B(mp, max_blocks)); + XFS_FSB_TO_B(mp, max_blocks) - range.start); if (copy_to_user(urange, &range, sizeof(range))) return -EFAULT; return 0; diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c index c8a655c92c92..d0889190ab7f 100644 --- a/fs/xfs/xfs_exchrange.c +++ b/fs/xfs/xfs_exchrange.c @@ -72,6 +72,34 @@ xfs_exchrange_estimate( return error; } +/* + * Check that file2's metadata agree with the snapshot that we took for the + * range commit request. + * + * This should be called after the filesystem has locked /all/ inode metadata + * against modification. + */ +STATIC int +xfs_exchrange_check_freshness( + const struct xfs_exchrange *fxr, + struct xfs_inode *ip2) +{ + struct inode *inode2 = VFS_I(ip2); + struct timespec64 ctime = inode_get_ctime(inode2); + struct timespec64 mtime = inode_get_mtime(inode2); + + trace_xfs_exchrange_freshness(fxr, ip2); + + /* Check that file2 hasn't otherwise been modified. */ + if (fxr->file2_ino != ip2->i_ino || + fxr->file2_gen != inode2->i_generation || + !timespec64_equal(&fxr->file2_ctime, &ctime) || + !timespec64_equal(&fxr->file2_mtime, &mtime)) + return -EBUSY; + + return 0; +} + #define QRETRY_IP1 (0x1) #define QRETRY_IP2 (0x2) @@ -607,6 +635,12 @@ xfs_exchrange_prep( if (error || fxr->length == 0) return error; + if (fxr->flags & __XFS_EXCHANGE_RANGE_CHECK_FRESH2) { + error = xfs_exchrange_check_freshness(fxr, ip2); + if (error) + return error; + } + /* Attach dquots to both inodes before changing block maps. */ error = xfs_qm_dqattach(ip2); if (error) @@ -719,7 +753,8 @@ xfs_exchange_range( if (fxr->file1->f_path.mnt != fxr->file2->f_path.mnt) return -EXDEV; - if (fxr->flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS) + if (fxr->flags & ~(XFS_EXCHANGE_RANGE_ALL_FLAGS | + __XFS_EXCHANGE_RANGE_CHECK_FRESH2)) return -EINVAL; /* Userspace requests only honored for regular files. */ @@ -802,3 +837,109 @@ xfs_ioc_exchange_range( fdput(file1); return error; } + +/* Opaque freshness blob for XFS_IOC_COMMIT_RANGE */ +struct xfs_commit_range_fresh { + xfs_fsid_t fsid; /* m_fixedfsid */ + __u64 file2_ino; /* inode number */ + __s64 file2_mtime; /* modification time */ + __s64 file2_ctime; /* change time */ + __s32 file2_mtime_nsec; /* mod time, nsec */ + __s32 file2_ctime_nsec; /* change time, nsec */ + __u32 file2_gen; /* inode generation */ + __u32 magic; /* zero */ +}; +#define XCR_FRESH_MAGIC 0x444F524B /* DORK */ + +/* Set up a commitrange operation by sampling file2's write-related attrs */ +long +xfs_ioc_start_commit( + struct file *file, + struct xfs_commit_range __user *argp) +{ + struct xfs_commit_range args = { }; + struct timespec64 ts; + struct xfs_commit_range_fresh *kern_f; + struct xfs_commit_range_fresh __user *user_f; + struct inode *inode2 = file_inode(file); + struct xfs_inode *ip2 = XFS_I(inode2); + const unsigned int lockflags = XFS_IOLOCK_SHARED | + XFS_MMAPLOCK_SHARED | + XFS_ILOCK_SHARED; + + BUILD_BUG_ON(sizeof(struct xfs_commit_range_fresh) != + sizeof(args.file2_freshness)); + + kern_f = (struct xfs_commit_range_fresh *)&args.file2_freshness; + + memcpy(&kern_f->fsid, ip2->i_mount->m_fixedfsid, sizeof(xfs_fsid_t)); + + xfs_ilock(ip2, lockflags); + ts = inode_get_ctime(inode2); + kern_f->file2_ctime = ts.tv_sec; + kern_f->file2_ctime_nsec = ts.tv_nsec; + ts = inode_get_mtime(inode2); + kern_f->file2_mtime = ts.tv_sec; + kern_f->file2_mtime_nsec = ts.tv_nsec; + kern_f->file2_ino = ip2->i_ino; + kern_f->file2_gen = inode2->i_generation; + kern_f->magic = XCR_FRESH_MAGIC; + xfs_iunlock(ip2, lockflags); + + user_f = (struct xfs_commit_range_fresh __user *)&argp->file2_freshness; + if (copy_to_user(user_f, kern_f, sizeof(*kern_f))) + return -EFAULT; + + return 0; +} + +/* + * Exchange file1 and file2 contents if file2 has not been written since the + * start commit operation. + */ +long +xfs_ioc_commit_range( + struct file *file, + struct xfs_commit_range __user *argp) +{ + struct xfs_exchrange fxr = { + .file2 = file, + }; + struct xfs_commit_range args; + struct xfs_commit_range_fresh *kern_f; + struct xfs_inode *ip2 = XFS_I(file_inode(file)); + struct xfs_mount *mp = ip2->i_mount; + struct fd file1; + int error; + + kern_f = (struct xfs_commit_range_fresh *)&args.file2_freshness; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + if (args.flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS) + return -EINVAL; + if (kern_f->magic != XCR_FRESH_MAGIC) + return -EBUSY; + if (memcmp(&kern_f->fsid, mp->m_fixedfsid, sizeof(xfs_fsid_t))) + return -EBUSY; + + fxr.file1_offset = args.file1_offset; + fxr.file2_offset = args.file2_offset; + fxr.length = args.length; + fxr.flags = args.flags | __XFS_EXCHANGE_RANGE_CHECK_FRESH2; + fxr.file2_ino = kern_f->file2_ino; + fxr.file2_gen = kern_f->file2_gen; + fxr.file2_mtime.tv_sec = kern_f->file2_mtime; + fxr.file2_mtime.tv_nsec = kern_f->file2_mtime_nsec; + fxr.file2_ctime.tv_sec = kern_f->file2_ctime; + fxr.file2_ctime.tv_nsec = kern_f->file2_ctime_nsec; + + file1 = fdget(args.file1_fd); + if (!file1.file) + return -EBADF; + fxr.file1 = file1.file; + + error = xfs_exchange_range(&fxr); + fdput(file1); + return error; +} diff --git a/fs/xfs/xfs_exchrange.h b/fs/xfs/xfs_exchrange.h index 039abcca546e..bc1298aba806 100644 --- a/fs/xfs/xfs_exchrange.h +++ b/fs/xfs/xfs_exchrange.h @@ -10,8 +10,12 @@ #define __XFS_EXCHANGE_RANGE_UPD_CMTIME1 (1ULL << 63) #define __XFS_EXCHANGE_RANGE_UPD_CMTIME2 (1ULL << 62) +/* Freshness check required */ +#define __XFS_EXCHANGE_RANGE_CHECK_FRESH2 (1ULL << 61) + #define XFS_EXCHANGE_RANGE_PRIV_FLAGS (__XFS_EXCHANGE_RANGE_UPD_CMTIME1 | \ - __XFS_EXCHANGE_RANGE_UPD_CMTIME2) + __XFS_EXCHANGE_RANGE_UPD_CMTIME2 | \ + __XFS_EXCHANGE_RANGE_CHECK_FRESH2) struct xfs_exchrange { struct file *file1; @@ -22,10 +26,20 @@ struct xfs_exchrange { u64 length; u64 flags; /* XFS_EXCHANGE_RANGE flags */ + + /* file2 metadata for freshness checks */ + u64 file2_ino; + struct timespec64 file2_mtime; + struct timespec64 file2_ctime; + u32 file2_gen; }; long xfs_ioc_exchange_range(struct file *file, struct xfs_exchange_range __user *argp); +long xfs_ioc_start_commit(struct file *file, + struct xfs_commit_range __user *argp); +long xfs_ioc_commit_range(struct file *file, + struct xfs_commit_range __user *argp); struct xfs_exchmaps_req; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index f6e4912769a0..e97d789495a5 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1238,12 +1238,78 @@ xfs_dir_open( return error; } +/* + * Don't bother propagating errors. We're just doing cleanup, and the caller + * ignores the return value anyway. + */ STATIC int xfs_file_release( - struct inode *inode, - struct file *filp) + struct inode *inode, + struct file *file) { - return xfs_release(XFS_I(inode)); + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + + /* + * If this is a read-only mount or the file system has been shut down, + * don't generate I/O. + */ + if (xfs_is_readonly(mp) || xfs_is_shutdown(mp)) + return 0; + + /* + * If we previously truncated this file and removed old data in the + * process, we want to initiate "early" writeout on the last close. + * This is an attempt to combat the notorious NULL files problem which + * is particularly noticeable from a truncate down, buffered (re-)write + * (delalloc), followed by a crash. What we are effectively doing here + * is significantly reducing the time window where we'd otherwise be + * exposed to that problem. + */ + if (xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED)) { + xfs_iflags_clear(ip, XFS_EOFBLOCKS_RELEASED); + if (ip->i_delayed_blks > 0) + filemap_flush(inode->i_mapping); + } + + /* + * XFS aggressively preallocates post-EOF space to generate contiguous + * allocations for writers that append to the end of the file. + * + * To support workloads that close and reopen the file frequently, these + * preallocations usually persist after a close unless it is the first + * close for the inode. This is a tradeoff to generate tightly packed + * data layouts for unpacking tarballs or similar archives that write + * one file after another without going back to it while keeping the + * preallocation for files that have recurring open/write/close cycles. + * + * This heuristic is skipped for inodes with the append-only flag as + * that flag is rather pointless for inodes written only once. + * + * There is no point in freeing blocks here for open but unlinked files + * as they will be taken care of by the inactivation path soon. + * + * When releasing a read-only context, don't flush data or trim post-EOF + * blocks. This avoids open/read/close workloads from removing EOF + * blocks that other writers depend upon to reduce fragmentation. + * + * If we can't get the iolock just skip truncating the blocks past EOF + * because we could deadlock with the mmap_lock otherwise. We'll get + * another chance to drop them once the last reference to the inode is + * dropped, so we'll never leak blocks permanently. + */ + if (inode->i_nlink && + (file->f_mode & FMODE_WRITE) && + !(ip->i_diflags & XFS_DIFLAG_APPEND) && + !xfs_iflags_test(ip, XFS_EOFBLOCKS_RELEASED) && + xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { + if (xfs_can_free_eofblocks(ip) && + !xfs_iflags_test_and_set(ip, XFS_EOFBLOCKS_RELEASED)) + xfs_free_eofblocks(ip); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + } + + return 0; } STATIC int diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 71f32354944e..ae18ab86e608 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -44,7 +44,7 @@ xfs_fsmap_from_internal( } /* Convert an fsmap to an xfs_fsmap. */ -void +static void xfs_fsmap_to_internal( struct xfs_fsmap *dest, struct fsmap *src) @@ -441,141 +441,6 @@ xfs_getfsmap_set_irec_flags( irec->rm_flags |= XFS_RMAP_UNWRITTEN; } -/* Execute a getfsmap query against the log device. */ -STATIC int -xfs_getfsmap_logdev( - struct xfs_trans *tp, - const struct xfs_fsmap *keys, - struct xfs_getfsmap_info *info) -{ - struct xfs_mount *mp = tp->t_mountp; - struct xfs_rmap_irec rmap; - xfs_daddr_t rec_daddr, len_daddr; - xfs_fsblock_t start_fsb, end_fsb; - uint64_t eofs; - - eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); - if (keys[0].fmr_physical >= eofs) - return 0; - start_fsb = XFS_BB_TO_FSBT(mp, - keys[0].fmr_physical + keys[0].fmr_length); - end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); - - /* Adjust the low key if we are continuing from where we left off. */ - if (keys[0].fmr_length > 0) - info->low_daddr = XFS_FSB_TO_BB(mp, start_fsb); - - trace_xfs_fsmap_low_key_linear(mp, info->dev, start_fsb); - trace_xfs_fsmap_high_key_linear(mp, info->dev, end_fsb); - - if (start_fsb > 0) - return 0; - - /* Fabricate an rmap entry for the external log device. */ - rmap.rm_startblock = 0; - rmap.rm_blockcount = mp->m_sb.sb_logblocks; - rmap.rm_owner = XFS_RMAP_OWN_LOG; - rmap.rm_offset = 0; - rmap.rm_flags = 0; - - rec_daddr = XFS_FSB_TO_BB(mp, rmap.rm_startblock); - len_daddr = XFS_FSB_TO_BB(mp, rmap.rm_blockcount); - return xfs_getfsmap_helper(tp, info, &rmap, rec_daddr, len_daddr); -} - -#ifdef CONFIG_XFS_RT -/* Transform a rtbitmap "record" into a fsmap */ -STATIC int -xfs_getfsmap_rtdev_rtbitmap_helper( - struct xfs_mount *mp, - struct xfs_trans *tp, - const struct xfs_rtalloc_rec *rec, - void *priv) -{ - struct xfs_getfsmap_info *info = priv; - struct xfs_rmap_irec irec; - xfs_rtblock_t rtbno; - xfs_daddr_t rec_daddr, len_daddr; - - rtbno = xfs_rtx_to_rtb(mp, rec->ar_startext); - rec_daddr = XFS_FSB_TO_BB(mp, rtbno); - irec.rm_startblock = rtbno; - - rtbno = xfs_rtx_to_rtb(mp, rec->ar_extcount); - len_daddr = XFS_FSB_TO_BB(mp, rtbno); - irec.rm_blockcount = rtbno; - - irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */ - irec.rm_offset = 0; - irec.rm_flags = 0; - - return xfs_getfsmap_helper(tp, info, &irec, rec_daddr, len_daddr); -} - -/* Execute a getfsmap query against the realtime device rtbitmap. */ -STATIC int -xfs_getfsmap_rtdev_rtbitmap( - struct xfs_trans *tp, - const struct xfs_fsmap *keys, - struct xfs_getfsmap_info *info) -{ - - struct xfs_rtalloc_rec alow = { 0 }; - struct xfs_rtalloc_rec ahigh = { 0 }; - struct xfs_mount *mp = tp->t_mountp; - xfs_rtblock_t start_rtb; - xfs_rtblock_t end_rtb; - uint64_t eofs; - int error; - - eofs = XFS_FSB_TO_BB(mp, xfs_rtx_to_rtb(mp, mp->m_sb.sb_rextents)); - if (keys[0].fmr_physical >= eofs) - return 0; - start_rtb = XFS_BB_TO_FSBT(mp, - keys[0].fmr_physical + keys[0].fmr_length); - end_rtb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); - - info->missing_owner = XFS_FMR_OWN_UNKNOWN; - - /* Adjust the low key if we are continuing from where we left off. */ - if (keys[0].fmr_length > 0) { - info->low_daddr = XFS_FSB_TO_BB(mp, start_rtb); - if (info->low_daddr >= eofs) - return 0; - } - - trace_xfs_fsmap_low_key_linear(mp, info->dev, start_rtb); - trace_xfs_fsmap_high_key_linear(mp, info->dev, end_rtb); - - xfs_rtbitmap_lock_shared(mp, XFS_RBMLOCK_BITMAP); - - /* - * Set up query parameters to return free rtextents covering the range - * we want. - */ - alow.ar_startext = xfs_rtb_to_rtx(mp, start_rtb); - ahigh.ar_startext = xfs_rtb_to_rtxup(mp, end_rtb); - error = xfs_rtalloc_query_range(mp, tp, &alow, &ahigh, - xfs_getfsmap_rtdev_rtbitmap_helper, info); - if (error) - goto err; - - /* - * Report any gaps at the end of the rtbitmap by simulating a null - * rmap starting at the block after the end of the query range. - */ - info->last = true; - ahigh.ar_startext = min(mp->m_sb.sb_rextents, ahigh.ar_startext); - - error = xfs_getfsmap_rtdev_rtbitmap_helper(mp, tp, &ahigh, info); - if (error) - goto err; -err: - xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP); - return error; -} -#endif /* CONFIG_XFS_RT */ - static inline bool rmap_not_shareable(struct xfs_mount *mp, const struct xfs_rmap_irec *r) { @@ -800,6 +665,140 @@ xfs_getfsmap_datadev_bnobt( xfs_getfsmap_datadev_bnobt_query, &akeys[0]); } +/* Execute a getfsmap query against the log device. */ +STATIC int +xfs_getfsmap_logdev( + struct xfs_trans *tp, + const struct xfs_fsmap *keys, + struct xfs_getfsmap_info *info) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_rmap_irec rmap; + xfs_daddr_t rec_daddr, len_daddr; + xfs_fsblock_t start_fsb, end_fsb; + uint64_t eofs; + + eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); + if (keys[0].fmr_physical >= eofs) + return 0; + start_fsb = XFS_BB_TO_FSBT(mp, + keys[0].fmr_physical + keys[0].fmr_length); + end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); + + /* Adjust the low key if we are continuing from where we left off. */ + if (keys[0].fmr_length > 0) + info->low_daddr = XFS_FSB_TO_BB(mp, start_fsb); + + trace_xfs_fsmap_low_key_linear(mp, info->dev, start_fsb); + trace_xfs_fsmap_high_key_linear(mp, info->dev, end_fsb); + + if (start_fsb > 0) + return 0; + + /* Fabricate an rmap entry for the external log device. */ + rmap.rm_startblock = 0; + rmap.rm_blockcount = mp->m_sb.sb_logblocks; + rmap.rm_owner = XFS_RMAP_OWN_LOG; + rmap.rm_offset = 0; + rmap.rm_flags = 0; + + rec_daddr = XFS_FSB_TO_BB(mp, rmap.rm_startblock); + len_daddr = XFS_FSB_TO_BB(mp, rmap.rm_blockcount); + return xfs_getfsmap_helper(tp, info, &rmap, rec_daddr, len_daddr); +} + +#ifdef CONFIG_XFS_RT +/* Transform a rtbitmap "record" into a fsmap */ +STATIC int +xfs_getfsmap_rtdev_rtbitmap_helper( + struct xfs_mount *mp, + struct xfs_trans *tp, + const struct xfs_rtalloc_rec *rec, + void *priv) +{ + struct xfs_getfsmap_info *info = priv; + struct xfs_rmap_irec irec; + xfs_rtblock_t rtbno; + xfs_daddr_t rec_daddr, len_daddr; + + rtbno = xfs_rtx_to_rtb(mp, rec->ar_startext); + rec_daddr = XFS_FSB_TO_BB(mp, rtbno); + irec.rm_startblock = rtbno; + + rtbno = xfs_rtx_to_rtb(mp, rec->ar_extcount); + len_daddr = XFS_FSB_TO_BB(mp, rtbno); + irec.rm_blockcount = rtbno; + + irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */ + irec.rm_offset = 0; + irec.rm_flags = 0; + + return xfs_getfsmap_helper(tp, info, &irec, rec_daddr, len_daddr); +} + +/* Execute a getfsmap query against the realtime device rtbitmap. */ +STATIC int +xfs_getfsmap_rtdev_rtbitmap( + struct xfs_trans *tp, + const struct xfs_fsmap *keys, + struct xfs_getfsmap_info *info) +{ + + struct xfs_rtalloc_rec ahigh = { 0 }; + struct xfs_mount *mp = tp->t_mountp; + xfs_rtblock_t start_rtb; + xfs_rtblock_t end_rtb; + xfs_rtxnum_t high; + uint64_t eofs; + int error; + + eofs = XFS_FSB_TO_BB(mp, xfs_rtx_to_rtb(mp, mp->m_sb.sb_rextents)); + if (keys[0].fmr_physical >= eofs) + return 0; + start_rtb = XFS_BB_TO_FSBT(mp, + keys[0].fmr_physical + keys[0].fmr_length); + end_rtb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); + + info->missing_owner = XFS_FMR_OWN_UNKNOWN; + + /* Adjust the low key if we are continuing from where we left off. */ + if (keys[0].fmr_length > 0) { + info->low_daddr = XFS_FSB_TO_BB(mp, start_rtb); + if (info->low_daddr >= eofs) + return 0; + } + + trace_xfs_fsmap_low_key_linear(mp, info->dev, start_rtb); + trace_xfs_fsmap_high_key_linear(mp, info->dev, end_rtb); + + xfs_rtbitmap_lock_shared(mp, XFS_RBMLOCK_BITMAP); + + /* + * Set up query parameters to return free rtextents covering the range + * we want. + */ + high = xfs_rtb_to_rtxup(mp, end_rtb); + error = xfs_rtalloc_query_range(mp, tp, xfs_rtb_to_rtx(mp, start_rtb), + high, xfs_getfsmap_rtdev_rtbitmap_helper, info); + if (error) + goto err; + + /* + * Report any gaps at the end of the rtbitmap by simulating a null + * rmap starting at the block after the end of the query range. + */ + info->last = true; + ahigh.ar_startext = min(mp->m_sb.sb_rextents, high); + + error = xfs_getfsmap_rtdev_rtbitmap_helper(mp, tp, &ahigh, info); + if (error) + goto err; +err: + xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP); + return error; +} +#endif /* CONFIG_XFS_RT */ + /* Do we recognize the device? */ STATIC bool xfs_getfsmap_is_valid_device( @@ -890,7 +889,7 @@ xfs_getfsmap_check_keys( * xfs_getfsmap_info.low/high -- per-AG low/high keys computed from * dkeys; used to query the metadata. */ -int +STATIC int xfs_getfsmap( struct xfs_mount *mp, struct xfs_fsmap_head *head, @@ -1020,3 +1019,133 @@ xfs_getfsmap( head->fmh_oflags = FMH_OF_DEV_T; return error; } + +int +xfs_ioc_getfsmap( + struct xfs_inode *ip, + struct fsmap_head __user *arg) +{ + struct xfs_fsmap_head xhead = {0}; + struct fsmap_head head; + struct fsmap *recs; + unsigned int count; + __u32 last_flags = 0; + bool done = false; + int error; + + if (copy_from_user(&head, arg, sizeof(struct fsmap_head))) + return -EFAULT; + if (memchr_inv(head.fmh_reserved, 0, sizeof(head.fmh_reserved)) || + memchr_inv(head.fmh_keys[0].fmr_reserved, 0, + sizeof(head.fmh_keys[0].fmr_reserved)) || + memchr_inv(head.fmh_keys[1].fmr_reserved, 0, + sizeof(head.fmh_keys[1].fmr_reserved))) + return -EINVAL; + + /* + * Use an internal memory buffer so that we don't have to copy fsmap + * data to userspace while holding locks. Start by trying to allocate + * up to 128k for the buffer, but fall back to a single page if needed. + */ + count = min_t(unsigned int, head.fmh_count, + 131072 / sizeof(struct fsmap)); + recs = kvcalloc(count, sizeof(struct fsmap), GFP_KERNEL); + if (!recs) { + count = min_t(unsigned int, head.fmh_count, + PAGE_SIZE / sizeof(struct fsmap)); + recs = kvcalloc(count, sizeof(struct fsmap), GFP_KERNEL); + if (!recs) + return -ENOMEM; + } + + xhead.fmh_iflags = head.fmh_iflags; + xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]); + xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]); + + trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]); + trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]); + + head.fmh_entries = 0; + do { + struct fsmap __user *user_recs; + struct fsmap *last_rec; + + user_recs = &arg->fmh_recs[head.fmh_entries]; + xhead.fmh_entries = 0; + xhead.fmh_count = min_t(unsigned int, count, + head.fmh_count - head.fmh_entries); + + /* Run query, record how many entries we got. */ + error = xfs_getfsmap(ip->i_mount, &xhead, recs); + switch (error) { + case 0: + /* + * There are no more records in the result set. Copy + * whatever we got to userspace and break out. + */ + done = true; + break; + case -ECANCELED: + /* + * The internal memory buffer is full. Copy whatever + * records we got to userspace and go again if we have + * not yet filled the userspace buffer. + */ + error = 0; + break; + default: + goto out_free; + } + head.fmh_entries += xhead.fmh_entries; + head.fmh_oflags = xhead.fmh_oflags; + + /* + * If the caller wanted a record count or there aren't any + * new records to return, we're done. + */ + if (head.fmh_count == 0 || xhead.fmh_entries == 0) + break; + + /* Copy all the records we got out to userspace. */ + if (copy_to_user(user_recs, recs, + xhead.fmh_entries * sizeof(struct fsmap))) { + error = -EFAULT; + goto out_free; + } + + /* Remember the last record flags we copied to userspace. */ + last_rec = &recs[xhead.fmh_entries - 1]; + last_flags = last_rec->fmr_flags; + + /* Set up the low key for the next iteration. */ + xfs_fsmap_to_internal(&xhead.fmh_keys[0], last_rec); + trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]); + } while (!done && head.fmh_entries < head.fmh_count); + + /* + * If there are no more records in the query result set and we're not + * in counting mode, mark the last record returned with the LAST flag. + */ + if (done && head.fmh_count > 0 && head.fmh_entries > 0) { + struct fsmap __user *user_rec; + + last_flags |= FMR_OF_LAST; + user_rec = &arg->fmh_recs[head.fmh_entries - 1]; + + if (copy_to_user(&user_rec->fmr_flags, &last_flags, + sizeof(last_flags))) { + error = -EFAULT; + goto out_free; + } + } + + /* copy back header */ + if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) { + error = -EFAULT; + goto out_free; + } + +out_free: + kvfree(recs); + return error; +} diff --git a/fs/xfs/xfs_fsmap.h b/fs/xfs/xfs_fsmap.h index a0775788e7b1..a0bcc38486a5 100644 --- a/fs/xfs/xfs_fsmap.h +++ b/fs/xfs/xfs_fsmap.h @@ -7,6 +7,7 @@ #define __XFS_FSMAP_H__ struct fsmap; +struct fsmap_head; /* internal fsmap representation */ struct xfs_fsmap { @@ -27,9 +28,6 @@ struct xfs_fsmap_head { struct xfs_fsmap fmh_keys[2]; /* low and high keys */ }; -void xfs_fsmap_to_internal(struct xfs_fsmap *dest, struct fsmap *src); - -int xfs_getfsmap(struct xfs_mount *mp, struct xfs_fsmap_head *head, - struct fsmap *out_recs); +int xfs_ioc_getfsmap(struct xfs_inode *ip, struct fsmap_head __user *arg); #endif /* __XFS_FSMAP_H__ */ diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index c211ea2b63c4..3643cc843f62 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -485,7 +485,7 @@ xfs_do_force_shutdown( const char *why; - if (test_and_set_bit(XFS_OPSTATE_SHUTDOWN, &mp->m_opstate)) { + if (xfs_set_shutdown(mp)) { xlog_shutdown_wait(mp->m_log); return; } diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index cf629302d48e..20d9924f28c2 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -65,6 +65,18 @@ static int xfs_icwalk_ag(struct xfs_perag *pag, XFS_ICWALK_FLAG_RECLAIM_SICK | \ XFS_ICWALK_FLAG_UNION) +/* Marks for the perag xarray */ +#define XFS_PERAG_RECLAIM_MARK XA_MARK_0 +#define XFS_PERAG_BLOCKGC_MARK XA_MARK_1 + +static inline xa_mark_t ici_tag_to_mark(unsigned int tag) +{ + if (tag == XFS_ICI_RECLAIM_TAG) + return XFS_PERAG_RECLAIM_MARK; + ASSERT(tag == XFS_ICI_BLOCKGC_TAG); + return XFS_PERAG_BLOCKGC_MARK; +} + /* * Allocate and initialise an xfs_inode. */ @@ -191,7 +203,7 @@ xfs_reclaim_work_queue( { rcu_read_lock(); - if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { + if (xa_marked(&mp->m_perags, XFS_PERAG_RECLAIM_MARK)) { queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work, msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); } @@ -241,9 +253,7 @@ xfs_perag_set_inode_tag( return; /* propagate the tag up into the perag radix tree */ - spin_lock(&mp->m_perag_lock); - radix_tree_tag_set(&mp->m_perag_tree, pag->pag_agno, tag); - spin_unlock(&mp->m_perag_lock); + xa_set_mark(&mp->m_perags, pag->pag_agno, ici_tag_to_mark(tag)); /* start background work */ switch (tag) { @@ -285,14 +295,39 @@ xfs_perag_clear_inode_tag( return; /* clear the tag from the perag radix tree */ - spin_lock(&mp->m_perag_lock); - radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno, tag); - spin_unlock(&mp->m_perag_lock); + xa_clear_mark(&mp->m_perags, pag->pag_agno, ici_tag_to_mark(tag)); trace_xfs_perag_clear_inode_tag(pag, _RET_IP_); } /* + * Find the next AG after @pag, or the first AG if @pag is NULL. + */ +static struct xfs_perag * +xfs_perag_grab_next_tag( + struct xfs_mount *mp, + struct xfs_perag *pag, + int tag) +{ + unsigned long index = 0; + + if (pag) { + index = pag->pag_agno + 1; + xfs_perag_rele(pag); + } + + rcu_read_lock(); + pag = xa_find(&mp->m_perags, &index, ULONG_MAX, ici_tag_to_mark(tag)); + if (pag) { + trace_xfs_perag_grab_next_tag(pag, _RET_IP_); + if (!atomic_inc_not_zero(&pag->pag_active_ref)) + pag = NULL; + } + rcu_read_unlock(); + return pag; +} + +/* * When we recycle a reclaimable inode, we need to re-initialise the VFS inode * part of the structure. This is made more complex by the fact we store * information about the on-disk values in the VFS inode and so we can't just @@ -755,7 +790,7 @@ xfs_iget( ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0); /* reject inode numbers outside existing AGs */ - if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) + if (!xfs_verify_ino(mp, ino)) return -EINVAL; XFS_STATS_INC(mp, xs_ig_attempts); @@ -977,7 +1012,7 @@ xfs_reclaim_inodes( if (xfs_want_reclaim_sick(mp)) icw.icw_flags |= XFS_ICWALK_FLAG_RECLAIM_SICK; - while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { + while (xa_marked(&mp->m_perags, XFS_PERAG_RECLAIM_MARK)) { xfs_ail_push_all_sync(mp->m_ail); xfs_icwalk(mp, XFS_ICWALK_RECLAIM, &icw); } @@ -1019,15 +1054,17 @@ long xfs_reclaim_inodes_count( struct xfs_mount *mp) { - struct xfs_perag *pag; - xfs_agnumber_t ag = 0; + XA_STATE (xas, &mp->m_perags, 0); long reclaimable = 0; + struct xfs_perag *pag; - while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { - ag = pag->pag_agno + 1; + rcu_read_lock(); + xas_for_each_marked(&xas, pag, ULONG_MAX, XFS_PERAG_RECLAIM_MARK) { + trace_xfs_reclaim_inodes_count(pag, _THIS_IP_); reclaimable += pag->pag_ici_reclaimable; - xfs_perag_put(pag); } + rcu_read_unlock(); + return reclaimable; } @@ -1159,7 +1196,7 @@ xfs_inode_free_eofblocks( if (xfs_can_free_eofblocks(ip)) return xfs_free_eofblocks(ip); - /* inode could be preallocated or append-only */ + /* inode could be preallocated */ trace_xfs_inode_free_eofblocks_invalid(ip); xfs_inode_clear_eofblocks_tag(ip); return 0; @@ -1369,14 +1406,13 @@ void xfs_blockgc_start( struct xfs_mount *mp) { - struct xfs_perag *pag; - xfs_agnumber_t agno; + struct xfs_perag *pag = NULL; if (xfs_set_blockgc_enabled(mp)) return; trace_xfs_blockgc_start(mp, __return_address); - for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG) + while ((pag = xfs_perag_grab_next_tag(mp, pag, XFS_ICI_BLOCKGC_TAG))) xfs_blockgc_queue(pag); } @@ -1492,21 +1528,19 @@ int xfs_blockgc_flush_all( struct xfs_mount *mp) { - struct xfs_perag *pag; - xfs_agnumber_t agno; + struct xfs_perag *pag = NULL; trace_xfs_blockgc_flush_all(mp, __return_address); /* - * For each blockgc worker, move its queue time up to now. If it - * wasn't queued, it will not be requeued. Then flush whatever's - * left. + * For each blockgc worker, move its queue time up to now. If it wasn't + * queued, it will not be requeued. Then flush whatever is left. */ - for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG) + while ((pag = xfs_perag_grab_next_tag(mp, pag, XFS_ICI_BLOCKGC_TAG))) mod_delayed_work(pag->pag_mount->m_blockgc_wq, &pag->pag_blockgc_work, 0); - for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG) + while ((pag = xfs_perag_grab_next_tag(mp, pag, XFS_ICI_BLOCKGC_TAG))) flush_delayed_work(&pag->pag_blockgc_work); return xfs_inodegc_flush(mp); @@ -1752,12 +1786,11 @@ xfs_icwalk( enum xfs_icwalk_goal goal, struct xfs_icwalk *icw) { - struct xfs_perag *pag; + struct xfs_perag *pag = NULL; int error = 0; int last_error = 0; - xfs_agnumber_t agno; - for_each_perag_tag(mp, agno, pag, goal) { + while ((pag = xfs_perag_grab_next_tag(mp, pag, goal))) { error = xfs_icwalk_ag(pag, goal, icw); if (error) { last_error = error; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 7dc6f326936c..bcc277fc0a83 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -704,7 +704,7 @@ xfs_create( * entry pointing to them, but a directory also the "." entry * pointing to itself. */ - error = xfs_dialloc(&tp, dp->i_ino, args->mode, &ino); + error = xfs_dialloc(&tp, args, &ino); if (!error) error = xfs_icreate(tp, ino, args, &du.ip); if (error) @@ -812,7 +812,7 @@ xfs_create_tmpfile( if (error) goto out_release_dquots; - error = xfs_dialloc(&tp, dp->i_ino, args->mode, &ino); + error = xfs_dialloc(&tp, args, &ino); if (!error) error = xfs_icreate(tp, ino, args, &ip); if (error) @@ -1079,88 +1079,6 @@ out: return error; } -int -xfs_release( - xfs_inode_t *ip) -{ - xfs_mount_t *mp = ip->i_mount; - int error = 0; - - if (!S_ISREG(VFS_I(ip)->i_mode) || (VFS_I(ip)->i_mode == 0)) - return 0; - - /* If this is a read-only mount, don't do this (would generate I/O) */ - if (xfs_is_readonly(mp)) - return 0; - - if (!xfs_is_shutdown(mp)) { - int truncated; - - /* - * If we previously truncated this file and removed old data - * in the process, we want to initiate "early" writeout on - * the last close. This is an attempt to combat the notorious - * NULL files problem which is particularly noticeable from a - * truncate down, buffered (re-)write (delalloc), followed by - * a crash. What we are effectively doing here is - * significantly reducing the time window where we'd otherwise - * be exposed to that problem. - */ - truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); - if (truncated) { - xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE); - if (ip->i_delayed_blks > 0) { - error = filemap_flush(VFS_I(ip)->i_mapping); - if (error) - return error; - } - } - } - - if (VFS_I(ip)->i_nlink == 0) - return 0; - - /* - * If we can't get the iolock just skip truncating the blocks past EOF - * because we could deadlock with the mmap_lock otherwise. We'll get - * another chance to drop them once the last reference to the inode is - * dropped, so we'll never leak blocks permanently. - */ - if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) - return 0; - - if (xfs_can_free_eofblocks(ip)) { - /* - * Check if the inode is being opened, written and closed - * frequently and we have delayed allocation blocks outstanding - * (e.g. streaming writes from the NFS server), truncating the - * blocks past EOF will cause fragmentation to occur. - * - * In this case don't do the truncation, but we have to be - * careful how we detect this case. Blocks beyond EOF show up as - * i_delayed_blks even when the inode is clean, so we need to - * truncate them away first before checking for a dirty release. - * Hence on the first dirty close we will still remove the - * speculative allocation, but after that we will leave it in - * place. - */ - if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE)) - goto out_unlock; - - error = xfs_free_eofblocks(ip); - if (error) - goto out_unlock; - - /* delalloc blocks after truncation means it really is dirty */ - if (ip->i_delayed_blks) - xfs_iflags_set(ip, XFS_IDIRTY_RELEASE); - } - -out_unlock: - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return error; -} - /* * Mark all the buffers attached to this directory stale. In theory we should * never be freeing a directory with any blocks at all, but this covers the diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 51defdebef30..97ed912306fd 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -276,12 +276,13 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip) return ip->i_diflags2 & XFS_DIFLAG2_REFLINK; } -static inline bool xfs_is_metadata_inode(struct xfs_inode *ip) +static inline bool xfs_is_metadata_inode(const struct xfs_inode *ip) { struct xfs_mount *mp = ip->i_mount; - return ip == mp->m_rbmip || ip == mp->m_rsumip || - xfs_is_quota_inode(&mp->m_sb, ip->i_ino); + return ip->i_ino == mp->m_sb.sb_rbmino || + ip->i_ino == mp->m_sb.sb_rsumino || + xfs_is_quota_inode(&mp->m_sb, ip->i_ino); } bool xfs_is_always_cow_inode(struct xfs_inode *ip); @@ -335,7 +336,7 @@ static inline bool xfs_inode_has_bigrtalloc(struct xfs_inode *ip) #define XFS_INEW (1 << 3) /* inode has just been allocated */ #define XFS_IPRESERVE_DM_FIELDS (1 << 4) /* has legacy DMAPI fields set */ #define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */ -#define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */ +#define XFS_EOFBLOCKS_RELEASED (1 << 6) /* eofblocks were freed in ->release */ #define XFS_IFLUSHING (1 << 7) /* inode is being flushed */ #define __XFS_IPINNED_BIT 8 /* wakeup key for zero pin count */ #define XFS_IPINNED (1 << __XFS_IPINNED_BIT) @@ -382,7 +383,7 @@ static inline bool xfs_inode_has_bigrtalloc(struct xfs_inode *ip) */ #define XFS_IRECLAIM_RESET_FLAGS \ (XFS_IRECLAIMABLE | XFS_IRECLAIM | \ - XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | XFS_NEED_INACTIVE | \ + XFS_EOFBLOCKS_RELEASED | XFS_ITRUNCATED | XFS_NEED_INACTIVE | \ XFS_INACTIVATING | XFS_IQUOTAUNCHECKED) /* @@ -512,7 +513,6 @@ enum layout_break_reason { #define XFS_INHERIT_GID(pip) \ (xfs_has_grpid((pip)->i_mount) || (VFS_I(pip)->i_mode & S_ISGID)) -int xfs_release(struct xfs_inode *ip); int xfs_inactive(struct xfs_inode *ip); int xfs_lookup(struct xfs_inode *dp, const struct xfs_name *name, struct xfs_inode **ipp, struct xfs_name *ci_name); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 6b13666d4e96..7226d27e8afc 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -876,136 +876,6 @@ out_free_buf: return error; } -STATIC int -xfs_ioc_getfsmap( - struct xfs_inode *ip, - struct fsmap_head __user *arg) -{ - struct xfs_fsmap_head xhead = {0}; - struct fsmap_head head; - struct fsmap *recs; - unsigned int count; - __u32 last_flags = 0; - bool done = false; - int error; - - if (copy_from_user(&head, arg, sizeof(struct fsmap_head))) - return -EFAULT; - if (memchr_inv(head.fmh_reserved, 0, sizeof(head.fmh_reserved)) || - memchr_inv(head.fmh_keys[0].fmr_reserved, 0, - sizeof(head.fmh_keys[0].fmr_reserved)) || - memchr_inv(head.fmh_keys[1].fmr_reserved, 0, - sizeof(head.fmh_keys[1].fmr_reserved))) - return -EINVAL; - - /* - * Use an internal memory buffer so that we don't have to copy fsmap - * data to userspace while holding locks. Start by trying to allocate - * up to 128k for the buffer, but fall back to a single page if needed. - */ - count = min_t(unsigned int, head.fmh_count, - 131072 / sizeof(struct fsmap)); - recs = kvcalloc(count, sizeof(struct fsmap), GFP_KERNEL); - if (!recs) { - count = min_t(unsigned int, head.fmh_count, - PAGE_SIZE / sizeof(struct fsmap)); - recs = kvcalloc(count, sizeof(struct fsmap), GFP_KERNEL); - if (!recs) - return -ENOMEM; - } - - xhead.fmh_iflags = head.fmh_iflags; - xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]); - xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]); - - trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]); - trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]); - - head.fmh_entries = 0; - do { - struct fsmap __user *user_recs; - struct fsmap *last_rec; - - user_recs = &arg->fmh_recs[head.fmh_entries]; - xhead.fmh_entries = 0; - xhead.fmh_count = min_t(unsigned int, count, - head.fmh_count - head.fmh_entries); - - /* Run query, record how many entries we got. */ - error = xfs_getfsmap(ip->i_mount, &xhead, recs); - switch (error) { - case 0: - /* - * There are no more records in the result set. Copy - * whatever we got to userspace and break out. - */ - done = true; - break; - case -ECANCELED: - /* - * The internal memory buffer is full. Copy whatever - * records we got to userspace and go again if we have - * not yet filled the userspace buffer. - */ - error = 0; - break; - default: - goto out_free; - } - head.fmh_entries += xhead.fmh_entries; - head.fmh_oflags = xhead.fmh_oflags; - - /* - * If the caller wanted a record count or there aren't any - * new records to return, we're done. - */ - if (head.fmh_count == 0 || xhead.fmh_entries == 0) - break; - - /* Copy all the records we got out to userspace. */ - if (copy_to_user(user_recs, recs, - xhead.fmh_entries * sizeof(struct fsmap))) { - error = -EFAULT; - goto out_free; - } - - /* Remember the last record flags we copied to userspace. */ - last_rec = &recs[xhead.fmh_entries - 1]; - last_flags = last_rec->fmr_flags; - - /* Set up the low key for the next iteration. */ - xfs_fsmap_to_internal(&xhead.fmh_keys[0], last_rec); - trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]); - } while (!done && head.fmh_entries < head.fmh_count); - - /* - * If there are no more records in the query result set and we're not - * in counting mode, mark the last record returned with the LAST flag. - */ - if (done && head.fmh_count > 0 && head.fmh_entries > 0) { - struct fsmap __user *user_rec; - - last_flags |= FMR_OF_LAST; - user_rec = &arg->fmh_recs[head.fmh_entries - 1]; - - if (copy_to_user(&user_rec->fmr_flags, &last_flags, - sizeof(last_flags))) { - error = -EFAULT; - goto out_free; - } - } - - /* copy back header */ - if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) { - error = -EFAULT; - goto out_free; - } - -out_free: - kvfree(recs); - return error; -} - int xfs_ioc_swapext( xfs_swapext_t *sxp) @@ -1518,6 +1388,10 @@ xfs_file_ioctl( case XFS_IOC_EXCHANGE_RANGE: return xfs_ioc_exchange_range(filp, arg); + case XFS_IOC_START_COMMIT: + return xfs_ioc_start_commit(filp, arg); + case XFS_IOC_COMMIT_RANGE: + return xfs_ioc_commit_range(filp, arg); default: return -ENOTTY; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 817ea7e0a8ab..26b2f5887b88 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3495,7 +3495,7 @@ xlog_force_shutdown( * If this log shutdown also sets the mount shutdown state, issue a * shutdown warning message. */ - if (!test_and_set_bit(XFS_OPSTATE_SHUTDOWN, &log->l_mp->m_opstate)) { + if (!xfs_set_shutdown(log->l_mp)) { xfs_alert_tag(log->l_mp, XFS_PTAG_SHUTDOWN_LOGERROR, "Filesystem has been shut down due to log error (0x%x).", shutdown_flags); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 4423dd344239..1a74fe22672e 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1336,7 +1336,7 @@ xlog_find_tail( * headers if we have a filesystem using non-persistent counters. */ if (clean) - set_bit(XFS_OPSTATE_CLEAN, &log->l_mp->m_opstate); + xfs_set_clean(log->l_mp); /* * Make sure that there are no blocks in front of the head diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 09eef1721ef4..460f93a9ce00 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -595,7 +595,7 @@ xfs_unmount_flush_inodes( xfs_extent_busy_wait_all(mp); flush_workqueue(xfs_discard_wq); - set_bit(XFS_OPSTATE_UNMOUNTING, &mp->m_opstate); + xfs_set_unmounting(mp); xfs_ail_push_all_sync(mp->m_ail); xfs_inodegc_stop(mp); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index d0567dfbc036..96496f39f551 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -147,7 +147,7 @@ typedef struct xfs_mount { int m_logbufs; /* number of log buffers */ int m_logbsize; /* size of each log buffer */ uint m_rsumlevels; /* rt summary levels */ - uint m_rsumsize; /* size of rt summary, bytes */ + xfs_filblks_t m_rsumblocks; /* size of rt summary, FSBs */ int m_fixedfsid[2]; /* unchanged for life of FS */ uint m_qflags; /* quota status flags */ uint64_t m_features; /* active filesystem features */ @@ -208,8 +208,7 @@ typedef struct xfs_mount { */ atomic64_t m_allocbt_blks; - struct radix_tree_root m_perag_tree; /* per-ag accounting info */ - spinlock_t m_perag_lock; /* lock for m_perag_tree */ + struct xarray m_perags; /* per-ag accounting info */ uint64_t m_resblks; /* total reserved blocks */ uint64_t m_resblks_avail;/* available reserved blocks */ uint64_t m_resblks_save; /* reserved blks @ remount,ro */ diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 7443debaffd6..d0f5b403bdbe 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -230,9 +230,8 @@ _xfs_mru_cache_clear_reap_list( __releases(mru->lock) __acquires(mru->lock) { struct xfs_mru_cache_elem *elem, *next; - struct list_head tmp; + LIST_HEAD(tmp); - INIT_LIST_HEAD(&tmp); list_for_each_entry_safe(elem, next, &mru->reap_list, list_node) { /* Remove the element from the data store. */ diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 9490b913a4ab..7e2307921deb 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -799,7 +799,7 @@ xfs_qm_qino_alloc( }; xfs_ino_t ino; - error = xfs_dialloc(&tp, 0, S_IFREG, &ino); + error = xfs_dialloc(&tp, &args, &ino); if (!error) error = xfs_icreate(tp, ino, &args, ipp); if (error) { @@ -1539,6 +1539,43 @@ xfs_qm_mount_quotas( } /* + * Load the inode for a given type of quota, assuming that the sb fields have + * been sorted out. This is not true when switching quota types on a V4 + * filesystem, so do not use this function for that. + * + * Returns -ENOENT if the quota inode field is NULLFSINO; 0 and an inode on + * success; or a negative errno. + */ +int +xfs_qm_qino_load( + struct xfs_mount *mp, + xfs_dqtype_t type, + struct xfs_inode **ipp) +{ + xfs_ino_t ino = NULLFSINO; + + switch (type) { + case XFS_DQTYPE_USER: + ino = mp->m_sb.sb_uquotino; + break; + case XFS_DQTYPE_GROUP: + ino = mp->m_sb.sb_gquotino; + break; + case XFS_DQTYPE_PROJ: + ino = mp->m_sb.sb_pquotino; + break; + default: + ASSERT(0); + return -EFSCORRUPTED; + } + + if (ino == NULLFSINO) + return -ENOENT; + + return xfs_iget(mp, NULL, ino, 0, 0, ipp); +} + +/* * This is called after the superblock has been read in and we're ready to * iget the quota inodes. */ @@ -1561,24 +1598,21 @@ xfs_qm_init_quotainos( if (XFS_IS_UQUOTA_ON(mp) && mp->m_sb.sb_uquotino != NULLFSINO) { ASSERT(mp->m_sb.sb_uquotino > 0); - error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, - 0, 0, &uip); + error = xfs_qm_qino_load(mp, XFS_DQTYPE_USER, &uip); if (error) return error; } if (XFS_IS_GQUOTA_ON(mp) && mp->m_sb.sb_gquotino != NULLFSINO) { ASSERT(mp->m_sb.sb_gquotino > 0); - error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, - 0, 0, &gip); + error = xfs_qm_qino_load(mp, XFS_DQTYPE_GROUP, &gip); if (error) goto error_rele; } if (XFS_IS_PQUOTA_ON(mp) && mp->m_sb.sb_pquotino != NULLFSINO) { ASSERT(mp->m_sb.sb_pquotino > 0); - error = xfs_iget(mp, NULL, mp->m_sb.sb_pquotino, - 0, 0, &pip); + error = xfs_qm_qino_load(mp, XFS_DQTYPE_PROJ, &pip); if (error) goto error_rele; } diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 6e09dfcd13e2..e919c7f62f57 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -184,4 +184,7 @@ xfs_get_defquota(struct xfs_quotainfo *qi, xfs_dqtype_t type) } } +int xfs_qm_qino_load(struct xfs_mount *mp, xfs_dqtype_t type, + struct xfs_inode **ipp); + #endif /* __XFS_QM_H__ */ diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 392cb39cc10c..4eda50ae2d1c 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -53,16 +53,15 @@ xfs_qm_scall_quotaoff( STATIC int xfs_qm_scall_trunc_qfile( struct xfs_mount *mp, - xfs_ino_t ino) + xfs_dqtype_t type) { struct xfs_inode *ip; struct xfs_trans *tp; int error; - if (ino == NULLFSINO) + error = xfs_qm_qino_load(mp, type, &ip); + if (error == -ENOENT) return 0; - - error = xfs_iget(mp, NULL, ino, 0, 0, &ip); if (error) return error; @@ -113,17 +112,17 @@ xfs_qm_scall_trunc_qfiles( } if (flags & XFS_QMOPT_UQUOTA) { - error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino); + error = xfs_qm_scall_trunc_qfile(mp, XFS_DQTYPE_USER); if (error) return error; } if (flags & XFS_QMOPT_GQUOTA) { - error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino); + error = xfs_qm_scall_trunc_qfile(mp, XFS_DQTYPE_GROUP); if (error) return error; } if (flags & XFS_QMOPT_PQUOTA) - error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_pquotino); + error = xfs_qm_scall_trunc_qfile(mp, XFS_DQTYPE_PROJ); return error; } diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index 9c162e69976b..4c7f7ce4fd2f 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -16,24 +16,25 @@ #include "xfs_qm.h" -static void +static int xfs_qm_fill_state( struct qc_type_state *tstate, struct xfs_mount *mp, - struct xfs_inode *ip, - xfs_ino_t ino, - struct xfs_def_quota *defq) + xfs_dqtype_t type) { - bool tempqip = false; - - tstate->ino = ino; - if (!ip && ino == NULLFSINO) - return; - if (!ip) { - if (xfs_iget(mp, NULL, ino, 0, 0, &ip)) - return; - tempqip = true; + struct xfs_inode *ip; + struct xfs_def_quota *defq; + int error; + + error = xfs_qm_qino_load(mp, type, &ip); + if (error) { + tstate->ino = NULLFSINO; + return error != -ENOENT ? error : 0; } + + defq = xfs_get_defquota(mp->m_quotainfo, type); + + tstate->ino = ip->i_ino; tstate->flags |= QCI_SYSFILE; tstate->blocks = ip->i_nblocks; tstate->nextents = ip->i_df.if_nextents; @@ -43,8 +44,9 @@ xfs_qm_fill_state( tstate->spc_warnlimit = 0; tstate->ino_warnlimit = 0; tstate->rt_spc_warnlimit = 0; - if (tempqip) - xfs_irele(ip); + xfs_irele(ip); + + return 0; } /* @@ -56,8 +58,9 @@ xfs_fs_get_quota_state( struct super_block *sb, struct qc_state *state) { - struct xfs_mount *mp = XFS_M(sb); - struct xfs_quotainfo *q = mp->m_quotainfo; + struct xfs_mount *mp = XFS_M(sb); + struct xfs_quotainfo *q = mp->m_quotainfo; + int error; memset(state, 0, sizeof(*state)); if (!XFS_IS_QUOTA_ON(mp)) @@ -76,12 +79,18 @@ xfs_fs_get_quota_state( if (XFS_IS_PQUOTA_ENFORCED(mp)) state->s_state[PRJQUOTA].flags |= QCI_LIMITS_ENFORCED; - xfs_qm_fill_state(&state->s_state[USRQUOTA], mp, q->qi_uquotaip, - mp->m_sb.sb_uquotino, &q->qi_usr_default); - xfs_qm_fill_state(&state->s_state[GRPQUOTA], mp, q->qi_gquotaip, - mp->m_sb.sb_gquotino, &q->qi_grp_default); - xfs_qm_fill_state(&state->s_state[PRJQUOTA], mp, q->qi_pquotaip, - mp->m_sb.sb_pquotino, &q->qi_prj_default); + error = xfs_qm_fill_state(&state->s_state[USRQUOTA], mp, + XFS_DQTYPE_USER); + if (error) + return error; + error = xfs_qm_fill_state(&state->s_state[GRPQUOTA], mp, + XFS_DQTYPE_GROUP); + if (error) + return error; + error = xfs_qm_fill_state(&state->s_state[PRJQUOTA], mp, + XFS_DQTYPE_PROJ); + if (error) + return error; return 0; } diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index ebeab8e4dab1..3a2005a1e673 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -142,7 +142,7 @@ xfs_rtallocate_range( * We need to find the beginning and end of the extent so we can * properly update the summary. */ - error = xfs_rtfind_back(args, start, 0, &preblock); + error = xfs_rtfind_back(args, start, &preblock); if (error) return error; @@ -194,6 +194,17 @@ xfs_rtallocate_range( return xfs_rtmodify_range(args, start, len, 0); } +/* Reduce @rtxlen until it is a multiple of @prod. */ +static inline xfs_rtxlen_t +xfs_rtalloc_align_len( + xfs_rtxlen_t rtxlen, + xfs_rtxlen_t prod) +{ + if (unlikely(prod > 1)) + return rounddown(rtxlen, prod); + return rtxlen; +} + /* * Make sure we don't run off the end of the rt volume. Be careful that * adjusting maxlen downwards doesn't cause us to fail the alignment checks. @@ -208,7 +219,7 @@ xfs_rtallocate_clamp_len( xfs_rtxlen_t ret; ret = min(mp->m_sb.sb_rextents, startrtx + rtxlen) - startrtx; - return rounddown(ret, prod); + return xfs_rtalloc_align_len(ret, prod); } /* @@ -229,39 +240,40 @@ xfs_rtallocate_extent_block( xfs_rtxnum_t *rtx) /* out: start rtext allocated */ { struct xfs_mount *mp = args->mp; - xfs_rtxnum_t besti; /* best rtext found so far */ - xfs_rtxnum_t bestlen;/* best length found so far */ + xfs_rtxnum_t besti = -1; /* best rtext found so far */ xfs_rtxnum_t end; /* last rtext in chunk */ - int error; xfs_rtxnum_t i; /* current rtext trying */ xfs_rtxnum_t next; /* next rtext to try */ + xfs_rtxlen_t scanlen; /* number of free rtx to look for */ + xfs_rtxlen_t bestlen = 0; /* best length found so far */ int stat; /* status from internal calls */ + int error; /* - * Loop over all the extents starting in this bitmap block, - * looking for one that's long enough. + * Loop over all the extents starting in this bitmap block up to the + * end of the rt volume, looking for one that's long enough. */ - for (i = xfs_rbmblock_to_rtx(mp, bbno), besti = -1, bestlen = 0, - end = xfs_rbmblock_to_rtx(mp, bbno + 1) - 1; - i <= end; - i++) { + end = min(mp->m_sb.sb_rextents, xfs_rbmblock_to_rtx(mp, bbno + 1)) - 1; + for (i = xfs_rbmblock_to_rtx(mp, bbno); i <= end; i++) { /* Make sure we don't scan off the end of the rt volume. */ - maxlen = xfs_rtallocate_clamp_len(mp, i, maxlen, prod); + scanlen = xfs_rtallocate_clamp_len(mp, i, maxlen, prod); + if (scanlen < minlen) + break; /* - * See if there's a free extent of maxlen starting at i. + * See if there's a free extent of scanlen starting at i. * If it's not so then next will contain the first non-free. */ - error = xfs_rtcheck_range(args, i, maxlen, 1, &next, &stat); + error = xfs_rtcheck_range(args, i, scanlen, 1, &next, &stat); if (error) return error; if (stat) { /* - * i for maxlen is all free, allocate and return that. + * i to scanlen is all free, allocate and return that. */ - bestlen = maxlen; - besti = i; - goto allocate; + *len = scanlen; + *rtx = i; + return 0; } /* @@ -289,38 +301,28 @@ xfs_rtallocate_extent_block( return error; } - /* - * Searched the whole thing & didn't find a maxlen free extent. - */ - if (minlen > maxlen || besti == -1) { - /* - * Allocation failed. Set *nextp to the next block to try. - */ - *nextp = next; - return -ENOSPC; - } + /* Searched the whole thing & didn't find a maxlen free extent. */ + if (besti == -1) + goto nospace; /* - * If size should be a multiple of prod, make that so. + * Ensure bestlen is a multiple of prod, but don't return a too-short + * extent. */ - if (prod > 1) { - xfs_rtxlen_t p; /* amount to trim length by */ - - div_u64_rem(bestlen, prod, &p); - if (p) - bestlen -= p; - } + bestlen = xfs_rtalloc_align_len(bestlen, prod); + if (bestlen < minlen) + goto nospace; /* - * Allocate besti for bestlen & return that. + * Pick besti for bestlen & return that. */ -allocate: - error = xfs_rtallocate_range(args, besti, bestlen); - if (error) - return error; *len = bestlen; *rtx = besti; return 0; +nospace: + /* Allocation failed. Set *nextp to the next block to try. */ + *nextp = next; + return -ENOSPC; } /* @@ -339,45 +341,46 @@ xfs_rtallocate_extent_exact( xfs_rtxlen_t prod, /* extent product factor */ xfs_rtxnum_t *rtx) /* out: start rtext allocated */ { - int error; - xfs_rtxlen_t i; /* extent length trimmed due to prod */ - int isfree; /* extent is free */ + struct xfs_mount *mp = args->mp; xfs_rtxnum_t next; /* next rtext to try (dummy) */ + xfs_rtxlen_t alloclen; /* candidate length */ + xfs_rtxlen_t scanlen; /* number of free rtx to look for */ + int isfree; /* extent is free */ + int error; ASSERT(minlen % prod == 0); ASSERT(maxlen % prod == 0); - /* - * Check if the range in question (for maxlen) is free. - */ - error = xfs_rtcheck_range(args, start, maxlen, 1, &next, &isfree); + + /* Make sure we don't run off the end of the rt volume. */ + scanlen = xfs_rtallocate_clamp_len(mp, start, maxlen, prod); + if (scanlen < minlen) + return -ENOSPC; + + /* Check if the range in question (for scanlen) is free. */ + error = xfs_rtcheck_range(args, start, scanlen, 1, &next, &isfree); if (error) return error; - if (!isfree) { - /* - * If not, allocate what there is, if it's at least minlen. - */ - maxlen = next - start; - if (maxlen < minlen) - return -ENOSPC; - - /* - * Trim off tail of extent, if prod is specified. - */ - if (prod > 1 && (i = maxlen % prod)) { - maxlen -= i; - if (maxlen < minlen) - return -ENOSPC; - } + if (isfree) { + /* start to scanlen is all free; allocate it. */ + *len = scanlen; + *rtx = start; + return 0; } /* - * Allocate what we can and return it. + * If not, allocate what there is, if it's at least minlen. */ - error = xfs_rtallocate_range(args, start, maxlen); - if (error) - return error; - *len = maxlen; + alloclen = next - start; + if (alloclen < minlen) + return -ENOSPC; + + /* Ensure alloclen is a multiple of prod. */ + alloclen = xfs_rtalloc_align_len(alloclen, prod); + if (alloclen < minlen) + return -ENOSPC; + + *len = alloclen; *rtx = start; return 0; } @@ -416,11 +419,6 @@ xfs_rtallocate_extent_near( if (start >= mp->m_sb.sb_rextents) start = mp->m_sb.sb_rextents - 1; - /* Make sure we don't run off the end of the rt volume. */ - maxlen = xfs_rtallocate_clamp_len(mp, start, maxlen, prod); - if (maxlen < minlen) - return -ENOSPC; - /* * Try the exact allocation first. */ @@ -429,7 +427,6 @@ xfs_rtallocate_extent_near( if (error != -ENOSPC) return error; - bbno = xfs_rtx_to_rbmblock(mp, start); i = 0; j = -1; @@ -552,11 +549,11 @@ xfs_rtalloc_sumlevel( xfs_rtxnum_t *rtx) /* out: start rtext allocated */ { xfs_fileoff_t i; /* bitmap block number */ + int error; for (i = 0; i < args->mp->m_sb.sb_rbmblocks; i++) { xfs_suminfo_t sum; /* summary information for extents */ xfs_rtxnum_t n; /* next rtext to be tried */ - int error; error = xfs_rtget_summary(args, l, i, &sum); if (error) @@ -652,136 +649,20 @@ xfs_rtallocate_extent_size( return -ENOSPC; } -/* - * Allocate space to the bitmap or summary file, and zero it, for growfs. - */ -STATIC int -xfs_growfs_rt_alloc( - struct xfs_mount *mp, /* file system mount point */ - xfs_extlen_t oblocks, /* old count of blocks */ - xfs_extlen_t nblocks, /* new count of blocks */ - struct xfs_inode *ip) /* inode (bitmap/summary) */ -{ - xfs_fileoff_t bno; /* block number in file */ - struct xfs_buf *bp; /* temporary buffer for zeroing */ - xfs_daddr_t d; /* disk block address */ - int error; /* error return value */ - xfs_fsblock_t fsbno; /* filesystem block for bno */ - struct xfs_bmbt_irec map; /* block map output */ - int nmap; /* number of block maps */ - int resblks; /* space reservation */ - enum xfs_blft buf_type; - struct xfs_trans *tp; - - if (ip == mp->m_rsumip) - buf_type = XFS_BLFT_RTSUMMARY_BUF; - else - buf_type = XFS_BLFT_RTBITMAP_BUF; - - /* - * Allocate space to the file, as necessary. - */ - while (oblocks < nblocks) { - resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks); - /* - * Reserve space & log for one extent added to the file. - */ - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtalloc, resblks, - 0, 0, &tp); - if (error) - return error; - /* - * Lock the inode. - */ - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - - error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, - XFS_IEXT_ADD_NOSPLIT_CNT); - if (error) - goto out_trans_cancel; - - /* - * Allocate blocks to the bitmap file. - */ - nmap = 1; - error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks, - XFS_BMAPI_METADATA, 0, &map, &nmap); - if (error) - goto out_trans_cancel; - /* - * Free any blocks freed up in the transaction, then commit. - */ - error = xfs_trans_commit(tp); - if (error) - return error; - /* - * Now we need to clear the allocated blocks. - * Do this one block per transaction, to keep it simple. - */ - for (bno = map.br_startoff, fsbno = map.br_startblock; - bno < map.br_startoff + map.br_blockcount; - bno++, fsbno++) { - /* - * Reserve log for one block zeroing. - */ - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtzero, - 0, 0, 0, &tp); - if (error) - return error; - /* - * Lock the bitmap inode. - */ - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - /* - * Get a buffer for the block. - */ - d = XFS_FSB_TO_DADDR(mp, fsbno); - error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, - mp->m_bsize, 0, &bp); - if (error) - goto out_trans_cancel; - - xfs_trans_buf_set_type(tp, bp, buf_type); - bp->b_ops = &xfs_rtbuf_ops; - memset(bp->b_addr, 0, mp->m_sb.sb_blocksize); - xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); - /* - * Commit the transaction. - */ - error = xfs_trans_commit(tp); - if (error) - return error; - } - /* - * Go on to the next extent, if any. - */ - oblocks = map.br_startoff + map.br_blockcount; - } - - return 0; - -out_trans_cancel: - xfs_trans_cancel(tp); - return error; -} - -static void +static int xfs_alloc_rsum_cache( - xfs_mount_t *mp, /* file system mount structure */ - xfs_extlen_t rbmblocks) /* number of rt bitmap blocks */ + struct xfs_mount *mp, + xfs_extlen_t rbmblocks) { /* * The rsum cache is initialized to the maximum value, which is * trivially an upper bound on the maximum level with any free extents. - * We can continue without the cache if it couldn't be allocated. */ mp->m_rsum_cache = kvmalloc(rbmblocks, GFP_KERNEL); - if (mp->m_rsum_cache) - memset(mp->m_rsum_cache, -1, rbmblocks); - else - xfs_warn(mp, "could not allocate realtime summary cache"); + if (!mp->m_rsum_cache) + return -ENOMEM; + memset(mp->m_rsum_cache, -1, rbmblocks); + return 0; } /* @@ -817,9 +698,168 @@ out_iolock: return error; } +static int +xfs_growfs_rt_bmblock( + struct xfs_mount *mp, + xfs_rfsblock_t nrblocks, + xfs_agblock_t rextsize, + xfs_fileoff_t bmbno) +{ + struct xfs_inode *rbmip = mp->m_rbmip; + struct xfs_inode *rsumip = mp->m_rsumip; + struct xfs_rtalloc_args args = { + .mp = mp, + }; + struct xfs_rtalloc_args nargs = { + }; + struct xfs_mount *nmp; + xfs_rfsblock_t nrblocks_step; + xfs_rtbxlen_t freed_rtx; + int error; + + + nrblocks_step = (bmbno + 1) * NBBY * mp->m_sb.sb_blocksize * rextsize; + + nmp = nargs.mp = kmemdup(mp, sizeof(*mp), GFP_KERNEL); + if (!nmp) + return -ENOMEM; + + /* + * Calculate new sb and mount fields for this round. + */ + nmp->m_sb.sb_rextsize = rextsize; + xfs_mount_sb_set_rextsize(nmp, &nmp->m_sb); + nmp->m_sb.sb_rbmblocks = bmbno + 1; + nmp->m_sb.sb_rblocks = min(nrblocks, nrblocks_step); + nmp->m_sb.sb_rextents = xfs_rtb_to_rtx(nmp, nmp->m_sb.sb_rblocks); + nmp->m_sb.sb_rextslog = xfs_compute_rextslog(nmp->m_sb.sb_rextents); + nmp->m_rsumlevels = nmp->m_sb.sb_rextslog + 1; + nmp->m_rsumblocks = xfs_rtsummary_blockcount(mp, nmp->m_rsumlevels, + nmp->m_sb.sb_rbmblocks); + + /* + * Recompute the growfsrt reservation from the new rsumsize, so that the + * transaction below use the new, potentially larger value. + * */ + xfs_trans_resv_calc(nmp, &nmp->m_resv); + error = xfs_trans_alloc(mp, &M_RES(nmp)->tr_growrtfree, 0, 0, 0, + &args.tp); + if (error) + goto out_free; + nargs.tp = args.tp; + + xfs_rtbitmap_lock(mp); + xfs_rtbitmap_trans_join(args.tp); + + /* + * Update the bitmap inode's size ondisk and incore. We need to update + * the incore size so that inode inactivation won't punch what it thinks + * are "posteof" blocks. + */ + rbmip->i_disk_size = nmp->m_sb.sb_rbmblocks * nmp->m_sb.sb_blocksize; + i_size_write(VFS_I(rbmip), rbmip->i_disk_size); + xfs_trans_log_inode(args.tp, rbmip, XFS_ILOG_CORE); + + /* + * Update the summary inode's size. We need to update the incore size + * so that inode inactivation won't punch what it thinks are "posteof" + * blocks. + */ + rsumip->i_disk_size = nmp->m_rsumblocks * nmp->m_sb.sb_blocksize; + i_size_write(VFS_I(rsumip), rsumip->i_disk_size); + xfs_trans_log_inode(args.tp, rsumip, XFS_ILOG_CORE); + + /* + * Copy summary data from old to new sizes when the real size (not + * block-aligned) changes. + */ + if (mp->m_sb.sb_rbmblocks != nmp->m_sb.sb_rbmblocks || + mp->m_rsumlevels != nmp->m_rsumlevels) { + error = xfs_rtcopy_summary(&args, &nargs); + if (error) + goto out_cancel; + } + + /* + * Update superblock fields. + */ + if (nmp->m_sb.sb_rextsize != mp->m_sb.sb_rextsize) + xfs_trans_mod_sb(args.tp, XFS_TRANS_SB_REXTSIZE, + nmp->m_sb.sb_rextsize - mp->m_sb.sb_rextsize); + if (nmp->m_sb.sb_rbmblocks != mp->m_sb.sb_rbmblocks) + xfs_trans_mod_sb(args.tp, XFS_TRANS_SB_RBMBLOCKS, + nmp->m_sb.sb_rbmblocks - mp->m_sb.sb_rbmblocks); + if (nmp->m_sb.sb_rblocks != mp->m_sb.sb_rblocks) + xfs_trans_mod_sb(args.tp, XFS_TRANS_SB_RBLOCKS, + nmp->m_sb.sb_rblocks - mp->m_sb.sb_rblocks); + if (nmp->m_sb.sb_rextents != mp->m_sb.sb_rextents) + xfs_trans_mod_sb(args.tp, XFS_TRANS_SB_REXTENTS, + nmp->m_sb.sb_rextents - mp->m_sb.sb_rextents); + if (nmp->m_sb.sb_rextslog != mp->m_sb.sb_rextslog) + xfs_trans_mod_sb(args.tp, XFS_TRANS_SB_REXTSLOG, + nmp->m_sb.sb_rextslog - mp->m_sb.sb_rextslog); + + /* + * Free the new extent. + */ + freed_rtx = nmp->m_sb.sb_rextents - mp->m_sb.sb_rextents; + error = xfs_rtfree_range(&nargs, mp->m_sb.sb_rextents, freed_rtx); + xfs_rtbuf_cache_relse(&nargs); + if (error) + goto out_cancel; + + /* + * Mark more blocks free in the superblock. + */ + xfs_trans_mod_sb(args.tp, XFS_TRANS_SB_FREXTENTS, freed_rtx); + + /* + * Update the calculated values in the real mount structure. + */ + mp->m_rsumlevels = nmp->m_rsumlevels; + mp->m_rsumblocks = nmp->m_rsumblocks; + xfs_mount_sb_set_rextsize(mp, &mp->m_sb); + + /* + * Recompute the growfsrt reservation from the new rsumsize. + */ + xfs_trans_resv_calc(mp, &mp->m_resv); + + error = xfs_trans_commit(args.tp); + if (error) + goto out_free; + + /* + * Ensure the mount RT feature flag is now set. + */ + mp->m_features |= XFS_FEAT_REALTIME; + + kfree(nmp); + return 0; + +out_cancel: + xfs_trans_cancel(args.tp); +out_free: + kfree(nmp); + return error; +} + /* - * Visible (exported) functions. + * Calculate the last rbmblock currently used. + * + * This also deals with the case where there were no rtextents before. */ +static xfs_fileoff_t +xfs_last_rt_bmblock( + struct xfs_mount *mp) +{ + xfs_fileoff_t bmbno = mp->m_sb.sb_rbmblocks; + + /* Skip the current block if it is exactly full. */ + if (xfs_rtx_to_rbmword(mp, mp->m_sb.sb_rextents) != 0) + bmbno--; + return bmbno; +} /* * Grow the realtime area of the filesystem. @@ -832,23 +872,14 @@ xfs_growfs_rt( xfs_fileoff_t bmbno; /* bitmap block number */ struct xfs_buf *bp; /* temporary buffer */ int error; /* error return value */ - xfs_mount_t *nmp; /* new (fake) mount structure */ - xfs_rfsblock_t nrblocks; /* new number of realtime blocks */ xfs_extlen_t nrbmblocks; /* new number of rt bitmap blocks */ xfs_rtxnum_t nrextents; /* new number of realtime extents */ - uint8_t nrextslog; /* new log2 of sb_rextents */ xfs_extlen_t nrsumblocks; /* new number of summary blocks */ - uint nrsumlevels; /* new rt summary levels */ - uint nrsumsize; /* new size of rt summary, bytes */ - xfs_sb_t *nsbp; /* new superblock */ xfs_extlen_t rbmblocks; /* current number of rt bitmap blocks */ xfs_extlen_t rsumblocks; /* current number of rt summary blks */ - xfs_sb_t *sbp; /* old superblock */ uint8_t *rsum_cache; /* old summary cache */ xfs_agblock_t old_rextsize = mp->m_sb.sb_rextsize; - sbp = &mp->m_sb; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -867,11 +898,10 @@ xfs_growfs_rt( goto out_unlock; /* Shrink not supported. */ - if (in->newblocks <= sbp->sb_rblocks) + if (in->newblocks <= mp->m_sb.sb_rblocks) goto out_unlock; - /* Can only change rt extent size when adding rt volume. */ - if (sbp->sb_rblocks > 0 && in->extsize != sbp->sb_rextsize) + if (mp->m_sb.sb_rblocks > 0 && in->extsize != mp->m_sb.sb_rextsize) goto out_unlock; /* Range check the extent size. */ @@ -884,15 +914,14 @@ xfs_growfs_rt( if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp) || xfs_has_quota(mp)) goto out_unlock; - nrblocks = in->newblocks; - error = xfs_sb_validate_fsb_count(sbp, nrblocks); + error = xfs_sb_validate_fsb_count(&mp->m_sb, in->newblocks); if (error) goto out_unlock; /* * Read in the last block of the device, make sure it exists. */ error = xfs_buf_read_uncached(mp->m_rtdev_targp, - XFS_FSB_TO_BB(mp, nrblocks - 1), + XFS_FSB_TO_BB(mp, in->newblocks - 1), XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL); if (error) goto out_unlock; @@ -901,17 +930,15 @@ xfs_growfs_rt( /* * Calculate new parameters. These are the final values to be reached. */ - nrextents = nrblocks; - do_div(nrextents, in->extsize); - if (!xfs_validate_rtextents(nrextents)) { + nrextents = div_u64(in->newblocks, in->extsize); + if (nrextents == 0) { error = -EINVAL; goto out_unlock; } nrbmblocks = xfs_rtbitmap_blockcount(mp, nrextents); - nrextslog = xfs_compute_rextslog(nrextents); - nrsumlevels = nrextslog + 1; - nrsumblocks = xfs_rtsummary_blockcount(mp, nrsumlevels, nrbmblocks); - nrsumsize = XFS_FSB_TO_B(mp, nrsumblocks); + nrsumblocks = xfs_rtsummary_blockcount(mp, + xfs_compute_rextslog(nrextents) + 1, nrbmblocks); + /* * New summary size can't be more than half the size of * the log. This prevents us from getting a log overflow, @@ -931,154 +958,29 @@ xfs_growfs_rt( /* * Allocate space to the bitmap and summary files, as necessary. */ - error = xfs_growfs_rt_alloc(mp, rbmblocks, nrbmblocks, mp->m_rbmip); + error = xfs_rtfile_initialize_blocks(mp->m_rbmip, rbmblocks, + nrbmblocks, NULL); if (error) goto out_unlock; - error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, mp->m_rsumip); + error = xfs_rtfile_initialize_blocks(mp->m_rsumip, rsumblocks, + nrsumblocks, NULL); if (error) goto out_unlock; rsum_cache = mp->m_rsum_cache; - if (nrbmblocks != sbp->sb_rbmblocks) - xfs_alloc_rsum_cache(mp, nrbmblocks); - - /* - * Allocate a new (fake) mount/sb. - */ - nmp = kmalloc(sizeof(*nmp), GFP_KERNEL | __GFP_NOFAIL); - /* - * Loop over the bitmap blocks. - * We will do everything one bitmap block at a time. - * Skip the current block if it is exactly full. - * This also deals with the case where there were no rtextents before. - */ - for (bmbno = sbp->sb_rbmblocks - - ((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0); - bmbno < nrbmblocks; - bmbno++) { - struct xfs_rtalloc_args args = { - .mp = mp, - }; - struct xfs_rtalloc_args nargs = { - .mp = nmp, - }; - struct xfs_trans *tp; - xfs_rfsblock_t nrblocks_step; - - *nmp = *mp; - nsbp = &nmp->m_sb; - /* - * Calculate new sb and mount fields for this round. - */ - nsbp->sb_rextsize = in->extsize; - nmp->m_rtxblklog = -1; /* don't use shift or masking */ - nsbp->sb_rbmblocks = bmbno + 1; - nrblocks_step = (bmbno + 1) * NBBY * nsbp->sb_blocksize * - nsbp->sb_rextsize; - nsbp->sb_rblocks = min(nrblocks, nrblocks_step); - nsbp->sb_rextents = xfs_rtb_to_rtx(nmp, nsbp->sb_rblocks); - ASSERT(nsbp->sb_rextents != 0); - nsbp->sb_rextslog = xfs_compute_rextslog(nsbp->sb_rextents); - nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1; - nrsumblocks = xfs_rtsummary_blockcount(mp, nrsumlevels, - nsbp->sb_rbmblocks); - nmp->m_rsumsize = nrsumsize = XFS_FSB_TO_B(mp, nrsumblocks); - /* recompute growfsrt reservation from new rsumsize */ - xfs_trans_resv_calc(nmp, &nmp->m_resv); - - /* - * Start a transaction, get the log reservation. - */ - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtfree, 0, 0, 0, - &tp); + if (nrbmblocks != mp->m_sb.sb_rbmblocks) { + error = xfs_alloc_rsum_cache(mp, nrbmblocks); if (error) - break; - args.tp = tp; - nargs.tp = tp; - - /* - * Lock out other callers by grabbing the bitmap and summary - * inode locks and joining them to the transaction. - */ - xfs_rtbitmap_lock(tp, mp); - /* - * Update the bitmap inode's size ondisk and incore. We need - * to update the incore size so that inode inactivation won't - * punch what it thinks are "posteof" blocks. - */ - mp->m_rbmip->i_disk_size = - nsbp->sb_rbmblocks * nsbp->sb_blocksize; - i_size_write(VFS_I(mp->m_rbmip), mp->m_rbmip->i_disk_size); - xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); - /* - * Update the summary inode's size. We need to update the - * incore size so that inode inactivation won't punch what it - * thinks are "posteof" blocks. - */ - mp->m_rsumip->i_disk_size = nmp->m_rsumsize; - i_size_write(VFS_I(mp->m_rsumip), mp->m_rsumip->i_disk_size); - xfs_trans_log_inode(tp, mp->m_rsumip, XFS_ILOG_CORE); - /* - * Copy summary data from old to new sizes. - * Do this when the real size (not block-aligned) changes. - */ - if (sbp->sb_rbmblocks != nsbp->sb_rbmblocks || - mp->m_rsumlevels != nmp->m_rsumlevels) { - error = xfs_rtcopy_summary(&args, &nargs); - if (error) - goto error_cancel; - } - /* - * Update superblock fields. - */ - if (nsbp->sb_rextsize != sbp->sb_rextsize) - xfs_trans_mod_sb(tp, XFS_TRANS_SB_REXTSIZE, - nsbp->sb_rextsize - sbp->sb_rextsize); - if (nsbp->sb_rbmblocks != sbp->sb_rbmblocks) - xfs_trans_mod_sb(tp, XFS_TRANS_SB_RBMBLOCKS, - nsbp->sb_rbmblocks - sbp->sb_rbmblocks); - if (nsbp->sb_rblocks != sbp->sb_rblocks) - xfs_trans_mod_sb(tp, XFS_TRANS_SB_RBLOCKS, - nsbp->sb_rblocks - sbp->sb_rblocks); - if (nsbp->sb_rextents != sbp->sb_rextents) - xfs_trans_mod_sb(tp, XFS_TRANS_SB_REXTENTS, - nsbp->sb_rextents - sbp->sb_rextents); - if (nsbp->sb_rextslog != sbp->sb_rextslog) - xfs_trans_mod_sb(tp, XFS_TRANS_SB_REXTSLOG, - nsbp->sb_rextslog - sbp->sb_rextslog); - /* - * Free new extent. - */ - error = xfs_rtfree_range(&nargs, sbp->sb_rextents, - nsbp->sb_rextents - sbp->sb_rextents); - xfs_rtbuf_cache_relse(&nargs); - if (error) { -error_cancel: - xfs_trans_cancel(tp); - break; - } - /* - * Mark more blocks free in the superblock. - */ - xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, - nsbp->sb_rextents - sbp->sb_rextents); - /* - * Update mp values into the real mp structure. - */ - mp->m_rsumlevels = nrsumlevels; - mp->m_rsumsize = nrsumsize; - /* recompute growfsrt reservation from new rsumsize */ - xfs_trans_resv_calc(mp, &mp->m_resv); + goto out_unlock; + } - error = xfs_trans_commit(tp); + /* Initialize the free space bitmap one bitmap block at a time. */ + for (bmbno = xfs_last_rt_bmblock(mp); bmbno < nrbmblocks; bmbno++) { + error = xfs_growfs_rt_bmblock(mp, in->newblocks, in->extsize, + bmbno); if (error) - break; - - /* Ensure the mount RT feature flag is now set. */ - mp->m_features |= XFS_FEAT_REALTIME; + goto out_free; } - if (error) - goto out_free; if (old_rextsize != in->extsize) { error = xfs_growfs_rt_fixup_extsize(mp); @@ -1091,11 +993,6 @@ error_cancel: out_free: /* - * Free the fake mp structure. - */ - kfree(nmp); - - /* * If we had to allocate a new rsum_cache, we either need to free the * old one (if we succeeded) or free the new one and restore the old one * (if there was an error). @@ -1124,7 +1021,6 @@ xfs_rtmount_init( struct xfs_buf *bp; /* buffer for last block of subvolume */ struct xfs_sb *sbp; /* filesystem superblock copy in mount */ xfs_daddr_t d; /* address of last block of subvolume */ - unsigned int rsumblocks; int error; sbp = &mp->m_sb; @@ -1136,9 +1032,8 @@ xfs_rtmount_init( return -ENODEV; } mp->m_rsumlevels = sbp->sb_rextslog + 1; - rsumblocks = xfs_rtsummary_blockcount(mp, mp->m_rsumlevels, + mp->m_rsumblocks = xfs_rtsummary_blockcount(mp, mp->m_rsumlevels, mp->m_sb.sb_rbmblocks); - mp->m_rsumsize = XFS_FSB_TO_B(mp, rsumblocks); mp->m_rbmip = mp->m_rsumip = NULL; /* * Check that the realtime section is an ok size. @@ -1268,7 +1163,9 @@ xfs_rtmount_inodes( if (error) goto out_rele_summary; - xfs_alloc_rsum_cache(mp, sbp->sb_rbmblocks); + error = xfs_alloc_rsum_cache(mp, sbp->sb_rbmblocks); + if (error) + goto out_rele_summary; return 0; out_rele_summary: @@ -1296,12 +1193,11 @@ xfs_rtunmount_inodes( * of rtextents and the fraction. * The fraction sequence is 0, 1/2, 1/4, 3/4, 1/8, ..., 7/8, 1/16, ... */ -static int +static xfs_rtxnum_t xfs_rtpick_extent( xfs_mount_t *mp, /* file system mount point */ xfs_trans_t *tp, /* transaction pointer */ - xfs_rtxlen_t len, /* allocation length (rtextents) */ - xfs_rtxnum_t *pick) /* result rt extent */ + xfs_rtxlen_t len) /* allocation length (rtextents) */ { xfs_rtxnum_t b; /* result rtext */ int log2; /* log of sequence number */ @@ -1332,8 +1228,7 @@ xfs_rtpick_extent( ts.tv_sec = seq + 1; inode_set_atime_to_ts(VFS_I(mp->m_rbmip), ts); xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); - *pick = b; - return 0; + return b; } static void @@ -1365,36 +1260,109 @@ xfs_rtalloc_align_minmax( *raminlen = newminlen; } -int -xfs_bmap_rtalloc( - struct xfs_bmalloca *ap) +static int +xfs_rtallocate( + struct xfs_trans *tp, + xfs_rtblock_t bno_hint, + xfs_rtxlen_t minlen, + xfs_rtxlen_t maxlen, + xfs_rtxlen_t prod, + bool wasdel, + bool initial_user_data, + bool *rtlocked, + xfs_rtblock_t *bno, + xfs_extlen_t *blen) +{ + struct xfs_rtalloc_args args = { + .mp = tp->t_mountp, + .tp = tp, + }; + xfs_rtxnum_t start = 0; + xfs_rtxnum_t rtx; + xfs_rtxlen_t len = 0; + int error = 0; + + /* + * Lock out modifications to both the RT bitmap and summary inodes. + */ + if (!*rtlocked) { + xfs_rtbitmap_lock(args.mp); + xfs_rtbitmap_trans_join(tp); + *rtlocked = true; + } + + /* + * For an allocation to an empty file at offset 0, pick an extent that + * will space things out in the rt area. + */ + if (bno_hint) + start = xfs_rtb_to_rtx(args.mp, bno_hint); + else if (initial_user_data) + start = xfs_rtpick_extent(args.mp, tp, maxlen); + + if (start) { + error = xfs_rtallocate_extent_near(&args, start, minlen, maxlen, + &len, prod, &rtx); + /* + * If we can't allocate near a specific rt extent, try again + * without locality criteria. + */ + if (error == -ENOSPC) { + xfs_rtbuf_cache_relse(&args); + error = 0; + } + } + + if (!error) { + error = xfs_rtallocate_extent_size(&args, minlen, maxlen, &len, + prod, &rtx); + } + + if (error) + goto out_release; + + error = xfs_rtallocate_range(&args, rtx, len); + if (error) + goto out_release; + + xfs_trans_mod_sb(tp, wasdel ? + XFS_TRANS_SB_RES_FREXTENTS : XFS_TRANS_SB_FREXTENTS, + -(long)len); + *bno = xfs_rtx_to_rtb(args.mp, rtx); + *blen = xfs_rtxlen_to_extlen(args.mp, len); + +out_release: + xfs_rtbuf_cache_relse(&args); + return error; +} + +static int +xfs_rtallocate_align( + struct xfs_bmalloca *ap, + xfs_rtxlen_t *ralen, + xfs_rtxlen_t *raminlen, + xfs_rtxlen_t *prod, + bool *noalign) { struct xfs_mount *mp = ap->ip->i_mount; xfs_fileoff_t orig_offset = ap->offset; - xfs_rtxnum_t start; /* allocation hint rtextent no */ - xfs_rtxnum_t rtx; /* actually allocated rtextent no */ - xfs_rtxlen_t prod = 0; /* product factor for allocators */ - xfs_extlen_t mod = 0; /* product factor for allocators */ - xfs_rtxlen_t ralen = 0; /* realtime allocation length */ - xfs_extlen_t align; /* minimum allocation alignment */ - xfs_extlen_t orig_length = ap->length; xfs_extlen_t minlen = mp->m_sb.sb_rextsize; - xfs_rtxlen_t raminlen; - bool rtlocked = false; - bool ignore_locality = false; - struct xfs_rtalloc_args args = { - .mp = mp, - .tp = ap->tp, - }; + xfs_extlen_t align; /* minimum allocation alignment */ + xfs_extlen_t mod; /* product factor for allocators */ int error; - align = xfs_get_extsz_hint(ap->ip); - if (!align) - align = 1; -retry: - error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, - align, 1, ap->eof, 0, - ap->conv, &ap->offset, &ap->length); + if (*noalign) { + align = mp->m_sb.sb_rextsize; + } else { + align = xfs_get_extsz_hint(ap->ip); + if (!align) + align = 1; + if (align == mp->m_sb.sb_rextsize) + *noalign = true; + } + + error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 1, + ap->eof, 0, ap->conv, &ap->offset, &ap->length); if (error) return error; ASSERT(ap->length); @@ -1418,59 +1386,55 @@ retry: * XFS_BMBT_MAX_EXTLEN), we don't hear about that number, and can't * adjust the starting point to match it. */ - ralen = xfs_extlen_to_rtxlen(mp, min(ap->length, XFS_MAX_BMBT_EXTLEN)); - raminlen = max_t(xfs_rtxlen_t, 1, xfs_extlen_to_rtxlen(mp, minlen)); - ASSERT(raminlen > 0); - ASSERT(raminlen <= ralen); - - /* - * Lock out modifications to both the RT bitmap and summary inodes - */ - if (!rtlocked) { - xfs_rtbitmap_lock(ap->tp, mp); - rtlocked = true; - } - - if (ignore_locality) { - start = 0; - } else if (xfs_bmap_adjacent(ap)) { - start = xfs_rtb_to_rtx(mp, ap->blkno); - } else if (ap->datatype & XFS_ALLOC_INITIAL_USER_DATA) { - /* - * If it's an allocation to an empty file at offset 0, pick an - * extent that will space things out in the rt area. - */ - error = xfs_rtpick_extent(mp, ap->tp, ralen, &start); - if (error) - return error; - } else { - start = 0; - } + *ralen = xfs_extlen_to_rtxlen(mp, min(ap->length, XFS_MAX_BMBT_EXTLEN)); + *raminlen = max_t(xfs_rtxlen_t, 1, xfs_extlen_to_rtxlen(mp, minlen)); + ASSERT(*raminlen > 0); + ASSERT(*raminlen <= *ralen); /* * Only bother calculating a real prod factor if offset & length are * perfectly aligned, otherwise it will just get us in trouble. */ div_u64_rem(ap->offset, align, &mod); - if (mod || ap->length % align) { - prod = 1; - } else { - prod = xfs_extlen_to_rtxlen(mp, align); - if (prod > 1) - xfs_rtalloc_align_minmax(&raminlen, &ralen, &prod); - } + if (mod || ap->length % align) + *prod = 1; + else + *prod = xfs_extlen_to_rtxlen(mp, align); - if (start) { - error = xfs_rtallocate_extent_near(&args, start, raminlen, - ralen, &ralen, prod, &rtx); - } else { - error = xfs_rtallocate_extent_size(&args, raminlen, - ralen, &ralen, prod, &rtx); - } - xfs_rtbuf_cache_relse(&args); + if (*prod > 1) + xfs_rtalloc_align_minmax(raminlen, ralen, prod); + return 0; +} +int +xfs_bmap_rtalloc( + struct xfs_bmalloca *ap) +{ + xfs_fileoff_t orig_offset = ap->offset; + xfs_rtxlen_t prod = 0; /* product factor for allocators */ + xfs_rtxlen_t ralen = 0; /* realtime allocation length */ + xfs_rtblock_t bno_hint = NULLRTBLOCK; + xfs_extlen_t orig_length = ap->length; + xfs_rtxlen_t raminlen; + bool rtlocked = false; + bool noalign = false; + bool initial_user_data = + ap->datatype & XFS_ALLOC_INITIAL_USER_DATA; + int error; + +retry: + error = xfs_rtallocate_align(ap, &ralen, &raminlen, &prod, &noalign); + if (error) + return error; + + if (xfs_bmap_adjacent(ap)) + bno_hint = ap->blkno; + + error = xfs_rtallocate(ap->tp, bno_hint, raminlen, ralen, prod, + ap->wasdel, initial_user_data, &rtlocked, + &ap->blkno, &ap->length); if (error == -ENOSPC) { - if (align > mp->m_sb.sb_rextsize) { + if (!noalign) { /* * We previously enlarged the request length to try to * satisfy an extent size hint. The allocator didn't @@ -1480,16 +1444,7 @@ retry: */ ap->offset = orig_offset; ap->length = orig_length; - minlen = align = mp->m_sb.sb_rextsize; - goto retry; - } - - if (!ignore_locality && start != 0) { - /* - * If we can't allocate near a specific rt extent, try - * again without locality criteria. - */ - ignore_locality = true; + noalign = true; goto retry; } @@ -1500,11 +1455,6 @@ retry: if (error) return error; - xfs_trans_mod_sb(ap->tp, ap->wasdel ? - XFS_TRANS_SB_RES_FREXTENTS : XFS_TRANS_SB_FREXTENTS, - -(long)ralen); - ap->blkno = xfs_rtx_to_rtb(mp, rtx); - ap->length = xfs_rtxlen_to_extlen(mp, ralen); xfs_bmap_alloc_account(ap); return 0; } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 27e9f749c4c7..26767745d9fd 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -311,9 +311,9 @@ xfs_set_inode_alloc( * the allocator to accommodate the request. */ if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32) - set_bit(XFS_OPSTATE_INODE32, &mp->m_opstate); + xfs_set_inode32(mp); else - clear_bit(XFS_OPSTATE_INODE32, &mp->m_opstate); + xfs_clear_inode32(mp); for (index = 0; index < agcount; index++) { struct xfs_perag *pag; @@ -1511,7 +1511,7 @@ xfs_fs_fill_super( * the newer fsopen/fsconfig API. */ if (fc->sb_flags & SB_RDONLY) - set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); + xfs_set_readonly(mp); if (fc->sb_flags & SB_DIRSYNC) mp->m_features |= XFS_FEAT_DIRSYNC; if (fc->sb_flags & SB_SYNCHRONOUS) @@ -1820,7 +1820,7 @@ xfs_remount_rw( return -EINVAL; } - clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); + xfs_clear_readonly(mp); /* * If this is the first remount to writeable state we might have some @@ -1908,7 +1908,7 @@ xfs_remount_ro( xfs_save_resvblks(mp); xfs_log_clean(mp); - set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); + xfs_set_readonly(mp); return 0; } @@ -2009,8 +2009,7 @@ static int xfs_init_fs_context( return -ENOMEM; spin_lock_init(&mp->m_sb_lock); - INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); - spin_lock_init(&mp->m_perag_lock); + xa_init(&mp->m_perags); mutex_init(&mp->m_growlock); INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker); INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 77f19e2f66e0..4252b07cd251 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -165,7 +165,7 @@ xfs_symlink( /* * Allocate an inode for the symlink. */ - error = xfs_dialloc(&tp, dp->i_ino, S_IFLNK, &ino); + error = xfs_dialloc(&tp, &args, &ino); if (!error) error = xfs_icreate(tp, ino, &args, &du.ip); if (error) diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 180ce697305a..ee9f0b1f548d 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -210,14 +210,14 @@ DEFINE_EVENT(xfs_perag_class, name, \ TP_PROTO(struct xfs_perag *pag, unsigned long caller_ip), \ TP_ARGS(pag, caller_ip)) DEFINE_PERAG_REF_EVENT(xfs_perag_get); -DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag); DEFINE_PERAG_REF_EVENT(xfs_perag_hold); DEFINE_PERAG_REF_EVENT(xfs_perag_put); DEFINE_PERAG_REF_EVENT(xfs_perag_grab); -DEFINE_PERAG_REF_EVENT(xfs_perag_grab_tag); +DEFINE_PERAG_REF_EVENT(xfs_perag_grab_next_tag); DEFINE_PERAG_REF_EVENT(xfs_perag_rele); DEFINE_PERAG_REF_EVENT(xfs_perag_set_inode_tag); DEFINE_PERAG_REF_EVENT(xfs_perag_clear_inode_tag); +DEFINE_PERAG_REF_EVENT(xfs_reclaim_inodes_count); TRACE_EVENT(xfs_inodegc_worker, TP_PROTO(struct xfs_mount *mp, unsigned int shrinker_hits), @@ -4926,7 +4926,8 @@ DEFINE_INODE_ERROR_EVENT(xfs_exchrange_error); { XFS_EXCHANGE_RANGE_DRY_RUN, "DRY_RUN" }, \ { XFS_EXCHANGE_RANGE_FILE1_WRITTEN, "F1_WRITTEN" }, \ { __XFS_EXCHANGE_RANGE_UPD_CMTIME1, "CMTIME1" }, \ - { __XFS_EXCHANGE_RANGE_UPD_CMTIME2, "CMTIME2" } + { __XFS_EXCHANGE_RANGE_UPD_CMTIME2, "CMTIME2" }, \ + { __XFS_EXCHANGE_RANGE_CHECK_FRESH2, "FRESH2" } /* file exchange-range tracepoint class */ DECLARE_EVENT_CLASS(xfs_exchrange_class, @@ -4986,6 +4987,60 @@ DEFINE_EXCHRANGE_EVENT(xfs_exchrange_prep); DEFINE_EXCHRANGE_EVENT(xfs_exchrange_flush); DEFINE_EXCHRANGE_EVENT(xfs_exchrange_mappings); +TRACE_EVENT(xfs_exchrange_freshness, + TP_PROTO(const struct xfs_exchrange *fxr, struct xfs_inode *ip2), + TP_ARGS(fxr, ip2), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ip2_ino) + __field(long long, ip2_mtime) + __field(long long, ip2_ctime) + __field(int, ip2_mtime_nsec) + __field(int, ip2_ctime_nsec) + + __field(xfs_ino_t, file2_ino) + __field(long long, file2_mtime) + __field(long long, file2_ctime) + __field(int, file2_mtime_nsec) + __field(int, file2_ctime_nsec) + ), + TP_fast_assign( + struct timespec64 ts64; + struct inode *inode2 = VFS_I(ip2); + + __entry->dev = inode2->i_sb->s_dev; + __entry->ip2_ino = ip2->i_ino; + + ts64 = inode_get_ctime(inode2); + __entry->ip2_ctime = ts64.tv_sec; + __entry->ip2_ctime_nsec = ts64.tv_nsec; + + ts64 = inode_get_mtime(inode2); + __entry->ip2_mtime = ts64.tv_sec; + __entry->ip2_mtime_nsec = ts64.tv_nsec; + + __entry->file2_ino = fxr->file2_ino; + __entry->file2_mtime = fxr->file2_mtime.tv_sec; + __entry->file2_ctime = fxr->file2_ctime.tv_sec; + __entry->file2_mtime_nsec = fxr->file2_mtime.tv_nsec; + __entry->file2_ctime_nsec = fxr->file2_ctime.tv_nsec; + ), + TP_printk("dev %d:%d " + "ino 0x%llx mtime %lld:%d ctime %lld:%d -> " + "file 0x%llx mtime %lld:%d ctime %lld:%d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ip2_ino, + __entry->ip2_mtime, + __entry->ip2_mtime_nsec, + __entry->ip2_ctime, + __entry->ip2_ctime_nsec, + __entry->file2_ino, + __entry->file2_mtime, + __entry->file2_mtime_nsec, + __entry->file2_ctime, + __entry->file2_ctime_nsec) +); + TRACE_EVENT(xfs_exchmaps_overhead, TP_PROTO(struct xfs_mount *mp, unsigned long long bmbt_blocks, unsigned long long rmapbt_blocks), |