diff options
Diffstat (limited to 'fs/xfs/xfs_icache.c')
-rw-r--r-- | fs/xfs/xfs_icache.c | 438 |
1 files changed, 227 insertions, 211 deletions
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index deb99300d171..1d7720a0c068 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -916,69 +916,6 @@ xfs_inode_walk( } /* - * Background scanning to trim post-EOF preallocated space. This is queued - * based on the 'speculative_prealloc_lifetime' tunable (5m by default). - */ -void -xfs_queue_eofblocks( - struct xfs_mount *mp) -{ - rcu_read_lock(); - if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_EOFBLOCKS_TAG)) - queue_delayed_work(mp->m_eofblocks_workqueue, - &mp->m_eofblocks_work, - msecs_to_jiffies(xfs_eofb_secs * 1000)); - rcu_read_unlock(); -} - -void -xfs_eofblocks_worker( - struct work_struct *work) -{ - struct xfs_mount *mp = container_of(to_delayed_work(work), - struct xfs_mount, m_eofblocks_work); - - if (!sb_start_write_trylock(mp->m_super)) - return; - xfs_icache_free_eofblocks(mp, NULL); - sb_end_write(mp->m_super); - - xfs_queue_eofblocks(mp); -} - -/* - * Background scanning to trim preallocated CoW space. This is queued - * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default). - * (We'll just piggyback on the post-EOF prealloc space workqueue.) - */ -void -xfs_queue_cowblocks( - struct xfs_mount *mp) -{ - rcu_read_lock(); - if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_COWBLOCKS_TAG)) - queue_delayed_work(mp->m_eofblocks_workqueue, - &mp->m_cowblocks_work, - msecs_to_jiffies(xfs_cowb_secs * 1000)); - rcu_read_unlock(); -} - -void -xfs_cowblocks_worker( - struct work_struct *work) -{ - struct xfs_mount *mp = container_of(to_delayed_work(work), - struct xfs_mount, m_cowblocks_work); - - if (!sb_start_write_trylock(mp->m_super)) - return; - xfs_icache_free_cowblocks(mp, NULL); - sb_end_write(mp->m_super); - - xfs_queue_cowblocks(mp); -} - -/* * Grab the inode for reclaim exclusively. * * We have found this inode via a lookup under RCU, so the inode may have @@ -1346,14 +1283,17 @@ xfs_reclaim_worker( STATIC int xfs_inode_free_eofblocks( struct xfs_inode *ip, - void *args) + void *args, + unsigned int *lockflags) { struct xfs_eofblocks *eofb = args; bool wait; - int ret; wait = eofb && (eofb->eof_flags & XFS_EOF_FLAGS_SYNC); + if (!xfs_iflags_test(ip, XFS_IEOFBLOCKS)) + return 0; + if (!xfs_can_free_eofblocks(ip, false)) { /* inode could be preallocated or append-only */ trace_xfs_inode_free_eofblocks_invalid(ip); @@ -1380,130 +1320,68 @@ xfs_inode_free_eofblocks( return -EAGAIN; return 0; } + *lockflags |= XFS_IOLOCK_EXCL; - ret = xfs_free_eofblocks(ip); - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - - return ret; -} - -int -xfs_icache_free_eofblocks( - struct xfs_mount *mp, - struct xfs_eofblocks *eofb) -{ - return xfs_inode_walk(mp, 0, xfs_inode_free_eofblocks, eofb, - XFS_ICI_EOFBLOCKS_TAG); + return xfs_free_eofblocks(ip); } /* - * Run eofblocks scans on the quotas applicable to the inode. For inodes with - * multiple quotas, we don't know exactly which quota caused an allocation - * failure. We make a best effort by including each quota under low free space - * conditions (less than 1% free space) in the scan. + * Background scanning to trim preallocated space. This is queued based on the + * 'speculative_prealloc_lifetime' tunable (5m by default). */ -static int -__xfs_inode_free_quota_eofblocks( - struct xfs_inode *ip, - int (*execute)(struct xfs_mount *mp, - struct xfs_eofblocks *eofb)) -{ - int scan = 0; - struct xfs_eofblocks eofb = {0}; - struct xfs_dquot *dq; - - /* - * Run a sync scan to increase effectiveness and use the union filter to - * cover all applicable quotas in a single scan. - */ - eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC; - - if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) { - dq = xfs_inode_dquot(ip, XFS_DQTYPE_USER); - if (dq && xfs_dquot_lowsp(dq)) { - eofb.eof_uid = VFS_I(ip)->i_uid; - eofb.eof_flags |= XFS_EOF_FLAGS_UID; - scan = 1; - } - } - - if (XFS_IS_GQUOTA_ENFORCED(ip->i_mount)) { - dq = xfs_inode_dquot(ip, XFS_DQTYPE_GROUP); - if (dq && xfs_dquot_lowsp(dq)) { - eofb.eof_gid = VFS_I(ip)->i_gid; - eofb.eof_flags |= XFS_EOF_FLAGS_GID; - scan = 1; - } - } - - if (scan) - execute(ip->i_mount, &eofb); - - return scan; -} - -int -xfs_inode_free_quota_eofblocks( - struct xfs_inode *ip) -{ - return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks); -} - -static inline unsigned long -xfs_iflag_for_tag( - int tag) +static inline void +xfs_blockgc_queue( + struct xfs_perag *pag) { - switch (tag) { - case XFS_ICI_EOFBLOCKS_TAG: - return XFS_IEOFBLOCKS; - case XFS_ICI_COWBLOCKS_TAG: - return XFS_ICOWBLOCKS; - default: - ASSERT(0); - return 0; - } + rcu_read_lock(); + if (radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_BLOCKGC_TAG)) + queue_delayed_work(pag->pag_mount->m_blockgc_workqueue, + &pag->pag_blockgc_work, + msecs_to_jiffies(xfs_blockgc_secs * 1000)); + rcu_read_unlock(); } static void -__xfs_inode_set_blocks_tag( - xfs_inode_t *ip, - void (*execute)(struct xfs_mount *mp), - void (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, - int error, unsigned long caller_ip), - int tag) +xfs_blockgc_set_iflag( + struct xfs_inode *ip, + unsigned long iflag) { - struct xfs_mount *mp = ip->i_mount; - struct xfs_perag *pag; - int tagged; + struct xfs_mount *mp = ip->i_mount; + struct xfs_perag *pag; + int tagged; + + ASSERT((iflag & ~(XFS_IEOFBLOCKS | XFS_ICOWBLOCKS)) == 0); /* * Don't bother locking the AG and looking up in the radix trees * if we already know that we have the tag set. */ - if (ip->i_flags & xfs_iflag_for_tag(tag)) + if (ip->i_flags & iflag) return; spin_lock(&ip->i_flags_lock); - ip->i_flags |= xfs_iflag_for_tag(tag); + ip->i_flags |= iflag; spin_unlock(&ip->i_flags_lock); pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); spin_lock(&pag->pag_ici_lock); - tagged = radix_tree_tagged(&pag->pag_ici_root, tag); + tagged = radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_BLOCKGC_TAG); radix_tree_tag_set(&pag->pag_ici_root, - XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), tag); + XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), + XFS_ICI_BLOCKGC_TAG); if (!tagged) { - /* propagate the eofblocks tag up into the perag radix tree */ + /* propagate the blockgc tag up into the perag radix tree */ spin_lock(&ip->i_mount->m_perag_lock); radix_tree_tag_set(&ip->i_mount->m_perag_tree, XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), - tag); + XFS_ICI_BLOCKGC_TAG); spin_unlock(&ip->i_mount->m_perag_lock); /* kick off background trimming */ - execute(ip->i_mount); + xfs_blockgc_queue(pag); - set_tp(ip->i_mount, pag->pag_agno, -1, _RET_IP_); + trace_xfs_perag_set_blockgc(ip->i_mount, pag->pag_agno, -1, + _RET_IP_); } spin_unlock(&pag->pag_ici_lock); @@ -1515,38 +1393,43 @@ xfs_inode_set_eofblocks_tag( xfs_inode_t *ip) { trace_xfs_inode_set_eofblocks_tag(ip); - return __xfs_inode_set_blocks_tag(ip, xfs_queue_eofblocks, - trace_xfs_perag_set_eofblocks, - XFS_ICI_EOFBLOCKS_TAG); + return xfs_blockgc_set_iflag(ip, XFS_IEOFBLOCKS); } static void -__xfs_inode_clear_blocks_tag( - xfs_inode_t *ip, - void (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, - int error, unsigned long caller_ip), - int tag) +xfs_blockgc_clear_iflag( + struct xfs_inode *ip, + unsigned long iflag) { - struct xfs_mount *mp = ip->i_mount; - struct xfs_perag *pag; + struct xfs_mount *mp = ip->i_mount; + struct xfs_perag *pag; + bool clear_tag; + + ASSERT((iflag & ~(XFS_IEOFBLOCKS | XFS_ICOWBLOCKS)) == 0); spin_lock(&ip->i_flags_lock); - ip->i_flags &= ~xfs_iflag_for_tag(tag); + ip->i_flags &= ~iflag; + clear_tag = (ip->i_flags & (XFS_IEOFBLOCKS | XFS_ICOWBLOCKS)) == 0; spin_unlock(&ip->i_flags_lock); + if (!clear_tag) + return; + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); spin_lock(&pag->pag_ici_lock); radix_tree_tag_clear(&pag->pag_ici_root, - XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), tag); - if (!radix_tree_tagged(&pag->pag_ici_root, tag)) { - /* clear the eofblocks tag from the perag radix tree */ + XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), + XFS_ICI_BLOCKGC_TAG); + if (!radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_BLOCKGC_TAG)) { + /* clear the blockgc tag from the perag radix tree */ spin_lock(&ip->i_mount->m_perag_lock); radix_tree_tag_clear(&ip->i_mount->m_perag_tree, XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), - tag); + XFS_ICI_BLOCKGC_TAG); spin_unlock(&ip->i_mount->m_perag_lock); - clear_tp(ip->i_mount, pag->pag_agno, -1, _RET_IP_); + trace_xfs_perag_clear_blockgc(ip->i_mount, pag->pag_agno, -1, + _RET_IP_); } spin_unlock(&pag->pag_ici_lock); @@ -1558,8 +1441,7 @@ xfs_inode_clear_eofblocks_tag( xfs_inode_t *ip) { trace_xfs_inode_clear_eofblocks_tag(ip); - return __xfs_inode_clear_blocks_tag(ip, - trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG); + return xfs_blockgc_clear_iflag(ip, XFS_IEOFBLOCKS); } /* @@ -1609,20 +1491,42 @@ xfs_prep_free_cowblocks( STATIC int xfs_inode_free_cowblocks( struct xfs_inode *ip, - void *args) + void *args, + unsigned int *lockflags) { struct xfs_eofblocks *eofb = args; + bool wait; int ret = 0; + wait = eofb && (eofb->eof_flags & XFS_EOF_FLAGS_SYNC); + + if (!xfs_iflags_test(ip, XFS_ICOWBLOCKS)) + return 0; + if (!xfs_prep_free_cowblocks(ip)) return 0; if (!xfs_inode_matches_eofb(ip, eofb)) return 0; - /* Free the CoW blocks */ - xfs_ilock(ip, XFS_IOLOCK_EXCL); - xfs_ilock(ip, XFS_MMAPLOCK_EXCL); + /* + * If the caller is waiting, return -EAGAIN to keep the background + * scanner moving and revisit the inode in a subsequent pass. + */ + if (!(*lockflags & XFS_IOLOCK_EXCL) && + !xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { + if (wait) + return -EAGAIN; + return 0; + } + *lockflags |= XFS_IOLOCK_EXCL; + + if (!xfs_ilock_nowait(ip, XFS_MMAPLOCK_EXCL)) { + if (wait) + return -EAGAIN; + return 0; + } + *lockflags |= XFS_MMAPLOCK_EXCL; /* * Check again, nobody else should be able to dirty blocks or change @@ -1630,37 +1534,15 @@ xfs_inode_free_cowblocks( */ if (xfs_prep_free_cowblocks(ip)) ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); - - xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return ret; } -int -xfs_icache_free_cowblocks( - struct xfs_mount *mp, - struct xfs_eofblocks *eofb) -{ - return xfs_inode_walk(mp, 0, xfs_inode_free_cowblocks, eofb, - XFS_ICI_COWBLOCKS_TAG); -} - -int -xfs_inode_free_quota_cowblocks( - struct xfs_inode *ip) -{ - return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_cowblocks); -} - void xfs_inode_set_cowblocks_tag( xfs_inode_t *ip) { trace_xfs_inode_set_cowblocks_tag(ip); - return __xfs_inode_set_blocks_tag(ip, xfs_queue_cowblocks, - trace_xfs_perag_set_cowblocks, - XFS_ICI_COWBLOCKS_TAG); + return xfs_blockgc_set_iflag(ip, XFS_ICOWBLOCKS); } void @@ -1668,24 +1550,158 @@ xfs_inode_clear_cowblocks_tag( xfs_inode_t *ip) { trace_xfs_inode_clear_cowblocks_tag(ip); - return __xfs_inode_clear_blocks_tag(ip, - trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG); + return xfs_blockgc_clear_iflag(ip, XFS_ICOWBLOCKS); } +#define for_each_perag_tag(mp, next_agno, pag, tag) \ + for ((next_agno) = 0, (pag) = xfs_perag_get_tag((mp), 0, (tag)); \ + (pag) != NULL; \ + (next_agno) = (pag)->pag_agno + 1, \ + xfs_perag_put(pag), \ + (pag) = xfs_perag_get_tag((mp), (next_agno), (tag))) + + /* Disable post-EOF and CoW block auto-reclamation. */ void -xfs_stop_block_reaping( +xfs_blockgc_stop( struct xfs_mount *mp) { - cancel_delayed_work_sync(&mp->m_eofblocks_work); - cancel_delayed_work_sync(&mp->m_cowblocks_work); + struct xfs_perag *pag; + xfs_agnumber_t agno; + + for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG) + cancel_delayed_work_sync(&pag->pag_blockgc_work); } /* Enable post-EOF and CoW block auto-reclamation. */ void -xfs_start_block_reaping( +xfs_blockgc_start( struct xfs_mount *mp) { - xfs_queue_eofblocks(mp); - xfs_queue_cowblocks(mp); + struct xfs_perag *pag; + xfs_agnumber_t agno; + + for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG) + xfs_blockgc_queue(pag); +} + +/* Scan one incore inode for block preallocations that we can remove. */ +static int +xfs_blockgc_scan_inode( + struct xfs_inode *ip, + void *args) +{ + unsigned int lockflags = 0; + int error; + + error = xfs_inode_free_eofblocks(ip, args, &lockflags); + if (error) + goto unlock; + + error = xfs_inode_free_cowblocks(ip, args, &lockflags); +unlock: + if (lockflags) + xfs_iunlock(ip, lockflags); + return error; +} + +/* Background worker that trims preallocated space. */ +void +xfs_blockgc_worker( + struct work_struct *work) +{ + struct xfs_perag *pag = container_of(to_delayed_work(work), + struct xfs_perag, pag_blockgc_work); + struct xfs_mount *mp = pag->pag_mount; + int error; + + if (!sb_start_write_trylock(mp->m_super)) + return; + error = xfs_inode_walk_ag(pag, 0, xfs_blockgc_scan_inode, NULL, + XFS_ICI_BLOCKGC_TAG); + if (error) + xfs_info(mp, "AG %u preallocation gc worker failed, err=%d", + pag->pag_agno, error); + sb_end_write(mp->m_super); + xfs_blockgc_queue(pag); +} + +/* + * Try to free space in the filesystem by purging eofblocks and cowblocks. + */ +int +xfs_blockgc_free_space( + struct xfs_mount *mp, + struct xfs_eofblocks *eofb) +{ + trace_xfs_blockgc_free_space(mp, eofb, _RET_IP_); + + return xfs_inode_walk(mp, 0, xfs_blockgc_scan_inode, eofb, + XFS_ICI_BLOCKGC_TAG); +} + +/* + * Run cow/eofblocks scans on the supplied dquots. We don't know exactly which + * quota caused an allocation failure, so we make a best effort by including + * each quota under low free space conditions (less than 1% free space) in the + * scan. + * + * Callers must not hold any inode's ILOCK. If requesting a synchronous scan + * (XFS_EOF_FLAGS_SYNC), the caller also must not hold any inode's IOLOCK or + * MMAPLOCK. + */ +int +xfs_blockgc_free_dquots( + struct xfs_mount *mp, + struct xfs_dquot *udqp, + struct xfs_dquot *gdqp, + struct xfs_dquot *pdqp, + unsigned int eof_flags) +{ + struct xfs_eofblocks eofb = {0}; + bool do_work = false; + + if (!udqp && !gdqp && !pdqp) + return 0; + + /* + * Run a scan to free blocks using the union filter to cover all + * applicable quotas in a single scan. + */ + eofb.eof_flags = XFS_EOF_FLAGS_UNION | eof_flags; + + if (XFS_IS_UQUOTA_ENFORCED(mp) && udqp && xfs_dquot_lowsp(udqp)) { + eofb.eof_uid = make_kuid(mp->m_super->s_user_ns, udqp->q_id); + eofb.eof_flags |= XFS_EOF_FLAGS_UID; + do_work = true; + } + + if (XFS_IS_UQUOTA_ENFORCED(mp) && gdqp && xfs_dquot_lowsp(gdqp)) { + eofb.eof_gid = make_kgid(mp->m_super->s_user_ns, gdqp->q_id); + eofb.eof_flags |= XFS_EOF_FLAGS_GID; + do_work = true; + } + + if (XFS_IS_PQUOTA_ENFORCED(mp) && pdqp && xfs_dquot_lowsp(pdqp)) { + eofb.eof_prid = pdqp->q_id; + eofb.eof_flags |= XFS_EOF_FLAGS_PRID; + do_work = true; + } + + if (!do_work) + return 0; + + return xfs_blockgc_free_space(mp, &eofb); +} + +/* Run cow/eofblocks scans on the quotas attached to the inode. */ +int +xfs_blockgc_free_quota( + struct xfs_inode *ip, + unsigned int eof_flags) +{ + return xfs_blockgc_free_dquots(ip->i_mount, + xfs_inode_dquot(ip, XFS_DQTYPE_USER), + xfs_inode_dquot(ip, XFS_DQTYPE_GROUP), + xfs_inode_dquot(ip, XFS_DQTYPE_PROJ), eof_flags); } |