diff options
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/libxfs/xfs_shared.h | 1 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_trans_space.h | 2 | ||||
-rw-r--r-- | fs/xfs/scrub/bmap.c | 22 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 18 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_reflink.c | 21 | ||||
-rw-r--r-- | fs/xfs/xfs_sysfs.h | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_trans.c | 19 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_dquot.c | 2 |
9 files changed, 69 insertions, 23 deletions
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index c45acbd3add9..708feb8eac76 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -65,6 +65,7 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp, #define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ #define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ #define XFS_TRANS_NO_WRITECOUNT 0x40 /* do not elevate SB writecount */ +#define XFS_TRANS_RES_FDBLKS 0x80 /* reserve newly freed blocks */ /* * LOWMODE is used by the allocator to activate the lowspace algorithm - when * free space is running low the extent allocator may choose to allocate an diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h index 88221c7a04cc..c6df01a2a158 100644 --- a/fs/xfs/libxfs/xfs_trans_space.h +++ b/fs/xfs/libxfs/xfs_trans_space.h @@ -57,7 +57,7 @@ XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK) #define XFS_IALLOC_SPACE_RES(mp) \ (M_IGEO(mp)->ialloc_blks + \ - (xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \ + ((xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1) * \ (M_IGEO(mp)->inobt_maxlevels - 1))) /* diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 7badd6dfe544..955302e7cdde 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -45,9 +45,27 @@ xchk_setup_inode_bmap( */ if (S_ISREG(VFS_I(sc->ip)->i_mode) && sc->sm->sm_type == XFS_SCRUB_TYPE_BMBTD) { + struct address_space *mapping = VFS_I(sc->ip)->i_mapping; + inode_dio_wait(VFS_I(sc->ip)); - error = filemap_write_and_wait(VFS_I(sc->ip)->i_mapping); - if (error) + + /* + * Try to flush all incore state to disk before we examine the + * space mappings for the data fork. Leave accumulated errors + * in the mapping for the writer threads to consume. + * + * On ENOSPC or EIO writeback errors, we continue into the + * extent mapping checks because write failures do not + * necessarily imply anything about the correctness of the file + * metadata. The metadata and the file data could be on + * completely separate devices; a media failure might only + * affect a subset of the disk, etc. We can handle delalloc + * extents in the scrubber, so leaving them in memory is fine. + */ + error = filemap_fdatawrite(mapping); + if (!error) + error = filemap_fdatawait_keep_errors(mapping); + if (error && (error != -ENOSPC && error != -EIO)) goto out; } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index f37f5cc4b19f..afdc7f8e0e70 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1567,6 +1567,7 @@ xfs_swap_extents( int lock_flags; uint64_t f; int resblks = 0; + unsigned int flags = 0; /* * Lock the inodes against other IO, page faults and truncate to @@ -1630,17 +1631,16 @@ xfs_swap_extents( resblks += XFS_SWAP_RMAP_SPACE_RES(mp, tipnext, w); /* - * Handle the corner case where either inode might straddle the - * btree format boundary. If so, the inode could bounce between - * btree <-> extent format on unmap -> remap cycles, freeing and - * allocating a bmapbt block each time. + * If either inode straddles a bmapbt block allocation boundary, + * the rmapbt algorithm triggers repeated allocs and frees as + * extents are remapped. This can exhaust the block reservation + * prematurely and cause shutdown. Return freed blocks to the + * transaction reservation to counter this behavior. */ - if (ipnext == (XFS_IFORK_MAXEXT(ip, w) + 1)) - resblks += XFS_IFORK_MAXEXT(ip, w); - if (tipnext == (XFS_IFORK_MAXEXT(tip, w) + 1)) - resblks += XFS_IFORK_MAXEXT(tip, w); + flags |= XFS_TRANS_RES_FDBLKS; } - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, flags, + &tp); if (error) goto out_unlock; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index d6cd83317344..938023dd8ce5 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -148,6 +148,7 @@ xfs_qm_dqpurge( error = xfs_bwrite(bp); xfs_buf_relse(bp); } else if (error == -EAGAIN) { + dqp->dq_flags &= ~XFS_DQ_FREEING; goto out_unlock; } xfs_dqflock(dqp); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 107bf2a2f344..d89201d40891 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1003,6 +1003,7 @@ xfs_reflink_remap_extent( xfs_filblks_t rlen; xfs_filblks_t unmap_len; xfs_off_t newlen; + int64_t qres; int error; unmap_len = irec->br_startoff + irec->br_blockcount - destoff; @@ -1025,13 +1026,19 @@ xfs_reflink_remap_extent( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); - /* If we're not just clearing space, then do we have enough quota? */ - if (real_extent) { - error = xfs_trans_reserve_quota_nblks(tp, ip, - irec->br_blockcount, 0, XFS_QMOPT_RES_REGBLKS); - if (error) - goto out_cancel; - } + /* + * Reserve quota for this operation. We don't know if the first unmap + * in the dest file will cause a bmap btree split, so we always reserve + * at least enough blocks for that split. If the extent being mapped + * in is written, we need to reserve quota for that too. + */ + qres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); + if (real_extent) + qres += irec->br_blockcount; + error = xfs_trans_reserve_quota_nblks(tp, ip, qres, 0, + XFS_QMOPT_RES_REGBLKS); + if (error) + goto out_cancel; trace_xfs_reflink_remap(ip, irec->br_startoff, irec->br_blockcount, irec->br_startblock); diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h index e9f810fc6731..43585850f154 100644 --- a/fs/xfs/xfs_sysfs.h +++ b/fs/xfs/xfs_sysfs.h @@ -32,9 +32,11 @@ xfs_sysfs_init( struct xfs_kobj *parent_kobj, const char *name) { + struct kobject *parent; + + parent = parent_kobj ? &parent_kobj->kobject : NULL; init_completion(&kobj->complete); - return kobject_init_and_add(&kobj->kobject, ktype, - &parent_kobj->kobject, "%s", name); + return kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name); } static inline void diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 3c94e5ff4316..0ad72a83edac 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -107,7 +107,8 @@ xfs_trans_dup( ntp->t_flags = XFS_TRANS_PERM_LOG_RES | (tp->t_flags & XFS_TRANS_RESERVE) | - (tp->t_flags & XFS_TRANS_NO_WRITECOUNT); + (tp->t_flags & XFS_TRANS_NO_WRITECOUNT) | + (tp->t_flags & XFS_TRANS_RES_FDBLKS); /* We gave our writer reference to the new transaction */ tp->t_flags |= XFS_TRANS_NO_WRITECOUNT; ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket); @@ -272,6 +273,8 @@ xfs_trans_alloc( */ WARN_ON(resp->tr_logres > 0 && mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); + ASSERT(!(flags & XFS_TRANS_RES_FDBLKS) || + xfs_sb_version_haslazysbcount(&mp->m_sb)); tp->t_magic = XFS_TRANS_HEADER_MAGIC; tp->t_flags = flags; @@ -365,6 +368,20 @@ xfs_trans_mod_sb( tp->t_blk_res_used += (uint)-delta; if (tp->t_blk_res_used > tp->t_blk_res) xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + } else if (delta > 0 && (tp->t_flags & XFS_TRANS_RES_FDBLKS)) { + int64_t blkres_delta; + + /* + * Return freed blocks directly to the reservation + * instead of the global pool, being careful not to + * overflow the trans counter. This is used to preserve + * reservation across chains of transaction rolls that + * repeatedly free and allocate blocks. + */ + blkres_delta = min_t(int64_t, delta, + UINT_MAX - tp->t_blk_res); + tp->t_blk_res += blkres_delta; + delta -= blkres_delta; } tp->t_fdblocks_delta += delta; if (xfs_sb_version_haslazysbcount(&mp->m_sb)) diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index c0f73b82c055..ed0ce8b301b4 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -647,7 +647,7 @@ xfs_trans_dqresv( } } if (ninos > 0) { - total_count = be64_to_cpu(dqp->q_core.d_icount) + ninos; + total_count = dqp->q_res_icount + ninos; timer = be32_to_cpu(dqp->q_core.d_itimer); warns = be16_to_cpu(dqp->q_core.d_iwarns); warnlimit = defq->iwarnlimit; |