summaryrefslogtreecommitdiff
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/Kconfig3
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c128
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h10
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c47
-rw-r--r--fs/xfs/libxfs/xfs_attr.c24
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c157
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h4
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c104
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c128
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c58
-rw-r--r--fs/xfs/libxfs/xfs_btree.c117
-rw-r--r--fs/xfs/libxfs/xfs_btree.h16
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c70
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h6
-rw-r--r--fs/xfs/libxfs/xfs_defer.c39
-rw-r--r--fs/xfs/libxfs/xfs_defer.h5
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c5
-rw-r--r--fs/xfs/libxfs/xfs_dir2.h2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c39
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c208
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c89
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c89
-rw-r--r--fs/xfs/libxfs/xfs_dir2_priv.h12
-rw-r--r--fs/xfs/libxfs/xfs_dir2_sf.c30
-rw-r--r--fs/xfs/libxfs/xfs_dquot_buf.c174
-rw-r--r--fs/xfs/libxfs/xfs_fs.h7
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c153
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h7
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c65
-rw-r--r--fs/xfs/libxfs/xfs_iext_tree.c4
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c135
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.h4
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c152
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.h14
-rw-r--r--fs/xfs/libxfs/xfs_log_rlimit.c2
-rw-r--r--fs/xfs/libxfs/xfs_quota_defs.h9
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c71
-rw-r--r--fs/xfs/libxfs/xfs_refcount.h3
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c40
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c166
-rw-r--r--fs/xfs/libxfs/xfs_rmap.h21
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c40
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c21
-rw-r--r--fs/xfs/libxfs/xfs_sb.c113
-rw-r--r--fs/xfs/libxfs/xfs_sb.h4
-rw-r--r--fs/xfs/libxfs/xfs_shared.h4
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c75
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c199
-rw-r--r--fs/xfs/scrub/agheader.c340
-rw-r--r--fs/xfs/scrub/alloc.c81
-rw-r--r--fs/xfs/scrub/bmap.c219
-rw-r--r--fs/xfs/scrub/btree.c184
-rw-r--r--fs/xfs/scrub/btree.h9
-rw-r--r--fs/xfs/scrub/common.c255
-rw-r--r--fs/xfs/scrub/common.h23
-rw-r--r--fs/xfs/scrub/dabtree.c22
-rw-r--r--fs/xfs/scrub/dir.c44
-rw-r--r--fs/xfs/scrub/ialloc.c194
-rw-r--r--fs/xfs/scrub/inode.c192
-rw-r--r--fs/xfs/scrub/parent.c8
-rw-r--r--fs/xfs/scrub/quota.c11
-rw-r--r--fs/xfs/scrub/refcount.c420
-rw-r--r--fs/xfs/scrub/rmap.c123
-rw-r--r--fs/xfs/scrub/rtbitmap.c35
-rw-r--r--fs/xfs/scrub/scrub.c204
-rw-r--r--fs/xfs/scrub/scrub.h37
-rw-r--r--fs/xfs/scrub/trace.c1
-rw-r--r--fs/xfs/scrub/trace.h44
-rw-r--r--fs/xfs/xfs_aops.c27
-rw-r--r--fs/xfs/xfs_bmap_item.c23
-rw-r--r--fs/xfs/xfs_bmap_item.h3
-rw-r--r--fs/xfs/xfs_bmap_util.c4
-rw-r--r--fs/xfs/xfs_buf.c37
-rw-r--r--fs/xfs/xfs_buf.h8
-rw-r--r--fs/xfs/xfs_buf_item.c156
-rw-r--r--fs/xfs/xfs_buf_item.h7
-rw-r--r--fs/xfs/xfs_dir2_readdir.c4
-rw-r--r--fs/xfs/xfs_dquot.c76
-rw-r--r--fs/xfs/xfs_dquot_item.c39
-rw-r--r--fs/xfs/xfs_error.c64
-rw-r--r--fs/xfs/xfs_error.h14
-rw-r--r--fs/xfs/xfs_extfree_item.c2
-rw-r--r--fs/xfs/xfs_fsops.c84
-rw-r--r--fs/xfs/xfs_fsops.h1
-rw-r--r--fs/xfs/xfs_icache.c110
-rw-r--r--fs/xfs/xfs_icache.h1
-rw-r--r--fs/xfs/xfs_inode.c189
-rw-r--r--fs/xfs/xfs_inode.h8
-rw-r--r--fs/xfs/xfs_inode_item.c44
-rw-r--r--fs/xfs/xfs_ioctl.c5
-rw-r--r--fs/xfs/xfs_ioctl32.c3
-rw-r--r--fs/xfs/xfs_iomap.c4
-rw-r--r--fs/xfs/xfs_linux.h14
-rw-r--r--fs/xfs/xfs_log.c23
-rw-r--r--fs/xfs/xfs_log_recover.c133
-rw-r--r--fs/xfs/xfs_mount.c3
-rw-r--r--fs/xfs/xfs_qm.c83
-rw-r--r--fs/xfs/xfs_refcount_item.c21
-rw-r--r--fs/xfs/xfs_refcount_item.h3
-rw-r--r--fs/xfs/xfs_reflink.c118
-rw-r--r--fs/xfs/xfs_rtalloc.h4
-rw-r--r--fs/xfs/xfs_super.c31
-rw-r--r--fs/xfs/xfs_super.h2
-rw-r--r--fs/xfs/xfs_symlink.c15
-rw-r--r--fs/xfs/xfs_trace.c1
-rw-r--r--fs/xfs/xfs_trace.h68
-rw-r--r--fs/xfs/xfs_trans.c22
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_buf.c98
-rw-r--r--fs/xfs/xfs_trans_inode.c16
110 files changed, 5084 insertions, 1805 deletions
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index f42fcf1b5465..46bcf0e649f5 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -48,9 +48,6 @@ config XFS_POSIX_ACL
POSIX Access Control Lists (ACLs) support permissions for users and
groups beyond the owner/group/world scheme.
- To learn more about Access Control Lists, visit the POSIX ACLs for
- Linux website <http://acl.bestbits.at/>.
-
If you don't know what Access Control Lists are, say N.
config XFS_RT
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 0da80019a917..c02781a4c091 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -167,7 +167,7 @@ xfs_alloc_lookup_ge(
* Lookup the first record less than or equal to [bno, len]
* in the btree given by cur.
*/
-static int /* error */
+int /* error */
xfs_alloc_lookup_le(
struct xfs_btree_cur *cur, /* btree cursor */
xfs_agblock_t bno, /* starting block of extent */
@@ -520,7 +520,7 @@ xfs_alloc_fixup_trees(
return 0;
}
-static bool
+static xfs_failaddr_t
xfs_agfl_verify(
struct xfs_buf *bp)
{
@@ -528,10 +528,19 @@ xfs_agfl_verify(
struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
int i;
+ /*
+ * There is no verification of non-crc AGFLs because mkfs does not
+ * initialise the AGFL to zero or NULL. Hence the only valid part of the
+ * AGFL is what the AGF says is active. We can't get to the AGF, so we
+ * can't verify just those entries are valid.
+ */
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return NULL;
+
if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
- return false;
+ return __this_address;
if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
- return false;
+ return __this_address;
/*
* during growfs operations, the perag is not fully initialised,
* so we can't use it for any useful checking. growfs ensures we can't
@@ -539,16 +548,17 @@ xfs_agfl_verify(
* so we can detect and avoid this problem.
*/
if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
- return false;
+ return __this_address;
for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
- return false;
+ return __this_address;
}
- return xfs_log_check_lsn(mp,
- be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn));
+ if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
+ return __this_address;
+ return NULL;
}
static void
@@ -556,6 +566,7 @@ xfs_agfl_read_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
+ xfs_failaddr_t fa;
/*
* There is no verification of non-crc AGFLs because mkfs does not
@@ -567,28 +578,29 @@ xfs_agfl_read_verify(
return;
if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
- xfs_buf_ioerror(bp, -EFSBADCRC);
- else if (!xfs_agfl_verify(bp))
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
-
- if (bp->b_error)
- xfs_verifier_error(bp);
+ xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+ else {
+ fa = xfs_agfl_verify(bp);
+ if (fa)
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ }
}
static void
xfs_agfl_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
- struct xfs_buf_log_item *bip = bp->b_fspriv;
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
+ xfs_failaddr_t fa;
/* no verification of non-crc AGFLs */
if (!xfs_sb_version_hascrc(&mp->m_sb))
return;
- if (!xfs_agfl_verify(bp)) {
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
- xfs_verifier_error(bp);
+ fa = xfs_agfl_verify(bp);
+ if (fa) {
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
}
@@ -602,6 +614,7 @@ const struct xfs_buf_ops xfs_agfl_buf_ops = {
.name = "xfs_agfl",
.verify_read = xfs_agfl_read_verify,
.verify_write = xfs_agfl_write_verify,
+ .verify_struct = xfs_agfl_verify,
};
/*
@@ -702,7 +715,7 @@ xfs_alloc_ag_vextent(
ASSERT(args->agbno % args->alignment == 0);
/* if not file data, insert new block into the reverse map btree */
- if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) {
+ if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
args->agbno, args->len, &args->oinfo);
if (error)
@@ -1682,7 +1695,7 @@ xfs_free_ag_extent(
bno_cur = cnt_cur = NULL;
mp = tp->t_mountp;
- if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) {
+ if (!xfs_rmap_should_skip_owner_update(oinfo)) {
error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
if (error)
goto error0;
@@ -2397,19 +2410,19 @@ xfs_alloc_put_freelist(
return 0;
}
-static bool
+static xfs_failaddr_t
xfs_agf_verify(
- struct xfs_mount *mp,
- struct xfs_buf *bp)
- {
- struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
if (xfs_sb_version_hascrc(&mp->m_sb)) {
if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
- return false;
+ return __this_address;
if (!xfs_log_check_lsn(mp,
be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
- return false;
+ return __this_address;
}
if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
@@ -2418,18 +2431,18 @@ xfs_agf_verify(
be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
- return false;
+ return __this_address;
if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
- return false;
+ return __this_address;
if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
(be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
- return false;
+ return __this_address;
/*
* during growfs operations, the perag is not fully initialised,
@@ -2438,18 +2451,18 @@ xfs_agf_verify(
* so we can detect and avoid this problem.
*/
if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
- return false;
+ return __this_address;
if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
- return false;
+ return __this_address;
if (xfs_sb_version_hasreflink(&mp->m_sb) &&
(be32_to_cpu(agf->agf_refcount_level) < 1 ||
be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
- return false;
+ return __this_address;
- return true;;
+ return NULL;
}
@@ -2458,28 +2471,29 @@ xfs_agf_read_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
+ xfs_failaddr_t fa;
if (xfs_sb_version_hascrc(&mp->m_sb) &&
!xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
- xfs_buf_ioerror(bp, -EFSBADCRC);
- else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
- XFS_ERRTAG_ALLOC_READ_AGF))
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
-
- if (bp->b_error)
- xfs_verifier_error(bp);
+ xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+ else {
+ fa = xfs_agf_verify(bp);
+ if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ }
}
static void
xfs_agf_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
- struct xfs_buf_log_item *bip = bp->b_fspriv;
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
+ xfs_failaddr_t fa;
- if (!xfs_agf_verify(mp, bp)) {
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
- xfs_verifier_error(bp);
+ fa = xfs_agf_verify(bp);
+ if (fa) {
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
}
@@ -2496,6 +2510,7 @@ const struct xfs_buf_ops xfs_agf_buf_ops = {
.name = "xfs_agf",
.verify_read = xfs_agf_read_verify,
.verify_write = xfs_agf_write_verify,
+ .verify_struct = xfs_agf_verify,
};
/*
@@ -2981,3 +2996,22 @@ xfs_verify_fsbno(
return false;
return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno));
}
+
+/* Is there a record covering a given extent? */
+int
+xfs_alloc_has_record(
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ bool *exists)
+{
+ union xfs_btree_irec low;
+ union xfs_btree_irec high;
+
+ memset(&low, 0, sizeof(low));
+ low.a.ar_startblock = bno;
+ memset(&high, 0xFF, sizeof(high));
+ high.a.ar_startblock = bno + len - 1;
+
+ return xfs_btree_has_record(cur, &low, &high, exists);
+}
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 7ba2d129d504..65a0cafe06e4 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -198,6 +198,13 @@ xfs_free_extent(
enum xfs_ag_resv_type type); /* block reservation type */
int /* error */
+xfs_alloc_lookup_le(
+ struct xfs_btree_cur *cur, /* btree cursor */
+ xfs_agblock_t bno, /* starting block of extent */
+ xfs_extlen_t len, /* length of extent */
+ int *stat); /* success/failure */
+
+int /* error */
xfs_alloc_lookup_ge(
struct xfs_btree_cur *cur, /* btree cursor */
xfs_agblock_t bno, /* starting block of extent */
@@ -237,4 +244,7 @@ bool xfs_verify_agbno(struct xfs_mount *mp, xfs_agnumber_t agno,
xfs_agblock_t agbno);
bool xfs_verify_fsbno(struct xfs_mount *mp, xfs_fsblock_t fsbno);
+int xfs_alloc_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+ xfs_extlen_t len, bool *exist);
+
#endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index cfde0a0f9706..6840b588187e 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -307,13 +307,14 @@ xfs_cntbt_diff_two_keys(
be32_to_cpu(k2->alloc.ar_startblock);
}
-static bool
+static xfs_failaddr_t
xfs_allocbt_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_perag *pag = bp->b_pag;
+ xfs_failaddr_t fa;
unsigned int level;
/*
@@ -331,29 +332,31 @@ xfs_allocbt_verify(
level = be16_to_cpu(block->bb_level);
switch (block->bb_magic) {
case cpu_to_be32(XFS_ABTB_CRC_MAGIC):
- if (!xfs_btree_sblock_v5hdr_verify(bp))
- return false;
+ fa = xfs_btree_sblock_v5hdr_verify(bp);
+ if (fa)
+ return fa;
/* fall through */
case cpu_to_be32(XFS_ABTB_MAGIC):
if (pag && pag->pagf_init) {
if (level >= pag->pagf_levels[XFS_BTNUM_BNOi])
- return false;
+ return __this_address;
} else if (level >= mp->m_ag_maxlevels)
- return false;
+ return __this_address;
break;
case cpu_to_be32(XFS_ABTC_CRC_MAGIC):
- if (!xfs_btree_sblock_v5hdr_verify(bp))
- return false;
+ fa = xfs_btree_sblock_v5hdr_verify(bp);
+ if (fa)
+ return fa;
/* fall through */
case cpu_to_be32(XFS_ABTC_MAGIC):
if (pag && pag->pagf_init) {
if (level >= pag->pagf_levels[XFS_BTNUM_CNTi])
- return false;
+ return __this_address;
} else if (level >= mp->m_ag_maxlevels)
- return false;
+ return __this_address;
break;
default:
- return false;
+ return __this_address;
}
return xfs_btree_sblock_verify(bp, mp->m_alloc_mxr[level != 0]);
@@ -363,25 +366,30 @@ static void
xfs_allocbt_read_verify(
struct xfs_buf *bp)
{
+ xfs_failaddr_t fa;
+
if (!xfs_btree_sblock_verify_crc(bp))
- xfs_buf_ioerror(bp, -EFSBADCRC);
- else if (!xfs_allocbt_verify(bp))
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+ else {
+ fa = xfs_allocbt_verify(bp);
+ if (fa)
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ }
- if (bp->b_error) {
+ if (bp->b_error)
trace_xfs_btree_corrupt(bp, _RET_IP_);
- xfs_verifier_error(bp);
- }
}
static void
xfs_allocbt_write_verify(
struct xfs_buf *bp)
{
- if (!xfs_allocbt_verify(bp)) {
+ xfs_failaddr_t fa;
+
+ fa = xfs_allocbt_verify(bp);
+ if (fa) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
- xfs_verifier_error(bp);
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
}
xfs_btree_sblock_calc_crc(bp);
@@ -392,6 +400,7 @@ const struct xfs_buf_ops xfs_allocbt_buf_ops = {
.name = "xfs_allocbt",
.verify_read = xfs_allocbt_read_verify,
.verify_write = xfs_allocbt_write_verify,
+ .verify_struct = xfs_allocbt_verify,
};
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 6249c92671de..ce4a34a2751d 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -212,6 +212,7 @@ xfs_attr_set(
int flags)
{
struct xfs_mount *mp = dp->i_mount;
+ struct xfs_buf *leaf_bp = NULL;
struct xfs_da_args args;
struct xfs_defer_ops dfops;
struct xfs_trans_res tres;
@@ -327,9 +328,16 @@ xfs_attr_set(
* GROT: another possible req'mt for a double-split btree op.
*/
xfs_defer_init(args.dfops, args.firstblock);
- error = xfs_attr_shortform_to_leaf(&args);
+ error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
if (error)
goto out_defer_cancel;
+ /*
+ * Prevent the leaf buffer from being unlocked so that a
+ * concurrent AIL push cannot grab the half-baked leaf
+ * buffer and run into problems with the write verifier.
+ */
+ xfs_trans_bhold(args.trans, leaf_bp);
+ xfs_defer_bjoin(args.dfops, leaf_bp);
xfs_defer_ijoin(args.dfops, dp);
error = xfs_defer_finish(&args.trans, args.dfops);
if (error)
@@ -337,13 +345,14 @@ xfs_attr_set(
/*
* Commit the leaf transformation. We'll need another (linked)
- * transaction to add the new attribute to the leaf.
+ * transaction to add the new attribute to the leaf, which
+ * means that we have to hold & join the leaf buffer here too.
*/
-
error = xfs_trans_roll_inode(&args.trans, dp);
if (error)
goto out;
-
+ xfs_trans_bjoin(args.trans, leaf_bp);
+ leaf_bp = NULL;
}
if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
@@ -374,8 +383,9 @@ xfs_attr_set(
out_defer_cancel:
xfs_defer_cancel(&dfops);
- args.trans = NULL;
out:
+ if (leaf_bp)
+ xfs_trans_brelse(args.trans, leaf_bp);
if (args.trans)
xfs_trans_cancel(args.trans);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
@@ -707,7 +717,6 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
return error;
out_defer_cancel:
xfs_defer_cancel(args->dfops);
- args->trans = NULL;
return error;
}
@@ -760,7 +769,6 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
return 0;
out_defer_cancel:
xfs_defer_cancel(args->dfops);
- args->trans = NULL;
return error;
}
@@ -1035,7 +1043,6 @@ out:
return retval;
out_defer_cancel:
xfs_defer_cancel(args->dfops);
- args->trans = NULL;
goto out;
}
@@ -1176,7 +1183,6 @@ out:
return error;
out_defer_cancel:
xfs_defer_cancel(args->dfops);
- args->trans = NULL;
goto out;
}
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 53cc8b986eac..2135b8e67dcc 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -247,14 +247,15 @@ xfs_attr3_leaf_hdr_to_disk(
}
}
-static bool
+static xfs_failaddr_t
xfs_attr3_leaf_verify(
- struct xfs_buf *bp)
+ struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
- struct xfs_attr_leafblock *leaf = bp->b_addr;
- struct xfs_perag *pag = bp->b_pag;
- struct xfs_attr3_icleaf_hdr ichdr;
+ struct xfs_attr3_icleaf_hdr ichdr;
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_attr_leafblock *leaf = bp->b_addr;
+ struct xfs_perag *pag = bp->b_pag;
+ struct xfs_attr_leaf_entry *entries;
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
@@ -262,17 +263,17 @@ xfs_attr3_leaf_verify(
struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
if (ichdr.magic != XFS_ATTR3_LEAF_MAGIC)
- return false;
+ return __this_address;
if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid))
- return false;
+ return __this_address;
if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
- return false;
+ return __this_address;
if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn)))
- return false;
+ return __this_address;
} else {
if (ichdr.magic != XFS_ATTR_LEAF_MAGIC)
- return false;
+ return __this_address;
}
/*
* In recovery there is a transient state where count == 0 is valid
@@ -280,12 +281,27 @@ xfs_attr3_leaf_verify(
* if the attr didn't fit in shortform.
*/
if (pag && pag->pagf_init && ichdr.count == 0)
- return false;
+ return __this_address;
+
+ /*
+ * firstused is the block offset of the first name info structure.
+ * Make sure it doesn't go off the block or crash into the header.
+ */
+ if (ichdr.firstused > mp->m_attr_geo->blksize)
+ return __this_address;
+ if (ichdr.firstused < xfs_attr3_leaf_hdr_size(leaf))
+ return __this_address;
+
+ /* Make sure the entries array doesn't crash into the name info. */
+ entries = xfs_attr3_leaf_entryp(bp->b_addr);
+ if ((char *)&entries[ichdr.count] >
+ (char *)bp->b_addr + ichdr.firstused)
+ return __this_address;
/* XXX: need to range check rest of attr header values */
/* XXX: hash order check? */
- return true;
+ return NULL;
}
static void
@@ -293,12 +309,13 @@ xfs_attr3_leaf_write_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- struct xfs_buf_log_item *bip = bp->b_fspriv;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr;
+ xfs_failaddr_t fa;
- if (!xfs_attr3_leaf_verify(bp)) {
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
- xfs_verifier_error(bp);
+ fa = xfs_attr3_leaf_verify(bp);
+ if (fa) {
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
}
@@ -322,21 +339,23 @@ xfs_attr3_leaf_read_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
+ xfs_failaddr_t fa;
if (xfs_sb_version_hascrc(&mp->m_sb) &&
!xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF))
- xfs_buf_ioerror(bp, -EFSBADCRC);
- else if (!xfs_attr3_leaf_verify(bp))
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
-
- if (bp->b_error)
- xfs_verifier_error(bp);
+ xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+ else {
+ fa = xfs_attr3_leaf_verify(bp);
+ if (fa)
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ }
}
const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = {
.name = "xfs_attr3_leaf",
.verify_read = xfs_attr3_leaf_read_verify,
.verify_write = xfs_attr3_leaf_write_verify,
+ .verify_struct = xfs_attr3_leaf_verify,
};
int
@@ -735,10 +754,13 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
}
/*
- * Convert from using the shortform to the leaf.
+ * Convert from using the shortform to the leaf. On success, return the
+ * buffer so that we can keep it locked until we're totally done with it.
*/
int
-xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
+xfs_attr_shortform_to_leaf(
+ struct xfs_da_args *args,
+ struct xfs_buf **leaf_bp)
{
xfs_inode_t *dp;
xfs_attr_shortform_t *sf;
@@ -818,7 +840,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
}
error = 0;
-
+ *leaf_bp = bp;
out:
kmem_free(tmpbuffer);
return error;
@@ -867,6 +889,80 @@ xfs_attr_shortform_allfit(
return xfs_attr_shortform_bytesfit(dp, bytes);
}
+/* Verify the consistency of an inline attribute fork. */
+xfs_failaddr_t
+xfs_attr_shortform_verify(
+ struct xfs_inode *ip)
+{
+ struct xfs_attr_shortform *sfp;
+ struct xfs_attr_sf_entry *sfep;
+ struct xfs_attr_sf_entry *next_sfep;
+ char *endp;
+ struct xfs_ifork *ifp;
+ int i;
+ int size;
+
+ ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL);
+ ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
+ sfp = (struct xfs_attr_shortform *)ifp->if_u1.if_data;
+ size = ifp->if_bytes;
+
+ /*
+ * Give up if the attribute is way too short.
+ */
+ if (size < sizeof(struct xfs_attr_sf_hdr))
+ return __this_address;
+
+ endp = (char *)sfp + size;
+
+ /* Check all reported entries */
+ sfep = &sfp->list[0];
+ for (i = 0; i < sfp->hdr.count; i++) {
+ /*
+ * struct xfs_attr_sf_entry has a variable length.
+ * Check the fixed-offset parts of the structure are
+ * within the data buffer.
+ */
+ if (((char *)sfep + sizeof(*sfep)) >= endp)
+ return __this_address;
+
+ /* Don't allow names with known bad length. */
+ if (sfep->namelen == 0)
+ return __this_address;
+
+ /*
+ * Check that the variable-length part of the structure is
+ * within the data buffer. The next entry starts after the
+ * name component, so nextentry is an acceptable test.
+ */
+ next_sfep = XFS_ATTR_SF_NEXTENTRY(sfep);
+ if ((char *)next_sfep > endp)
+ return __this_address;
+
+ /*
+ * Check for unknown flags. Short form doesn't support
+ * the incomplete or local bits, so we can use the namespace
+ * mask here.
+ */
+ if (sfep->flags & ~XFS_ATTR_NSP_ONDISK_MASK)
+ return __this_address;
+
+ /*
+ * Check for invalid namespace combinations. We only allow
+ * one namespace flag per xattr, so we can just count the
+ * bits (i.e. hweight) here.
+ */
+ if (hweight8(sfep->flags & XFS_ATTR_NSP_ONDISK_MASK) > 1)
+ return __this_address;
+
+ sfep = next_sfep;
+ }
+ if ((void *)sfep != (void *)endp)
+ return __this_address;
+
+ return NULL;
+}
+
/*
* Convert a leaf attribute list to shortform attribute list
*/
@@ -2170,7 +2266,8 @@ xfs_attr3_leaf_lookup_int(
leaf = bp->b_addr;
xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
entries = xfs_attr3_leaf_entryp(leaf);
- ASSERT(ichdr.count < args->geo->blksize / 8);
+ if (ichdr.count >= args->geo->blksize / 8)
+ return -EFSCORRUPTED;
/*
* Binary search. (note: small blocks will skip this loop)
@@ -2186,8 +2283,10 @@ xfs_attr3_leaf_lookup_int(
else
break;
}
- ASSERT(probe >= 0 && (!ichdr.count || probe < ichdr.count));
- ASSERT(span <= 4 || be32_to_cpu(entry->hashval) == hashval);
+ if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count)))
+ return -EFSCORRUPTED;
+ if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval))
+ return -EFSCORRUPTED;
/*
* Since we may have duplicate hashval's, find the first matching
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index f7dda0c237b0..4da08af5b134 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -48,10 +48,12 @@ void xfs_attr_shortform_create(struct xfs_da_args *args);
void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);
int xfs_attr_shortform_lookup(struct xfs_da_args *args);
int xfs_attr_shortform_getvalue(struct xfs_da_args *args);
-int xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
+int xfs_attr_shortform_to_leaf(struct xfs_da_args *args,
+ struct xfs_buf **leaf_bp);
int xfs_attr_shortform_remove(struct xfs_da_args *args);
int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);
+xfs_failaddr_t xfs_attr_shortform_verify(struct xfs_inode *ip);
void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp);
/*
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index d56caf037ca0..21be186067a2 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -65,7 +65,7 @@ xfs_attr3_rmt_blocks(
* does CRC, location and bounds checking, the unpacking function checks the
* attribute parameters and owner.
*/
-static bool
+static xfs_failaddr_t
xfs_attr3_rmt_hdr_ok(
void *ptr,
xfs_ino_t ino,
@@ -76,19 +76,19 @@ xfs_attr3_rmt_hdr_ok(
struct xfs_attr3_rmt_hdr *rmt = ptr;
if (bno != be64_to_cpu(rmt->rm_blkno))
- return false;
+ return __this_address;
if (offset != be32_to_cpu(rmt->rm_offset))
- return false;
+ return __this_address;
if (size != be32_to_cpu(rmt->rm_bytes))
- return false;
+ return __this_address;
if (ino != be64_to_cpu(rmt->rm_owner))
- return false;
+ return __this_address;
/* ok */
- return true;
+ return NULL;
}
-static bool
+static xfs_failaddr_t
xfs_attr3_rmt_verify(
struct xfs_mount *mp,
void *ptr,
@@ -98,27 +98,29 @@ xfs_attr3_rmt_verify(
struct xfs_attr3_rmt_hdr *rmt = ptr;
if (!xfs_sb_version_hascrc(&mp->m_sb))
- return false;
+ return __this_address;
if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC))
- return false;
+ return __this_address;
if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid))
- return false;
+ return __this_address;
if (be64_to_cpu(rmt->rm_blkno) != bno)
- return false;
+ return __this_address;
if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
- return false;
+ return __this_address;
if (be32_to_cpu(rmt->rm_offset) +
be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX)
- return false;
+ return __this_address;
if (rmt->rm_owner == 0)
- return false;
+ return __this_address;
- return true;
+ return NULL;
}
-static void
-xfs_attr3_rmt_read_verify(
- struct xfs_buf *bp)
+static int
+__xfs_attr3_rmt_read_verify(
+ struct xfs_buf *bp,
+ bool check_crc,
+ xfs_failaddr_t *failaddr)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
char *ptr;
@@ -128,7 +130,7 @@ xfs_attr3_rmt_read_verify(
/* no verification of non-crc buffers */
if (!xfs_sb_version_hascrc(&mp->m_sb))
- return;
+ return 0;
ptr = bp->b_addr;
bno = bp->b_bn;
@@ -136,23 +138,48 @@ xfs_attr3_rmt_read_verify(
ASSERT(len >= blksize);
while (len > 0) {
- if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) {
- xfs_buf_ioerror(bp, -EFSBADCRC);
- break;
- }
- if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
- break;
+ if (check_crc &&
+ !xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) {
+ *failaddr = __this_address;
+ return -EFSBADCRC;
}
+ *failaddr = xfs_attr3_rmt_verify(mp, ptr, blksize, bno);
+ if (*failaddr)
+ return -EFSCORRUPTED;
len -= blksize;
ptr += blksize;
bno += BTOBB(blksize);
}
- if (bp->b_error)
- xfs_verifier_error(bp);
- else
- ASSERT(len == 0);
+ if (len != 0) {
+ *failaddr = __this_address;
+ return -EFSCORRUPTED;
+ }
+
+ return 0;
+}
+
+static void
+xfs_attr3_rmt_read_verify(
+ struct xfs_buf *bp)
+{
+ xfs_failaddr_t fa;
+ int error;
+
+ error = __xfs_attr3_rmt_read_verify(bp, true, &fa);
+ if (error)
+ xfs_verifier_error(bp, error, fa);
+}
+
+static xfs_failaddr_t
+xfs_attr3_rmt_verify_struct(
+ struct xfs_buf *bp)
+{
+ xfs_failaddr_t fa;
+ int error;
+
+ error = __xfs_attr3_rmt_read_verify(bp, false, &fa);
+ return error ? fa : NULL;
}
static void
@@ -160,6 +187,7 @@ xfs_attr3_rmt_write_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
+ xfs_failaddr_t fa;
int blksize = mp->m_attr_geo->blksize;
char *ptr;
int len;
@@ -177,9 +205,9 @@ xfs_attr3_rmt_write_verify(
while (len > 0) {
struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr;
- if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
- xfs_verifier_error(bp);
+ fa = xfs_attr3_rmt_verify(mp, ptr, blksize, bno);
+ if (fa) {
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
}
@@ -188,8 +216,7 @@ xfs_attr3_rmt_write_verify(
* xfs_attr3_rmt_hdr_set() for the explanation.
*/
if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) {
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
- xfs_verifier_error(bp);
+ xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
return;
}
xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF);
@@ -198,13 +225,16 @@ xfs_attr3_rmt_write_verify(
ptr += blksize;
bno += BTOBB(blksize);
}
- ASSERT(len == 0);
+
+ if (len != 0)
+ xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
}
const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
.name = "xfs_attr3_rmt",
.verify_read = xfs_attr3_rmt_read_verify,
.verify_write = xfs_attr3_rmt_write_verify,
+ .verify_struct = xfs_attr3_rmt_verify_struct,
};
STATIC int
@@ -269,7 +299,7 @@ xfs_attr_rmtval_copyout(
byte_cnt = min(*valuelen, byte_cnt);
if (xfs_sb_version_hascrc(&mp->m_sb)) {
- if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset,
+ if (xfs_attr3_rmt_hdr_ok(src, ino, *offset,
byte_cnt, bno)) {
xfs_alert(mp,
"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 08df809e2315..daae00ed30c5 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -400,7 +400,7 @@ xfs_bmap_check_leaf_extents(
pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
bno = be64_to_cpu(*pp);
XFS_WANT_CORRUPTED_GOTO(mp,
- XFS_FSB_SANITY_CHECK(mp, bno), error0);
+ xfs_verify_fsbno(mp, bno), error0);
if (bp_release) {
bp_release = 0;
xfs_trans_brelse(NULL, bp);
@@ -1220,7 +1220,7 @@ xfs_iread_extents(
pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
bno = be64_to_cpu(*pp);
XFS_WANT_CORRUPTED_GOTO(mp,
- XFS_FSB_SANITY_CHECK(mp, bno), out_brelse);
+ xfs_verify_fsbno(mp, bno), out_brelse);
xfs_trans_brelse(tp, bp);
}
@@ -3337,6 +3337,49 @@ xfs_bmap_btalloc_filestreams(
return 0;
}
+/* Update all inode and quota accounting for the allocation we just did. */
+static void
+xfs_bmap_btalloc_accounting(
+ struct xfs_bmalloca *ap,
+ struct xfs_alloc_arg *args)
+{
+ if (ap->flags & XFS_BMAPI_COWFORK) {
+ /*
+ * COW fork blocks are in-core only and thus are treated as
+ * in-core quota reservation (like delalloc blocks) even when
+ * converted to real blocks. The quota reservation is not
+ * accounted to disk until blocks are remapped to the data
+ * fork. So if these blocks were previously delalloc, we
+ * already have quota reservation and there's nothing to do
+ * yet.
+ */
+ if (ap->wasdel)
+ return;
+
+ /*
+ * Otherwise, we've allocated blocks in a hole. The transaction
+ * has acquired in-core quota reservation for this extent.
+ * Rather than account these as real blocks, however, we reduce
+ * the transaction quota reservation based on the allocation.
+ * This essentially transfers the transaction quota reservation
+ * to that of a delalloc extent.
+ */
+ ap->ip->i_delayed_blks += args->len;
+ xfs_trans_mod_dquot_byino(ap->tp, ap->ip, XFS_TRANS_DQ_RES_BLKS,
+ -(long)args->len);
+ return;
+ }
+
+ /* data/attr fork only */
+ ap->ip->i_d.di_nblocks += args->len;
+ xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
+ if (ap->wasdel)
+ ap->ip->i_delayed_blks -= args->len;
+ xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
+ ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT,
+ args->len);
+}
+
STATIC int
xfs_bmap_btalloc(
struct xfs_bmalloca *ap) /* bmap alloc argument struct */
@@ -3347,6 +3390,8 @@ xfs_bmap_btalloc(
xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */
xfs_agnumber_t ag;
xfs_alloc_arg_t args;
+ xfs_fileoff_t orig_offset;
+ xfs_extlen_t orig_length;
xfs_extlen_t blen;
xfs_extlen_t nextminlen = 0;
int nullfb; /* true if ap->firstblock isn't set */
@@ -3356,6 +3401,8 @@ xfs_bmap_btalloc(
int stripe_align;
ASSERT(ap->length);
+ orig_offset = ap->offset;
+ orig_length = ap->length;
mp = ap->ip->i_mount;
@@ -3571,19 +3618,23 @@ xfs_bmap_btalloc(
*ap->firstblock = args.fsbno;
ASSERT(nullfb || fb_agno <= args.agno);
ap->length = args.len;
- if (!(ap->flags & XFS_BMAPI_COWFORK))
- ap->ip->i_d.di_nblocks += args.len;
- xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
- if (ap->wasdel)
- ap->ip->i_delayed_blks -= args.len;
/*
- * Adjust the disk quota also. This was reserved
- * earlier.
+ * If the extent size hint is active, we tried to round the
+ * caller's allocation request offset down to extsz and the
+ * length up to another extsz boundary. If we found a free
+ * extent we mapped it in starting at this new offset. If the
+ * newly mapped space isn't long enough to cover any of the
+ * range of offsets that was originally requested, move the
+ * mapping up so that we can fill as much of the caller's
+ * original request as possible. Free space is apparently
+ * very fragmented so we're unlikely to be able to satisfy the
+ * hints anyway.
*/
- xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
- ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
- XFS_TRANS_DQ_BCOUNT,
- (long) args.len);
+ if (ap->length <= orig_length)
+ ap->offset = orig_offset;
+ else if (ap->offset + ap->length < orig_offset + orig_length)
+ ap->offset = orig_offset + orig_length - ap->length;
+ xfs_bmap_btalloc_accounting(ap, &args);
} else {
ap->blkno = NULLFSBLOCK;
ap->length = 0;
@@ -3876,8 +3927,6 @@ xfs_bmapi_reserve_delalloc(
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
xfs_extlen_t alen;
xfs_extlen_t indlen;
- char rt = XFS_IS_REALTIME_INODE(ip);
- xfs_extlen_t extsz;
int error;
xfs_fileoff_t aoff = off;
@@ -3892,31 +3941,25 @@ xfs_bmapi_reserve_delalloc(
prealloc = alen - len;
/* Figure out the extent size, adjust alen */
- if (whichfork == XFS_COW_FORK)
- extsz = xfs_get_cowextsz_hint(ip);
- else
- extsz = xfs_get_extsz_hint(ip);
- if (extsz) {
+ if (whichfork == XFS_COW_FORK) {
struct xfs_bmbt_irec prev;
+ xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip);
if (!xfs_iext_peek_prev_extent(ifp, icur, &prev))
prev.br_startoff = NULLFILEOFF;
- error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof,
+ error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof,
1, 0, &aoff, &alen);
ASSERT(!error);
}
- if (rt)
- extsz = alen / mp->m_sb.sb_rextsize;
-
/*
* Make a transaction-less quota reservation for delayed allocation
* blocks. This number gets adjusted later. We return if we haven't
* allocated blocks already inside this loop.
*/
error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0,
- rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+ XFS_QMOPT_RES_REGBLKS);
if (error)
return error;
@@ -3927,12 +3970,7 @@ xfs_bmapi_reserve_delalloc(
indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
ASSERT(indlen > 0);
- if (rt) {
- error = xfs_mod_frextents(mp, -((int64_t)extsz));
- } else {
- error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
- }
-
+ error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
if (error)
goto out_unreserve_quota;
@@ -3963,14 +4001,11 @@ xfs_bmapi_reserve_delalloc(
return 0;
out_unreserve_blocks:
- if (rt)
- xfs_mod_frextents(mp, extsz);
- else
- xfs_mod_fdblocks(mp, alen, false);
+ xfs_mod_fdblocks(mp, alen, false);
out_unreserve_quota:
if (XFS_IS_QUOTA_ON(mp))
- xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ?
- XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+ xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0,
+ XFS_QMOPT_RES_REGBLKS);
return error;
}
@@ -4304,8 +4339,16 @@ xfs_bmapi_write(
while (bno < end && n < *nmap) {
bool need_alloc = false, wasdelay = false;
- /* in hole or beyoned EOF? */
+ /* in hole or beyond EOF? */
if (eof || bma.got.br_startoff > bno) {
+ /*
+ * CoW fork conversions should /never/ hit EOF or
+ * holes. There should always be something for us
+ * to work on.
+ */
+ ASSERT(!((flags & XFS_BMAPI_CONVERT) &&
+ (flags & XFS_BMAPI_COWFORK)));
+
if (flags & XFS_BMAPI_DELALLOC) {
/*
* For the COW fork we can reasonably get a
@@ -4824,6 +4867,7 @@ xfs_bmap_del_extent_cow(
xfs_iext_insert(ip, icur, &new, state);
break;
}
+ ip->i_delayed_blks -= del->br_blockcount;
}
/*
@@ -5136,7 +5180,7 @@ __xfs_bunmapi(
* blowing out the transaction with a mix of EFIs and reflink
* adjustments.
*/
- if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
+ if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
else
max_len = len;
@@ -5662,7 +5706,8 @@ xfs_bmap_collapse_extents(
*done = true;
goto del_cursor;
}
- XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
+ XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock),
+ del_cursor);
new_startoff = got.br_startoff - offset_shift_fsb;
if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) {
@@ -5767,7 +5812,8 @@ xfs_bmap_insert_extents(
goto del_cursor;
}
}
- XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
+ XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock),
+ del_cursor);
if (stop_fsb >= got.br_startoff + got.br_blockcount) {
error = -EIO;
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index c10aecaaae44..9faf479aba49 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -425,33 +425,29 @@ xfs_bmbt_diff_two_keys(
be64_to_cpu(k2->bmbt.br_startoff);
}
-static bool
+static xfs_failaddr_t
xfs_bmbt_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ xfs_failaddr_t fa;
unsigned int level;
switch (block->bb_magic) {
case cpu_to_be32(XFS_BMAP_CRC_MAGIC):
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- return false;
- if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
- return false;
- if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn)
- return false;
/*
* XXX: need a better way of verifying the owner here. Right now
* just make sure there has been one set.
*/
- if (be64_to_cpu(block->bb_u.l.bb_owner) == 0)
- return false;
+ fa = xfs_btree_lblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN);
+ if (fa)
+ return fa;
/* fall through */
case cpu_to_be32(XFS_BMAP_MAGIC):
break;
default:
- return false;
+ return __this_address;
}
/*
@@ -463,46 +459,39 @@ xfs_bmbt_verify(
*/
level = be16_to_cpu(block->bb_level);
if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]))
- return false;
- if (be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
- return false;
-
- /* sibling pointer verification */
- if (!block->bb_u.l.bb_leftsib ||
- (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) &&
- !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))))
- return false;
- if (!block->bb_u.l.bb_rightsib ||
- (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) &&
- !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))))
- return false;
-
- return true;
+ return __this_address;
+
+ return xfs_btree_lblock_verify(bp, mp->m_bmap_dmxr[level != 0]);
}
static void
xfs_bmbt_read_verify(
struct xfs_buf *bp)
{
+ xfs_failaddr_t fa;
+
if (!xfs_btree_lblock_verify_crc(bp))
- xfs_buf_ioerror(bp, -EFSBADCRC);
- else if (!xfs_bmbt_verify(bp))
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+ else {
+ fa = xfs_bmbt_verify(bp);
+ if (fa)
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ }
- if (bp->b_error) {
+ if (bp->b_error)
trace_xfs_btree_corrupt(bp, _RET_IP_);
- xfs_verifier_error(bp);
- }
}
static void
xfs_bmbt_write_verify(
struct xfs_buf *bp)
{
- if (!xfs_bmbt_verify(bp)) {
+ xfs_failaddr_t fa;
+
+ fa = xfs_bmbt_verify(bp);
+ if (fa) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
- xfs_verifier_error(bp);
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
}
xfs_btree_lblock_calc_crc(bp);
@@ -512,6 +501,7 @@ const struct xfs_buf_ops xfs_bmbt_buf_ops = {
.name = "xfs_bmbt",
.verify_read = xfs_bmbt_read_verify,
.verify_write = xfs_bmbt_write_verify,
+ .verify_struct = xfs_bmbt_verify,
};
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 5f33adf8eecb..79ee4a1951d1 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -273,7 +273,7 @@ xfs_btree_lblock_calc_crc(
struct xfs_buf *bp)
{
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
- struct xfs_buf_log_item *bip = bp->b_fspriv;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
return;
@@ -311,7 +311,7 @@ xfs_btree_sblock_calc_crc(
struct xfs_buf *bp)
{
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
- struct xfs_buf_log_item *bip = bp->b_fspriv;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
return;
@@ -329,7 +329,7 @@ xfs_btree_sblock_verify_crc(
if (xfs_sb_version_hascrc(&mp->m_sb)) {
if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
- return false;
+ return __this_address;
return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
}
@@ -853,7 +853,7 @@ xfs_btree_read_bufl(
xfs_daddr_t d; /* real disk block address */
int error;
- if (!XFS_FSB_SANITY_CHECK(mp, fsbno))
+ if (!xfs_verify_fsbno(mp, fsbno))
return -EFSCORRUPTED;
d = XFS_FSB_TO_DADDR(mp, fsbno);
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
@@ -4529,6 +4529,51 @@ xfs_btree_change_owner(
&bbcoi);
}
+/* Verify the v5 fields of a long-format btree block. */
+xfs_failaddr_t
+xfs_btree_lblock_v5hdr_verify(
+ struct xfs_buf *bp,
+ uint64_t owner)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return __this_address;
+ if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
+ return __this_address;
+ if (block->bb_u.l.bb_blkno != cpu_to_be64(bp->b_bn))
+ return __this_address;
+ if (owner != XFS_RMAP_OWN_UNKNOWN &&
+ be64_to_cpu(block->bb_u.l.bb_owner) != owner)
+ return __this_address;
+ return NULL;
+}
+
+/* Verify a long-format btree block. */
+xfs_failaddr_t
+xfs_btree_lblock_verify(
+ struct xfs_buf *bp,
+ unsigned int max_recs)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+
+ /* numrecs verification */
+ if (be16_to_cpu(block->bb_numrecs) > max_recs)
+ return __this_address;
+
+ /* sibling pointer verification */
+ if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) &&
+ !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_leftsib)))
+ return __this_address;
+ if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) &&
+ !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_rightsib)))
+ return __this_address;
+
+ return NULL;
+}
+
/**
* xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
* btree block
@@ -4537,7 +4582,7 @@ xfs_btree_change_owner(
* @max_recs: pointer to the m_*_mxr max records field in the xfs mount
* @pag_max_level: pointer to the per-ag max level field
*/
-bool
+xfs_failaddr_t
xfs_btree_sblock_v5hdr_verify(
struct xfs_buf *bp)
{
@@ -4546,14 +4591,14 @@ xfs_btree_sblock_v5hdr_verify(
struct xfs_perag *pag = bp->b_pag;
if (!xfs_sb_version_hascrc(&mp->m_sb))
- return false;
+ return __this_address;
if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
- return false;
+ return __this_address;
if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
- return false;
+ return __this_address;
if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
- return false;
- return true;
+ return __this_address;
+ return NULL;
}
/**
@@ -4562,29 +4607,29 @@ xfs_btree_sblock_v5hdr_verify(
* @bp: buffer containing the btree block
* @max_recs: maximum records allowed in this btree node
*/
-bool
+xfs_failaddr_t
xfs_btree_sblock_verify(
struct xfs_buf *bp,
unsigned int max_recs)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ xfs_agblock_t agno;
/* numrecs verification */
if (be16_to_cpu(block->bb_numrecs) > max_recs)
- return false;
+ return __this_address;
/* sibling pointer verification */
- if (!block->bb_u.s.bb_leftsib ||
- (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
- block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
- return false;
- if (!block->bb_u.s.bb_rightsib ||
- (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
- block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
- return false;
+ agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp));
+ if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) &&
+ !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_leftsib)))
+ return __this_address;
+ if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) &&
+ !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_rightsib)))
+ return __this_address;
- return true;
+ return NULL;
}
/*
@@ -4953,3 +4998,33 @@ xfs_btree_diff_two_ptrs(
return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l);
return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
}
+
+/* If there's an extent, we're done. */
+STATIC int
+xfs_btree_has_record_helper(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec,
+ void *priv)
+{
+ return XFS_BTREE_QUERY_RANGE_ABORT;
+}
+
+/* Is there a record covering a given range of keys? */
+int
+xfs_btree_has_record(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_irec *low,
+ union xfs_btree_irec *high,
+ bool *exists)
+{
+ int error;
+
+ error = xfs_btree_query_range(cur, low, high,
+ &xfs_btree_has_record_helper, NULL);
+ if (error == XFS_BTREE_QUERY_RANGE_ABORT) {
+ *exists = true;
+ return 0;
+ }
+ *exists = false;
+ return error;
+}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index b57501c6f71d..50440b5618e8 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -473,10 +473,6 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
#define XFS_FILBLKS_MIN(a,b) min_t(xfs_filblks_t, (a), (b))
#define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b))
-#define XFS_FSB_SANITY_CHECK(mp,fsb) \
- (fsb && XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
- XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks)
-
/*
* Trace hooks. Currently not implemented as they need to be ported
* over to the generic tracing functionality, which is some effort.
@@ -496,8 +492,14 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
#define XFS_BTREE_TRACE_ARGR(c, r)
#define XFS_BTREE_TRACE_CURSOR(c, t)
-bool xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp);
-bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs);
+xfs_failaddr_t xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp);
+xfs_failaddr_t xfs_btree_sblock_verify(struct xfs_buf *bp,
+ unsigned int max_recs);
+xfs_failaddr_t xfs_btree_lblock_v5hdr_verify(struct xfs_buf *bp,
+ uint64_t owner);
+xfs_failaddr_t xfs_btree_lblock_verify(struct xfs_buf *bp,
+ unsigned int max_recs);
+
uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits,
unsigned long len);
xfs_extlen_t xfs_btree_calc_size(struct xfs_mount *mp, uint *limits,
@@ -545,5 +547,7 @@ void xfs_btree_get_keys(struct xfs_btree_cur *cur,
struct xfs_btree_block *block, union xfs_btree_key *key);
union xfs_btree_key *xfs_btree_high_key_from_key(struct xfs_btree_cur *cur,
union xfs_btree_key *key);
+int xfs_btree_has_record(struct xfs_btree_cur *cur, union xfs_btree_irec *low,
+ union xfs_btree_irec *high, bool *exists);
#endif /* __XFS_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 651611530d2f..ea187b4a7991 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -128,7 +128,7 @@ xfs_da_state_free(xfs_da_state_t *state)
kmem_zone_free(xfs_da_state_zone, state);
}
-static bool
+static xfs_failaddr_t
xfs_da3_node_verify(
struct xfs_buf *bp)
{
@@ -145,24 +145,24 @@ xfs_da3_node_verify(
struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
if (ichdr.magic != XFS_DA3_NODE_MAGIC)
- return false;
+ return __this_address;
if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid))
- return false;
+ return __this_address;
if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
- return false;
+ return __this_address;
if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn)))
- return false;
+ return __this_address;
} else {
if (ichdr.magic != XFS_DA_NODE_MAGIC)
- return false;
+ return __this_address;
}
if (ichdr.level == 0)
- return false;
+ return __this_address;
if (ichdr.level > XFS_DA_NODE_MAXDEPTH)
- return false;
+ return __this_address;
if (ichdr.count == 0)
- return false;
+ return __this_address;
/*
* we don't know if the node is for and attribute or directory tree,
@@ -170,11 +170,11 @@ xfs_da3_node_verify(
*/
if (ichdr.count > mp->m_dir_geo->node_ents &&
ichdr.count > mp->m_attr_geo->node_ents)
- return false;
+ return __this_address;
/* XXX: hash order check? */
- return true;
+ return NULL;
}
static void
@@ -182,12 +182,13 @@ xfs_da3_node_write_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- struct xfs_buf_log_item *bip = bp->b_fspriv;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
+ xfs_failaddr_t fa;
- if (!xfs_da3_node_verify(bp)) {
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
- xfs_verifier_error(bp);
+ fa = xfs_da3_node_verify(bp);
+ if (fa) {
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
}
@@ -211,19 +212,20 @@ xfs_da3_node_read_verify(
struct xfs_buf *bp)
{
struct xfs_da_blkinfo *info = bp->b_addr;
+ xfs_failaddr_t fa;
switch (be16_to_cpu(info->magic)) {
case XFS_DA3_NODE_MAGIC:
if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) {
- xfs_buf_ioerror(bp, -EFSBADCRC);
+ xfs_verifier_error(bp, -EFSBADCRC,
+ __this_address);
break;
}
/* fall through */
case XFS_DA_NODE_MAGIC:
- if (!xfs_da3_node_verify(bp)) {
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
- break;
- }
+ fa = xfs_da3_node_verify(bp);
+ if (fa)
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
case XFS_ATTR_LEAF_MAGIC:
case XFS_ATTR3_LEAF_MAGIC:
@@ -236,18 +238,40 @@ xfs_da3_node_read_verify(
bp->b_ops->verify_read(bp);
return;
default:
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
break;
}
+}
+
+/* Verify the structure of a da3 block. */
+static xfs_failaddr_t
+xfs_da3_node_verify_struct(
+ struct xfs_buf *bp)
+{
+ struct xfs_da_blkinfo *info = bp->b_addr;
- /* corrupt block */
- xfs_verifier_error(bp);
+ switch (be16_to_cpu(info->magic)) {
+ case XFS_DA3_NODE_MAGIC:
+ case XFS_DA_NODE_MAGIC:
+ return xfs_da3_node_verify(bp);
+ case XFS_ATTR_LEAF_MAGIC:
+ case XFS_ATTR3_LEAF_MAGIC:
+ bp->b_ops = &xfs_attr3_leaf_buf_ops;
+ return bp->b_ops->verify_struct(bp);
+ case XFS_DIR2_LEAFN_MAGIC:
+ case XFS_DIR3_LEAFN_MAGIC:
+ bp->b_ops = &xfs_dir3_leafn_buf_ops;
+ return bp->b_ops->verify_struct(bp);
+ default:
+ return __this_address;
+ }
}
const struct xfs_buf_ops xfs_da3_node_buf_ops = {
.name = "xfs_da3_node",
.verify_read = xfs_da3_node_read_verify,
.verify_write = xfs_da3_node_write_verify,
+ .verify_struct = xfs_da3_node_verify_struct,
};
int
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 3771edcb301d..7e77299b7789 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -875,4 +875,10 @@ struct xfs_attr3_rmt_hdr {
((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \
sizeof(struct xfs_attr3_rmt_hdr) : 0))
+/* Number of bytes in a directory block. */
+static inline unsigned int xfs_dir2_dirblock_bytes(struct xfs_sb *sbp)
+{
+ return 1 << (sbp->sb_blocklog + sbp->sb_dirblklog);
+}
+
#endif /* __XFS_DA_FORMAT_H__ */
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 072ebfe1d6ae..087fea02c389 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -249,6 +249,10 @@ xfs_defer_trans_roll(
for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE);
+ /* Hold the (previously bjoin'd) buffer locked across the roll. */
+ for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++)
+ xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]);
+
trace_xfs_defer_trans_roll((*tp)->t_mountp, dop);
/* Roll the transaction. */
@@ -264,6 +268,12 @@ xfs_defer_trans_roll(
for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0);
+ /* Rejoin the buffers and dirty them so the log moves forward. */
+ for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) {
+ xfs_trans_bjoin(*tp, dop->dop_bufs[i]);
+ xfs_trans_bhold(*tp, dop->dop_bufs[i]);
+ }
+
return error;
}
@@ -295,6 +305,31 @@ xfs_defer_ijoin(
}
}
+ ASSERT(0);
+ return -EFSCORRUPTED;
+}
+
+/*
+ * Add this buffer to the deferred op. Each joined buffer is relogged
+ * each time we roll the transaction.
+ */
+int
+xfs_defer_bjoin(
+ struct xfs_defer_ops *dop,
+ struct xfs_buf *bp)
+{
+ int i;
+
+ for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) {
+ if (dop->dop_bufs[i] == bp)
+ return 0;
+ else if (dop->dop_bufs[i] == NULL) {
+ dop->dop_bufs[i] = bp;
+ return 0;
+ }
+ }
+
+ ASSERT(0);
return -EFSCORRUPTED;
}
@@ -493,9 +528,7 @@ xfs_defer_init(
struct xfs_defer_ops *dop,
xfs_fsblock_t *fbp)
{
- dop->dop_committed = false;
- dop->dop_low = false;
- memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes));
+ memset(dop, 0, sizeof(struct xfs_defer_ops));
*fbp = NULLFSBLOCK;
INIT_LIST_HEAD(&dop->dop_intake);
INIT_LIST_HEAD(&dop->dop_pending);
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index d4f046dd44bd..045beacdd37d 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -59,6 +59,7 @@ enum xfs_defer_ops_type {
};
#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */
+#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */
struct xfs_defer_ops {
bool dop_committed; /* did any trans commit? */
@@ -66,8 +67,9 @@ struct xfs_defer_ops {
struct list_head dop_intake; /* unlogged pending work */
struct list_head dop_pending; /* logged pending work */
- /* relog these inodes with each roll */
+ /* relog these with each roll */
struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES];
+ struct xfs_buf *dop_bufs[XFS_DEFER_OPS_NR_BUFS];
};
void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
@@ -77,6 +79,7 @@ void xfs_defer_cancel(struct xfs_defer_ops *dop);
void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp);
bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip);
+int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp);
/* Description of a deferred type. */
struct xfs_defer_op_type {
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index e10778c102ea..92f94e190f04 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -119,8 +119,7 @@ xfs_da_mount(
ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT);
- ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <=
- XFS_MAX_BLOCKSIZE);
+ ASSERT(xfs_dir2_dirblock_bytes(&mp->m_sb) <= XFS_MAX_BLOCKSIZE);
mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL);
mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL);
@@ -140,7 +139,7 @@ xfs_da_mount(
dageo = mp->m_dir_geo;
dageo->blklog = mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog;
dageo->fsblog = mp->m_sb.sb_blocklog;
- dageo->blksize = 1 << dageo->blklog;
+ dageo->blksize = xfs_dir2_dirblock_bytes(&mp->m_sb);
dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog;
/*
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index 1a8f2cf977ca..388d67c5c903 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -340,5 +340,7 @@ xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp)
#define XFS_READDIR_BUFSIZE (32768)
unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp, uint8_t filetype);
+void *xfs_dir3_data_endp(struct xfs_da_geometry *geo,
+ struct xfs_dir2_data_hdr *hdr);
#endif /* __XFS_DIR2_H__ */
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index 43c902f7a68d..2da86a394bcf 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -58,7 +58,7 @@ xfs_dir_startup(void)
xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2);
}
-static bool
+static xfs_failaddr_t
xfs_dir3_block_verify(
struct xfs_buf *bp)
{
@@ -67,20 +67,18 @@ xfs_dir3_block_verify(
if (xfs_sb_version_hascrc(&mp->m_sb)) {
if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC))
- return false;
+ return __this_address;
if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
- return false;
+ return __this_address;
if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
- return false;
+ return __this_address;
if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
- return false;
+ return __this_address;
} else {
if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))
- return false;
+ return __this_address;
}
- if (__xfs_dir3_data_check(NULL, bp))
- return false;
- return true;
+ return __xfs_dir3_data_check(NULL, bp);
}
static void
@@ -88,15 +86,16 @@ xfs_dir3_block_read_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
+ xfs_failaddr_t fa;
if (xfs_sb_version_hascrc(&mp->m_sb) &&
!xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
- xfs_buf_ioerror(bp, -EFSBADCRC);
- else if (!xfs_dir3_block_verify(bp))
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
-
- if (bp->b_error)
- xfs_verifier_error(bp);
+ xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+ else {
+ fa = xfs_dir3_block_verify(bp);
+ if (fa)
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ }
}
static void
@@ -104,12 +103,13 @@ xfs_dir3_block_write_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- struct xfs_buf_log_item *bip = bp->b_fspriv;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
+ xfs_failaddr_t fa;
- if (!xfs_dir3_block_verify(bp)) {
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
- xfs_verifier_error(bp);
+ fa = xfs_dir3_block_verify(bp);
+ if (fa) {
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
}
@@ -126,6 +126,7 @@ const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
.name = "xfs_dir3_block",
.verify_read = xfs_dir3_block_read_verify,
.verify_write = xfs_dir3_block_write_verify,
+ .verify_struct = xfs_dir3_block_verify,
};
int
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index 8727a43115ef..920279485275 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -36,9 +36,9 @@
/*
* Check the consistency of the data block.
* The input can also be a block-format directory.
- * Return 0 is the buffer is good, otherwise an error.
+ * Return NULL if the buffer is good, otherwise the address of the error.
*/
-int
+xfs_failaddr_t
__xfs_dir3_data_check(
struct xfs_inode *dp, /* incore inode pointer */
struct xfs_buf *bp) /* data block's buffer */
@@ -73,6 +73,14 @@ __xfs_dir3_data_check(
*/
ops = xfs_dir_get_ops(mp, dp);
+ /*
+ * If this isn't a directory, or we don't get handed the dir ops,
+ * something is seriously wrong. Bail out.
+ */
+ if ((dp && !S_ISDIR(VFS_I(dp)->i_mode)) ||
+ ops != xfs_dir_get_ops(mp, NULL))
+ return __this_address;
+
hdr = bp->b_addr;
p = (char *)ops->data_entry_p(hdr);
@@ -81,7 +89,6 @@ __xfs_dir3_data_check(
case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
btp = xfs_dir2_block_tail_p(geo, hdr);
lep = xfs_dir2_block_leaf_p(btp);
- endp = (char *)lep;
/*
* The number of leaf entries is limited by the size of the
@@ -90,17 +97,19 @@ __xfs_dir3_data_check(
* so just ensure that the count falls somewhere inside the
* block right now.
*/
- XFS_WANT_CORRUPTED_RETURN(mp, be32_to_cpu(btp->count) <
- ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry));
+ if (be32_to_cpu(btp->count) >=
+ ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry))
+ return __this_address;
break;
case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
- endp = (char *)hdr + geo->blksize;
break;
default:
- XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp);
- return -EFSCORRUPTED;
+ return __this_address;
}
+ endp = xfs_dir3_data_endp(geo, hdr);
+ if (!endp)
+ return __this_address;
/*
* Account for zero bestfree entries.
@@ -108,22 +117,25 @@ __xfs_dir3_data_check(
bf = ops->data_bestfree_p(hdr);
count = lastfree = freeseen = 0;
if (!bf[0].length) {
- XFS_WANT_CORRUPTED_RETURN(mp, !bf[0].offset);
+ if (bf[0].offset)
+ return __this_address;
freeseen |= 1 << 0;
}
if (!bf[1].length) {
- XFS_WANT_CORRUPTED_RETURN(mp, !bf[1].offset);
+ if (bf[1].offset)
+ return __this_address;
freeseen |= 1 << 1;
}
if (!bf[2].length) {
- XFS_WANT_CORRUPTED_RETURN(mp, !bf[2].offset);
+ if (bf[2].offset)
+ return __this_address;
freeseen |= 1 << 2;
}
- XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[0].length) >=
- be16_to_cpu(bf[1].length));
- XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[1].length) >=
- be16_to_cpu(bf[2].length));
+ if (be16_to_cpu(bf[0].length) < be16_to_cpu(bf[1].length))
+ return __this_address;
+ if (be16_to_cpu(bf[1].length) < be16_to_cpu(bf[2].length))
+ return __this_address;
/*
* Loop over the data/unused entries.
*/
@@ -135,22 +147,23 @@ __xfs_dir3_data_check(
* doesn't need to be there.
*/
if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
- XFS_WANT_CORRUPTED_RETURN(mp, lastfree == 0);
- XFS_WANT_CORRUPTED_RETURN(mp, endp >=
- p + be16_to_cpu(dup->length));
- XFS_WANT_CORRUPTED_RETURN(mp,
- be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
- (char *)dup - (char *)hdr);
+ if (lastfree != 0)
+ return __this_address;
+ if (endp < p + be16_to_cpu(dup->length))
+ return __this_address;
+ if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) !=
+ (char *)dup - (char *)hdr)
+ return __this_address;
dfp = xfs_dir2_data_freefind(hdr, bf, dup);
if (dfp) {
i = (int)(dfp - bf);
- XFS_WANT_CORRUPTED_RETURN(mp,
- (freeseen & (1 << i)) == 0);
+ if ((freeseen & (1 << i)) != 0)
+ return __this_address;
freeseen |= 1 << i;
} else {
- XFS_WANT_CORRUPTED_RETURN(mp,
- be16_to_cpu(dup->length) <=
- be16_to_cpu(bf[2].length));
+ if (be16_to_cpu(dup->length) >
+ be16_to_cpu(bf[2].length))
+ return __this_address;
}
p += be16_to_cpu(dup->length);
lastfree = 1;
@@ -163,16 +176,17 @@ __xfs_dir3_data_check(
* The linear search is crude but this is DEBUG code.
*/
dep = (xfs_dir2_data_entry_t *)p;
- XFS_WANT_CORRUPTED_RETURN(mp, dep->namelen != 0);
- XFS_WANT_CORRUPTED_RETURN(mp,
- !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)));
- XFS_WANT_CORRUPTED_RETURN(mp, endp >=
- p + ops->data_entsize(dep->namelen));
- XFS_WANT_CORRUPTED_RETURN(mp,
- be16_to_cpu(*ops->data_entry_tag_p(dep)) ==
- (char *)dep - (char *)hdr);
- XFS_WANT_CORRUPTED_RETURN(mp,
- ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX);
+ if (dep->namelen == 0)
+ return __this_address;
+ if (xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)))
+ return __this_address;
+ if (endp < p + ops->data_entsize(dep->namelen))
+ return __this_address;
+ if (be16_to_cpu(*ops->data_entry_tag_p(dep)) !=
+ (char *)dep - (char *)hdr)
+ return __this_address;
+ if (ops->data_get_ftype(dep) >= XFS_DIR3_FT_MAX)
+ return __this_address;
count++;
lastfree = 0;
if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
@@ -188,34 +202,52 @@ __xfs_dir3_data_check(
be32_to_cpu(lep[i].hashval) == hash)
break;
}
- XFS_WANT_CORRUPTED_RETURN(mp,
- i < be32_to_cpu(btp->count));
+ if (i >= be32_to_cpu(btp->count))
+ return __this_address;
}
p += ops->data_entsize(dep->namelen);
}
/*
* Need to have seen all the entries and all the bestfree slots.
*/
- XFS_WANT_CORRUPTED_RETURN(mp, freeseen == 7);
+ if (freeseen != 7)
+ return __this_address;
if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
if (lep[i].address ==
cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
stale++;
- if (i > 0)
- XFS_WANT_CORRUPTED_RETURN(mp,
- be32_to_cpu(lep[i].hashval) >=
- be32_to_cpu(lep[i - 1].hashval));
+ if (i > 0 && be32_to_cpu(lep[i].hashval) <
+ be32_to_cpu(lep[i - 1].hashval))
+ return __this_address;
}
- XFS_WANT_CORRUPTED_RETURN(mp, count ==
- be32_to_cpu(btp->count) - be32_to_cpu(btp->stale));
- XFS_WANT_CORRUPTED_RETURN(mp, stale == be32_to_cpu(btp->stale));
+ if (count != be32_to_cpu(btp->count) - be32_to_cpu(btp->stale))
+ return __this_address;
+ if (stale != be32_to_cpu(btp->stale))
+ return __this_address;
}
- return 0;
+ return NULL;
+}
+
+#ifdef DEBUG
+void
+xfs_dir3_data_check(
+ struct xfs_inode *dp,
+ struct xfs_buf *bp)
+{
+ xfs_failaddr_t fa;
+
+ fa = __xfs_dir3_data_check(dp, bp);
+ if (!fa)
+ return;
+ xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount,
+ bp->b_addr, __FILE__, __LINE__, fa);
+ ASSERT(0);
}
+#endif
-static bool
+static xfs_failaddr_t
xfs_dir3_data_verify(
struct xfs_buf *bp)
{
@@ -224,20 +256,18 @@ xfs_dir3_data_verify(
if (xfs_sb_version_hascrc(&mp->m_sb)) {
if (hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC))
- return false;
+ return __this_address;
if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
- return false;
+ return __this_address;
if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
- return false;
+ return __this_address;
if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
- return false;
+ return __this_address;
} else {
if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC))
- return false;
+ return __this_address;
}
- if (__xfs_dir3_data_check(NULL, bp))
- return false;
- return true;
+ return __xfs_dir3_data_check(NULL, bp);
}
/*
@@ -263,8 +293,7 @@ xfs_dir3_data_reada_verify(
bp->b_ops->verify_read(bp);
return;
default:
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
- xfs_verifier_error(bp);
+ xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
break;
}
}
@@ -274,15 +303,16 @@ xfs_dir3_data_read_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
+ xfs_failaddr_t fa;
if (xfs_sb_version_hascrc(&mp->m_sb) &&
- !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
- xfs_buf_ioerror(bp, -EFSBADCRC);
- else if (!xfs_dir3_data_verify(bp))
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
-
- if (bp->b_error)
- xfs_verifier_error(bp);
+ !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
+ xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+ else {
+ fa = xfs_dir3_data_verify(bp);
+ if (fa)
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ }
}
static void
@@ -290,12 +320,13 @@ xfs_dir3_data_write_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- struct xfs_buf_log_item *bip = bp->b_fspriv;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
+ xfs_failaddr_t fa;
- if (!xfs_dir3_data_verify(bp)) {
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
- xfs_verifier_error(bp);
+ fa = xfs_dir3_data_verify(bp);
+ if (fa) {
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
}
@@ -312,6 +343,7 @@ const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
.name = "xfs_dir3_data",
.verify_read = xfs_dir3_data_read_verify,
.verify_write = xfs_dir3_data_write_verify,
+ .verify_struct = xfs_dir3_data_verify,
};
static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = {
@@ -515,7 +547,6 @@ xfs_dir2_data_freescan_int(
struct xfs_dir2_data_hdr *hdr,
int *loghead)
{
- xfs_dir2_block_tail_t *btp; /* block tail */
xfs_dir2_data_entry_t *dep; /* active data entry */
xfs_dir2_data_unused_t *dup; /* unused data entry */
struct xfs_dir2_data_free *bf;
@@ -537,12 +568,7 @@ xfs_dir2_data_freescan_int(
* Set up pointers.
*/
p = (char *)ops->data_entry_p(hdr);
- if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
- hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
- btp = xfs_dir2_block_tail_p(geo, hdr);
- endp = (char *)xfs_dir2_block_leaf_p(btp);
- } else
- endp = (char *)hdr + geo->blksize;
+ endp = xfs_dir3_data_endp(geo, hdr);
/*
* Loop over the block's entries.
*/
@@ -755,17 +781,9 @@ xfs_dir2_data_make_free(
/*
* Figure out where the end of the data area is.
*/
- if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
- hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC))
- endptr = (char *)hdr + args->geo->blksize;
- else {
- xfs_dir2_block_tail_t *btp; /* block tail */
+ endptr = xfs_dir3_data_endp(args->geo, hdr);
+ ASSERT(endptr != NULL);
- ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
- hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
- btp = xfs_dir2_block_tail_p(args->geo, hdr);
- endptr = (char *)xfs_dir2_block_leaf_p(btp);
- }
/*
* If this isn't the start of the block, then back up to
* the previous entry and see if it's free.
@@ -1067,3 +1085,21 @@ xfs_dir2_data_use_free(
}
*needscanp = needscan;
}
+
+/* Find the end of the entry data in a data/block format dir block. */
+void *
+xfs_dir3_data_endp(
+ struct xfs_da_geometry *geo,
+ struct xfs_dir2_data_hdr *hdr)
+{
+ switch (hdr->magic) {
+ case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
+ case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
+ return xfs_dir2_block_leaf_p(xfs_dir2_block_tail_p(geo, hdr));
+ case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
+ case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
+ return (char *)hdr + geo->blksize;
+ default:
+ return NULL;
+ }
+}
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index 27297a689d9c..d7e630f41f9c 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -50,13 +50,7 @@ static void xfs_dir3_leaf_log_tail(struct xfs_da_args *args,
* Pop an assert if something is wrong.
*/
#ifdef DEBUG
-#define xfs_dir3_leaf_check(dp, bp) \
-do { \
- if (!xfs_dir3_leaf1_check((dp), (bp))) \
- ASSERT(0); \
-} while (0);
-
-STATIC bool
+static xfs_failaddr_t
xfs_dir3_leaf1_check(
struct xfs_inode *dp,
struct xfs_buf *bp)
@@ -69,17 +63,32 @@ xfs_dir3_leaf1_check(
if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) {
struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
- return false;
+ return __this_address;
} else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC)
- return false;
+ return __this_address;
return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf);
}
+
+static inline void
+xfs_dir3_leaf_check(
+ struct xfs_inode *dp,
+ struct xfs_buf *bp)
+{
+ xfs_failaddr_t fa;
+
+ fa = xfs_dir3_leaf1_check(dp, bp);
+ if (!fa)
+ return;
+ xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount,
+ bp->b_addr, __FILE__, __LINE__, fa);
+ ASSERT(0);
+}
#else
#define xfs_dir3_leaf_check(dp, bp)
#endif
-bool
+xfs_failaddr_t
xfs_dir3_leaf_check_int(
struct xfs_mount *mp,
struct xfs_inode *dp,
@@ -114,27 +123,27 @@ xfs_dir3_leaf_check_int(
* We can deduce a value for that from di_size.
*/
if (hdr->count > ops->leaf_max_ents(geo))
- return false;
+ return __this_address;
/* Leaves and bests don't overlap in leaf format. */
if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
hdr->magic == XFS_DIR3_LEAF1_MAGIC) &&
(char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp))
- return false;
+ return __this_address;
/* Check hash value order, count stale entries. */
for (i = stale = 0; i < hdr->count; i++) {
if (i + 1 < hdr->count) {
if (be32_to_cpu(ents[i].hashval) >
be32_to_cpu(ents[i + 1].hashval))
- return false;
+ return __this_address;
}
if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
stale++;
}
if (hdr->stale != stale)
- return false;
- return true;
+ return __this_address;
+ return NULL;
}
/*
@@ -142,7 +151,7 @@ xfs_dir3_leaf_check_int(
* kernels we don't get assertion failures in xfs_dir3_leaf_hdr_from_disk() due
* to incorrect magic numbers.
*/
-static bool
+static xfs_failaddr_t
xfs_dir3_leaf_verify(
struct xfs_buf *bp,
uint16_t magic)
@@ -160,16 +169,16 @@ xfs_dir3_leaf_verify(
: XFS_DIR3_LEAFN_MAGIC;
if (leaf3->info.hdr.magic != cpu_to_be16(magic3))
- return false;
+ return __this_address;
if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid))
- return false;
+ return __this_address;
if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
- return false;
+ return __this_address;
if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn)))
- return false;
+ return __this_address;
} else {
if (leaf->hdr.info.magic != cpu_to_be16(magic))
- return false;
+ return __this_address;
}
return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf);
@@ -181,15 +190,16 @@ __read_verify(
uint16_t magic)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
+ xfs_failaddr_t fa;
if (xfs_sb_version_hascrc(&mp->m_sb) &&
!xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF))
- xfs_buf_ioerror(bp, -EFSBADCRC);
- else if (!xfs_dir3_leaf_verify(bp, magic))
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
-
- if (bp->b_error)
- xfs_verifier_error(bp);
+ xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+ else {
+ fa = xfs_dir3_leaf_verify(bp, magic);
+ if (fa)
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ }
}
static void
@@ -198,12 +208,13 @@ __write_verify(
uint16_t magic)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- struct xfs_buf_log_item *bip = bp->b_fspriv;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
+ xfs_failaddr_t fa;
- if (!xfs_dir3_leaf_verify(bp, magic)) {
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
- xfs_verifier_error(bp);
+ fa = xfs_dir3_leaf_verify(bp, magic);
+ if (fa) {
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
}
@@ -216,6 +227,13 @@ __write_verify(
xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF);
}
+static xfs_failaddr_t
+xfs_dir3_leaf1_verify(
+ struct xfs_buf *bp)
+{
+ return xfs_dir3_leaf_verify(bp, XFS_DIR2_LEAF1_MAGIC);
+}
+
static void
xfs_dir3_leaf1_read_verify(
struct xfs_buf *bp)
@@ -230,6 +248,13 @@ xfs_dir3_leaf1_write_verify(
__write_verify(bp, XFS_DIR2_LEAF1_MAGIC);
}
+static xfs_failaddr_t
+xfs_dir3_leafn_verify(
+ struct xfs_buf *bp)
+{
+ return xfs_dir3_leaf_verify(bp, XFS_DIR2_LEAFN_MAGIC);
+}
+
static void
xfs_dir3_leafn_read_verify(
struct xfs_buf *bp)
@@ -248,12 +273,14 @@ const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = {
.name = "xfs_dir3_leaf1",
.verify_read = xfs_dir3_leaf1_read_verify,
.verify_write = xfs_dir3_leaf1_write_verify,
+ .verify_struct = xfs_dir3_leaf1_verify,
};
const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = {
.name = "xfs_dir3_leafn",
.verify_read = xfs_dir3_leafn_read_verify,
.verify_write = xfs_dir3_leafn_write_verify,
+ .verify_struct = xfs_dir3_leafn_verify,
};
int
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 682e2bf370c7..239d97a64296 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -53,13 +53,7 @@ static int xfs_dir2_node_addname_int(xfs_da_args_t *args,
* Check internal consistency of a leafn block.
*/
#ifdef DEBUG
-#define xfs_dir3_leaf_check(dp, bp) \
-do { \
- if (!xfs_dir3_leafn_check((dp), (bp))) \
- ASSERT(0); \
-} while (0);
-
-static bool
+static xfs_failaddr_t
xfs_dir3_leafn_check(
struct xfs_inode *dp,
struct xfs_buf *bp)
@@ -72,17 +66,32 @@ xfs_dir3_leafn_check(
if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) {
struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
- return false;
+ return __this_address;
} else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC)
- return false;
+ return __this_address;
return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf);
}
+
+static inline void
+xfs_dir3_leaf_check(
+ struct xfs_inode *dp,
+ struct xfs_buf *bp)
+{
+ xfs_failaddr_t fa;
+
+ fa = xfs_dir3_leafn_check(dp, bp);
+ if (!fa)
+ return;
+ xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, dp->i_mount,
+ bp->b_addr, __FILE__, __LINE__, fa);
+ ASSERT(0);
+}
#else
#define xfs_dir3_leaf_check(dp, bp)
#endif
-static bool
+static xfs_failaddr_t
xfs_dir3_free_verify(
struct xfs_buf *bp)
{
@@ -93,21 +102,21 @@ xfs_dir3_free_verify(
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
if (hdr3->magic != cpu_to_be32(XFS_DIR3_FREE_MAGIC))
- return false;
+ return __this_address;
if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
- return false;
+ return __this_address;
if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
- return false;
+ return __this_address;
if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
- return false;
+ return __this_address;
} else {
if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC))
- return false;
+ return __this_address;
}
/* XXX: should bounds check the xfs_dir3_icfree_hdr here */
- return true;
+ return NULL;
}
static void
@@ -115,15 +124,16 @@ xfs_dir3_free_read_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
+ xfs_failaddr_t fa;
if (xfs_sb_version_hascrc(&mp->m_sb) &&
!xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF))
- xfs_buf_ioerror(bp, -EFSBADCRC);
- else if (!xfs_dir3_free_verify(bp))
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
-
- if (bp->b_error)
- xfs_verifier_error(bp);
+ xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+ else {
+ fa = xfs_dir3_free_verify(bp);
+ if (fa)
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ }
}
static void
@@ -131,12 +141,13 @@ xfs_dir3_free_write_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- struct xfs_buf_log_item *bip = bp->b_fspriv;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
+ xfs_failaddr_t fa;
- if (!xfs_dir3_free_verify(bp)) {
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
- xfs_verifier_error(bp);
+ fa = xfs_dir3_free_verify(bp);
+ if (fa) {
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
}
@@ -153,10 +164,11 @@ const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
.name = "xfs_dir3_free",
.verify_read = xfs_dir3_free_read_verify,
.verify_write = xfs_dir3_free_write_verify,
+ .verify_struct = xfs_dir3_free_verify,
};
/* Everything ok in the free block header? */
-static bool
+static xfs_failaddr_t
xfs_dir3_free_header_check(
struct xfs_inode *dp,
xfs_dablk_t fbno,
@@ -174,22 +186,22 @@ xfs_dir3_free_header_check(
struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
if (be32_to_cpu(hdr3->firstdb) != firstdb)
- return false;
+ return __this_address;
if (be32_to_cpu(hdr3->nvalid) > maxbests)
- return false;
+ return __this_address;
if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused))
- return false;
+ return __this_address;
} else {
struct xfs_dir2_free_hdr *hdr = bp->b_addr;
if (be32_to_cpu(hdr->firstdb) != firstdb)
- return false;
+ return __this_address;
if (be32_to_cpu(hdr->nvalid) > maxbests)
- return false;
+ return __this_address;
if (be32_to_cpu(hdr->nvalid) < be32_to_cpu(hdr->nused))
- return false;
+ return __this_address;
}
- return true;
+ return NULL;
}
static int
@@ -200,6 +212,7 @@ __xfs_dir3_free_read(
xfs_daddr_t mappedbno,
struct xfs_buf **bpp)
{
+ xfs_failaddr_t fa;
int err;
err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
@@ -208,9 +221,9 @@ __xfs_dir3_free_read(
return err;
/* Check things that we can't do in the verifier. */
- if (!xfs_dir3_free_header_check(dp, fbno, *bpp)) {
- xfs_buf_ioerror(*bpp, -EFSCORRUPTED);
- xfs_verifier_error(*bpp);
+ fa = xfs_dir3_free_header_check(dp, fbno, *bpp);
+ if (fa) {
+ xfs_verifier_error(*bpp, -EFSCORRUPTED, fa);
xfs_trans_brelse(tp, *bpp);
return -EFSCORRUPTED;
}
@@ -1906,7 +1919,7 @@ xfs_dir2_node_addname_int(
(unsigned long long)ifbno, lastfbno);
if (fblk) {
xfs_alert(mp,
- " fblk 0x%p blkno %llu index %d magic 0x%x",
+ " fblk "PTR_FMT" blkno %llu index %d magic 0x%x",
fblk,
(unsigned long long)fblk->blkno,
fblk->index,
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index 4badd26c47e6..753aeeeffc18 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -39,12 +39,13 @@ extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
/* xfs_dir2_data.c */
#ifdef DEBUG
-#define xfs_dir3_data_check(dp,bp) __xfs_dir3_data_check(dp, bp);
+extern void xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
#else
#define xfs_dir3_data_check(dp,bp)
#endif
-extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
+extern xfs_failaddr_t __xfs_dir3_data_check(struct xfs_inode *dp,
+ struct xfs_buf *bp);
extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp,
xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp);
extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno,
@@ -89,8 +90,9 @@ xfs_dir3_leaf_find_entry(struct xfs_dir3_icleaf_hdr *leafhdr,
int lowstale, int highstale, int *lfloglow, int *lfloghigh);
extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
-extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp,
- struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf);
+extern xfs_failaddr_t xfs_dir3_leaf_check_int(struct xfs_mount *mp,
+ struct xfs_inode *dp, struct xfs_dir3_icleaf_hdr *hdr,
+ struct xfs_dir2_leaf *leaf);
/* xfs_dir2_node.c */
extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
@@ -127,7 +129,7 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
-extern int xfs_dir2_sf_verify(struct xfs_inode *ip);
+extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip);
/* xfs_dir2_readdir.c */
extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp,
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index be8b9755f66a..0c75a7f00883 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -156,7 +156,6 @@ xfs_dir2_block_to_sf(
xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */
{
xfs_dir2_data_hdr_t *hdr; /* block header */
- xfs_dir2_block_tail_t *btp; /* block tail pointer */
xfs_dir2_data_entry_t *dep; /* data entry pointer */
xfs_inode_t *dp; /* incore directory inode */
xfs_dir2_data_unused_t *dup; /* unused data pointer */
@@ -192,9 +191,8 @@ xfs_dir2_block_to_sf(
/*
* Set up to loop over the block's entries.
*/
- btp = xfs_dir2_block_tail_p(args->geo, hdr);
ptr = (char *)dp->d_ops->data_entry_p(hdr);
- endptr = (char *)xfs_dir2_block_leaf_p(btp);
+ endptr = xfs_dir3_data_endp(args->geo, hdr);
sfep = xfs_dir2_sf_firstentry(sfp);
/*
* Loop over the active and unused entries.
@@ -630,7 +628,7 @@ xfs_dir2_sf_check(
#endif /* DEBUG */
/* Verify the consistency of an inline directory. */
-int
+xfs_failaddr_t
xfs_dir2_sf_verify(
struct xfs_inode *ip)
{
@@ -665,7 +663,7 @@ xfs_dir2_sf_verify(
*/
if (size <= offsetof(struct xfs_dir2_sf_hdr, parent) ||
size < xfs_dir2_sf_hdr_size(sfp->i8count))
- return -EFSCORRUPTED;
+ return __this_address;
endp = (char *)sfp + size;
@@ -674,7 +672,7 @@ xfs_dir2_sf_verify(
i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
error = xfs_dir_ino_validate(mp, ino);
if (error)
- return error;
+ return __this_address;
offset = dops->data_first_offset;
/* Check all reported entries */
@@ -686,11 +684,11 @@ xfs_dir2_sf_verify(
* within the data buffer.
*/
if (((char *)sfep + sizeof(*sfep)) >= endp)
- return -EFSCORRUPTED;
+ return __this_address;
/* Don't allow names with known bad length. */
if (sfep->namelen == 0)
- return -EFSCORRUPTED;
+ return __this_address;
/*
* Check that the variable-length part of the structure is
@@ -699,23 +697,23 @@ xfs_dir2_sf_verify(
*/
next_sfep = dops->sf_nextentry(sfp, sfep);
if (endp < (char *)next_sfep)
- return -EFSCORRUPTED;
+ return __this_address;
/* Check that the offsets always increase. */
if (xfs_dir2_sf_get_offset(sfep) < offset)
- return -EFSCORRUPTED;
+ return __this_address;
/* Check the inode number. */
ino = dops->sf_get_ino(sfp, sfep);
i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
error = xfs_dir_ino_validate(mp, ino);
if (error)
- return error;
+ return __this_address;
/* Check the file type. */
filetype = dops->sf_get_ftype(sfep);
if (filetype >= XFS_DIR3_FT_MAX)
- return -EFSCORRUPTED;
+ return __this_address;
offset = xfs_dir2_sf_get_offset(sfep) +
dops->data_entsize(sfep->namelen);
@@ -723,16 +721,16 @@ xfs_dir2_sf_verify(
sfep = next_sfep;
}
if (i8count != sfp->i8count)
- return -EFSCORRUPTED;
+ return __this_address;
if ((void *)sfep != (void *)endp)
- return -EFSCORRUPTED;
+ return __this_address;
/* Make sure this whole thing ought to be in local format. */
if (offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
(uint)sizeof(xfs_dir2_block_tail_t) > mp->m_dir_geo->blksize)
- return -EFSCORRUPTED;
+ return __this_address;
- return 0;
+ return NULL;
}
/*
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index 747085b4ef44..8b7a6c3cb599 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -42,18 +42,14 @@ xfs_calc_dquots_per_chunk(
/*
* Do some primitive error checking on ondisk dquot data structures.
*/
-int
-xfs_dqcheck(
+xfs_failaddr_t
+xfs_dquot_verify(
struct xfs_mount *mp,
xfs_disk_dquot_t *ddq,
xfs_dqid_t id,
uint type, /* used only when IO_dorepair is true */
- uint flags,
- const char *str)
+ uint flags)
{
- xfs_dqblk_t *d = (xfs_dqblk_t *)ddq;
- int errs = 0;
-
/*
* We can encounter an uninitialized dquot buffer for 2 reasons:
* 1. If we crash while deleting the quotainode(s), and those blks got
@@ -69,87 +65,57 @@ xfs_dqcheck(
* This is all fine; things are still consistent, and we haven't lost
* any quota information. Just don't complain about bad dquot blks.
*/
- if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) {
- if (flags & XFS_QMOPT_DOWARN)
- xfs_alert(mp,
- "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x",
- str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC);
- errs++;
- }
- if (ddq->d_version != XFS_DQUOT_VERSION) {
- if (flags & XFS_QMOPT_DOWARN)
- xfs_alert(mp,
- "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x",
- str, id, ddq->d_version, XFS_DQUOT_VERSION);
- errs++;
- }
+ if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC))
+ return __this_address;
+ if (ddq->d_version != XFS_DQUOT_VERSION)
+ return __this_address;
if (ddq->d_flags != XFS_DQ_USER &&
ddq->d_flags != XFS_DQ_PROJ &&
- ddq->d_flags != XFS_DQ_GROUP) {
- if (flags & XFS_QMOPT_DOWARN)
- xfs_alert(mp,
- "%s : XFS dquot ID 0x%x, unknown flags 0x%x",
- str, id, ddq->d_flags);
- errs++;
- }
+ ddq->d_flags != XFS_DQ_GROUP)
+ return __this_address;
- if (id != -1 && id != be32_to_cpu(ddq->d_id)) {
- if (flags & XFS_QMOPT_DOWARN)
- xfs_alert(mp,
- "%s : ondisk-dquot 0x%p, ID mismatch: "
- "0x%x expected, found id 0x%x",
- str, ddq, id, be32_to_cpu(ddq->d_id));
- errs++;
- }
+ if (id != -1 && id != be32_to_cpu(ddq->d_id))
+ return __this_address;
- if (!errs && ddq->d_id) {
- if (ddq->d_blk_softlimit &&
- be64_to_cpu(ddq->d_bcount) >
- be64_to_cpu(ddq->d_blk_softlimit)) {
- if (!ddq->d_btimer) {
- if (flags & XFS_QMOPT_DOWARN)
- xfs_alert(mp,
- "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED",
- str, (int)be32_to_cpu(ddq->d_id), ddq);
- errs++;
- }
- }
- if (ddq->d_ino_softlimit &&
- be64_to_cpu(ddq->d_icount) >
- be64_to_cpu(ddq->d_ino_softlimit)) {
- if (!ddq->d_itimer) {
- if (flags & XFS_QMOPT_DOWARN)
- xfs_alert(mp,
- "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED",
- str, (int)be32_to_cpu(ddq->d_id), ddq);
- errs++;
- }
- }
- if (ddq->d_rtb_softlimit &&
- be64_to_cpu(ddq->d_rtbcount) >
- be64_to_cpu(ddq->d_rtb_softlimit)) {
- if (!ddq->d_rtbtimer) {
- if (flags & XFS_QMOPT_DOWARN)
- xfs_alert(mp,
- "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED",
- str, (int)be32_to_cpu(ddq->d_id), ddq);
- errs++;
- }
- }
- }
+ if (!ddq->d_id)
+ return NULL;
+
+ if (ddq->d_blk_softlimit &&
+ be64_to_cpu(ddq->d_bcount) > be64_to_cpu(ddq->d_blk_softlimit) &&
+ !ddq->d_btimer)
+ return __this_address;
+
+ if (ddq->d_ino_softlimit &&
+ be64_to_cpu(ddq->d_icount) > be64_to_cpu(ddq->d_ino_softlimit) &&
+ !ddq->d_itimer)
+ return __this_address;
- if (!errs || !(flags & XFS_QMOPT_DQREPAIR))
- return errs;
+ if (ddq->d_rtb_softlimit &&
+ be64_to_cpu(ddq->d_rtbcount) > be64_to_cpu(ddq->d_rtb_softlimit) &&
+ !ddq->d_rtbtimer)
+ return __this_address;
+
+ return NULL;
+}
+
+/*
+ * Do some primitive error checking on ondisk dquot data structures.
+ */
+int
+xfs_dquot_repair(
+ struct xfs_mount *mp,
+ struct xfs_disk_dquot *ddq,
+ xfs_dqid_t id,
+ uint type)
+{
+ struct xfs_dqblk *d = (struct xfs_dqblk *)ddq;
- if (flags & XFS_QMOPT_DOWARN)
- xfs_notice(mp, "Re-initializing dquot ID 0x%x", id);
/*
* Typically, a repair is only requested by quotacheck.
*/
ASSERT(id != -1);
- ASSERT(flags & XFS_QMOPT_DQREPAIR);
memset(d, 0, sizeof(xfs_dqblk_t));
d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
@@ -163,7 +129,7 @@ xfs_dqcheck(
XFS_DQUOT_CRC_OFF);
}
- return errs;
+ return 0;
}
STATIC bool
@@ -198,13 +164,13 @@ xfs_dquot_buf_verify_crc(
return true;
}
-STATIC bool
+STATIC xfs_failaddr_t
xfs_dquot_buf_verify(
struct xfs_mount *mp,
- struct xfs_buf *bp,
- int warn)
+ struct xfs_buf *bp)
{
struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
+ xfs_failaddr_t fa;
xfs_dqid_t id = 0;
int ndquots;
int i;
@@ -228,33 +194,43 @@ xfs_dquot_buf_verify(
*/
for (i = 0; i < ndquots; i++) {
struct xfs_disk_dquot *ddq;
- int error;
ddq = &d[i].dd_diskdq;
if (i == 0)
id = be32_to_cpu(ddq->d_id);
- error = xfs_dqcheck(mp, ddq, id + i, 0, warn, __func__);
- if (error)
- return false;
+ fa = xfs_dquot_verify(mp, ddq, id + i, 0, 0);
+ if (fa)
+ return fa;
}
- return true;
+
+ return NULL;
+}
+
+static xfs_failaddr_t
+xfs_dquot_buf_verify_struct(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+
+ return xfs_dquot_buf_verify(mp, bp);
}
static void
xfs_dquot_buf_read_verify(
- struct xfs_buf *bp)
+ struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
+ xfs_failaddr_t fa;
if (!xfs_dquot_buf_verify_crc(mp, bp))
- xfs_buf_ioerror(bp, -EFSBADCRC);
- else if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN))
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
-
- if (bp->b_error)
- xfs_verifier_error(bp);
+ xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+ else {
+ fa = xfs_dquot_buf_verify(mp, bp);
+ if (fa)
+ xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
+ }
}
/*
@@ -270,7 +246,7 @@ xfs_dquot_buf_readahead_verify(
struct xfs_mount *mp = bp->b_target->bt_mount;
if (!xfs_dquot_buf_verify_crc(mp, bp) ||
- !xfs_dquot_buf_verify(mp, bp, 0)) {
+ xfs_dquot_buf_verify(mp, bp) != NULL) {
xfs_buf_ioerror(bp, -EIO);
bp->b_flags &= ~XBF_DONE;
}
@@ -283,21 +259,21 @@ xfs_dquot_buf_readahead_verify(
*/
static void
xfs_dquot_buf_write_verify(
- struct xfs_buf *bp)
+ struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
+ xfs_failaddr_t fa;
- if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) {
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
- xfs_verifier_error(bp);
- return;
- }
+ fa = xfs_dquot_buf_verify(mp, bp);
+ if (fa)
+ xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
}
const struct xfs_buf_ops xfs_dquot_buf_ops = {
.name = "xfs_dquot",
.verify_read = xfs_dquot_buf_read_verify,
.verify_write = xfs_dquot_buf_write_verify,
+ .verify_struct = xfs_dquot_buf_verify_struct,
};
const struct xfs_buf_ops xfs_dquot_buf_ra_ops = {
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index b90924104596..faf1a4edd618 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -233,6 +233,13 @@ typedef struct xfs_fsop_resblks {
#define XFS_MAX_LOG_BLOCKS (1024 * 1024ULL)
#define XFS_MIN_LOG_BYTES (10 * 1024 * 1024ULL)
+/*
+ * Limits on sb_agblocks/sb_agblklog -- mkfs won't format AGs smaller than
+ * 16MB or larger than 1TB.
+ */
+#define XFS_MIN_AG_BYTES (1ULL << 24) /* 16 MB */
+#define XFS_MAX_AG_BYTES (1ULL << 40) /* 1 TB */
+
/* keep the maximum size under 2^31 by a small amount */
#define XFS_MAX_LOG_BYTES \
((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES)
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index de3f04a98656..0e2cf5f0be1f 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -920,8 +920,7 @@ STATIC xfs_agnumber_t
xfs_ialloc_ag_select(
xfs_trans_t *tp, /* transaction pointer */
xfs_ino_t parent, /* parent directory inode number */
- umode_t mode, /* bits set to indicate file type */
- int okalloc) /* ok to allocate more space */
+ umode_t mode) /* bits set to indicate file type */
{
xfs_agnumber_t agcount; /* number of ag's in the filesystem */
xfs_agnumber_t agno; /* current ag number */
@@ -978,9 +977,6 @@ xfs_ialloc_ag_select(
return agno;
}
- if (!okalloc)
- goto nextag;
-
if (!pag->pagf_init) {
error = xfs_alloc_pagf_init(mp, tp, agno, flags);
if (error)
@@ -1680,7 +1676,6 @@ xfs_dialloc(
struct xfs_trans *tp,
xfs_ino_t parent,
umode_t mode,
- int okalloc,
struct xfs_buf **IO_agbp,
xfs_ino_t *inop)
{
@@ -1692,6 +1687,7 @@ xfs_dialloc(
int noroom = 0;
xfs_agnumber_t start_agno;
struct xfs_perag *pag;
+ int okalloc = 1;
if (*IO_agbp) {
/*
@@ -1707,7 +1703,7 @@ xfs_dialloc(
* We do not have an agbp, so select an initial allocation
* group for inode allocation.
*/
- start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
+ start_agno = xfs_ialloc_ag_select(tp, parent, mode);
if (start_agno == NULLAGNUMBER) {
*inop = NULLFSINO;
return 0;
@@ -2495,7 +2491,7 @@ xfs_check_agi_unlinked(
#define xfs_check_agi_unlinked(agi)
#endif
-static bool
+static xfs_failaddr_t
xfs_agi_verify(
struct xfs_buf *bp)
{
@@ -2504,28 +2500,28 @@ xfs_agi_verify(
if (xfs_sb_version_hascrc(&mp->m_sb)) {
if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid))
- return false;
+ return __this_address;
if (!xfs_log_check_lsn(mp,
be64_to_cpu(XFS_BUF_TO_AGI(bp)->agi_lsn)))
- return false;
+ return __this_address;
}
/*
* Validate the magic number of the agi block.
*/
if (agi->agi_magicnum != cpu_to_be32(XFS_AGI_MAGIC))
- return false;
+ return __this_address;
if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))
- return false;
+ return __this_address;
if (be32_to_cpu(agi->agi_level) < 1 ||
be32_to_cpu(agi->agi_level) > XFS_BTREE_MAXLEVELS)
- return false;
+ return __this_address;
if (xfs_sb_version_hasfinobt(&mp->m_sb) &&
(be32_to_cpu(agi->agi_free_level) < 1 ||
be32_to_cpu(agi->agi_free_level) > XFS_BTREE_MAXLEVELS))
- return false;
+ return __this_address;
/*
* during growfs operations, the perag is not fully initialised,
@@ -2534,10 +2530,10 @@ xfs_agi_verify(
* so we can detect and avoid this problem.
*/
if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno)
- return false;
+ return __this_address;
xfs_check_agi_unlinked(agi);
- return true;
+ return NULL;
}
static void
@@ -2545,28 +2541,29 @@ xfs_agi_read_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
+ xfs_failaddr_t fa;
if (xfs_sb_version_hascrc(&mp->m_sb) &&
!xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF))
- xfs_buf_ioerror(bp, -EFSBADCRC);
- else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp,
- XFS_ERRTAG_IALLOC_READ_AGI))
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
-
- if (bp->b_error)
- xfs_verifier_error(bp);
+ xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+ else {
+ fa = xfs_agi_verify(bp);
+ if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_IALLOC_READ_AGI))
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ }
}
static void
xfs_agi_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
- struct xfs_buf_log_item *bip = bp->b_fspriv;
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
+ xfs_failaddr_t fa;
- if (!xfs_agi_verify(bp)) {
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
- xfs_verifier_error(bp);
+ fa = xfs_agi_verify(bp);
+ if (fa) {
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
}
@@ -2582,6 +2579,7 @@ const struct xfs_buf_ops xfs_agi_buf_ops = {
.name = "xfs_agi",
.verify_read = xfs_agi_read_verify,
.verify_write = xfs_agi_write_verify,
+ .verify_struct = xfs_agi_verify,
};
/*
@@ -2755,3 +2753,102 @@ xfs_verify_dir_ino(
return false;
return xfs_verify_ino(mp, ino);
}
+
+/* Is there an inode record covering a given range of inode numbers? */
+int
+xfs_ialloc_has_inode_record(
+ struct xfs_btree_cur *cur,
+ xfs_agino_t low,
+ xfs_agino_t high,
+ bool *exists)
+{
+ struct xfs_inobt_rec_incore irec;
+ xfs_agino_t agino;
+ uint16_t holemask;
+ int has_record;
+ int i;
+ int error;
+
+ *exists = false;
+ error = xfs_inobt_lookup(cur, low, XFS_LOOKUP_LE, &has_record);
+ while (error == 0 && has_record) {
+ error = xfs_inobt_get_rec(cur, &irec, &has_record);
+ if (error || irec.ir_startino > high)
+ break;
+
+ agino = irec.ir_startino;
+ holemask = irec.ir_holemask;
+ for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; holemask >>= 1,
+ i++, agino += XFS_INODES_PER_HOLEMASK_BIT) {
+ if (holemask & 1)
+ continue;
+ if (agino + XFS_INODES_PER_HOLEMASK_BIT > low &&
+ agino <= high) {
+ *exists = true;
+ return 0;
+ }
+ }
+
+ error = xfs_btree_increment(cur, 0, &has_record);
+ }
+ return error;
+}
+
+/* Is there an inode record covering a given extent? */
+int
+xfs_ialloc_has_inodes_at_extent(
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ bool *exists)
+{
+ xfs_agino_t low;
+ xfs_agino_t high;
+
+ low = XFS_OFFBNO_TO_AGINO(cur->bc_mp, bno, 0);
+ high = XFS_OFFBNO_TO_AGINO(cur->bc_mp, bno + len, 0) - 1;
+
+ return xfs_ialloc_has_inode_record(cur, low, high, exists);
+}
+
+struct xfs_ialloc_count_inodes {
+ xfs_agino_t count;
+ xfs_agino_t freecount;
+};
+
+/* Record inode counts across all inobt records. */
+STATIC int
+xfs_ialloc_count_inodes_rec(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec,
+ void *priv)
+{
+ struct xfs_inobt_rec_incore irec;
+ struct xfs_ialloc_count_inodes *ci = priv;
+
+ xfs_inobt_btrec_to_irec(cur->bc_mp, rec, &irec);
+ ci->count += irec.ir_count;
+ ci->freecount += irec.ir_freecount;
+
+ return 0;
+}
+
+/* Count allocated and free inodes under an inobt. */
+int
+xfs_ialloc_count_inodes(
+ struct xfs_btree_cur *cur,
+ xfs_agino_t *count,
+ xfs_agino_t *freecount)
+{
+ struct xfs_ialloc_count_inodes ci = {0};
+ int error;
+
+ ASSERT(cur->bc_btnum == XFS_BTNUM_INO);
+ error = xfs_btree_query_all(cur, xfs_ialloc_count_inodes_rec, &ci);
+ if (error)
+ return error;
+
+ *count = ci.count;
+ *freecount = ci.freecount;
+ return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index d2bdcd5e7312..c5402bb4ce0c 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -81,7 +81,6 @@ xfs_dialloc(
struct xfs_trans *tp, /* transaction pointer */
xfs_ino_t parent, /* parent inode (directory) */
umode_t mode, /* mode bits for new inode */
- int okalloc, /* ok to allocate more space */
struct xfs_buf **agbp, /* buf for a.g. inode header */
xfs_ino_t *inop); /* inode number allocated */
@@ -171,6 +170,12 @@ int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
union xfs_btree_rec;
void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, union xfs_btree_rec *rec,
struct xfs_inobt_rec_incore *irec);
+int xfs_ialloc_has_inodes_at_extent(struct xfs_btree_cur *cur,
+ xfs_agblock_t bno, xfs_extlen_t len, bool *exists);
+int xfs_ialloc_has_inode_record(struct xfs_btree_cur *cur, xfs_agino_t low,
+ xfs_agino_t high, bool *exists);
+int xfs_ialloc_count_inodes(struct xfs_btree_cur *cur, xfs_agino_t *count,
+ xfs_agino_t *freecount);
int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
void xfs_ialloc_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno,
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 317caba9faa6..af197a5f3a82 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -141,21 +141,42 @@ xfs_finobt_alloc_block(
union xfs_btree_ptr *new,
int *stat)
{
+ if (cur->bc_mp->m_inotbt_nores)
+ return xfs_inobt_alloc_block(cur, start, new, stat);
return __xfs_inobt_alloc_block(cur, start, new, stat,
XFS_AG_RESV_METADATA);
}
STATIC int
-xfs_inobt_free_block(
+__xfs_inobt_free_block(
struct xfs_btree_cur *cur,
- struct xfs_buf *bp)
+ struct xfs_buf *bp,
+ enum xfs_ag_resv_type resv)
{
struct xfs_owner_info oinfo;
xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
return xfs_free_extent(cur->bc_tp,
XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1,
- &oinfo, XFS_AG_RESV_NONE);
+ &oinfo, resv);
+}
+
+STATIC int
+xfs_inobt_free_block(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp)
+{
+ return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_NONE);
+}
+
+STATIC int
+xfs_finobt_free_block(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp)
+{
+ if (cur->bc_mp->m_inotbt_nores)
+ return xfs_inobt_free_block(cur, bp);
+ return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_METADATA);
}
STATIC int
@@ -250,12 +271,13 @@ xfs_inobt_diff_two_keys(
be32_to_cpu(k2->inobt.ir_startino);
}
-static int
+static xfs_failaddr_t
xfs_inobt_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ xfs_failaddr_t fa;
unsigned int level;
/*
@@ -271,20 +293,21 @@ xfs_inobt_verify(
switch (block->bb_magic) {
case cpu_to_be32(XFS_IBT_CRC_MAGIC):
case cpu_to_be32(XFS_FIBT_CRC_MAGIC):
- if (!xfs_btree_sblock_v5hdr_verify(bp))
- return false;
+ fa = xfs_btree_sblock_v5hdr_verify(bp);
+ if (fa)
+ return fa;
/* fall through */
case cpu_to_be32(XFS_IBT_MAGIC):
case cpu_to_be32(XFS_FIBT_MAGIC):
break;
default:
- return 0;
+ return NULL;
}
/* level verification */
level = be16_to_cpu(block->bb_level);
if (level >= mp->m_in_maxlevels)
- return false;
+ return __this_address;
return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]);
}
@@ -293,25 +316,30 @@ static void
xfs_inobt_read_verify(
struct xfs_buf *bp)
{
+ xfs_failaddr_t fa;
+
if (!xfs_btree_sblock_verify_crc(bp))
- xfs_buf_ioerror(bp, -EFSBADCRC);
- else if (!xfs_inobt_verify(bp))
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+ else {
+ fa = xfs_inobt_verify(bp);
+ if (fa)
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ }
- if (bp->b_error) {
+ if (bp->b_error)
trace_xfs_btree_corrupt(bp, _RET_IP_);
- xfs_verifier_error(bp);
- }
}
static void
xfs_inobt_write_verify(
struct xfs_buf *bp)
{
- if (!xfs_inobt_verify(bp)) {
+ xfs_failaddr_t fa;
+
+ fa = xfs_inobt_verify(bp);
+ if (fa) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
- xfs_verifier_error(bp);
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
}
xfs_btree_sblock_calc_crc(bp);
@@ -322,6 +350,7 @@ const struct xfs_buf_ops xfs_inobt_buf_ops = {
.name = "xfs_inobt",
.verify_read = xfs_inobt_read_verify,
.verify_write = xfs_inobt_write_verify,
+ .verify_struct = xfs_inobt_verify,
};
STATIC int
@@ -372,7 +401,7 @@ static const struct xfs_btree_ops xfs_finobt_ops = {
.dup_cursor = xfs_inobt_dup_cursor,
.set_root = xfs_finobt_set_root,
.alloc_block = xfs_finobt_alloc_block,
- .free_block = xfs_inobt_free_block,
+ .free_block = xfs_finobt_free_block,
.get_minrecs = xfs_inobt_get_minrecs,
.get_maxrecs = xfs_inobt_get_maxrecs,
.init_key_from_rec = xfs_inobt_init_key_from_rec,
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index 89bf16b4d937..b0f31791c7e6 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -632,8 +632,6 @@ xfs_iext_insert(
struct xfs_iext_leaf *new = NULL;
int nr_entries, i;
- trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
-
if (ifp->if_height == 0)
xfs_iext_alloc_root(ifp, cur);
else if (ifp->if_height == 1)
@@ -661,6 +659,8 @@ xfs_iext_insert(
xfs_iext_set(cur_rec(cur), irec);
ifp->if_bytes += sizeof(struct xfs_iext_rec);
+ trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
+
if (new)
xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
}
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 6b7989038d75..4fe17b368316 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -32,6 +32,8 @@
#include "xfs_ialloc.h"
#include "xfs_dir2.h"
+#include <linux/iversion.h>
+
/*
* Check that none of the inode's in the buffer have a next
* unlinked field of 0.
@@ -113,8 +115,7 @@ xfs_inode_buf_verify(
return;
}
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
- xfs_verifier_error(bp);
+ xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
#ifdef DEBUG
xfs_alert(mp,
"bad inode magic/vsn daddr %lld #%d (magic=%x)",
@@ -264,7 +265,8 @@ xfs_inode_from_disk(
to->di_flags = be16_to_cpu(from->di_flags);
if (to->di_version == 3) {
- inode->i_version = be64_to_cpu(from->di_changecount);
+ inode_set_iversion_queried(inode,
+ be64_to_cpu(from->di_changecount));
to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
to->di_flags2 = be64_to_cpu(from->di_flags2);
@@ -314,7 +316,7 @@ xfs_inode_to_disk(
to->di_flags = cpu_to_be16(from->di_flags);
if (from->di_version == 3) {
- to->di_changecount = cpu_to_be64(inode->i_version);
+ to->di_changecount = cpu_to_be64(inode_peek_iversion(inode));
to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
to->di_flags2 = cpu_to_be64(from->di_flags2);
@@ -381,7 +383,7 @@ xfs_log_dinode_to_disk(
}
}
-bool
+xfs_failaddr_t
xfs_dinode_verify(
struct xfs_mount *mp,
xfs_ino_t ino,
@@ -390,53 +392,122 @@ xfs_dinode_verify(
uint16_t mode;
uint16_t flags;
uint64_t flags2;
+ uint64_t di_size;
if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
- return false;
+ return __this_address;
+
+ /* Verify v3 integrity information first */
+ if (dip->di_version >= 3) {
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return __this_address;
+ if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
+ XFS_DINODE_CRC_OFF))
+ return __this_address;
+ if (be64_to_cpu(dip->di_ino) != ino)
+ return __this_address;
+ if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid))
+ return __this_address;
+ }
/* don't allow invalid i_size */
- if (be64_to_cpu(dip->di_size) & (1ULL << 63))
- return false;
+ di_size = be64_to_cpu(dip->di_size);
+ if (di_size & (1ULL << 63))
+ return __this_address;
mode = be16_to_cpu(dip->di_mode);
if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
- return false;
+ return __this_address;
/* No zero-length symlinks/dirs. */
- if ((S_ISLNK(mode) || S_ISDIR(mode)) && dip->di_size == 0)
- return false;
+ if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
+ return __this_address;
+
+ /* Fork checks carried over from xfs_iformat_fork */
+ if (mode &&
+ be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) >
+ be64_to_cpu(dip->di_nblocks))
+ return __this_address;
+
+ if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize)
+ return __this_address;
+
+ flags = be16_to_cpu(dip->di_flags);
+
+ if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)
+ return __this_address;
+
+ /* Do we have appropriate data fork formats for the mode? */
+ switch (mode & S_IFMT) {
+ case S_IFIFO:
+ case S_IFCHR:
+ case S_IFBLK:
+ case S_IFSOCK:
+ if (dip->di_format != XFS_DINODE_FMT_DEV)
+ return __this_address;
+ break;
+ case S_IFREG:
+ case S_IFLNK:
+ case S_IFDIR:
+ switch (dip->di_format) {
+ case XFS_DINODE_FMT_LOCAL:
+ /*
+ * no local regular files yet
+ */
+ if (S_ISREG(mode))
+ return __this_address;
+ if (di_size > XFS_DFORK_DSIZE(dip, mp))
+ return __this_address;
+ /* fall through */
+ case XFS_DINODE_FMT_EXTENTS:
+ case XFS_DINODE_FMT_BTREE:
+ break;
+ default:
+ return __this_address;
+ }
+ break;
+ case 0:
+ /* Uninitialized inode ok. */
+ break;
+ default:
+ return __this_address;
+ }
+
+ if (XFS_DFORK_Q(dip)) {
+ switch (dip->di_aformat) {
+ case XFS_DINODE_FMT_LOCAL:
+ case XFS_DINODE_FMT_EXTENTS:
+ case XFS_DINODE_FMT_BTREE:
+ break;
+ default:
+ return __this_address;
+ }
+ }
/* only version 3 or greater inodes are extensively verified here */
if (dip->di_version < 3)
- return true;
-
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- return false;
- if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
- XFS_DINODE_CRC_OFF))
- return false;
- if (be64_to_cpu(dip->di_ino) != ino)
- return false;
- if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid))
- return false;
+ return NULL;
- flags = be16_to_cpu(dip->di_flags);
flags2 = be64_to_cpu(dip->di_flags2);
/* don't allow reflink/cowextsize if we don't have reflink */
if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) &&
!xfs_sb_version_hasreflink(&mp->m_sb))
- return false;
+ return __this_address;
+
+ /* only regular files get reflink */
+ if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG)
+ return __this_address;
/* don't let reflink and realtime mix */
if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME))
- return false;
+ return __this_address;
/* don't let reflink and dax mix */
if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX))
- return false;
+ return __this_address;
- return true;
+ return NULL;
}
void
@@ -476,6 +547,7 @@ xfs_iread(
{
xfs_buf_t *bp;
xfs_dinode_t *dip;
+ xfs_failaddr_t fa;
int error;
/*
@@ -507,11 +579,10 @@ xfs_iread(
return error;
/* even unallocated inodes are verified */
- if (!xfs_dinode_verify(mp, ip->i_ino, dip)) {
- xfs_alert(mp, "%s: validation failed for inode %lld",
- __func__, ip->i_ino);
-
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
+ fa = xfs_dinode_verify(mp, ip->i_ino, dip);
+ if (fa) {
+ xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", dip,
+ sizeof(*dip), fa);
error = -EFSCORRUPTED;
goto out_brelse;
}
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index a9c97a356c30..8a5e1da52d74 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -82,7 +82,7 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
#define xfs_inobp_check(mp, bp)
#endif /* DEBUG */
-bool xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino,
- struct xfs_dinode *dip);
+xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino,
+ struct xfs_dinode *dip);
#endif /* __XFS_INODE_BUF_H__ */
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index c79a1616b79d..866d2861c625 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -35,6 +35,8 @@
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_dir2_priv.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_shared.h"
kmem_zone_t *xfs_ifork_zone;
@@ -62,69 +64,11 @@ xfs_iformat_fork(
int error = 0;
xfs_fsize_t di_size;
- if (unlikely(be32_to_cpu(dip->di_nextents) +
- be16_to_cpu(dip->di_anextents) >
- be64_to_cpu(dip->di_nblocks))) {
- xfs_warn(ip->i_mount,
- "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
- (unsigned long long)ip->i_ino,
- (int)(be32_to_cpu(dip->di_nextents) +
- be16_to_cpu(dip->di_anextents)),
- (unsigned long long)
- be64_to_cpu(dip->di_nblocks));
- XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
- ip->i_mount, dip);
- return -EFSCORRUPTED;
- }
-
- if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
- xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
- (unsigned long long)ip->i_ino,
- dip->di_forkoff);
- XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
- ip->i_mount, dip);
- return -EFSCORRUPTED;
- }
-
- if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
- !ip->i_mount->m_rtdev_targp)) {
- xfs_warn(ip->i_mount,
- "corrupt dinode %Lu, has realtime flag set.",
- ip->i_ino);
- XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
- XFS_ERRLEVEL_LOW, ip->i_mount, dip);
- return -EFSCORRUPTED;
- }
-
- if (unlikely(xfs_is_reflink_inode(ip) && !S_ISREG(inode->i_mode))) {
- xfs_warn(ip->i_mount,
- "corrupt dinode %llu, wrong file type for reflink.",
- ip->i_ino);
- XFS_CORRUPTION_ERROR("xfs_iformat(reflink)",
- XFS_ERRLEVEL_LOW, ip->i_mount, dip);
- return -EFSCORRUPTED;
- }
-
- if (unlikely(xfs_is_reflink_inode(ip) &&
- (ip->i_d.di_flags & XFS_DIFLAG_REALTIME))) {
- xfs_warn(ip->i_mount,
- "corrupt dinode %llu, has reflink+realtime flag set.",
- ip->i_ino);
- XFS_CORRUPTION_ERROR("xfs_iformat(reflink)",
- XFS_ERRLEVEL_LOW, ip->i_mount, dip);
- return -EFSCORRUPTED;
- }
-
switch (inode->i_mode & S_IFMT) {
case S_IFIFO:
case S_IFCHR:
case S_IFBLK:
case S_IFSOCK:
- if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
- XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
- ip->i_mount, dip);
- return -EFSCORRUPTED;
- }
ip->i_d.di_size = 0;
inode->i_rdev = xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip));
break;
@@ -134,32 +78,7 @@ xfs_iformat_fork(
case S_IFDIR:
switch (dip->di_format) {
case XFS_DINODE_FMT_LOCAL:
- /*
- * no local regular files yet
- */
- if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
- xfs_warn(ip->i_mount,
- "corrupt inode %Lu (local format for regular file).",
- (unsigned long long) ip->i_ino);
- XFS_CORRUPTION_ERROR("xfs_iformat(4)",
- XFS_ERRLEVEL_LOW,
- ip->i_mount, dip);
- return -EFSCORRUPTED;
- }
-
di_size = be64_to_cpu(dip->di_size);
- if (unlikely(di_size < 0 ||
- di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
- xfs_warn(ip->i_mount,
- "corrupt inode %Lu (bad size %Ld for local inode).",
- (unsigned long long) ip->i_ino,
- (long long) di_size);
- XFS_CORRUPTION_ERROR("xfs_iformat(5)",
- XFS_ERRLEVEL_LOW,
- ip->i_mount, dip);
- return -EFSCORRUPTED;
- }
-
size = (int)di_size;
error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
break;
@@ -170,28 +89,16 @@ xfs_iformat_fork(
error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
break;
default:
- XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
- ip->i_mount);
return -EFSCORRUPTED;
}
break;
default:
- XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
return -EFSCORRUPTED;
}
if (error)
return error;
- /* Check inline dir contents. */
- if (S_ISDIR(inode->i_mode) && dip->di_format == XFS_DINODE_FMT_LOCAL) {
- error = xfs_dir2_sf_verify(ip);
- if (error) {
- xfs_idestroy_fork(ip, XFS_DATA_FORK);
- return error;
- }
- }
-
if (xfs_is_reflink_inode(ip)) {
ASSERT(ip->i_cowfp == NULL);
xfs_ifork_init_cow(ip);
@@ -208,18 +115,6 @@ xfs_iformat_fork(
atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
size = be16_to_cpu(atp->hdr.totsize);
- if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
- xfs_warn(ip->i_mount,
- "corrupt inode %Lu (bad attr fork size %Ld).",
- (unsigned long long) ip->i_ino,
- (long long) size);
- XFS_CORRUPTION_ERROR("xfs_iformat(8)",
- XFS_ERRLEVEL_LOW,
- ip->i_mount, dip);
- error = -EFSCORRUPTED;
- break;
- }
-
error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
break;
case XFS_DINODE_FMT_EXTENTS:
@@ -403,6 +298,7 @@ xfs_iformat_btree(
*/
if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
XFS_IFORK_MAXEXT(ip, whichfork) ||
+ nrecs == 0 ||
XFS_BMDR_SPACE_CALC(nrecs) >
XFS_DFORK_SIZE(dip, mp, whichfork) ||
XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) ||
@@ -827,3 +723,45 @@ xfs_ifork_init_cow(
ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
ip->i_cnextents = 0;
}
+
+/* Default fork content verifiers. */
+struct xfs_ifork_ops xfs_default_ifork_ops = {
+ .verify_attr = xfs_attr_shortform_verify,
+ .verify_dir = xfs_dir2_sf_verify,
+ .verify_symlink = xfs_symlink_shortform_verify,
+};
+
+/* Verify the inline contents of the data fork of an inode. */
+xfs_failaddr_t
+xfs_ifork_verify_data(
+ struct xfs_inode *ip,
+ struct xfs_ifork_ops *ops)
+{
+ /* Non-local data fork, we're done. */
+ if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
+ return NULL;
+
+ /* Check the inline data fork if there is one. */
+ switch (VFS_I(ip)->i_mode & S_IFMT) {
+ case S_IFDIR:
+ return ops->verify_dir(ip);
+ case S_IFLNK:
+ return ops->verify_symlink(ip);
+ default:
+ return NULL;
+ }
+}
+
+/* Verify the inline contents of the attr fork of an inode. */
+xfs_failaddr_t
+xfs_ifork_verify_attr(
+ struct xfs_inode *ip,
+ struct xfs_ifork_ops *ops)
+{
+ /* There has to be an attr fork allocated if aformat is local. */
+ if (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
+ return NULL;
+ if (!XFS_IFORK_PTR(ip, XFS_ATTR_FORK))
+ return __this_address;
+ return ops->verify_attr(ip);
+}
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index b9f0098e33b8..dd8aba0dd119 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -186,4 +186,18 @@ extern struct kmem_zone *xfs_ifork_zone;
extern void xfs_ifork_init_cow(struct xfs_inode *ip);
+typedef xfs_failaddr_t (*xfs_ifork_verifier_t)(struct xfs_inode *);
+
+struct xfs_ifork_ops {
+ xfs_ifork_verifier_t verify_symlink;
+ xfs_ifork_verifier_t verify_dir;
+ xfs_ifork_verifier_t verify_attr;
+};
+extern struct xfs_ifork_ops xfs_default_ifork_ops;
+
+xfs_failaddr_t xfs_ifork_verify_data(struct xfs_inode *ip,
+ struct xfs_ifork_ops *ops);
+xfs_failaddr_t xfs_ifork_verify_attr(struct xfs_inode *ip,
+ struct xfs_ifork_ops *ops);
+
#endif /* __XFS_INODE_FORK_H__ */
diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c
index c10597973333..cc4cbe290939 100644
--- a/fs/xfs/libxfs/xfs_log_rlimit.c
+++ b/fs/xfs/libxfs/xfs_log_rlimit.c
@@ -55,7 +55,7 @@ xfs_log_calc_max_attrsetm_res(
* the maximum one in terms of the pre-calculated values which were done
* at mount time.
*/
-STATIC void
+void
xfs_log_get_max_trans_res(
struct xfs_mount *mp,
struct xfs_trans_res *max_resp)
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index d69c772271cb..bb1b13a9b5f4 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -112,8 +112,6 @@ typedef uint16_t xfs_qwarncnt_t;
#define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */
#define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */
#define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */
-#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */
-#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */
#define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */
#define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */
#define XFS_QMOPT_DQNEXT 0x0008000 /* return next dquot >= this ID */
@@ -153,8 +151,11 @@ typedef uint16_t xfs_qwarncnt_t;
(XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA)
#define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS)
-extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq,
- xfs_dqid_t id, uint type, uint flags, const char *str);
+extern xfs_failaddr_t xfs_dquot_verify(struct xfs_mount *mp,
+ struct xfs_disk_dquot *ddq, xfs_dqid_t id, uint type,
+ uint flags);
extern int xfs_calc_dquots_per_chunk(unsigned int nbblks);
+extern int xfs_dquot_repair(struct xfs_mount *mp, struct xfs_disk_dquot *ddq,
+ xfs_dqid_t id, uint type);
#endif /* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 585b35d34142..bee68c23d612 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1488,27 +1488,12 @@ __xfs_refcount_cow_alloc(
xfs_extlen_t aglen,
struct xfs_defer_ops *dfops)
{
- int error;
-
trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno,
agbno, aglen);
/* Add refcount btree reservation */
- error = xfs_refcount_adjust_cow(rcur, agbno, aglen,
+ return xfs_refcount_adjust_cow(rcur, agbno, aglen,
XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops);
- if (error)
- return error;
-
- /* Add rmap entry */
- if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) {
- error = xfs_rmap_alloc_extent(rcur->bc_mp, dfops,
- rcur->bc_private.a.agno,
- agbno, aglen, XFS_RMAP_OWN_COW);
- if (error)
- return error;
- }
-
- return error;
}
/*
@@ -1521,27 +1506,12 @@ __xfs_refcount_cow_free(
xfs_extlen_t aglen,
struct xfs_defer_ops *dfops)
{
- int error;
-
trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno,
agbno, aglen);
/* Remove refcount btree reservation */
- error = xfs_refcount_adjust_cow(rcur, agbno, aglen,
+ return xfs_refcount_adjust_cow(rcur, agbno, aglen,
XFS_REFCOUNT_ADJUST_COW_FREE, dfops);
- if (error)
- return error;
-
- /* Remove rmap entry */
- if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) {
- error = xfs_rmap_free_extent(rcur->bc_mp, dfops,
- rcur->bc_private.a.agno,
- agbno, aglen, XFS_RMAP_OWN_COW);
- if (error)
- return error;
- }
-
- return error;
}
/* Record a CoW staging extent in the refcount btree. */
@@ -1552,11 +1522,19 @@ xfs_refcount_alloc_cow_extent(
xfs_fsblock_t fsb,
xfs_extlen_t len)
{
+ int error;
+
if (!xfs_sb_version_hasreflink(&mp->m_sb))
return 0;
- return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW,
+ error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW,
fsb, len);
+ if (error)
+ return error;
+
+ /* Add rmap entry */
+ return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
+ XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
}
/* Forget a CoW staging event in the refcount btree. */
@@ -1567,9 +1545,17 @@ xfs_refcount_free_cow_extent(
xfs_fsblock_t fsb,
xfs_extlen_t len)
{
+ int error;
+
if (!xfs_sb_version_hasreflink(&mp->m_sb))
return 0;
+ /* Remove rmap entry */
+ error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
+ XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
+ if (error)
+ return error;
+
return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW,
fsb, len);
}
@@ -1710,3 +1696,22 @@ out_cursor:
xfs_trans_brelse(tp, agbp);
goto out_trans;
}
+
+/* Is there a record covering a given extent? */
+int
+xfs_refcount_has_record(
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ bool *exists)
+{
+ union xfs_btree_irec low;
+ union xfs_btree_irec high;
+
+ memset(&low, 0, sizeof(low));
+ low.rc.rc_startblock = bno;
+ memset(&high, 0xFF, sizeof(high));
+ high.rc.rc_startblock = bno + len - 1;
+
+ return xfs_btree_has_record(cur, &low, &high, exists);
+}
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index eafb9d1f3b37..2a731ac68fe4 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -83,4 +83,7 @@ static inline xfs_fileoff_t xfs_refcount_max_unmap(int log_res)
return (log_res * 3 / 4) / XFS_REFCOUNT_ITEM_OVERHEAD;
}
+extern int xfs_refcount_has_record(struct xfs_btree_cur *cur,
+ xfs_agblock_t bno, xfs_extlen_t len, bool *exists);
+
#endif /* __XFS_REFCOUNT_H__ */
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 3c59dd3d58d7..8479769e470d 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -223,29 +223,31 @@ xfs_refcountbt_diff_two_keys(
be32_to_cpu(k2->refc.rc_startblock);
}
-STATIC bool
+STATIC xfs_failaddr_t
xfs_refcountbt_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_perag *pag = bp->b_pag;
+ xfs_failaddr_t fa;
unsigned int level;
if (block->bb_magic != cpu_to_be32(XFS_REFC_CRC_MAGIC))
- return false;
+ return __this_address;
if (!xfs_sb_version_hasreflink(&mp->m_sb))
- return false;
- if (!xfs_btree_sblock_v5hdr_verify(bp))
- return false;
+ return __this_address;
+ fa = xfs_btree_sblock_v5hdr_verify(bp);
+ if (fa)
+ return fa;
level = be16_to_cpu(block->bb_level);
if (pag && pag->pagf_init) {
if (level >= pag->pagf_refcount_level)
- return false;
+ return __this_address;
} else if (level >= mp->m_refc_maxlevels)
- return false;
+ return __this_address;
return xfs_btree_sblock_verify(bp, mp->m_refc_mxr[level != 0]);
}
@@ -254,25 +256,30 @@ STATIC void
xfs_refcountbt_read_verify(
struct xfs_buf *bp)
{
+ xfs_failaddr_t fa;
+
if (!xfs_btree_sblock_verify_crc(bp))
- xfs_buf_ioerror(bp, -EFSBADCRC);
- else if (!xfs_refcountbt_verify(bp))
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+ else {
+ fa = xfs_refcountbt_verify(bp);
+ if (fa)
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ }
- if (bp->b_error) {
+ if (bp->b_error)
trace_xfs_btree_corrupt(bp, _RET_IP_);
- xfs_verifier_error(bp);
- }
}
STATIC void
xfs_refcountbt_write_verify(
struct xfs_buf *bp)
{
- if (!xfs_refcountbt_verify(bp)) {
+ xfs_failaddr_t fa;
+
+ fa = xfs_refcountbt_verify(bp);
+ if (fa) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
- xfs_verifier_error(bp);
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
}
xfs_btree_sblock_calc_crc(bp);
@@ -283,6 +290,7 @@ const struct xfs_buf_ops xfs_refcountbt_buf_ops = {
.name = "xfs_refcountbt",
.verify_read = xfs_refcountbt_read_verify,
.verify_write = xfs_refcountbt_write_verify,
+ .verify_struct = xfs_refcountbt_verify,
};
STATIC int
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index dd019cee1b3b..79822cf6ebe3 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -368,6 +368,51 @@ xfs_rmap_lookup_le_range(
}
/*
+ * Perform all the relevant owner checks for a removal op. If we're doing an
+ * unknown-owner removal then we have no owner information to check.
+ */
+static int
+xfs_rmap_free_check_owner(
+ struct xfs_mount *mp,
+ uint64_t ltoff,
+ struct xfs_rmap_irec *rec,
+ xfs_fsblock_t bno,
+ xfs_filblks_t len,
+ uint64_t owner,
+ uint64_t offset,
+ unsigned int flags)
+{
+ int error = 0;
+
+ if (owner == XFS_RMAP_OWN_UNKNOWN)
+ return 0;
+
+ /* Make sure the unwritten flag matches. */
+ XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) ==
+ (rec->rm_flags & XFS_RMAP_UNWRITTEN), out);
+
+ /* Make sure the owner matches what we expect to find in the tree. */
+ XFS_WANT_CORRUPTED_GOTO(mp, owner == rec->rm_owner, out);
+
+ /* Check the offset, if necessary. */
+ if (XFS_RMAP_NON_INODE_OWNER(owner))
+ goto out;
+
+ if (flags & XFS_RMAP_BMBT_BLOCK) {
+ XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_flags & XFS_RMAP_BMBT_BLOCK,
+ out);
+ } else {
+ XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_offset <= offset, out);
+ XFS_WANT_CORRUPTED_GOTO(mp,
+ ltoff + rec->rm_blockcount >= offset + len,
+ out);
+ }
+
+out:
+ return error;
+}
+
+/*
* Find the extent in the rmap btree and remove it.
*
* The record we find should always be an exact match for the extent that we're
@@ -444,33 +489,40 @@ xfs_rmap_unmap(
goto out_done;
}
- /* Make sure the unwritten flag matches. */
- XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) ==
- (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error);
+ /*
+ * If we're doing an unknown-owner removal for EFI recovery, we expect
+ * to find the full range in the rmapbt or nothing at all. If we
+ * don't find any rmaps overlapping either end of the range, we're
+ * done. Hopefully this means that the EFI creator already queued
+ * (and finished) a RUI to remove the rmap.
+ */
+ if (owner == XFS_RMAP_OWN_UNKNOWN &&
+ ltrec.rm_startblock + ltrec.rm_blockcount <= bno) {
+ struct xfs_rmap_irec rtrec;
+
+ error = xfs_btree_increment(cur, 0, &i);
+ if (error)
+ goto out_error;
+ if (i == 0)
+ goto out_done;
+ error = xfs_rmap_get_rec(cur, &rtrec, &i);
+ if (error)
+ goto out_error;
+ XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+ if (rtrec.rm_startblock >= bno + len)
+ goto out_done;
+ }
/* Make sure the extent we found covers the entire freeing range. */
XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno &&
- ltrec.rm_startblock + ltrec.rm_blockcount >=
- bno + len, out_error);
-
- /* Make sure the owner matches what we expect to find in the tree. */
- XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner ||
- XFS_RMAP_NON_INODE_OWNER(owner), out_error);
+ ltrec.rm_startblock + ltrec.rm_blockcount >=
+ bno + len, out_error);
- /* Check the offset, if necessary. */
- if (!XFS_RMAP_NON_INODE_OWNER(owner)) {
- if (flags & XFS_RMAP_BMBT_BLOCK) {
- XFS_WANT_CORRUPTED_GOTO(mp,
- ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK,
- out_error);
- } else {
- XFS_WANT_CORRUPTED_GOTO(mp,
- ltrec.rm_offset <= offset, out_error);
- XFS_WANT_CORRUPTED_GOTO(mp,
- ltoff + ltrec.rm_blockcount >= offset + len,
- out_error);
- }
- }
+ /* Check owner information. */
+ error = xfs_rmap_free_check_owner(mp, ltoff, &ltrec, bno, len, owner,
+ offset, flags);
+ if (error)
+ goto out_error;
if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
/* exact match, simply remove the record from rmap tree */
@@ -664,6 +716,7 @@ xfs_rmap_map(
flags |= XFS_RMAP_UNWRITTEN;
trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len,
unwritten, oinfo);
+ ASSERT(!xfs_rmap_should_skip_owner_update(oinfo));
/*
* For the initial lookup, look for an exact match or the left-adjacent
@@ -2334,3 +2387,70 @@ xfs_rmap_compare(
else
return 0;
}
+
+/* Is there a record covering a given extent? */
+int
+xfs_rmap_has_record(
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ bool *exists)
+{
+ union xfs_btree_irec low;
+ union xfs_btree_irec high;
+
+ memset(&low, 0, sizeof(low));
+ low.r.rm_startblock = bno;
+ memset(&high, 0xFF, sizeof(high));
+ high.r.rm_startblock = bno + len - 1;
+
+ return xfs_btree_has_record(cur, &low, &high, exists);
+}
+
+/*
+ * Is there a record for this owner completely covering a given physical
+ * extent? If so, *has_rmap will be set to true. If there is no record
+ * or the record only covers part of the range, we set *has_rmap to false.
+ * This function doesn't perform range lookups or offset checks, so it is
+ * not suitable for checking data fork blocks.
+ */
+int
+xfs_rmap_record_exists(
+ struct xfs_btree_cur *cur,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ struct xfs_owner_info *oinfo,
+ bool *has_rmap)
+{
+ uint64_t owner;
+ uint64_t offset;
+ unsigned int flags;
+ int has_record;
+ struct xfs_rmap_irec irec;
+ int error;
+
+ xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
+ ASSERT(XFS_RMAP_NON_INODE_OWNER(owner) ||
+ (flags & XFS_RMAP_BMBT_BLOCK));
+
+ error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags,
+ &has_record);
+ if (error)
+ return error;
+ if (!has_record) {
+ *has_rmap = false;
+ return 0;
+ }
+
+ error = xfs_rmap_get_rec(cur, &irec, &has_record);
+ if (error)
+ return error;
+ if (!has_record) {
+ *has_rmap = false;
+ return 0;
+ }
+
+ *has_rmap = (irec.rm_owner == owner && irec.rm_startblock <= bno &&
+ irec.rm_startblock + irec.rm_blockcount >= bno + len);
+ return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 466ede637080..380e53be98d5 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -61,7 +61,21 @@ static inline void
xfs_rmap_skip_owner_update(
struct xfs_owner_info *oi)
{
- oi->oi_owner = XFS_RMAP_OWN_UNKNOWN;
+ xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_NULL);
+}
+
+static inline bool
+xfs_rmap_should_skip_owner_update(
+ struct xfs_owner_info *oi)
+{
+ return oi->oi_owner == XFS_RMAP_OWN_NULL;
+}
+
+static inline void
+xfs_rmap_any_owner_update(
+ struct xfs_owner_info *oi)
+{
+ xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_UNKNOWN);
}
/* Reverse mapping functions. */
@@ -219,5 +233,10 @@ int xfs_rmap_compare(const struct xfs_rmap_irec *a,
union xfs_btree_rec;
int xfs_rmap_btrec_to_irec(union xfs_btree_rec *rec,
struct xfs_rmap_irec *irec);
+int xfs_rmap_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+ xfs_extlen_t len, bool *exists);
+int xfs_rmap_record_exists(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+ xfs_extlen_t len, struct xfs_owner_info *oinfo,
+ bool *has_rmap);
#endif /* __XFS_RMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 9d9c9192584c..e829c3e489ea 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -303,13 +303,14 @@ xfs_rmapbt_diff_two_keys(
return 0;
}
-static bool
+static xfs_failaddr_t
xfs_rmapbt_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_perag *pag = bp->b_pag;
+ xfs_failaddr_t fa;
unsigned int level;
/*
@@ -325,19 +326,20 @@ xfs_rmapbt_verify(
* in this case.
*/
if (block->bb_magic != cpu_to_be32(XFS_RMAP_CRC_MAGIC))
- return false;
+ return __this_address;
if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
- return false;
- if (!xfs_btree_sblock_v5hdr_verify(bp))
- return false;
+ return __this_address;
+ fa = xfs_btree_sblock_v5hdr_verify(bp);
+ if (fa)
+ return fa;
level = be16_to_cpu(block->bb_level);
if (pag && pag->pagf_init) {
if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi])
- return false;
+ return __this_address;
} else if (level >= mp->m_rmap_maxlevels)
- return false;
+ return __this_address;
return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]);
}
@@ -346,25 +348,30 @@ static void
xfs_rmapbt_read_verify(
struct xfs_buf *bp)
{
+ xfs_failaddr_t fa;
+
if (!xfs_btree_sblock_verify_crc(bp))
- xfs_buf_ioerror(bp, -EFSBADCRC);
- else if (!xfs_rmapbt_verify(bp))
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
+ xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+ else {
+ fa = xfs_rmapbt_verify(bp);
+ if (fa)
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ }
- if (bp->b_error) {
+ if (bp->b_error)
trace_xfs_btree_corrupt(bp, _RET_IP_);
- xfs_verifier_error(bp);
- }
}
static void
xfs_rmapbt_write_verify(
struct xfs_buf *bp)
{
- if (!xfs_rmapbt_verify(bp)) {
+ xfs_failaddr_t fa;
+
+ fa = xfs_rmapbt_verify(bp);
+ if (fa) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
- xfs_verifier_error(bp);
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
}
xfs_btree_sblock_calc_crc(bp);
@@ -375,6 +382,7 @@ const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
.name = "xfs_rmapbt",
.verify_read = xfs_rmapbt_read_verify,
.verify_write = xfs_rmapbt_write_verify,
+ .verify_struct = xfs_rmapbt_verify,
};
STATIC int
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 3fb29a5ea915..106be2d0bb88 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -1097,3 +1097,24 @@ xfs_verify_rtbno(
{
return rtbno < mp->m_sb.sb_rblocks;
}
+
+/* Is the given extent all free? */
+int
+xfs_rtalloc_extent_is_free(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_rtblock_t start,
+ xfs_extlen_t len,
+ bool *is_free)
+{
+ xfs_rtblock_t end;
+ int matches;
+ int error;
+
+ error = xfs_rtcheck_range(mp, tp, start, len, 1, &end, &matches);
+ if (error)
+ return error;
+
+ *is_free = matches;
+ return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 9b5aae2bcc0b..46af6aa60a8e 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -40,6 +40,8 @@
#include "xfs_rmap_btree.h"
#include "xfs_bmap.h"
#include "xfs_refcount_btree.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
/*
* Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -116,6 +118,9 @@ xfs_mount_validate_sb(
bool check_inprogress,
bool check_version)
{
+ u32 agcount = 0;
+ u32 rem;
+
if (sbp->sb_magicnum != XFS_SB_MAGIC) {
xfs_warn(mp, "bad magic number");
return -EWRONGFS;
@@ -226,6 +231,13 @@ xfs_mount_validate_sb(
return -EINVAL;
}
+ /* Compute agcount for this number of dblocks and agblocks */
+ if (sbp->sb_agblocks) {
+ agcount = div_u64_rem(sbp->sb_dblocks, sbp->sb_agblocks, &rem);
+ if (rem)
+ agcount++;
+ }
+
/*
* More sanity checking. Most of these were stolen directly from
* xfs_repair.
@@ -250,6 +262,10 @@ xfs_mount_validate_sb(
sbp->sb_inodesize != (1 << sbp->sb_inodelog) ||
sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE ||
sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) ||
+ XFS_FSB_TO_B(mp, sbp->sb_agblocks) < XFS_MIN_AG_BYTES ||
+ XFS_FSB_TO_B(mp, sbp->sb_agblocks) > XFS_MAX_AG_BYTES ||
+ sbp->sb_agblklog != xfs_highbit32(sbp->sb_agblocks - 1) + 1 ||
+ agcount == 0 || agcount != sbp->sb_agcount ||
(sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) ||
(sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) ||
(sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) ||
@@ -640,11 +656,10 @@ xfs_sb_read_verify(
error = xfs_sb_verify(bp, true);
out_error:
- if (error) {
+ if (error == -EFSCORRUPTED || error == -EFSBADCRC)
+ xfs_verifier_error(bp, error, __this_address);
+ else if (error)
xfs_buf_ioerror(bp, error);
- if (error == -EFSCORRUPTED || error == -EFSBADCRC)
- xfs_verifier_error(bp);
- }
}
/*
@@ -673,13 +688,12 @@ xfs_sb_write_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- struct xfs_buf_log_item *bip = bp->b_fspriv;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
int error;
error = xfs_sb_verify(bp, false);
if (error) {
- xfs_buf_ioerror(bp, error);
- xfs_verifier_error(bp);
+ xfs_verifier_error(bp, error, __this_address);
return;
}
@@ -876,3 +890,88 @@ xfs_sync_sb(
xfs_trans_set_sync(tp);
return xfs_trans_commit(tp);
}
+
+int
+xfs_fs_geometry(
+ struct xfs_sb *sbp,
+ struct xfs_fsop_geom *geo,
+ int struct_version)
+{
+ memset(geo, 0, sizeof(struct xfs_fsop_geom));
+
+ geo->blocksize = sbp->sb_blocksize;
+ geo->rtextsize = sbp->sb_rextsize;
+ geo->agblocks = sbp->sb_agblocks;
+ geo->agcount = sbp->sb_agcount;
+ geo->logblocks = sbp->sb_logblocks;
+ geo->sectsize = sbp->sb_sectsize;
+ geo->inodesize = sbp->sb_inodesize;
+ geo->imaxpct = sbp->sb_imax_pct;
+ geo->datablocks = sbp->sb_dblocks;
+ geo->rtblocks = sbp->sb_rblocks;
+ geo->rtextents = sbp->sb_rextents;
+ geo->logstart = sbp->sb_logstart;
+ BUILD_BUG_ON(sizeof(geo->uuid) != sizeof(sbp->sb_uuid));
+ memcpy(geo->uuid, &sbp->sb_uuid, sizeof(sbp->sb_uuid));
+
+ if (struct_version < 2)
+ return 0;
+
+ geo->sunit = sbp->sb_unit;
+ geo->swidth = sbp->sb_width;
+
+ if (struct_version < 3)
+ return 0;
+
+ geo->version = XFS_FSOP_GEOM_VERSION;
+ geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK |
+ XFS_FSOP_GEOM_FLAGS_DIRV2;
+ if (xfs_sb_version_hasattr(sbp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR;
+ if (xfs_sb_version_hasquota(sbp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_QUOTA;
+ if (xfs_sb_version_hasalign(sbp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_IALIGN;
+ if (xfs_sb_version_hasdalign(sbp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_DALIGN;
+ if (xfs_sb_version_hasextflgbit(sbp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_EXTFLG;
+ if (xfs_sb_version_hassector(sbp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR;
+ if (xfs_sb_version_hasasciici(sbp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_DIRV2CI;
+ if (xfs_sb_version_haslazysbcount(sbp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_LAZYSB;
+ if (xfs_sb_version_hasattr2(sbp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR2;
+ if (xfs_sb_version_hasprojid32bit(sbp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_PROJID32;
+ if (xfs_sb_version_hascrc(sbp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_V5SB;
+ if (xfs_sb_version_hasftype(sbp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_FTYPE;
+ if (xfs_sb_version_hasfinobt(sbp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_FINOBT;
+ if (xfs_sb_version_hassparseinodes(sbp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_SPINODES;
+ if (xfs_sb_version_hasrmapbt(sbp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_RMAPBT;
+ if (xfs_sb_version_hasreflink(sbp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_REFLINK;
+ if (xfs_sb_version_hassector(sbp))
+ geo->logsectsize = sbp->sb_logsectsize;
+ else
+ geo->logsectsize = BBSIZE;
+ geo->rtsectsize = sbp->sb_blocksize;
+ geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp);
+
+ if (struct_version < 4)
+ return 0;
+
+ if (xfs_sb_version_haslogv2(sbp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_LOGV2;
+
+ geo->logsunit = sbp->sb_logsunit;
+
+ return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index 961e6475a309..63dcd2a1a657 100644
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -34,4 +34,8 @@ extern void xfs_sb_from_disk(struct xfs_sb *to, struct xfs_dsb *from);
extern void xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from);
extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp);
+#define XFS_FS_GEOM_MAX_STRUCT_VER (4)
+extern int xfs_fs_geometry(struct xfs_sb *sbp, struct xfs_fsop_geom *geo,
+ int struct_version);
+
#endif /* __XFS_SB_H__ */
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index c6f4eb46fe26..d0b84da0cb1e 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -76,6 +76,9 @@ struct xfs_log_item_desc {
int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes);
int xfs_log_calc_minimum_size(struct xfs_mount *);
+struct xfs_trans_res;
+void xfs_log_get_max_trans_res(struct xfs_mount *mp,
+ struct xfs_trans_res *max_resp);
/*
* Values for t_flags.
@@ -143,5 +146,6 @@ bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset,
uint32_t size, struct xfs_buf *bp);
void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
struct xfs_inode *ip, struct xfs_ifork *ifp);
+xfs_failaddr_t xfs_symlink_shortform_verify(struct xfs_inode *ip);
#endif /* __XFS_SHARED_H__ */
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index c484877129a0..5ef5f354587e 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -98,7 +98,7 @@ xfs_symlink_hdr_ok(
return true;
}
-static bool
+static xfs_failaddr_t
xfs_symlink_verify(
struct xfs_buf *bp)
{
@@ -106,22 +106,22 @@ xfs_symlink_verify(
struct xfs_dsymlink_hdr *dsl = bp->b_addr;
if (!xfs_sb_version_hascrc(&mp->m_sb))
- return false;
+ return __this_address;
if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC))
- return false;
+ return __this_address;
if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid))
- return false;
+ return __this_address;
if (bp->b_bn != be64_to_cpu(dsl->sl_blkno))
- return false;
+ return __this_address;
if (be32_to_cpu(dsl->sl_offset) +
be32_to_cpu(dsl->sl_bytes) >= XFS_SYMLINK_MAXLEN)
- return false;
+ return __this_address;
if (dsl->sl_owner == 0)
- return false;
+ return __this_address;
if (!xfs_log_check_lsn(mp, be64_to_cpu(dsl->sl_lsn)))
- return false;
+ return __this_address;
- return true;
+ return NULL;
}
static void
@@ -129,18 +129,19 @@ xfs_symlink_read_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
+ xfs_failaddr_t fa;
/* no verification of non-crc buffers */
if (!xfs_sb_version_hascrc(&mp->m_sb))
return;
if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF))
- xfs_buf_ioerror(bp, -EFSBADCRC);
- else if (!xfs_symlink_verify(bp))
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
-
- if (bp->b_error)
- xfs_verifier_error(bp);
+ xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+ else {
+ fa = xfs_symlink_verify(bp);
+ if (fa)
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ }
}
static void
@@ -148,15 +149,16 @@ xfs_symlink_write_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- struct xfs_buf_log_item *bip = bp->b_fspriv;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
+ xfs_failaddr_t fa;
/* no verification of non-crc buffers */
if (!xfs_sb_version_hascrc(&mp->m_sb))
return;
- if (!xfs_symlink_verify(bp)) {
- xfs_buf_ioerror(bp, -EFSCORRUPTED);
- xfs_verifier_error(bp);
+ fa = xfs_symlink_verify(bp);
+ if (fa) {
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
}
@@ -171,6 +173,7 @@ const struct xfs_buf_ops xfs_symlink_buf_ops = {
.name = "xfs_symlink",
.verify_read = xfs_symlink_read_verify,
.verify_write = xfs_symlink_write_verify,
+ .verify_struct = xfs_symlink_verify,
};
void
@@ -207,3 +210,37 @@ xfs_symlink_local_to_remote(
xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) +
ifp->if_bytes - 1);
}
+
+/* Verify the consistency of an inline symlink. */
+xfs_failaddr_t
+xfs_symlink_shortform_verify(
+ struct xfs_inode *ip)
+{
+ char *sfp;
+ char *endp;
+ struct xfs_ifork *ifp;
+ int size;
+
+ ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
+ ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ sfp = (char *)ifp->if_u1.if_data;
+ size = ifp->if_bytes;
+ endp = sfp + size;
+
+ /* Zero length symlinks can exist while we're deleting a remote one. */
+ if (size == 0)
+ return NULL;
+
+ /* No negative sizes or overly long symlink targets. */
+ if (size < 0 || size > XFS_SYMLINK_MAXLEN)
+ return __this_address;
+
+ /* No NULLs in the target either. */
+ if (memchr(sfp, 0, size - 1))
+ return __this_address;
+
+ /* We /did/ null-terminate the buffer, right? */
+ if (*endp != 0)
+ return __this_address;
+ return NULL;
+}
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 6bd916bd35e2..5f17641f040f 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -34,6 +34,9 @@
#include "xfs_trans_space.h"
#include "xfs_trace.h"
+#define _ALLOC true
+#define _FREE false
+
/*
* A buffer has a format structure overhead in the log in addition
* to the data, so we need to take this into account when reserving
@@ -132,43 +135,77 @@ xfs_calc_inode_res(
}
/*
- * The free inode btree is a conditional feature and the log reservation
- * requirements differ slightly from that of the traditional inode allocation
- * btree. The finobt tracks records for inode chunks with at least one free
- * inode. A record can be removed from the tree for an inode allocation
- * or free and thus the finobt reservation is unconditional across:
+ * Inode btree record insertion/removal modifies the inode btree and free space
+ * btrees (since the inobt does not use the agfl). This requires the following
+ * reservation:
*
- * - inode allocation
- * - inode free
- * - inode chunk allocation
+ * the inode btree: max depth * blocksize
+ * the allocation btrees: 2 trees * (max depth - 1) * block size
*
- * The 'modify' param indicates to include the record modification scenario. The
- * 'alloc' param indicates to include the reservation for free space btree
- * modifications on behalf of finobt modifications. This is required only for
- * transactions that do not already account for free space btree modifications.
+ * The caller must account for SB and AG header modifications, etc.
+ */
+STATIC uint
+xfs_calc_inobt_res(
+ struct xfs_mount *mp)
+{
+ return xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
+ XFS_FSB_TO_B(mp, 1));
+}
+
+/*
+ * The free inode btree is a conditional feature. The behavior differs slightly
+ * from that of the traditional inode btree in that the finobt tracks records
+ * for inode chunks with at least one free inode. A record can be removed from
+ * the tree during individual inode allocation. Therefore the finobt
+ * reservation is unconditional for both the inode chunk allocation and
+ * individual inode allocation (modify) cases.
*
- * the free inode btree: max depth * block size
- * the allocation btrees: 2 trees * (max depth - 1) * block size
- * the free inode btree entry: block size
+ * Behavior aside, the reservation for finobt modification is equivalent to the
+ * traditional inobt: cover a full finobt shape change plus block allocation.
*/
STATIC uint
xfs_calc_finobt_res(
- struct xfs_mount *mp,
- int alloc,
- int modify)
+ struct xfs_mount *mp)
{
- uint res;
-
if (!xfs_sb_version_hasfinobt(&mp->m_sb))
return 0;
- res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1));
- if (alloc)
- res += xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
- XFS_FSB_TO_B(mp, 1));
- if (modify)
- res += (uint)XFS_FSB_TO_B(mp, 1);
+ return xfs_calc_inobt_res(mp);
+}
+/*
+ * Calculate the reservation required to allocate or free an inode chunk. This
+ * includes:
+ *
+ * the allocation btrees: 2 trees * (max depth - 1) * block size
+ * the inode chunk: m_ialloc_blks * N
+ *
+ * The size N of the inode chunk reservation depends on whether it is for
+ * allocation or free and which type of create transaction is in use. An inode
+ * chunk free always invalidates the buffers and only requires reservation for
+ * headers (N == 0). An inode chunk allocation requires a chunk sized
+ * reservation on v4 and older superblocks to initialize the chunk. No chunk
+ * reservation is required for allocation on v5 supers, which use ordered
+ * buffers to initialize.
+ */
+STATIC uint
+xfs_calc_inode_chunk_res(
+ struct xfs_mount *mp,
+ bool alloc)
+{
+ uint res, size = 0;
+
+ res = xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
+ XFS_FSB_TO_B(mp, 1));
+ if (alloc) {
+ /* icreate tx uses ordered buffers */
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ return res;
+ size = XFS_FSB_TO_B(mp, 1);
+ }
+
+ res += xfs_calc_buf_res(mp->m_ialloc_blks, size);
return res;
}
@@ -232,8 +269,6 @@ xfs_calc_write_reservation(
* the super block to reflect the freed blocks: sector size
* worst case split in allocation btrees per extent assuming 4 extents:
* 4 exts * 2 trees * (2 * max depth - 1) * block size
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (max depth - 1) * block size
*/
STATIC uint
xfs_calc_itruncate_reservation(
@@ -245,12 +280,7 @@ xfs_calc_itruncate_reservation(
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
- XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_buf_res(5, 0) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
- XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_buf_res(2 + mp->m_ialloc_blks +
- mp->m_in_maxlevels, 0)));
+ XFS_FSB_TO_B(mp, 1))));
}
/*
@@ -282,13 +312,14 @@ xfs_calc_rename_reservation(
* For removing an inode from unlinked list at first, we can modify:
* the agi hash list and counters: sector size
* the on disk inode before ours in the agi hash list: inode cluster size
+ * the on disk inode in the agi hash list: inode cluster size
*/
STATIC uint
xfs_calc_iunlink_remove_reservation(
struct xfs_mount *mp)
{
return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
- max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
+ 2 * max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
}
/*
@@ -320,13 +351,13 @@ xfs_calc_link_reservation(
/*
* For adding an inode to unlinked list we can modify:
* the agi hash list: sector size
- * the unlinked inode: inode size
+ * the on disk inode: inode cluster size
*/
STATIC uint
xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
{
return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
- xfs_calc_inode_res(mp, 1);
+ max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
}
/*
@@ -379,45 +410,16 @@ xfs_calc_create_resv_modify(
xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
(uint)XFS_FSB_TO_B(mp, 1) +
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_finobt_res(mp, 1, 1);
-}
-
-/*
- * For create we can allocate some inodes giving:
- * the agi and agf of the ag getting the new inodes: 2 * sectorsize
- * the superblock for the nlink flag: sector size
- * the inode blocks allocated: mp->m_ialloc_blks * blocksize
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-STATIC uint
-xfs_calc_create_resv_alloc(
- struct xfs_mount *mp)
-{
- return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
- mp->m_sb.sb_sectsize +
- xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
- XFS_FSB_TO_B(mp, 1));
-}
-
-STATIC uint
-__xfs_calc_create_reservation(
- struct xfs_mount *mp)
-{
- return XFS_DQUOT_LOGRES(mp) +
- MAX(xfs_calc_create_resv_alloc(mp),
- xfs_calc_create_resv_modify(mp));
+ xfs_calc_finobt_res(mp);
}
/*
* For icreate we can allocate some inodes giving:
* the agi and agf of the ag getting the new inodes: 2 * sectorsize
* the superblock for the nlink flag: sector size
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (max depth - 1) * block size
- * the finobt (record insertion)
+ * the inode chunk (allocation, optional init)
+ * the inobt (record insertion)
+ * the finobt (optional, record insertion)
*/
STATIC uint
xfs_calc_icreate_resv_alloc(
@@ -425,10 +427,9 @@ xfs_calc_icreate_resv_alloc(
{
return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
mp->m_sb.sb_sectsize +
- xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
- XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_finobt_res(mp, 0, 0);
+ xfs_calc_inode_chunk_res(mp, _ALLOC) +
+ xfs_calc_inobt_res(mp) +
+ xfs_calc_finobt_res(mp);
}
STATIC uint
@@ -440,26 +441,12 @@ xfs_calc_icreate_reservation(xfs_mount_t *mp)
}
STATIC uint
-xfs_calc_create_reservation(
- struct xfs_mount *mp)
-{
- if (xfs_sb_version_hascrc(&mp->m_sb))
- return xfs_calc_icreate_reservation(mp);
- return __xfs_calc_create_reservation(mp);
-
-}
-
-STATIC uint
xfs_calc_create_tmpfile_reservation(
struct xfs_mount *mp)
{
uint res = XFS_DQUOT_LOGRES(mp);
- if (xfs_sb_version_hascrc(&mp->m_sb))
- res += xfs_calc_icreate_resv_alloc(mp);
- else
- res += xfs_calc_create_resv_alloc(mp);
-
+ res += xfs_calc_icreate_resv_alloc(mp);
return res + xfs_calc_iunlink_add_reservation(mp);
}
@@ -470,7 +457,7 @@ STATIC uint
xfs_calc_mkdir_reservation(
struct xfs_mount *mp)
{
- return xfs_calc_create_reservation(mp);
+ return xfs_calc_icreate_reservation(mp);
}
@@ -483,20 +470,24 @@ STATIC uint
xfs_calc_symlink_reservation(
struct xfs_mount *mp)
{
- return xfs_calc_create_reservation(mp) +
+ return xfs_calc_icreate_reservation(mp) +
xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN);
}
/*
* In freeing an inode we can modify:
* the inode being freed: inode size
- * the super block free inode counter: sector size
- * the agi hash list and counters: sector size
- * the inode btree entry: block size
- * the on disk inode before ours in the agi hash list: inode cluster size
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (max depth - 1) * block size
+ * the super block free inode counter, AGF and AGFL: sector size
+ * the on disk inode (agi unlinked list removal)
+ * the inode chunk (invalidated, headers only)
+ * the inode btree
* the finobt (record insertion, removal or modification)
+ *
+ * Note that the inode chunk res. includes an allocfree res. for freeing of the
+ * inode chunk. This is technically extraneous because the inode chunk free is
+ * deferred (it occurs after a transaction roll). Include the extra reservation
+ * anyways since we've had reports of ifree transaction overruns due to too many
+ * agfl fixups during inode chunk frees.
*/
STATIC uint
xfs_calc_ifree_reservation(
@@ -504,15 +495,11 @@ xfs_calc_ifree_reservation(
{
return XFS_DQUOT_LOGRES(mp) +
xfs_calc_inode_res(mp, 1) +
- xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
+ xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_iunlink_remove_reservation(mp) +
- xfs_calc_buf_res(1, 0) +
- xfs_calc_buf_res(2 + mp->m_ialloc_blks +
- mp->m_in_maxlevels, 0) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
- XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_finobt_res(mp, 0, 1);
+ xfs_calc_inode_chunk_res(mp, _FREE) +
+ xfs_calc_inobt_res(mp) +
+ xfs_calc_finobt_res(mp);
}
/*
@@ -842,7 +829,7 @@ xfs_trans_resv_calc(
resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT;
resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
- resp->tr_create.tr_logres = xfs_calc_create_reservation(mp);
+ resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp);
resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT;
resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index 2a9b4f9e93c6..fd975524f460 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -32,30 +32,17 @@
#include "xfs_inode.h"
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
+#include "xfs_rmap.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
/*
- * Set up scrub to check all the static metadata in each AG.
- * This means the SB, AGF, AGI, and AGFL headers.
+ * Walk all the blocks in the AGFL. The fn function can return any negative
+ * error code or XFS_BTREE_QUERY_RANGE_ABORT.
*/
int
-xfs_scrub_setup_ag_header(
- struct xfs_scrub_context *sc,
- struct xfs_inode *ip)
-{
- struct xfs_mount *mp = sc->mp;
-
- if (sc->sm->sm_agno >= mp->m_sb.sb_agcount ||
- sc->sm->sm_ino || sc->sm->sm_gen)
- return -EINVAL;
- return xfs_scrub_setup_fs(sc, ip);
-}
-
-/* Walk all the blocks in the AGFL. */
-int
xfs_scrub_walk_agfl(
struct xfs_scrub_context *sc,
int (*fn)(struct xfs_scrub_context *,
@@ -115,6 +102,36 @@ xfs_scrub_walk_agfl(
/* Superblock */
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_superblock_xref(
+ struct xfs_scrub_context *sc,
+ struct xfs_buf *bp)
+{
+ struct xfs_owner_info oinfo;
+ struct xfs_mount *mp = sc->mp;
+ xfs_agnumber_t agno = sc->sm->sm_agno;
+ xfs_agblock_t agbno;
+ int error;
+
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ agbno = XFS_SB_BLOCK(mp);
+
+ error = xfs_scrub_ag_init(sc, agno, &sc->sa);
+ if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error))
+ return;
+
+ xfs_scrub_xref_is_used_space(sc, agbno, 1);
+ xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
+ xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
+ xfs_scrub_xref_is_not_shared(sc, agbno, 1);
+
+ /* scrub teardown will take care of sc->sa for us */
+}
+
/*
* Scrub the filesystem superblock.
*
@@ -143,6 +160,22 @@ xfs_scrub_superblock(
error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops);
+ /*
+ * The superblock verifier can return several different error codes
+ * if it thinks the superblock doesn't look right. For a mount these
+ * would all get bounced back to userspace, but if we're here then the
+ * fs mounted successfully, which means that this secondary superblock
+ * is simply incorrect. Treat all these codes the same way we treat
+ * any corruption.
+ */
+ switch (error) {
+ case -EINVAL: /* also -EWRONGFS */
+ case -ENOSYS:
+ case -EFBIG:
+ error = -EFSCORRUPTED;
+ default:
+ break;
+ }
if (!xfs_scrub_process_error(sc, agno, XFS_SB_BLOCK(mp), &error))
return error;
@@ -387,11 +420,175 @@ xfs_scrub_superblock(
BBTOB(bp->b_length) - sizeof(struct xfs_dsb)))
xfs_scrub_block_set_corrupt(sc, bp);
+ xfs_scrub_superblock_xref(sc, bp);
+
return error;
}
/* AGF */
+/* Tally freespace record lengths. */
+STATIC int
+xfs_scrub_agf_record_bno_lengths(
+ struct xfs_btree_cur *cur,
+ struct xfs_alloc_rec_incore *rec,
+ void *priv)
+{
+ xfs_extlen_t *blocks = priv;
+
+ (*blocks) += rec->ar_blockcount;
+ return 0;
+}
+
+/* Check agf_freeblks */
+static inline void
+xfs_scrub_agf_xref_freeblks(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+ xfs_extlen_t blocks = 0;
+ int error;
+
+ if (!sc->sa.bno_cur)
+ return;
+
+ error = xfs_alloc_query_all(sc->sa.bno_cur,
+ xfs_scrub_agf_record_bno_lengths, &blocks);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur))
+ return;
+ if (blocks != be32_to_cpu(agf->agf_freeblks))
+ xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+}
+
+/* Cross reference the AGF with the cntbt (freespace by length btree) */
+static inline void
+xfs_scrub_agf_xref_cntbt(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+ xfs_agblock_t agbno;
+ xfs_extlen_t blocks;
+ int have;
+ int error;
+
+ if (!sc->sa.cnt_cur)
+ return;
+
+ /* Any freespace at all? */
+ error = xfs_alloc_lookup_le(sc->sa.cnt_cur, 0, -1U, &have);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur))
+ return;
+ if (!have) {
+ if (agf->agf_freeblks != be32_to_cpu(0))
+ xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+ return;
+ }
+
+ /* Check agf_longest */
+ error = xfs_alloc_get_rec(sc->sa.cnt_cur, &agbno, &blocks, &have);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur))
+ return;
+ if (!have || blocks != be32_to_cpu(agf->agf_longest))
+ xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+}
+
+/* Check the btree block counts in the AGF against the btrees. */
+STATIC void
+xfs_scrub_agf_xref_btreeblks(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+ struct xfs_mount *mp = sc->mp;
+ xfs_agblock_t blocks;
+ xfs_agblock_t btreeblks;
+ int error;
+
+ /* Check agf_rmap_blocks; set up for agf_btreeblks check */
+ if (sc->sa.rmap_cur) {
+ error = xfs_btree_count_blocks(sc->sa.rmap_cur, &blocks);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+ return;
+ btreeblks = blocks - 1;
+ if (blocks != be32_to_cpu(agf->agf_rmap_blocks))
+ xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+ } else {
+ btreeblks = 0;
+ }
+
+ /*
+ * No rmap cursor; we can't xref if we have the rmapbt feature.
+ * We also can't do it if we're missing the free space btree cursors.
+ */
+ if ((xfs_sb_version_hasrmapbt(&mp->m_sb) && !sc->sa.rmap_cur) ||
+ !sc->sa.bno_cur || !sc->sa.cnt_cur)
+ return;
+
+ /* Check agf_btreeblks */
+ error = xfs_btree_count_blocks(sc->sa.bno_cur, &blocks);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur))
+ return;
+ btreeblks += blocks - 1;
+
+ error = xfs_btree_count_blocks(sc->sa.cnt_cur, &blocks);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.cnt_cur))
+ return;
+ btreeblks += blocks - 1;
+
+ if (btreeblks != be32_to_cpu(agf->agf_btreeblks))
+ xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+}
+
+/* Check agf_refcount_blocks against tree size */
+static inline void
+xfs_scrub_agf_xref_refcblks(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
+ xfs_agblock_t blocks;
+ int error;
+
+ if (!sc->sa.refc_cur)
+ return;
+
+ error = xfs_btree_count_blocks(sc->sa.refc_cur, &blocks);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
+ return;
+ if (blocks != be32_to_cpu(agf->agf_refcount_blocks))
+ xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agf_bp);
+}
+
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_agf_xref(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_owner_info oinfo;
+ struct xfs_mount *mp = sc->mp;
+ xfs_agblock_t agbno;
+ int error;
+
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ agbno = XFS_AGF_BLOCK(mp);
+
+ error = xfs_scrub_ag_btcur_init(sc, &sc->sa);
+ if (error)
+ return;
+
+ xfs_scrub_xref_is_used_space(sc, agbno, 1);
+ xfs_scrub_agf_xref_freeblks(sc);
+ xfs_scrub_agf_xref_cntbt(sc);
+ xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
+ xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
+ xfs_scrub_agf_xref_btreeblks(sc);
+ xfs_scrub_xref_is_not_shared(sc, agbno, 1);
+ xfs_scrub_agf_xref_refcblks(sc);
+
+ /* scrub teardown will take care of sc->sa for us */
+}
+
/* Scrub the AGF. */
int
xfs_scrub_agf(
@@ -414,6 +611,7 @@ xfs_scrub_agf(
&sc->sa.agf_bp, &sc->sa.agfl_bp);
if (!xfs_scrub_process_error(sc, agno, XFS_AGF_BLOCK(sc->mp), &error))
goto out;
+ xfs_scrub_buffer_recheck(sc, sc->sa.agf_bp);
agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
@@ -470,6 +668,7 @@ xfs_scrub_agf(
if (agfl_count != 0 && fl_count != agfl_count)
xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
+ xfs_scrub_agf_xref(sc);
out:
return error;
}
@@ -477,11 +676,28 @@ out:
/* AGFL */
struct xfs_scrub_agfl_info {
+ struct xfs_owner_info oinfo;
unsigned int sz_entries;
unsigned int nr_entries;
xfs_agblock_t *entries;
};
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_agfl_block_xref(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno,
+ struct xfs_owner_info *oinfo)
+{
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ xfs_scrub_xref_is_used_space(sc, agbno, 1);
+ xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
+ xfs_scrub_xref_is_owned_by(sc, agbno, 1, oinfo);
+ xfs_scrub_xref_is_not_shared(sc, agbno, 1);
+}
+
/* Scrub an AGFL block. */
STATIC int
xfs_scrub_agfl_block(
@@ -499,6 +715,8 @@ xfs_scrub_agfl_block(
else
xfs_scrub_block_set_corrupt(sc, sc->sa.agfl_bp);
+ xfs_scrub_agfl_block_xref(sc, agbno, priv);
+
return 0;
}
@@ -513,6 +731,37 @@ xfs_scrub_agblock_cmp(
return (int)*a - (int)*b;
}
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_agfl_xref(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_owner_info oinfo;
+ struct xfs_mount *mp = sc->mp;
+ xfs_agblock_t agbno;
+ int error;
+
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ agbno = XFS_AGFL_BLOCK(mp);
+
+ error = xfs_scrub_ag_btcur_init(sc, &sc->sa);
+ if (error)
+ return;
+
+ xfs_scrub_xref_is_used_space(sc, agbno, 1);
+ xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
+ xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
+ xfs_scrub_xref_is_not_shared(sc, agbno, 1);
+
+ /*
+ * Scrub teardown will take care of sc->sa for us. Leave sc->sa
+ * active so that the agfl block xref can use it too.
+ */
+}
+
/* Scrub the AGFL. */
int
xfs_scrub_agfl(
@@ -532,6 +781,12 @@ xfs_scrub_agfl(
goto out;
if (!sc->sa.agf_bp)
return -EFSCORRUPTED;
+ xfs_scrub_buffer_recheck(sc, sc->sa.agfl_bp);
+
+ xfs_scrub_agfl_xref(sc);
+
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ goto out;
/* Allocate buffer to ensure uniqueness of AGFL entries. */
agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
@@ -548,6 +803,7 @@ xfs_scrub_agfl(
}
/* Check the blocks in the AGFL. */
+ xfs_rmap_ag_owner(&sai.oinfo, XFS_RMAP_OWN_AG);
error = xfs_scrub_walk_agfl(sc, xfs_scrub_agfl_block, &sai);
if (error)
goto out_free;
@@ -575,6 +831,56 @@ out:
/* AGI */
+/* Check agi_count/agi_freecount */
+static inline void
+xfs_scrub_agi_xref_icounts(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_agi *agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
+ xfs_agino_t icount;
+ xfs_agino_t freecount;
+ int error;
+
+ if (!sc->sa.ino_cur)
+ return;
+
+ error = xfs_ialloc_count_inodes(sc->sa.ino_cur, &icount, &freecount);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.ino_cur))
+ return;
+ if (be32_to_cpu(agi->agi_count) != icount ||
+ be32_to_cpu(agi->agi_freecount) != freecount)
+ xfs_scrub_block_xref_set_corrupt(sc, sc->sa.agi_bp);
+}
+
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_agi_xref(
+ struct xfs_scrub_context *sc)
+{
+ struct xfs_owner_info oinfo;
+ struct xfs_mount *mp = sc->mp;
+ xfs_agblock_t agbno;
+ int error;
+
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ agbno = XFS_AGI_BLOCK(mp);
+
+ error = xfs_scrub_ag_btcur_init(sc, &sc->sa);
+ if (error)
+ return;
+
+ xfs_scrub_xref_is_used_space(sc, agbno, 1);
+ xfs_scrub_xref_is_not_inode_chunk(sc, agbno, 1);
+ xfs_scrub_agi_xref_icounts(sc);
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_FS);
+ xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
+ xfs_scrub_xref_is_not_shared(sc, agbno, 1);
+
+ /* scrub teardown will take care of sc->sa for us */
+}
+
/* Scrub the AGI. */
int
xfs_scrub_agi(
@@ -598,6 +904,7 @@ xfs_scrub_agi(
&sc->sa.agf_bp, &sc->sa.agfl_bp);
if (!xfs_scrub_process_error(sc, agno, XFS_AGI_BLOCK(sc->mp), &error))
goto out;
+ xfs_scrub_buffer_recheck(sc, sc->sa.agi_bp);
agi = XFS_BUF_TO_AGI(sc->sa.agi_bp);
@@ -653,6 +960,7 @@ xfs_scrub_agi(
if (agi->agi_pad32 != cpu_to_be32(0))
xfs_scrub_block_set_corrupt(sc, sc->sa.agi_bp);
+ xfs_scrub_agi_xref(sc);
out:
return error;
}
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
index 059663e13414..517c079d3f68 100644
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -31,6 +31,7 @@
#include "xfs_sb.h"
#include "xfs_alloc.h"
#include "xfs_rmap.h"
+#include "xfs_alloc.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
@@ -49,6 +50,64 @@ xfs_scrub_setup_ag_allocbt(
}
/* Free space btree scrubber. */
+/*
+ * Ensure there's a corresponding cntbt/bnobt record matching this
+ * bnobt/cntbt record, respectively.
+ */
+STATIC void
+xfs_scrub_allocbt_xref_other(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len)
+{
+ struct xfs_btree_cur **pcur;
+ xfs_agblock_t fbno;
+ xfs_extlen_t flen;
+ int has_otherrec;
+ int error;
+
+ if (sc->sm->sm_type == XFS_SCRUB_TYPE_BNOBT)
+ pcur = &sc->sa.cnt_cur;
+ else
+ pcur = &sc->sa.bno_cur;
+ if (!*pcur)
+ return;
+
+ error = xfs_alloc_lookup_le(*pcur, agbno, len, &has_otherrec);
+ if (!xfs_scrub_should_check_xref(sc, &error, pcur))
+ return;
+ if (!has_otherrec) {
+ xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0);
+ return;
+ }
+
+ error = xfs_alloc_get_rec(*pcur, &fbno, &flen, &has_otherrec);
+ if (!xfs_scrub_should_check_xref(sc, &error, pcur))
+ return;
+ if (!has_otherrec) {
+ xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0);
+ return;
+ }
+
+ if (fbno != agbno || flen != len)
+ xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0);
+}
+
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_allocbt_xref(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len)
+{
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ xfs_scrub_allocbt_xref_other(sc, agbno, len);
+ xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len);
+ xfs_scrub_xref_has_no_owner(sc, agbno, len);
+ xfs_scrub_xref_is_not_shared(sc, agbno, len);
+}
/* Scrub a bnobt/cntbt record. */
STATIC int
@@ -70,6 +129,8 @@ xfs_scrub_allocbt_rec(
!xfs_verify_agbno(mp, agno, bno + len - 1))
xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+ xfs_scrub_allocbt_xref(bs->sc, bno, len);
+
return error;
}
@@ -100,3 +161,23 @@ xfs_scrub_cntbt(
{
return xfs_scrub_allocbt(sc, XFS_BTNUM_CNT);
}
+
+/* xref check that the extent is not free */
+void
+xfs_scrub_xref_is_used_space(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len)
+{
+ bool is_freesp;
+ int error;
+
+ if (!sc->sa.bno_cur)
+ return;
+
+ error = xfs_alloc_has_record(sc->sa.bno_cur, agbno, len, &is_freesp);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.bno_cur))
+ return;
+ if (is_freesp)
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.bno_cur, 0);
+}
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 42fec0bcd9e1..d00282130492 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -37,6 +37,7 @@
#include "xfs_bmap_util.h"
#include "xfs_bmap_btree.h"
#include "xfs_rmap.h"
+#include "xfs_refcount.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
@@ -99,6 +100,201 @@ struct xfs_scrub_bmap_info {
int whichfork;
};
+/* Look for a corresponding rmap for this irec. */
+static inline bool
+xfs_scrub_bmap_get_rmap(
+ struct xfs_scrub_bmap_info *info,
+ struct xfs_bmbt_irec *irec,
+ xfs_agblock_t agbno,
+ uint64_t owner,
+ struct xfs_rmap_irec *rmap)
+{
+ xfs_fileoff_t offset;
+ unsigned int rflags = 0;
+ int has_rmap;
+ int error;
+
+ if (info->whichfork == XFS_ATTR_FORK)
+ rflags |= XFS_RMAP_ATTR_FORK;
+
+ /*
+ * CoW staging extents are owned (on disk) by the refcountbt, so
+ * their rmaps do not have offsets.
+ */
+ if (info->whichfork == XFS_COW_FORK)
+ offset = 0;
+ else
+ offset = irec->br_startoff;
+
+ /*
+ * If the caller thinks this could be a shared bmbt extent (IOWs,
+ * any data fork extent of a reflink inode) then we have to use the
+ * range rmap lookup to make sure we get the correct owner/offset.
+ */
+ if (info->is_shared) {
+ error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
+ owner, offset, rflags, rmap, &has_rmap);
+ if (!xfs_scrub_should_check_xref(info->sc, &error,
+ &info->sc->sa.rmap_cur))
+ return false;
+ goto out;
+ }
+
+ /*
+ * Otherwise, use the (faster) regular lookup.
+ */
+ error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno, 0, owner,
+ offset, rflags, &has_rmap);
+ if (!xfs_scrub_should_check_xref(info->sc, &error,
+ &info->sc->sa.rmap_cur))
+ return false;
+ if (!has_rmap)
+ goto out;
+
+ error = xfs_rmap_get_rec(info->sc->sa.rmap_cur, rmap, &has_rmap);
+ if (!xfs_scrub_should_check_xref(info->sc, &error,
+ &info->sc->sa.rmap_cur))
+ return false;
+
+out:
+ if (!has_rmap)
+ xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+ irec->br_startoff);
+ return has_rmap;
+}
+
+/* Make sure that we have rmapbt records for this extent. */
+STATIC void
+xfs_scrub_bmap_xref_rmap(
+ struct xfs_scrub_bmap_info *info,
+ struct xfs_bmbt_irec *irec,
+ xfs_agblock_t agbno)
+{
+ struct xfs_rmap_irec rmap;
+ unsigned long long rmap_end;
+ uint64_t owner;
+
+ if (!info->sc->sa.rmap_cur)
+ return;
+
+ if (info->whichfork == XFS_COW_FORK)
+ owner = XFS_RMAP_OWN_COW;
+ else
+ owner = info->sc->ip->i_ino;
+
+ /* Find the rmap record for this irec. */
+ if (!xfs_scrub_bmap_get_rmap(info, irec, agbno, owner, &rmap))
+ return;
+
+ /* Check the rmap. */
+ rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
+ if (rmap.rm_startblock > agbno ||
+ agbno + irec->br_blockcount > rmap_end)
+ xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+ irec->br_startoff);
+
+ /*
+ * Check the logical offsets if applicable. CoW staging extents
+ * don't track logical offsets since the mappings only exist in
+ * memory.
+ */
+ if (info->whichfork != XFS_COW_FORK) {
+ rmap_end = (unsigned long long)rmap.rm_offset +
+ rmap.rm_blockcount;
+ if (rmap.rm_offset > irec->br_startoff ||
+ irec->br_startoff + irec->br_blockcount > rmap_end)
+ xfs_scrub_fblock_xref_set_corrupt(info->sc,
+ info->whichfork, irec->br_startoff);
+ }
+
+ if (rmap.rm_owner != owner)
+ xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+ irec->br_startoff);
+
+ /*
+ * Check for discrepancies between the unwritten flag in the irec and
+ * the rmap. Note that the (in-memory) CoW fork distinguishes between
+ * unwritten and written extents, but we don't track that in the rmap
+ * records because the blocks are owned (on-disk) by the refcountbt,
+ * which doesn't track unwritten state.
+ */
+ if (owner != XFS_RMAP_OWN_COW &&
+ irec->br_state == XFS_EXT_UNWRITTEN &&
+ !(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
+ xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+ irec->br_startoff);
+
+ if (info->whichfork == XFS_ATTR_FORK &&
+ !(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
+ xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+ irec->br_startoff);
+ if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
+ xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
+ irec->br_startoff);
+}
+
+/* Cross-reference a single rtdev extent record. */
+STATIC void
+xfs_scrub_bmap_rt_extent_xref(
+ struct xfs_scrub_bmap_info *info,
+ struct xfs_inode *ip,
+ struct xfs_btree_cur *cur,
+ struct xfs_bmbt_irec *irec)
+{
+ if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ xfs_scrub_xref_is_used_rt_space(info->sc, irec->br_startblock,
+ irec->br_blockcount);
+}
+
+/* Cross-reference a single datadev extent record. */
+STATIC void
+xfs_scrub_bmap_extent_xref(
+ struct xfs_scrub_bmap_info *info,
+ struct xfs_inode *ip,
+ struct xfs_btree_cur *cur,
+ struct xfs_bmbt_irec *irec)
+{
+ struct xfs_mount *mp = info->sc->mp;
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+ xfs_extlen_t len;
+ int error;
+
+ if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
+ agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
+ len = irec->br_blockcount;
+
+ error = xfs_scrub_ag_init(info->sc, agno, &info->sc->sa);
+ if (!xfs_scrub_fblock_process_error(info->sc, info->whichfork,
+ irec->br_startoff, &error))
+ return;
+
+ xfs_scrub_xref_is_used_space(info->sc, agbno, len);
+ xfs_scrub_xref_is_not_inode_chunk(info->sc, agbno, len);
+ xfs_scrub_bmap_xref_rmap(info, irec, agbno);
+ switch (info->whichfork) {
+ case XFS_DATA_FORK:
+ if (xfs_is_reflink_inode(info->sc->ip))
+ break;
+ /* fall through */
+ case XFS_ATTR_FORK:
+ xfs_scrub_xref_is_not_shared(info->sc, agbno,
+ irec->br_blockcount);
+ break;
+ case XFS_COW_FORK:
+ xfs_scrub_xref_is_cow_staging(info->sc, agbno,
+ irec->br_blockcount);
+ break;
+ }
+
+ xfs_scrub_ag_free(info->sc, &info->sc->sa);
+}
+
/* Scrub a single extent record. */
STATIC int
xfs_scrub_bmap_extent(
@@ -109,6 +305,7 @@ xfs_scrub_bmap_extent(
{
struct xfs_mount *mp = info->sc->mp;
struct xfs_buf *bp = NULL;
+ xfs_filblks_t end;
int error = 0;
if (cur)
@@ -136,19 +333,23 @@ xfs_scrub_bmap_extent(
irec->br_startoff);
/* Make sure the extent points to a valid place. */
+ if (irec->br_blockcount > MAXEXTLEN)
+ xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
+ irec->br_startoff);
if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock)
xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
+ end = irec->br_startblock + irec->br_blockcount - 1;
if (info->is_rt &&
(!xfs_verify_rtbno(mp, irec->br_startblock) ||
- !xfs_verify_rtbno(mp, irec->br_startblock +
- irec->br_blockcount - 1)))
+ !xfs_verify_rtbno(mp, end)))
xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
if (!info->is_rt &&
(!xfs_verify_fsbno(mp, irec->br_startblock) ||
- !xfs_verify_fsbno(mp, irec->br_startblock +
- irec->br_blockcount - 1)))
+ !xfs_verify_fsbno(mp, end) ||
+ XFS_FSB_TO_AGNO(mp, irec->br_startblock) !=
+ XFS_FSB_TO_AGNO(mp, end)))
xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
@@ -158,6 +359,11 @@ xfs_scrub_bmap_extent(
xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
+ if (info->is_rt)
+ xfs_scrub_bmap_rt_extent_xref(info, ip, cur, irec);
+ else
+ xfs_scrub_bmap_extent_xref(info, ip, cur, irec);
+
info->lastoff = irec->br_startoff + irec->br_blockcount;
return error;
}
@@ -235,7 +441,6 @@ xfs_scrub_bmap(
struct xfs_ifork *ifp;
xfs_fileoff_t endoff;
struct xfs_iext_cursor icur;
- bool found;
int error = 0;
ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -314,9 +519,7 @@ xfs_scrub_bmap(
/* Scrub extent records. */
info.lastoff = 0;
ifp = XFS_IFORK_PTR(ip, whichfork);
- for (found = xfs_iext_lookup_extent(ip, ifp, 0, &icur, &irec);
- found != 0;
- found = xfs_iext_next_extent(ifp, &icur, &irec)) {
+ for_each_xfs_iext(ifp, &icur, &irec) {
if (xfs_scrub_should_terminate(sc, &error))
break;
if (isnullstartblock(irec.br_startblock))
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index df0766132ace..54218168c8f9 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -42,12 +42,14 @@
* Check for btree operation errors. See the section about handling
* operational errors in common.c.
*/
-bool
-xfs_scrub_btree_process_error(
+static bool
+__xfs_scrub_btree_process_error(
struct xfs_scrub_context *sc,
struct xfs_btree_cur *cur,
int level,
- int *error)
+ int *error,
+ __u32 errflag,
+ void *ret_ip)
{
if (*error == 0)
return true;
@@ -60,36 +62,80 @@ xfs_scrub_btree_process_error(
case -EFSBADCRC:
case -EFSCORRUPTED:
/* Note the badness but don't abort. */
- sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+ sc->sm->sm_flags |= errflag;
*error = 0;
/* fall through */
default:
if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
trace_xfs_scrub_ifork_btree_op_error(sc, cur, level,
- *error, __return_address);
+ *error, ret_ip);
else
trace_xfs_scrub_btree_op_error(sc, cur, level,
- *error, __return_address);
+ *error, ret_ip);
break;
}
return false;
}
+bool
+xfs_scrub_btree_process_error(
+ struct xfs_scrub_context *sc,
+ struct xfs_btree_cur *cur,
+ int level,
+ int *error)
+{
+ return __xfs_scrub_btree_process_error(sc, cur, level, error,
+ XFS_SCRUB_OFLAG_CORRUPT, __return_address);
+}
+
+bool
+xfs_scrub_btree_xref_process_error(
+ struct xfs_scrub_context *sc,
+ struct xfs_btree_cur *cur,
+ int level,
+ int *error)
+{
+ return __xfs_scrub_btree_process_error(sc, cur, level, error,
+ XFS_SCRUB_OFLAG_XFAIL, __return_address);
+}
+
/* Record btree block corruption. */
-void
-xfs_scrub_btree_set_corrupt(
+static void
+__xfs_scrub_btree_set_corrupt(
struct xfs_scrub_context *sc,
struct xfs_btree_cur *cur,
- int level)
+ int level,
+ __u32 errflag,
+ void *ret_ip)
{
- sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+ sc->sm->sm_flags |= errflag;
if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
trace_xfs_scrub_ifork_btree_error(sc, cur, level,
- __return_address);
+ ret_ip);
else
trace_xfs_scrub_btree_error(sc, cur, level,
- __return_address);
+ ret_ip);
+}
+
+void
+xfs_scrub_btree_set_corrupt(
+ struct xfs_scrub_context *sc,
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
+ __return_address);
+}
+
+void
+xfs_scrub_btree_xref_set_corrupt(
+ struct xfs_scrub_context *sc,
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ __xfs_scrub_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
+ __return_address);
}
/*
@@ -268,6 +314,8 @@ xfs_scrub_btree_block_check_sibling(
pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock);
if (!xfs_scrub_btree_ptr_ok(bs, level + 1, pp))
goto out;
+ if (pbp)
+ xfs_scrub_buffer_recheck(bs->sc, pbp);
if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
xfs_scrub_btree_set_corrupt(bs->sc, cur, level);
@@ -315,6 +363,97 @@ out:
return error;
}
+struct check_owner {
+ struct list_head list;
+ xfs_daddr_t daddr;
+ int level;
+};
+
+/*
+ * Make sure this btree block isn't in the free list and that there's
+ * an rmap record for it.
+ */
+STATIC int
+xfs_scrub_btree_check_block_owner(
+ struct xfs_scrub_btree *bs,
+ int level,
+ xfs_daddr_t daddr)
+{
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+ xfs_btnum_t btnum;
+ bool init_sa;
+ int error = 0;
+
+ if (!bs->cur)
+ return 0;
+
+ btnum = bs->cur->bc_btnum;
+ agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
+ agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
+
+ init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS;
+ if (init_sa) {
+ error = xfs_scrub_ag_init(bs->sc, agno, &bs->sc->sa);
+ if (!xfs_scrub_btree_xref_process_error(bs->sc, bs->cur,
+ level, &error))
+ return error;
+ }
+
+ xfs_scrub_xref_is_used_space(bs->sc, agbno, 1);
+ /*
+ * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
+ * have to nullify it (to shut down further block owner checks) if
+ * self-xref encounters problems.
+ */
+ if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO)
+ bs->cur = NULL;
+
+ xfs_scrub_xref_is_owned_by(bs->sc, agbno, 1, bs->oinfo);
+ if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP)
+ bs->cur = NULL;
+
+ if (init_sa)
+ xfs_scrub_ag_free(bs->sc, &bs->sc->sa);
+
+ return error;
+}
+
+/* Check the owner of a btree block. */
+STATIC int
+xfs_scrub_btree_check_owner(
+ struct xfs_scrub_btree *bs,
+ int level,
+ struct xfs_buf *bp)
+{
+ struct xfs_btree_cur *cur = bs->cur;
+ struct check_owner *co;
+
+ if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && bp == NULL)
+ return 0;
+
+ /*
+ * We want to cross-reference each btree block with the bnobt
+ * and the rmapbt. We cannot cross-reference the bnobt or
+ * rmapbt while scanning the bnobt or rmapbt, respectively,
+ * because we cannot alter the cursor and we'd prefer not to
+ * duplicate cursors. Therefore, save the buffer daddr for
+ * later scanning.
+ */
+ if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) {
+ co = kmem_alloc(sizeof(struct check_owner),
+ KM_MAYFAIL | KM_NOFS);
+ if (!co)
+ return -ENOMEM;
+ co->level = level;
+ co->daddr = XFS_BUF_ADDR(bp);
+ list_add_tail(&co->list, &bs->to_check);
+ return 0;
+ }
+
+ return xfs_scrub_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp));
+}
+
/*
* Grab and scrub a btree block given a btree pointer. Returns block
* and buffer pointers (if applicable) if they're ok to use.
@@ -349,6 +488,16 @@ xfs_scrub_btree_get_block(
xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level);
return 0;
}
+ if (*pbp)
+ xfs_scrub_buffer_recheck(bs->sc, *pbp);
+
+ /*
+ * Check the block's owner; this function absorbs error codes
+ * for us.
+ */
+ error = xfs_scrub_btree_check_owner(bs, level, *pbp);
+ if (error)
+ return error;
/*
* Check the block's siblings; this function absorbs error codes
@@ -421,6 +570,8 @@ xfs_scrub_btree(
struct xfs_btree_block *block;
int level;
struct xfs_buf *bp;
+ struct check_owner *co;
+ struct check_owner *n;
int i;
int error = 0;
@@ -512,5 +663,14 @@ xfs_scrub_btree(
}
out:
+ /* Process deferred owner checks on btree blocks. */
+ list_for_each_entry_safe(co, n, &bs.to_check, list) {
+ if (!error && bs.cur)
+ error = xfs_scrub_btree_check_block_owner(&bs,
+ co->level, co->daddr);
+ list_del(&co->list);
+ kmem_free(co);
+ }
+
return error;
}
diff --git a/fs/xfs/scrub/btree.h b/fs/xfs/scrub/btree.h
index 4de825a626d1..e2b868ede70b 100644
--- a/fs/xfs/scrub/btree.h
+++ b/fs/xfs/scrub/btree.h
@@ -26,10 +26,19 @@
bool xfs_scrub_btree_process_error(struct xfs_scrub_context *sc,
struct xfs_btree_cur *cur, int level, int *error);
+/* Check for btree xref operation errors. */
+bool xfs_scrub_btree_xref_process_error(struct xfs_scrub_context *sc,
+ struct xfs_btree_cur *cur, int level,
+ int *error);
+
/* Check for btree corruption. */
void xfs_scrub_btree_set_corrupt(struct xfs_scrub_context *sc,
struct xfs_btree_cur *cur, int level);
+/* Check for btree xref discrepancies. */
+void xfs_scrub_btree_xref_set_corrupt(struct xfs_scrub_context *sc,
+ struct xfs_btree_cur *cur, int level);
+
struct xfs_scrub_btree;
typedef int (*xfs_scrub_btree_rec_fn)(
struct xfs_scrub_btree *bs,
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index ac95fe911d96..8033ab9d8f47 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -78,12 +78,14 @@
*/
/* Check for operational errors. */
-bool
-xfs_scrub_process_error(
+static bool
+__xfs_scrub_process_error(
struct xfs_scrub_context *sc,
xfs_agnumber_t agno,
xfs_agblock_t bno,
- int *error)
+ int *error,
+ __u32 errflag,
+ void *ret_ip)
{
switch (*error) {
case 0:
@@ -95,24 +97,48 @@ xfs_scrub_process_error(
case -EFSBADCRC:
case -EFSCORRUPTED:
/* Note the badness but don't abort. */
- sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+ sc->sm->sm_flags |= errflag;
*error = 0;
/* fall through */
default:
trace_xfs_scrub_op_error(sc, agno, bno, *error,
- __return_address);
+ ret_ip);
break;
}
return false;
}
-/* Check for operational errors for a file offset. */
bool
-xfs_scrub_fblock_process_error(
+xfs_scrub_process_error(
+ struct xfs_scrub_context *sc,
+ xfs_agnumber_t agno,
+ xfs_agblock_t bno,
+ int *error)
+{
+ return __xfs_scrub_process_error(sc, agno, bno, error,
+ XFS_SCRUB_OFLAG_CORRUPT, __return_address);
+}
+
+bool
+xfs_scrub_xref_process_error(
+ struct xfs_scrub_context *sc,
+ xfs_agnumber_t agno,
+ xfs_agblock_t bno,
+ int *error)
+{
+ return __xfs_scrub_process_error(sc, agno, bno, error,
+ XFS_SCRUB_OFLAG_XFAIL, __return_address);
+}
+
+/* Check for operational errors for a file offset. */
+static bool
+__xfs_scrub_fblock_process_error(
struct xfs_scrub_context *sc,
int whichfork,
xfs_fileoff_t offset,
- int *error)
+ int *error,
+ __u32 errflag,
+ void *ret_ip)
{
switch (*error) {
case 0:
@@ -124,17 +150,39 @@ xfs_scrub_fblock_process_error(
case -EFSBADCRC:
case -EFSCORRUPTED:
/* Note the badness but don't abort. */
- sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+ sc->sm->sm_flags |= errflag;
*error = 0;
/* fall through */
default:
trace_xfs_scrub_file_op_error(sc, whichfork, offset, *error,
- __return_address);
+ ret_ip);
break;
}
return false;
}
+bool
+xfs_scrub_fblock_process_error(
+ struct xfs_scrub_context *sc,
+ int whichfork,
+ xfs_fileoff_t offset,
+ int *error)
+{
+ return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error,
+ XFS_SCRUB_OFLAG_CORRUPT, __return_address);
+}
+
+bool
+xfs_scrub_fblock_xref_process_error(
+ struct xfs_scrub_context *sc,
+ int whichfork,
+ xfs_fileoff_t offset,
+ int *error)
+{
+ return __xfs_scrub_fblock_process_error(sc, whichfork, offset, error,
+ XFS_SCRUB_OFLAG_XFAIL, __return_address);
+}
+
/*
* Handling scrub corruption/optimization/warning checks.
*
@@ -183,6 +231,16 @@ xfs_scrub_block_set_corrupt(
trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address);
}
+/* Record a corruption while cross-referencing. */
+void
+xfs_scrub_block_xref_set_corrupt(
+ struct xfs_scrub_context *sc,
+ struct xfs_buf *bp)
+{
+ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
+ trace_xfs_scrub_block_error(sc, bp->b_bn, __return_address);
+}
+
/*
* Record a corrupt inode. The trace data will include the block given
* by bp if bp is given; otherwise it will use the block location of the
@@ -198,6 +256,17 @@ xfs_scrub_ino_set_corrupt(
trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address);
}
+/* Record a corruption while cross-referencing with an inode. */
+void
+xfs_scrub_ino_xref_set_corrupt(
+ struct xfs_scrub_context *sc,
+ xfs_ino_t ino,
+ struct xfs_buf *bp)
+{
+ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
+ trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address);
+}
+
/* Record corruption in a block indexed by a file fork. */
void
xfs_scrub_fblock_set_corrupt(
@@ -209,6 +278,17 @@ xfs_scrub_fblock_set_corrupt(
trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address);
}
+/* Record a corruption while cross-referencing a fork block. */
+void
+xfs_scrub_fblock_xref_set_corrupt(
+ struct xfs_scrub_context *sc,
+ int whichfork,
+ xfs_fileoff_t offset)
+{
+ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
+ trace_xfs_scrub_fblock_error(sc, whichfork, offset, __return_address);
+}
+
/*
* Warn about inodes that need administrative review but is not
* incorrect.
@@ -245,6 +325,59 @@ xfs_scrub_set_incomplete(
}
/*
+ * rmap scrubbing -- compute the number of blocks with a given owner,
+ * at least according to the reverse mapping data.
+ */
+
+struct xfs_scrub_rmap_ownedby_info {
+ struct xfs_owner_info *oinfo;
+ xfs_filblks_t *blocks;
+};
+
+STATIC int
+xfs_scrub_count_rmap_ownedby_irec(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xfs_scrub_rmap_ownedby_info *sroi = priv;
+ bool irec_attr;
+ bool oinfo_attr;
+
+ irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK;
+ oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK;
+
+ if (rec->rm_owner != sroi->oinfo->oi_owner)
+ return 0;
+
+ if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr)
+ (*sroi->blocks) += rec->rm_blockcount;
+
+ return 0;
+}
+
+/*
+ * Calculate the number of blocks the rmap thinks are owned by something.
+ * The caller should pass us an rmapbt cursor.
+ */
+int
+xfs_scrub_count_rmap_ownedby_ag(
+ struct xfs_scrub_context *sc,
+ struct xfs_btree_cur *cur,
+ struct xfs_owner_info *oinfo,
+ xfs_filblks_t *blocks)
+{
+ struct xfs_scrub_rmap_ownedby_info sroi;
+
+ sroi.oinfo = oinfo;
+ *blocks = 0;
+ sroi.blocks = blocks;
+
+ return xfs_rmap_query_all(cur, xfs_scrub_count_rmap_ownedby_irec,
+ &sroi);
+}
+
+/*
* AG scrubbing
*
* These helpers facilitate locking an allocation group's header
@@ -302,7 +435,7 @@ xfs_scrub_ag_read_headers(
error = xfs_alloc_read_agfl(mp, sc->tp, agno, agfl);
if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL))
goto out;
-
+ error = 0;
out:
return error;
}
@@ -472,7 +605,7 @@ xfs_scrub_setup_ag_btree(
return error;
}
- error = xfs_scrub_setup_ag_header(sc, ip);
+ error = xfs_scrub_setup_fs(sc, ip);
if (error)
return error;
@@ -503,18 +636,11 @@ xfs_scrub_get_inode(
struct xfs_scrub_context *sc,
struct xfs_inode *ip_in)
{
+ struct xfs_imap imap;
struct xfs_mount *mp = sc->mp;
struct xfs_inode *ip = NULL;
int error;
- /*
- * If userspace passed us an AG number or a generation number
- * without an inode number, they haven't got a clue so bail out
- * immediately.
- */
- if (sc->sm->sm_agno || (sc->sm->sm_gen && !sc->sm->sm_ino))
- return -EINVAL;
-
/* We want to scan the inode we already had opened. */
if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
sc->ip = ip_in;
@@ -526,10 +652,33 @@ xfs_scrub_get_inode(
return -ENOENT;
error = xfs_iget(mp, NULL, sc->sm->sm_ino,
XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip);
- if (error == -ENOENT || error == -EINVAL) {
- /* inode doesn't exist... */
- return -ENOENT;
- } else if (error) {
+ switch (error) {
+ case -ENOENT:
+ /* Inode doesn't exist, just bail out. */
+ return error;
+ case 0:
+ /* Got an inode, continue. */
+ break;
+ case -EINVAL:
+ /*
+ * -EINVAL with IGET_UNTRUSTED could mean one of several
+ * things: userspace gave us an inode number that doesn't
+ * correspond to fs space, or doesn't have an inobt entry;
+ * or it could simply mean that the inode buffer failed the
+ * read verifiers.
+ *
+ * Try just the inode mapping lookup -- if it succeeds, then
+ * the inode buffer verifier failed and something needs fixing.
+ * Otherwise, we really couldn't find it so tell userspace
+ * that it no longer exists.
+ */
+ error = xfs_imap(sc->mp, sc->tp, sc->sm->sm_ino, &imap,
+ XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE);
+ if (error)
+ return -ENOENT;
+ error = -EFSCORRUPTED;
+ /* fall through */
+ default:
trace_xfs_scrub_op_error(sc,
XFS_INO_TO_AGNO(mp, sc->sm->sm_ino),
XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
@@ -572,3 +721,61 @@ out:
/* scrub teardown will unlock and release the inode for us */
return error;
}
+
+/*
+ * Predicate that decides if we need to evaluate the cross-reference check.
+ * If there was an error accessing the cross-reference btree, just delete
+ * the cursor and skip the check.
+ */
+bool
+xfs_scrub_should_check_xref(
+ struct xfs_scrub_context *sc,
+ int *error,
+ struct xfs_btree_cur **curpp)
+{
+ if (*error == 0)
+ return true;
+
+ if (curpp) {
+ /* If we've already given up on xref, just bail out. */
+ if (!*curpp)
+ return false;
+
+ /* xref error, delete cursor and bail out. */
+ xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR);
+ *curpp = NULL;
+ }
+
+ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
+ trace_xfs_scrub_xref_error(sc, *error, __return_address);
+
+ /*
+ * Errors encountered during cross-referencing with another
+ * data structure should not cause this scrubber to abort.
+ */
+ *error = 0;
+ return false;
+}
+
+/* Run the structure verifiers on in-memory buffers to detect bad memory. */
+void
+xfs_scrub_buffer_recheck(
+ struct xfs_scrub_context *sc,
+ struct xfs_buf *bp)
+{
+ xfs_failaddr_t fa;
+
+ if (bp->b_ops == NULL) {
+ xfs_scrub_block_set_corrupt(sc, bp);
+ return;
+ }
+ if (bp->b_ops->verify_struct == NULL) {
+ xfs_scrub_set_incomplete(sc);
+ return;
+ }
+ fa = bp->b_ops->verify_struct(bp);
+ if (!fa)
+ return;
+ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+ trace_xfs_scrub_block_error(sc, bp->b_bn, fa);
+}
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 5c043855570e..ddb65d22c76a 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -56,6 +56,11 @@ bool xfs_scrub_process_error(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
bool xfs_scrub_fblock_process_error(struct xfs_scrub_context *sc, int whichfork,
xfs_fileoff_t offset, int *error);
+bool xfs_scrub_xref_process_error(struct xfs_scrub_context *sc,
+ xfs_agnumber_t agno, xfs_agblock_t bno, int *error);
+bool xfs_scrub_fblock_xref_process_error(struct xfs_scrub_context *sc,
+ int whichfork, xfs_fileoff_t offset, int *error);
+
void xfs_scrub_block_set_preen(struct xfs_scrub_context *sc,
struct xfs_buf *bp);
void xfs_scrub_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino,
@@ -68,6 +73,13 @@ void xfs_scrub_ino_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino,
void xfs_scrub_fblock_set_corrupt(struct xfs_scrub_context *sc, int whichfork,
xfs_fileoff_t offset);
+void xfs_scrub_block_xref_set_corrupt(struct xfs_scrub_context *sc,
+ struct xfs_buf *bp);
+void xfs_scrub_ino_xref_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino,
+ struct xfs_buf *bp);
+void xfs_scrub_fblock_xref_set_corrupt(struct xfs_scrub_context *sc,
+ int whichfork, xfs_fileoff_t offset);
+
void xfs_scrub_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino,
struct xfs_buf *bp);
void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork,
@@ -76,10 +88,12 @@ void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork,
void xfs_scrub_set_incomplete(struct xfs_scrub_context *sc);
int xfs_scrub_checkpoint_log(struct xfs_mount *mp);
+/* Are we set up for a cross-referencing check? */
+bool xfs_scrub_should_check_xref(struct xfs_scrub_context *sc, int *error,
+ struct xfs_btree_cur **curpp);
+
/* Setup functions */
int xfs_scrub_setup_fs(struct xfs_scrub_context *sc, struct xfs_inode *ip);
-int xfs_scrub_setup_ag_header(struct xfs_scrub_context *sc,
- struct xfs_inode *ip);
int xfs_scrub_setup_ag_allocbt(struct xfs_scrub_context *sc,
struct xfs_inode *ip);
int xfs_scrub_setup_ag_iallocbt(struct xfs_scrub_context *sc,
@@ -134,11 +148,16 @@ int xfs_scrub_walk_agfl(struct xfs_scrub_context *sc,
int (*fn)(struct xfs_scrub_context *, xfs_agblock_t bno,
void *),
void *priv);
+int xfs_scrub_count_rmap_ownedby_ag(struct xfs_scrub_context *sc,
+ struct xfs_btree_cur *cur,
+ struct xfs_owner_info *oinfo,
+ xfs_filblks_t *blocks);
int xfs_scrub_setup_ag_btree(struct xfs_scrub_context *sc,
struct xfs_inode *ip, bool force_log);
int xfs_scrub_get_inode(struct xfs_scrub_context *sc, struct xfs_inode *ip_in);
int xfs_scrub_setup_inode_contents(struct xfs_scrub_context *sc,
struct xfs_inode *ip, unsigned int resblks);
+void xfs_scrub_buffer_recheck(struct xfs_scrub_context *sc, struct xfs_buf *bp);
#endif /* __XFS_SCRUB_COMMON_H__ */
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index d94edd93cba8..bffdb7dc09bf 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -233,11 +233,28 @@ xfs_scrub_da_btree_write_verify(
return;
}
}
+static void *
+xfs_scrub_da_btree_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_da_blkinfo *info = bp->b_addr;
+
+ switch (be16_to_cpu(info->magic)) {
+ case XFS_DIR2_LEAF1_MAGIC:
+ case XFS_DIR3_LEAF1_MAGIC:
+ bp->b_ops = &xfs_dir3_leaf1_buf_ops;
+ return bp->b_ops->verify_struct(bp);
+ default:
+ bp->b_ops = &xfs_da3_node_buf_ops;
+ return bp->b_ops->verify_struct(bp);
+ }
+}
static const struct xfs_buf_ops xfs_scrub_da_btree_buf_ops = {
.name = "xfs_scrub_da_btree",
.verify_read = xfs_scrub_da_btree_read_verify,
.verify_write = xfs_scrub_da_btree_write_verify,
+ .verify_struct = xfs_scrub_da_btree_verify,
};
/* Check a block's sibling. */
@@ -276,6 +293,9 @@ xfs_scrub_da_btree_block_check_sibling(
xfs_scrub_da_set_corrupt(ds, level);
return error;
}
+ if (ds->state->altpath.blk[level].bp)
+ xfs_scrub_buffer_recheck(ds->sc,
+ ds->state->altpath.blk[level].bp);
/* Compare upper level pointer to sibling pointer. */
if (ds->state->altpath.blk[level].blkno != sibling)
@@ -358,6 +378,8 @@ xfs_scrub_da_btree_block(
&xfs_scrub_da_btree_buf_ops);
if (!xfs_scrub_da_process_error(ds, level, &error))
goto out_nobuf;
+ if (blk->bp)
+ xfs_scrub_buffer_recheck(ds->sc, blk->bp);
/*
* We didn't find a dir btree root block, which means that
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index 69e1efdd4019..50b6a26b0299 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -92,7 +92,7 @@ xfs_scrub_dir_check_ftype(
* inodes can trigger immediate inactive cleanup of the inode.
*/
error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip);
- if (!xfs_scrub_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset,
+ if (!xfs_scrub_fblock_xref_process_error(sdc->sc, XFS_DATA_FORK, offset,
&error))
goto out;
@@ -200,6 +200,7 @@ xfs_scrub_dir_rec(
struct xfs_inode *dp = ds->dargs.dp;
struct xfs_dir2_data_entry *dent;
struct xfs_buf *bp;
+ char *p, *endp;
xfs_ino_t ino;
xfs_dablk_t rec_bno;
xfs_dir2_db_t db;
@@ -237,9 +238,37 @@ xfs_scrub_dir_rec(
xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
goto out;
}
+ xfs_scrub_buffer_recheck(ds->sc, bp);
- /* Retrieve the entry, sanity check it, and compare hashes. */
dent = (struct xfs_dir2_data_entry *)(((char *)bp->b_addr) + off);
+
+ /* Make sure we got a real directory entry. */
+ p = (char *)mp->m_dir_inode_ops->data_entry_p(bp->b_addr);
+ endp = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr);
+ if (!endp) {
+ xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
+ goto out_relse;
+ }
+ while (p < endp) {
+ struct xfs_dir2_data_entry *dep;
+ struct xfs_dir2_data_unused *dup;
+
+ dup = (struct xfs_dir2_data_unused *)p;
+ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
+ p += be16_to_cpu(dup->length);
+ continue;
+ }
+ dep = (struct xfs_dir2_data_entry *)p;
+ if (dep == dent)
+ break;
+ p += mp->m_dir_inode_ops->data_entsize(dep->namelen);
+ }
+ if (p >= endp) {
+ xfs_scrub_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
+ goto out_relse;
+ }
+
+ /* Retrieve the entry, sanity check it, and compare hashes. */
ino = be64_to_cpu(dent->inumber);
hash = be32_to_cpu(ent->hashval);
tag = be16_to_cpup(dp->d_ops->data_entry_tag_p(dent));
@@ -324,6 +353,7 @@ xfs_scrub_directory_data_bestfree(
}
if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
goto out;
+ xfs_scrub_buffer_recheck(sc, bp);
/* XXX: Check xfs_dir3_data_hdr.pad is zero once we start setting it. */
@@ -361,13 +391,7 @@ xfs_scrub_directory_data_bestfree(
/* Make sure the bestfrees are actually the best free spaces. */
ptr = (char *)d_ops->data_entry_p(bp->b_addr);
- if (is_block) {
- struct xfs_dir2_block_tail *btp;
-
- btp = xfs_dir2_block_tail_p(mp->m_dir_geo, bp->b_addr);
- endptr = (char *)xfs_dir2_block_leaf_p(btp);
- } else
- endptr = (char *)bp->b_addr + BBTOB(bp->b_length);
+ endptr = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr);
/* Iterate the entries, stopping when we hit or go past the end. */
while (ptr < endptr) {
@@ -474,6 +498,7 @@ xfs_scrub_directory_leaf1_bestfree(
error = xfs_dir3_leaf_read(sc->tp, sc->ip, lblk, -1, &bp);
if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
goto out;
+ xfs_scrub_buffer_recheck(sc, bp);
leaf = bp->b_addr;
d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
@@ -559,6 +584,7 @@ xfs_scrub_directory_free_bestfree(
error = xfs_dir2_free_read(sc->tp, sc->ip, lblk, &bp);
if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
goto out;
+ xfs_scrub_buffer_recheck(sc, bp);
if (xfs_sb_version_hascrc(&sc->mp->m_sb)) {
struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index 496d6f2fbb9e..63ab3f98430d 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -58,6 +58,56 @@ xfs_scrub_setup_ag_iallocbt(
/* Inode btree scrubber. */
+/*
+ * If we're checking the finobt, cross-reference with the inobt.
+ * Otherwise we're checking the inobt; if there is an finobt, make sure
+ * we have a record or not depending on freecount.
+ */
+static inline void
+xfs_scrub_iallocbt_chunk_xref_other(
+ struct xfs_scrub_context *sc,
+ struct xfs_inobt_rec_incore *irec,
+ xfs_agino_t agino)
+{
+ struct xfs_btree_cur **pcur;
+ bool has_irec;
+ int error;
+
+ if (sc->sm->sm_type == XFS_SCRUB_TYPE_FINOBT)
+ pcur = &sc->sa.ino_cur;
+ else
+ pcur = &sc->sa.fino_cur;
+ if (!(*pcur))
+ return;
+ error = xfs_ialloc_has_inode_record(*pcur, agino, agino, &has_irec);
+ if (!xfs_scrub_should_check_xref(sc, &error, pcur))
+ return;
+ if (((irec->ir_freecount > 0 && !has_irec) ||
+ (irec->ir_freecount == 0 && has_irec)))
+ xfs_scrub_btree_xref_set_corrupt(sc, *pcur, 0);
+}
+
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_iallocbt_chunk_xref(
+ struct xfs_scrub_context *sc,
+ struct xfs_inobt_rec_incore *irec,
+ xfs_agino_t agino,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len)
+{
+ struct xfs_owner_info oinfo;
+
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ xfs_scrub_xref_is_used_space(sc, agbno, len);
+ xfs_scrub_iallocbt_chunk_xref_other(sc, irec, agino);
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
+ xfs_scrub_xref_is_owned_by(sc, agbno, len, &oinfo);
+ xfs_scrub_xref_is_not_shared(sc, agbno, len);
+}
+
/* Is this chunk worth checking? */
STATIC bool
xfs_scrub_iallocbt_chunk(
@@ -76,6 +126,8 @@ xfs_scrub_iallocbt_chunk(
!xfs_verify_agbno(mp, agno, bno + len - 1))
xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+ xfs_scrub_iallocbt_chunk_xref(bs->sc, irec, agino, bno, len);
+
return true;
}
@@ -190,8 +242,14 @@ xfs_scrub_iallocbt_check_freemask(
}
/* If any part of this is a hole, skip it. */
- if (ir_holemask)
+ if (ir_holemask) {
+ xfs_scrub_xref_is_not_owned_by(bs->sc, agbno,
+ blks_per_cluster, &oinfo);
continue;
+ }
+
+ xfs_scrub_xref_is_owned_by(bs->sc, agbno, blks_per_cluster,
+ &oinfo);
/* Grab the inode cluster buffer. */
imap.im_blkno = XFS_AGB_TO_DADDR(mp, bs->cur->bc_private.a.agno,
@@ -227,6 +285,7 @@ xfs_scrub_iallocbt_rec(
union xfs_btree_rec *rec)
{
struct xfs_mount *mp = bs->cur->bc_mp;
+ xfs_filblks_t *inode_blocks = bs->private;
struct xfs_inobt_rec_incore irec;
uint64_t holes;
xfs_agnumber_t agno = bs->cur->bc_private.a.agno;
@@ -264,6 +323,9 @@ xfs_scrub_iallocbt_rec(
(agbno & (xfs_icluster_size_fsb(mp) - 1)))
xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+ *inode_blocks += XFS_B_TO_FSB(mp,
+ irec.ir_count * mp->m_sb.sb_inodesize);
+
/* Handle non-sparse inodes */
if (!xfs_inobt_issparse(irec.ir_holemask)) {
len = XFS_B_TO_FSB(mp,
@@ -308,6 +370,72 @@ out:
return error;
}
+/*
+ * Make sure the inode btrees are as large as the rmap thinks they are.
+ * Don't bother if we're missing btree cursors, as we're already corrupt.
+ */
+STATIC void
+xfs_scrub_iallocbt_xref_rmap_btreeblks(
+ struct xfs_scrub_context *sc,
+ int which)
+{
+ struct xfs_owner_info oinfo;
+ xfs_filblks_t blocks;
+ xfs_extlen_t inobt_blocks = 0;
+ xfs_extlen_t finobt_blocks = 0;
+ int error;
+
+ if (!sc->sa.ino_cur || !sc->sa.rmap_cur ||
+ (xfs_sb_version_hasfinobt(&sc->mp->m_sb) && !sc->sa.fino_cur))
+ return;
+
+ /* Check that we saw as many inobt blocks as the rmap says. */
+ error = xfs_btree_count_blocks(sc->sa.ino_cur, &inobt_blocks);
+ if (!xfs_scrub_process_error(sc, 0, 0, &error))
+ return;
+
+ if (sc->sa.fino_cur) {
+ error = xfs_btree_count_blocks(sc->sa.fino_cur, &finobt_blocks);
+ if (!xfs_scrub_process_error(sc, 0, 0, &error))
+ return;
+ }
+
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
+ error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo,
+ &blocks);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+ return;
+ if (blocks != inobt_blocks + finobt_blocks)
+ xfs_scrub_btree_set_corrupt(sc, sc->sa.ino_cur, 0);
+}
+
+/*
+ * Make sure that the inobt records point to the same number of blocks as
+ * the rmap says are owned by inodes.
+ */
+STATIC void
+xfs_scrub_iallocbt_xref_rmap_inodes(
+ struct xfs_scrub_context *sc,
+ int which,
+ xfs_filblks_t inode_blocks)
+{
+ struct xfs_owner_info oinfo;
+ xfs_filblks_t blocks;
+ int error;
+
+ if (!sc->sa.rmap_cur)
+ return;
+
+ /* Check that we saw as many inode blocks as the rmap knows about. */
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
+ error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, &oinfo,
+ &blocks);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+ return;
+ if (blocks != inode_blocks)
+ xfs_scrub_btree_set_corrupt(sc, sc->sa.ino_cur, 0);
+}
+
/* Scrub the inode btrees for some AG. */
STATIC int
xfs_scrub_iallocbt(
@@ -316,10 +444,29 @@ xfs_scrub_iallocbt(
{
struct xfs_btree_cur *cur;
struct xfs_owner_info oinfo;
+ xfs_filblks_t inode_blocks = 0;
+ int error;
xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
cur = which == XFS_BTNUM_INO ? sc->sa.ino_cur : sc->sa.fino_cur;
- return xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_rec, &oinfo, NULL);
+ error = xfs_scrub_btree(sc, cur, xfs_scrub_iallocbt_rec, &oinfo,
+ &inode_blocks);
+ if (error)
+ return error;
+
+ xfs_scrub_iallocbt_xref_rmap_btreeblks(sc, which);
+
+ /*
+ * If we're scrubbing the inode btree, inode_blocks is the number of
+ * blocks pointed to by all the inode chunk records. Therefore, we
+ * should compare to the number of inode chunk blocks that the rmap
+ * knows about. We can't do this for the finobt since it only points
+ * to inode chunks with free inodes.
+ */
+ if (which == XFS_BTNUM_INO)
+ xfs_scrub_iallocbt_xref_rmap_inodes(sc, which, inode_blocks);
+
+ return error;
}
int
@@ -335,3 +482,46 @@ xfs_scrub_finobt(
{
return xfs_scrub_iallocbt(sc, XFS_BTNUM_FINO);
}
+
+/* See if an inode btree has (or doesn't have) an inode chunk record. */
+static inline void
+xfs_scrub_xref_inode_check(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len,
+ struct xfs_btree_cur **icur,
+ bool should_have_inodes)
+{
+ bool has_inodes;
+ int error;
+
+ if (!(*icur))
+ return;
+
+ error = xfs_ialloc_has_inodes_at_extent(*icur, agbno, len, &has_inodes);
+ if (!xfs_scrub_should_check_xref(sc, &error, icur))
+ return;
+ if (has_inodes != should_have_inodes)
+ xfs_scrub_btree_xref_set_corrupt(sc, *icur, 0);
+}
+
+/* xref check that the extent is not covered by inodes */
+void
+xfs_scrub_xref_is_not_inode_chunk(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len)
+{
+ xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, false);
+ xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.fino_cur, false);
+}
+
+/* xref check that the extent is covered by inodes */
+void
+xfs_scrub_xref_is_inode_chunk(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len)
+{
+ xfs_scrub_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur, true);
+}
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 637b7a892313..21297bef8df1 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -36,9 +36,13 @@
#include "xfs_ialloc.h"
#include "xfs_da_format.h"
#include "xfs_reflink.h"
+#include "xfs_rmap.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
+#include "scrub/btree.h"
#include "scrub/trace.h"
/*
@@ -64,7 +68,7 @@ xfs_scrub_setup_inode(
break;
case -EFSCORRUPTED:
case -EFSBADCRC:
- return 0;
+ return xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp);
default:
return error;
}
@@ -318,8 +322,20 @@ xfs_scrub_dinode(
/* di_mode */
mode = be16_to_cpu(dip->di_mode);
- if (mode & ~(S_IALLUGO | S_IFMT))
+ switch (mode & S_IFMT) {
+ case S_IFLNK:
+ case S_IFREG:
+ case S_IFDIR:
+ case S_IFCHR:
+ case S_IFBLK:
+ case S_IFIFO:
+ case S_IFSOCK:
+ /* mode is recognized */
+ break;
+ default:
xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ break;
+ }
/* v1/v2 fields */
switch (dip->di_version) {
@@ -380,6 +396,14 @@ xfs_scrub_dinode(
break;
}
+ /* di_[amc]time.nsec */
+ if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC)
+ xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC)
+ xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC)
+ xfs_scrub_ino_set_corrupt(sc, ino, bp);
+
/*
* di_size. xfs_dinode_verify checks for things that screw up
* the VFS such as the upper bit being set and zero-length
@@ -483,6 +507,8 @@ xfs_scrub_dinode(
}
if (dip->di_version >= 3) {
+ if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC)
+ xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_inode_flags2(sc, bp, dip, ino, mode, flags, flags2);
xfs_scrub_inode_cowextsize(sc, bp, dip, ino, mode, flags,
flags2);
@@ -534,7 +560,7 @@ xfs_scrub_inode_map_raw(
*/
bp->b_ops = &xfs_inode_buf_ops;
dip = xfs_buf_offset(bp, imap.im_boffset);
- if (!xfs_dinode_verify(mp, ino, dip) ||
+ if (xfs_dinode_verify(mp, ino, dip) != NULL ||
!xfs_dinode_good_version(mp, dip->di_version)) {
xfs_scrub_ino_set_corrupt(sc, ino, bp);
goto out_buf;
@@ -555,18 +581,155 @@ out_buf:
return error;
}
+/*
+ * Make sure the finobt doesn't think this inode is free.
+ * We don't have to check the inobt ourselves because we got the inode via
+ * IGET_UNTRUSTED, which checks the inobt for us.
+ */
+static void
+xfs_scrub_inode_xref_finobt(
+ struct xfs_scrub_context *sc,
+ xfs_ino_t ino)
+{
+ struct xfs_inobt_rec_incore rec;
+ xfs_agino_t agino;
+ int has_record;
+ int error;
+
+ if (!sc->sa.fino_cur)
+ return;
+
+ agino = XFS_INO_TO_AGINO(sc->mp, ino);
+
+ /*
+ * Try to get the finobt record. If we can't get it, then we're
+ * in good shape.
+ */
+ error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE,
+ &has_record);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) ||
+ !has_record)
+ return;
+
+ error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) ||
+ !has_record)
+ return;
+
+ /*
+ * Otherwise, make sure this record either doesn't cover this inode,
+ * or that it does but it's marked present.
+ */
+ if (rec.ir_startino > agino ||
+ rec.ir_startino + XFS_INODES_PER_CHUNK <= agino)
+ return;
+
+ if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0);
+}
+
+/* Cross reference the inode fields with the forks. */
+STATIC void
+xfs_scrub_inode_xref_bmap(
+ struct xfs_scrub_context *sc,
+ struct xfs_dinode *dip)
+{
+ xfs_extnum_t nextents;
+ xfs_filblks_t count;
+ xfs_filblks_t acount;
+ int error;
+
+ /* Walk all the extents to check nextents/naextents/nblocks. */
+ error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
+ &nextents, &count);
+ if (!xfs_scrub_should_check_xref(sc, &error, NULL))
+ return;
+ if (nextents < be32_to_cpu(dip->di_nextents))
+ xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL);
+
+ error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
+ &nextents, &acount);
+ if (!xfs_scrub_should_check_xref(sc, &error, NULL))
+ return;
+ if (nextents != be16_to_cpu(dip->di_anextents))
+ xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL);
+
+ /* Check nblocks against the inode. */
+ if (count + acount != be64_to_cpu(dip->di_nblocks))
+ xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL);
+}
+
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_inode_xref(
+ struct xfs_scrub_context *sc,
+ xfs_ino_t ino,
+ struct xfs_dinode *dip)
+{
+ struct xfs_owner_info oinfo;
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+ int error;
+
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ agno = XFS_INO_TO_AGNO(sc->mp, ino);
+ agbno = XFS_INO_TO_AGBNO(sc->mp, ino);
+
+ error = xfs_scrub_ag_init(sc, agno, &sc->sa);
+ if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error))
+ return;
+
+ xfs_scrub_xref_is_used_space(sc, agbno, 1);
+ xfs_scrub_inode_xref_finobt(sc, ino);
+ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES);
+ xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo);
+ xfs_scrub_xref_is_not_shared(sc, agbno, 1);
+ xfs_scrub_inode_xref_bmap(sc, dip);
+
+ xfs_scrub_ag_free(sc, &sc->sa);
+}
+
+/*
+ * If the reflink iflag disagrees with a scan for shared data fork extents,
+ * either flag an error (shared extents w/ no flag) or a preen (flag set w/o
+ * any shared extents). We already checked for reflink iflag set on a non
+ * reflink filesystem.
+ */
+static void
+xfs_scrub_inode_check_reflink_iflag(
+ struct xfs_scrub_context *sc,
+ xfs_ino_t ino,
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = sc->mp;
+ bool has_shared;
+ int error;
+
+ if (!xfs_sb_version_hasreflink(&mp->m_sb))
+ return;
+
+ error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
+ &has_shared);
+ if (!xfs_scrub_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
+ XFS_INO_TO_AGBNO(mp, ino), &error))
+ return;
+ if (xfs_is_reflink_inode(sc->ip) && !has_shared)
+ xfs_scrub_ino_set_preen(sc, ino, bp);
+ else if (!xfs_is_reflink_inode(sc->ip) && has_shared)
+ xfs_scrub_ino_set_corrupt(sc, ino, bp);
+}
+
/* Scrub an inode. */
int
xfs_scrub_inode(
struct xfs_scrub_context *sc)
{
struct xfs_dinode di;
- struct xfs_mount *mp = sc->mp;
struct xfs_buf *bp = NULL;
struct xfs_dinode *dip;
xfs_ino_t ino;
-
- bool has_shared;
int error = 0;
/* Did we get the in-core inode, or are we doing this manually? */
@@ -591,19 +754,14 @@ xfs_scrub_inode(
goto out;
/*
- * Does this inode have the reflink flag set but no shared extents?
- * Set the preening flag if this is the case.
+ * Look for discrepancies between file's data blocks and the reflink
+ * iflag. We already checked the iflag against the file mode when
+ * we scrubbed the dinode.
*/
- if (xfs_is_reflink_inode(sc->ip)) {
- error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
- &has_shared);
- if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
- XFS_INO_TO_AGBNO(mp, ino), &error))
- goto out;
- if (!has_shared)
- xfs_scrub_ino_set_preen(sc, ino, bp);
- }
+ if (S_ISREG(VFS_I(sc->ip)->i_mode))
+ xfs_scrub_inode_check_reflink_iflag(sc, ino, bp);
+ xfs_scrub_inode_xref(sc, ino, dip);
out:
if (bp)
xfs_trans_brelse(sc->tp, bp);
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index 63a25334fc83..0d3851410c74 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -169,9 +169,9 @@ xfs_scrub_parent_validate(
* immediate inactive cleanup of the inode.
*/
error = xfs_iget(mp, sc->tp, dnum, 0, 0, &dp);
- if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
+ if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
goto out;
- if (dp == sc->ip) {
+ if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) {
xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
goto out_rele;
}
@@ -185,7 +185,7 @@ xfs_scrub_parent_validate(
*/
if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) {
error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink);
- if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0,
+ if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
&error))
goto out_unlock;
if (nlink != expected_nlink)
@@ -205,7 +205,7 @@ xfs_scrub_parent_validate(
/* Go looking for our dentry. */
error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink);
- if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
+ if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
goto out_unlock;
/* Drop the parent lock, relock this inode. */
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 8e58ba842946..51daa4ae2627 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -67,13 +67,6 @@ xfs_scrub_setup_quota(
{
uint dqtype;
- /*
- * If userspace gave us an AG number or inode data, they don't
- * know what they're doing. Get out.
- */
- if (sc->sm->sm_agno || sc->sm->sm_ino || sc->sm->sm_gen)
- return -EINVAL;
-
dqtype = xfs_scrub_quota_to_dqtype(sc);
if (dqtype == 0)
return -EINVAL;
@@ -107,7 +100,7 @@ xfs_scrub_quota_item(
unsigned long long rcount;
xfs_ino_t fs_icount;
- offset = id * qi->qi_dqperchunk;
+ offset = id / qi->qi_dqperchunk;
/*
* We fed $id and DQNEXT into the xfs_qm_dqget call, which means
@@ -207,7 +200,7 @@ xfs_scrub_quota(
xfs_dqid_t id = 0;
uint dqtype;
int nimaps;
- int error;
+ int error = 0;
if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return -ENOENT;
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index 2f88a8d44bd0..400f1561cd3d 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -31,6 +31,7 @@
#include "xfs_sb.h"
#include "xfs_alloc.h"
#include "xfs_rmap.h"
+#include "xfs_refcount.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
@@ -50,6 +51,307 @@ xfs_scrub_setup_ag_refcountbt(
/* Reference count btree scrubber. */
+/*
+ * Confirming Reference Counts via Reverse Mappings
+ *
+ * We want to count the reverse mappings overlapping a refcount record
+ * (bno, len, refcount), allowing for the possibility that some of the
+ * overlap may come from smaller adjoining reverse mappings, while some
+ * comes from single extents which overlap the range entirely. The
+ * outer loop is as follows:
+ *
+ * 1. For all reverse mappings overlapping the refcount extent,
+ * a. If a given rmap completely overlaps, mark it as seen.
+ * b. Otherwise, record the fragment (in agbno order) for later
+ * processing.
+ *
+ * Once we've seen all the rmaps, we know that for all blocks in the
+ * refcount record we want to find $refcount owners and we've already
+ * visited $seen extents that overlap all the blocks. Therefore, we
+ * need to find ($refcount - $seen) owners for every block in the
+ * extent; call that quantity $target_nr. Proceed as follows:
+ *
+ * 2. Pull the first $target_nr fragments from the list; all of them
+ * should start at or before the start of the extent.
+ * Call this subset of fragments the working set.
+ * 3. Until there are no more unprocessed fragments,
+ * a. Find the shortest fragments in the set and remove them.
+ * b. Note the block number of the end of these fragments.
+ * c. Pull the same number of fragments from the list. All of these
+ * fragments should start at the block number recorded in the
+ * previous step.
+ * d. Put those fragments in the set.
+ * 4. Check that there are $target_nr fragments remaining in the list,
+ * and that they all end at or beyond the end of the refcount extent.
+ *
+ * If the refcount is correct, all the check conditions in the algorithm
+ * should always hold true. If not, the refcount is incorrect.
+ */
+struct xfs_scrub_refcnt_frag {
+ struct list_head list;
+ struct xfs_rmap_irec rm;
+};
+
+struct xfs_scrub_refcnt_check {
+ struct xfs_scrub_context *sc;
+ struct list_head fragments;
+
+ /* refcount extent we're examining */
+ xfs_agblock_t bno;
+ xfs_extlen_t len;
+ xfs_nlink_t refcount;
+
+ /* number of owners seen */
+ xfs_nlink_t seen;
+};
+
+/*
+ * Decide if the given rmap is large enough that we can redeem it
+ * towards refcount verification now, or if it's a fragment, in
+ * which case we'll hang onto it in the hopes that we'll later
+ * discover that we've collected exactly the correct number of
+ * fragments as the refcountbt says we should have.
+ */
+STATIC int
+xfs_scrub_refcountbt_rmap_check(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xfs_scrub_refcnt_check *refchk = priv;
+ struct xfs_scrub_refcnt_frag *frag;
+ xfs_agblock_t rm_last;
+ xfs_agblock_t rc_last;
+ int error = 0;
+
+ if (xfs_scrub_should_terminate(refchk->sc, &error))
+ return error;
+
+ rm_last = rec->rm_startblock + rec->rm_blockcount - 1;
+ rc_last = refchk->bno + refchk->len - 1;
+
+ /* Confirm that a single-owner refc extent is a CoW stage. */
+ if (refchk->refcount == 1 && rec->rm_owner != XFS_RMAP_OWN_COW) {
+ xfs_scrub_btree_xref_set_corrupt(refchk->sc, cur, 0);
+ return 0;
+ }
+
+ if (rec->rm_startblock <= refchk->bno && rm_last >= rc_last) {
+ /*
+ * The rmap overlaps the refcount record, so we can confirm
+ * one refcount owner seen.
+ */
+ refchk->seen++;
+ } else {
+ /*
+ * This rmap covers only part of the refcount record, so
+ * save the fragment for later processing. If the rmapbt
+ * is healthy each rmap_irec we see will be in agbno order
+ * so we don't need insertion sort here.
+ */
+ frag = kmem_alloc(sizeof(struct xfs_scrub_refcnt_frag),
+ KM_MAYFAIL | KM_NOFS);
+ if (!frag)
+ return -ENOMEM;
+ memcpy(&frag->rm, rec, sizeof(frag->rm));
+ list_add_tail(&frag->list, &refchk->fragments);
+ }
+
+ return 0;
+}
+
+/*
+ * Given a bunch of rmap fragments, iterate through them, keeping
+ * a running tally of the refcount. If this ever deviates from
+ * what we expect (which is the refcountbt's refcount minus the
+ * number of extents that totally covered the refcountbt extent),
+ * we have a refcountbt error.
+ */
+STATIC void
+xfs_scrub_refcountbt_process_rmap_fragments(
+ struct xfs_scrub_refcnt_check *refchk)
+{
+ struct list_head worklist;
+ struct xfs_scrub_refcnt_frag *frag;
+ struct xfs_scrub_refcnt_frag *n;
+ xfs_agblock_t bno;
+ xfs_agblock_t rbno;
+ xfs_agblock_t next_rbno;
+ xfs_nlink_t nr;
+ xfs_nlink_t target_nr;
+
+ target_nr = refchk->refcount - refchk->seen;
+ if (target_nr == 0)
+ return;
+
+ /*
+ * There are (refchk->rc.rc_refcount - refchk->nr refcount)
+ * references we haven't found yet. Pull that many off the
+ * fragment list and figure out where the smallest rmap ends
+ * (and therefore the next rmap should start). All the rmaps
+ * we pull off should start at or before the beginning of the
+ * refcount record's range.
+ */
+ INIT_LIST_HEAD(&worklist);
+ rbno = NULLAGBLOCK;
+ nr = 1;
+
+ /* Make sure the fragments actually /are/ in agbno order. */
+ bno = 0;
+ list_for_each_entry(frag, &refchk->fragments, list) {
+ if (frag->rm.rm_startblock < bno)
+ goto done;
+ bno = frag->rm.rm_startblock;
+ }
+
+ /*
+ * Find all the rmaps that start at or before the refc extent,
+ * and put them on the worklist.
+ */
+ list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
+ if (frag->rm.rm_startblock > refchk->bno)
+ goto done;
+ bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
+ if (bno < rbno)
+ rbno = bno;
+ list_move_tail(&frag->list, &worklist);
+ if (nr == target_nr)
+ break;
+ nr++;
+ }
+
+ /*
+ * We should have found exactly $target_nr rmap fragments starting
+ * at or before the refcount extent.
+ */
+ if (nr != target_nr)
+ goto done;
+
+ while (!list_empty(&refchk->fragments)) {
+ /* Discard any fragments ending at rbno from the worklist. */
+ nr = 0;
+ next_rbno = NULLAGBLOCK;
+ list_for_each_entry_safe(frag, n, &worklist, list) {
+ bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
+ if (bno != rbno) {
+ if (bno < next_rbno)
+ next_rbno = bno;
+ continue;
+ }
+ list_del(&frag->list);
+ kmem_free(frag);
+ nr++;
+ }
+
+ /* Try to add nr rmaps starting at rbno to the worklist. */
+ list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
+ bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
+ if (frag->rm.rm_startblock != rbno)
+ goto done;
+ list_move_tail(&frag->list, &worklist);
+ if (next_rbno > bno)
+ next_rbno = bno;
+ nr--;
+ if (nr == 0)
+ break;
+ }
+
+ /*
+ * If we get here and nr > 0, this means that we added fewer
+ * items to the worklist than we discarded because the fragment
+ * list ran out of items. Therefore, we cannot maintain the
+ * required refcount. Something is wrong, so we're done.
+ */
+ if (nr)
+ goto done;
+
+ rbno = next_rbno;
+ }
+
+ /*
+ * Make sure the last extent we processed ends at or beyond
+ * the end of the refcount extent.
+ */
+ if (rbno < refchk->bno + refchk->len)
+ goto done;
+
+ /* Actually record us having seen the remaining refcount. */
+ refchk->seen = refchk->refcount;
+done:
+ /* Delete fragments and work list. */
+ list_for_each_entry_safe(frag, n, &worklist, list) {
+ list_del(&frag->list);
+ kmem_free(frag);
+ }
+ list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
+ list_del(&frag->list);
+ kmem_free(frag);
+ }
+}
+
+/* Use the rmap entries covering this extent to verify the refcount. */
+STATIC void
+xfs_scrub_refcountbt_xref_rmap(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ xfs_nlink_t refcount)
+{
+ struct xfs_scrub_refcnt_check refchk = {
+ .sc = sc,
+ .bno = bno,
+ .len = len,
+ .refcount = refcount,
+ .seen = 0,
+ };
+ struct xfs_rmap_irec low;
+ struct xfs_rmap_irec high;
+ struct xfs_scrub_refcnt_frag *frag;
+ struct xfs_scrub_refcnt_frag *n;
+ int error;
+
+ if (!sc->sa.rmap_cur)
+ return;
+
+ /* Cross-reference with the rmapbt to confirm the refcount. */
+ memset(&low, 0, sizeof(low));
+ low.rm_startblock = bno;
+ memset(&high, 0xFF, sizeof(high));
+ high.rm_startblock = bno + len - 1;
+
+ INIT_LIST_HEAD(&refchk.fragments);
+ error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high,
+ &xfs_scrub_refcountbt_rmap_check, &refchk);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+ goto out_free;
+
+ xfs_scrub_refcountbt_process_rmap_fragments(&refchk);
+ if (refcount != refchk.seen)
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+
+out_free:
+ list_for_each_entry_safe(frag, n, &refchk.fragments, list) {
+ list_del(&frag->list);
+ kmem_free(frag);
+ }
+}
+
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_refcountbt_xref(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len,
+ xfs_nlink_t refcount)
+{
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ xfs_scrub_xref_is_used_space(sc, agbno, len);
+ xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len);
+ xfs_scrub_refcountbt_xref_rmap(sc, agbno, len, refcount);
+}
+
/* Scrub a refcountbt record. */
STATIC int
xfs_scrub_refcountbt_rec(
@@ -57,6 +359,7 @@ xfs_scrub_refcountbt_rec(
union xfs_btree_rec *rec)
{
struct xfs_mount *mp = bs->cur->bc_mp;
+ xfs_agblock_t *cow_blocks = bs->private;
xfs_agnumber_t agno = bs->cur->bc_private.a.agno;
xfs_agblock_t bno;
xfs_extlen_t len;
@@ -72,6 +375,8 @@ xfs_scrub_refcountbt_rec(
has_cowflag = (bno & XFS_REFC_COW_START);
if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag))
xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+ if (has_cowflag)
+ (*cow_blocks) += len;
/* Check the extent. */
bno &= ~XFS_REFC_COW_START;
@@ -83,17 +388,128 @@ xfs_scrub_refcountbt_rec(
if (refcount == 0)
xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
+ xfs_scrub_refcountbt_xref(bs->sc, bno, len, refcount);
+
return error;
}
+/* Make sure we have as many refc blocks as the rmap says. */
+STATIC void
+xfs_scrub_refcount_xref_rmap(
+ struct xfs_scrub_context *sc,
+ struct xfs_owner_info *oinfo,
+ xfs_filblks_t cow_blocks)
+{
+ xfs_extlen_t refcbt_blocks = 0;
+ xfs_filblks_t blocks;
+ int error;
+
+ if (!sc->sa.rmap_cur)
+ return;
+
+ /* Check that we saw as many refcbt blocks as the rmap knows about. */
+ error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks);
+ if (!xfs_scrub_btree_process_error(sc, sc->sa.refc_cur, 0, &error))
+ return;
+ error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
+ &blocks);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+ return;
+ if (blocks != refcbt_blocks)
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+
+ /* Check that we saw as many cow blocks as the rmap knows about. */
+ xfs_rmap_ag_owner(oinfo, XFS_RMAP_OWN_COW);
+ error = xfs_scrub_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, oinfo,
+ &blocks);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+ return;
+ if (blocks != cow_blocks)
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+}
+
/* Scrub the refcount btree for some AG. */
int
xfs_scrub_refcountbt(
struct xfs_scrub_context *sc)
{
struct xfs_owner_info oinfo;
+ xfs_agblock_t cow_blocks = 0;
+ int error;
xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_REFC);
- return xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec,
- &oinfo, NULL);
+ error = xfs_scrub_btree(sc, sc->sa.refc_cur, xfs_scrub_refcountbt_rec,
+ &oinfo, &cow_blocks);
+ if (error)
+ return error;
+
+ xfs_scrub_refcount_xref_rmap(sc, &oinfo, cow_blocks);
+
+ return 0;
+}
+
+/* xref check that a cow staging extent is marked in the refcountbt. */
+void
+xfs_scrub_xref_is_cow_staging(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len)
+{
+ struct xfs_refcount_irec rc;
+ bool has_cowflag;
+ int has_refcount;
+ int error;
+
+ if (!sc->sa.refc_cur)
+ return;
+
+ /* Find the CoW staging extent. */
+ error = xfs_refcount_lookup_le(sc->sa.refc_cur,
+ agbno + XFS_REFC_COW_START, &has_refcount);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
+ return;
+ if (!has_refcount) {
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
+ return;
+ }
+
+ error = xfs_refcount_get_rec(sc->sa.refc_cur, &rc, &has_refcount);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
+ return;
+ if (!has_refcount) {
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
+ return;
+ }
+
+ /* CoW flag must be set, refcount must be 1. */
+ has_cowflag = (rc.rc_startblock & XFS_REFC_COW_START);
+ if (!has_cowflag || rc.rc_refcount != 1)
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
+
+ /* Must be at least as long as what was passed in */
+ if (rc.rc_blockcount < len)
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
+}
+
+/*
+ * xref check that the extent is not shared. Only file data blocks
+ * can have multiple owners.
+ */
+void
+xfs_scrub_xref_is_not_shared(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno,
+ xfs_extlen_t len)
+{
+ bool shared;
+ int error;
+
+ if (!sc->sa.refc_cur)
+ return;
+
+ error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
+ return;
+ if (shared)
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
}
diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c
index 97846c424690..8f2a7c3ff455 100644
--- a/fs/xfs/scrub/rmap.c
+++ b/fs/xfs/scrub/rmap.c
@@ -32,6 +32,7 @@
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_rmap.h"
+#include "xfs_refcount.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
@@ -51,6 +52,61 @@ xfs_scrub_setup_ag_rmapbt(
/* Reverse-mapping scrubber. */
+/* Cross-reference a rmap against the refcount btree. */
+STATIC void
+xfs_scrub_rmapbt_xref_refc(
+ struct xfs_scrub_context *sc,
+ struct xfs_rmap_irec *irec)
+{
+ xfs_agblock_t fbno;
+ xfs_extlen_t flen;
+ bool non_inode;
+ bool is_bmbt;
+ bool is_attr;
+ bool is_unwritten;
+ int error;
+
+ if (!sc->sa.refc_cur)
+ return;
+
+ non_inode = XFS_RMAP_NON_INODE_OWNER(irec->rm_owner);
+ is_bmbt = irec->rm_flags & XFS_RMAP_BMBT_BLOCK;
+ is_attr = irec->rm_flags & XFS_RMAP_ATTR_FORK;
+ is_unwritten = irec->rm_flags & XFS_RMAP_UNWRITTEN;
+
+ /* If this is shared, must be a data fork extent. */
+ error = xfs_refcount_find_shared(sc->sa.refc_cur, irec->rm_startblock,
+ irec->rm_blockcount, &fbno, &flen, false);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.refc_cur))
+ return;
+ if (flen != 0 && (non_inode || is_attr || is_bmbt || is_unwritten))
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
+}
+
+/* Cross-reference with the other btrees. */
+STATIC void
+xfs_scrub_rmapbt_xref(
+ struct xfs_scrub_context *sc,
+ struct xfs_rmap_irec *irec)
+{
+ xfs_agblock_t agbno = irec->rm_startblock;
+ xfs_extlen_t len = irec->rm_blockcount;
+
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ xfs_scrub_xref_is_used_space(sc, agbno, len);
+ if (irec->rm_owner == XFS_RMAP_OWN_INODES)
+ xfs_scrub_xref_is_inode_chunk(sc, agbno, len);
+ else
+ xfs_scrub_xref_is_not_inode_chunk(sc, agbno, len);
+ if (irec->rm_owner == XFS_RMAP_OWN_COW)
+ xfs_scrub_xref_is_cow_staging(sc, irec->rm_startblock,
+ irec->rm_blockcount);
+ else
+ xfs_scrub_rmapbt_xref_refc(sc, irec);
+}
+
/* Scrub an rmapbt record. */
STATIC int
xfs_scrub_rmapbt_rec(
@@ -121,6 +177,8 @@ xfs_scrub_rmapbt_rec(
irec.rm_owner > XFS_RMAP_OWN_FS)
xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0);
}
+
+ xfs_scrub_rmapbt_xref(bs->sc, &irec);
out:
return error;
}
@@ -136,3 +194,68 @@ xfs_scrub_rmapbt(
return xfs_scrub_btree(sc, sc->sa.rmap_cur, xfs_scrub_rmapbt_rec,
&oinfo, NULL);
}
+
+/* xref check that the extent is owned by a given owner */
+static inline void
+xfs_scrub_xref_check_owner(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ struct xfs_owner_info *oinfo,
+ bool should_have_rmap)
+{
+ bool has_rmap;
+ int error;
+
+ if (!sc->sa.rmap_cur)
+ return;
+
+ error = xfs_rmap_record_exists(sc->sa.rmap_cur, bno, len, oinfo,
+ &has_rmap);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+ return;
+ if (has_rmap != should_have_rmap)
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+}
+
+/* xref check that the extent is owned by a given owner */
+void
+xfs_scrub_xref_is_owned_by(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ struct xfs_owner_info *oinfo)
+{
+ xfs_scrub_xref_check_owner(sc, bno, len, oinfo, true);
+}
+
+/* xref check that the extent is not owned by a given owner */
+void
+xfs_scrub_xref_is_not_owned_by(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ struct xfs_owner_info *oinfo)
+{
+ xfs_scrub_xref_check_owner(sc, bno, len, oinfo, false);
+}
+
+/* xref check that the extent has no reverse mapping at all */
+void
+xfs_scrub_xref_has_no_owner(
+ struct xfs_scrub_context *sc,
+ xfs_agblock_t bno,
+ xfs_extlen_t len)
+{
+ bool has_rmap;
+ int error;
+
+ if (!sc->sa.rmap_cur)
+ return;
+
+ error = xfs_rmap_has_record(sc->sa.rmap_cur, bno, len, &has_rmap);
+ if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
+ return;
+ if (has_rmap)
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
+}
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index c6fedb698008..26390991369a 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -43,22 +43,14 @@ xfs_scrub_setup_rt(
struct xfs_scrub_context *sc,
struct xfs_inode *ip)
{
- struct xfs_mount *mp = sc->mp;
- int error = 0;
-
- /*
- * If userspace gave us an AG number or inode data, they don't
- * know what they're doing. Get out.
- */
- if (sc->sm->sm_agno || sc->sm->sm_ino || sc->sm->sm_gen)
- return -EINVAL;
+ int error;
error = xfs_scrub_setup_fs(sc, ip);
if (error)
return error;
sc->ilock_flags = XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP;
- sc->ip = mp->m_rbmip;
+ sc->ip = sc->mp->m_rbmip;
xfs_ilock(sc->ip, sc->ilock_flags);
return 0;
@@ -106,3 +98,26 @@ xfs_scrub_rtsummary(
/* XXX: implement this some day */
return -ENOENT;
}
+
+
+/* xref check that the extent is not free in the rtbitmap */
+void
+xfs_scrub_xref_is_used_rt_space(
+ struct xfs_scrub_context *sc,
+ xfs_rtblock_t fsbno,
+ xfs_extlen_t len)
+{
+ bool is_free;
+ int error;
+
+ xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
+ error = xfs_rtalloc_extent_is_free(sc->mp, sc->tp, fsbno, len,
+ &is_free);
+ if (!xfs_scrub_should_check_xref(sc, &error, NULL))
+ goto out_unlock;
+ if (is_free)
+ xfs_scrub_ino_xref_set_corrupt(sc, sc->mp->m_rbmip->i_ino,
+ NULL);
+out_unlock:
+ xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
+}
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 9c42c4efd01e..26c75967a072 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -46,7 +46,6 @@
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
-#include "scrub/scrub.h"
#include "scrub/btree.h"
/*
@@ -111,6 +110,16 @@
* structure itself is corrupt, the CORRUPT flag will be set. If
* the metadata is correct but otherwise suboptimal, the PREEN flag
* will be set.
+ *
+ * We perform secondary validation of filesystem metadata by
+ * cross-referencing every record with all other available metadata.
+ * For example, for block mapping extents, we verify that there are no
+ * records in the free space and inode btrees corresponding to that
+ * space extent and that there is a corresponding entry in the reverse
+ * mapping btree. Inconsistent metadata is noted by setting the
+ * XCORRUPT flag; btree query function errors are noted by setting the
+ * XFAIL flag and deleting the cursor to prevent further attempts to
+ * cross-reference with a defective btree.
*/
/*
@@ -129,8 +138,6 @@ xfs_scrub_probe(
{
int error = 0;
- if (sc->sm->sm_ino || sc->sm->sm_agno)
- return -EINVAL;
if (xfs_scrub_should_terminate(sc, &error))
return error;
@@ -152,7 +159,8 @@ xfs_scrub_teardown(
sc->tp = NULL;
}
if (sc->ip) {
- xfs_iunlock(sc->ip, sc->ilock_flags);
+ if (sc->ilock_flags)
+ xfs_iunlock(sc->ip, sc->ilock_flags);
if (sc->ip != ip_in &&
!xfs_internal_inum(sc->mp, sc->ip->i_ino))
iput(VFS_I(sc->ip));
@@ -168,106 +176,130 @@ xfs_scrub_teardown(
/* Scrubbing dispatch. */
static const struct xfs_scrub_meta_ops meta_scrub_ops[] = {
- { /* ioctl presence test */
+ [XFS_SCRUB_TYPE_PROBE] = { /* ioctl presence test */
+ .type = ST_NONE,
.setup = xfs_scrub_setup_fs,
.scrub = xfs_scrub_probe,
},
- { /* superblock */
- .setup = xfs_scrub_setup_ag_header,
+ [XFS_SCRUB_TYPE_SB] = { /* superblock */
+ .type = ST_PERAG,
+ .setup = xfs_scrub_setup_fs,
.scrub = xfs_scrub_superblock,
},
- { /* agf */
- .setup = xfs_scrub_setup_ag_header,
+ [XFS_SCRUB_TYPE_AGF] = { /* agf */
+ .type = ST_PERAG,
+ .setup = xfs_scrub_setup_fs,
.scrub = xfs_scrub_agf,
},
- { /* agfl */
- .setup = xfs_scrub_setup_ag_header,
+ [XFS_SCRUB_TYPE_AGFL]= { /* agfl */
+ .type = ST_PERAG,
+ .setup = xfs_scrub_setup_fs,
.scrub = xfs_scrub_agfl,
},
- { /* agi */
- .setup = xfs_scrub_setup_ag_header,
+ [XFS_SCRUB_TYPE_AGI] = { /* agi */
+ .type = ST_PERAG,
+ .setup = xfs_scrub_setup_fs,
.scrub = xfs_scrub_agi,
},
- { /* bnobt */
+ [XFS_SCRUB_TYPE_BNOBT] = { /* bnobt */
+ .type = ST_PERAG,
.setup = xfs_scrub_setup_ag_allocbt,
.scrub = xfs_scrub_bnobt,
},
- { /* cntbt */
+ [XFS_SCRUB_TYPE_CNTBT] = { /* cntbt */
+ .type = ST_PERAG,
.setup = xfs_scrub_setup_ag_allocbt,
.scrub = xfs_scrub_cntbt,
},
- { /* inobt */
+ [XFS_SCRUB_TYPE_INOBT] = { /* inobt */
+ .type = ST_PERAG,
.setup = xfs_scrub_setup_ag_iallocbt,
.scrub = xfs_scrub_inobt,
},
- { /* finobt */
+ [XFS_SCRUB_TYPE_FINOBT] = { /* finobt */
+ .type = ST_PERAG,
.setup = xfs_scrub_setup_ag_iallocbt,
.scrub = xfs_scrub_finobt,
.has = xfs_sb_version_hasfinobt,
},
- { /* rmapbt */
+ [XFS_SCRUB_TYPE_RMAPBT] = { /* rmapbt */
+ .type = ST_PERAG,
.setup = xfs_scrub_setup_ag_rmapbt,
.scrub = xfs_scrub_rmapbt,
.has = xfs_sb_version_hasrmapbt,
},
- { /* refcountbt */
+ [XFS_SCRUB_TYPE_REFCNTBT] = { /* refcountbt */
+ .type = ST_PERAG,
.setup = xfs_scrub_setup_ag_refcountbt,
.scrub = xfs_scrub_refcountbt,
.has = xfs_sb_version_hasreflink,
},
- { /* inode record */
+ [XFS_SCRUB_TYPE_INODE] = { /* inode record */
+ .type = ST_INODE,
.setup = xfs_scrub_setup_inode,
.scrub = xfs_scrub_inode,
},
- { /* inode data fork */
+ [XFS_SCRUB_TYPE_BMBTD] = { /* inode data fork */
+ .type = ST_INODE,
.setup = xfs_scrub_setup_inode_bmap,
.scrub = xfs_scrub_bmap_data,
},
- { /* inode attr fork */
+ [XFS_SCRUB_TYPE_BMBTA] = { /* inode attr fork */
+ .type = ST_INODE,
.setup = xfs_scrub_setup_inode_bmap,
.scrub = xfs_scrub_bmap_attr,
},
- { /* inode CoW fork */
+ [XFS_SCRUB_TYPE_BMBTC] = { /* inode CoW fork */
+ .type = ST_INODE,
.setup = xfs_scrub_setup_inode_bmap,
.scrub = xfs_scrub_bmap_cow,
},
- { /* directory */
+ [XFS_SCRUB_TYPE_DIR] = { /* directory */
+ .type = ST_INODE,
.setup = xfs_scrub_setup_directory,
.scrub = xfs_scrub_directory,
},
- { /* extended attributes */
+ [XFS_SCRUB_TYPE_XATTR] = { /* extended attributes */
+ .type = ST_INODE,
.setup = xfs_scrub_setup_xattr,
.scrub = xfs_scrub_xattr,
},
- { /* symbolic link */
+ [XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */
+ .type = ST_INODE,
.setup = xfs_scrub_setup_symlink,
.scrub = xfs_scrub_symlink,
},
- { /* parent pointers */
+ [XFS_SCRUB_TYPE_PARENT] = { /* parent pointers */
+ .type = ST_INODE,
.setup = xfs_scrub_setup_parent,
.scrub = xfs_scrub_parent,
},
- { /* realtime bitmap */
+ [XFS_SCRUB_TYPE_RTBITMAP] = { /* realtime bitmap */
+ .type = ST_FS,
.setup = xfs_scrub_setup_rt,
.scrub = xfs_scrub_rtbitmap,
.has = xfs_sb_version_hasrealtime,
},
- { /* realtime summary */
+ [XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */
+ .type = ST_FS,
.setup = xfs_scrub_setup_rt,
.scrub = xfs_scrub_rtsummary,
.has = xfs_sb_version_hasrealtime,
},
- { /* user quota */
- .setup = xfs_scrub_setup_quota,
- .scrub = xfs_scrub_quota,
+ [XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */
+ .type = ST_FS,
+ .setup = xfs_scrub_setup_quota,
+ .scrub = xfs_scrub_quota,
},
- { /* group quota */
- .setup = xfs_scrub_setup_quota,
- .scrub = xfs_scrub_quota,
+ [XFS_SCRUB_TYPE_GQUOTA] = { /* group quota */
+ .type = ST_FS,
+ .setup = xfs_scrub_setup_quota,
+ .scrub = xfs_scrub_quota,
},
- { /* project quota */
- .setup = xfs_scrub_setup_quota,
- .scrub = xfs_scrub_quota,
+ [XFS_SCRUB_TYPE_PQUOTA] = { /* project quota */
+ .type = ST_FS,
+ .setup = xfs_scrub_setup_quota,
+ .scrub = xfs_scrub_quota,
},
};
@@ -285,44 +317,56 @@ xfs_scrub_experimental_warning(
"EXPERIMENTAL online scrub feature in use. Use at your own risk!");
}
-/* Dispatch metadata scrubbing. */
-int
-xfs_scrub_metadata(
- struct xfs_inode *ip,
+static int
+xfs_scrub_validate_inputs(
+ struct xfs_mount *mp,
struct xfs_scrub_metadata *sm)
{
- struct xfs_scrub_context sc;
- struct xfs_mount *mp = ip->i_mount;
+ int error;
const struct xfs_scrub_meta_ops *ops;
- bool try_harder = false;
- int error = 0;
-
- trace_xfs_scrub_start(ip, sm, error);
-
- /* Forbidden if we are shut down or mounted norecovery. */
- error = -ESHUTDOWN;
- if (XFS_FORCED_SHUTDOWN(mp))
- goto out;
- error = -ENOTRECOVERABLE;
- if (mp->m_flags & XFS_MOUNT_NORECOVERY)
- goto out;
- /* Check our inputs. */
error = -EINVAL;
+ /* Check our inputs. */
sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN)
goto out;
+ /* sm_reserved[] must be zero */
if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved)))
goto out;
- /* Do we know about this type of metadata? */
error = -ENOENT;
+ /* Do we know about this type of metadata? */
if (sm->sm_type >= XFS_SCRUB_TYPE_NR)
goto out;
ops = &meta_scrub_ops[sm->sm_type];
- if (ops->scrub == NULL)
+ if (ops->setup == NULL || ops->scrub == NULL)
+ goto out;
+ /* Does this fs even support this type of metadata? */
+ if (ops->has && !ops->has(&mp->m_sb))
+ goto out;
+
+ error = -EINVAL;
+ /* restricting fields must be appropriate for type */
+ switch (ops->type) {
+ case ST_NONE:
+ case ST_FS:
+ if (sm->sm_ino || sm->sm_gen || sm->sm_agno)
+ goto out;
+ break;
+ case ST_PERAG:
+ if (sm->sm_ino || sm->sm_gen ||
+ sm->sm_agno >= mp->m_sb.sb_agcount)
+ goto out;
+ break;
+ case ST_INODE:
+ if (sm->sm_agno || (sm->sm_gen && !sm->sm_ino))
+ goto out;
+ break;
+ default:
goto out;
+ }
+ error = -EOPNOTSUPP;
/*
* We won't scrub any filesystem that doesn't have the ability
* to record unwritten extents. The option was made default in
@@ -332,20 +376,46 @@ xfs_scrub_metadata(
* We also don't support v1-v3 filesystems, which aren't
* mountable.
*/
- error = -EOPNOTSUPP;
if (!xfs_sb_version_hasextflgbit(&mp->m_sb))
goto out;
- /* Does this fs even support this type of metadata? */
- error = -ENOENT;
- if (ops->has && !ops->has(&mp->m_sb))
- goto out;
-
/* We don't know how to repair anything yet. */
- error = -EOPNOTSUPP;
if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
goto out;
+ error = 0;
+out:
+ return error;
+}
+
+/* Dispatch metadata scrubbing. */
+int
+xfs_scrub_metadata(
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm)
+{
+ struct xfs_scrub_context sc;
+ struct xfs_mount *mp = ip->i_mount;
+ bool try_harder = false;
+ int error = 0;
+
+ BUILD_BUG_ON(sizeof(meta_scrub_ops) !=
+ (sizeof(struct xfs_scrub_meta_ops) * XFS_SCRUB_TYPE_NR));
+
+ trace_xfs_scrub_start(ip, sm, error);
+
+ /* Forbidden if we are shut down or mounted norecovery. */
+ error = -ESHUTDOWN;
+ if (XFS_FORCED_SHUTDOWN(mp))
+ goto out;
+ error = -ENOTRECOVERABLE;
+ if (mp->m_flags & XFS_MOUNT_NORECOVERY)
+ goto out;
+
+ error = xfs_scrub_validate_inputs(mp, sm);
+ if (error)
+ goto out;
+
xfs_scrub_experimental_warning(mp);
retry_op:
@@ -353,7 +423,7 @@ retry_op:
memset(&sc, 0, sizeof(sc));
sc.mp = ip->i_mount;
sc.sm = sm;
- sc.ops = ops;
+ sc.ops = &meta_scrub_ops[sm->sm_type];
sc.try_harder = try_harder;
sc.sa.agno = NULLAGNUMBER;
error = sc.ops->setup(&sc, ip);
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index e9ec041cf713..0d92af86f67a 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -22,6 +22,14 @@
struct xfs_scrub_context;
+/* Type info and names for the scrub types. */
+enum xfs_scrub_type {
+ ST_NONE = 1, /* disabled */
+ ST_PERAG, /* per-AG metadata */
+ ST_FS, /* per-FS metadata */
+ ST_INODE, /* per-inode metadata */
+};
+
struct xfs_scrub_meta_ops {
/* Acquire whatever resources are needed for the operation. */
int (*setup)(struct xfs_scrub_context *,
@@ -32,6 +40,9 @@ struct xfs_scrub_meta_ops {
/* Decide if we even have this piece of metadata. */
bool (*has)(struct xfs_sb *);
+
+ /* type describing required/allowed inputs */
+ enum xfs_scrub_type type;
};
/* Buffer pointers and btree cursors for an entire AG. */
@@ -112,4 +123,30 @@ xfs_scrub_quota(struct xfs_scrub_context *sc)
}
#endif
+/* cross-referencing helpers */
+void xfs_scrub_xref_is_used_space(struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno, xfs_extlen_t len);
+void xfs_scrub_xref_is_not_inode_chunk(struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno, xfs_extlen_t len);
+void xfs_scrub_xref_is_inode_chunk(struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno, xfs_extlen_t len);
+void xfs_scrub_xref_is_owned_by(struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno, xfs_extlen_t len,
+ struct xfs_owner_info *oinfo);
+void xfs_scrub_xref_is_not_owned_by(struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno, xfs_extlen_t len,
+ struct xfs_owner_info *oinfo);
+void xfs_scrub_xref_has_no_owner(struct xfs_scrub_context *sc,
+ xfs_agblock_t agbno, xfs_extlen_t len);
+void xfs_scrub_xref_is_cow_staging(struct xfs_scrub_context *sc,
+ xfs_agblock_t bno, xfs_extlen_t len);
+void xfs_scrub_xref_is_not_shared(struct xfs_scrub_context *sc,
+ xfs_agblock_t bno, xfs_extlen_t len);
+#ifdef CONFIG_XFS_RT
+void xfs_scrub_xref_is_used_rt_space(struct xfs_scrub_context *sc,
+ xfs_rtblock_t rtbno, xfs_extlen_t len);
+#else
+# define xfs_scrub_xref_is_used_rt_space(sc, rtbno, len) do { } while (0)
+#endif
+
#endif /* __XFS_SCRUB_SCRUB_H__ */
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 472080e75788..86daed0e3a45 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -26,7 +26,6 @@
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_da_format.h"
-#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_trans.h"
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index c4ebfb5c1ee8..4dc896852bf0 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -50,7 +50,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_class,
__entry->flags = sm->sm_flags;
__entry->error = error;
),
- TP_printk("dev %d:%d ino %llu type %u agno %u inum %llu gen %u flags 0x%x error %d",
+ TP_printk("dev %d:%d ino 0x%llx type %u agno %u inum %llu gen %u flags 0x%x error %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->type,
@@ -90,7 +90,7 @@ TRACE_EVENT(xfs_scrub_op_error,
__entry->error = error;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d type %u agno %u agbno %u error %d ret_ip %pF",
+ TP_printk("dev %d:%d type %u agno %u agbno %u error %d ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->type,
__entry->agno,
@@ -121,7 +121,7 @@ TRACE_EVENT(xfs_scrub_file_op_error,
__entry->error = error;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu error %d ret_ip %pF",
+ TP_printk("dev %d:%d ino 0x%llx fork %d type %u offset %llu error %d ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->whichfork,
@@ -156,7 +156,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_block_error_class,
__entry->bno = bno;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d type %u agno %u agbno %u ret_ip %pF",
+ TP_printk("dev %d:%d type %u agno %u agbno %u ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->type,
__entry->agno,
@@ -207,7 +207,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_ino_error_class,
__entry->bno = bno;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d ino %llu type %u agno %u agbno %u ret_ip %pF",
+ TP_printk("dev %d:%d ino 0x%llx type %u agno %u agbno %u ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->type,
@@ -246,7 +246,7 @@ DECLARE_EVENT_CLASS(xfs_scrub_fblock_error_class,
__entry->offset = offset;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d ino %llu fork %d type %u offset %llu ret_ip %pF",
+ TP_printk("dev %d:%d ino 0x%llx fork %d type %u offset %llu ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->whichfork,
@@ -277,7 +277,7 @@ TRACE_EVENT(xfs_scrub_incomplete,
__entry->type = sc->sm->sm_type;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d type %u ret_ip %pF",
+ TP_printk("dev %d:%d type %u ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->type,
__entry->ret_ip)
@@ -311,7 +311,7 @@ TRACE_EVENT(xfs_scrub_btree_op_error,
__entry->error = error;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pF",
+ TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->type,
__entry->btnum,
@@ -354,7 +354,7 @@ TRACE_EVENT(xfs_scrub_ifork_btree_op_error,
__entry->error = error;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pF",
+ TP_printk("dev %d:%d ino 0x%llx fork %d type %u btnum %d level %d ptr %d agno %u agbno %u error %d ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->whichfork,
@@ -393,7 +393,7 @@ TRACE_EVENT(xfs_scrub_btree_error,
__entry->ptr = cur->bc_ptrs[level];
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pF",
+ TP_printk("dev %d:%d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->type,
__entry->btnum,
@@ -433,7 +433,7 @@ TRACE_EVENT(xfs_scrub_ifork_btree_error,
__entry->ptr = cur->bc_ptrs[level];
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d ino %llu fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pF",
+ TP_printk("dev %d:%d ino 0x%llx fork %d type %u btnum %d level %d ptr %d agno %u agbno %u ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->whichfork,
@@ -491,6 +491,28 @@ DEFINE_EVENT(xfs_scrub_sbtree_class, name, \
DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_rec);
DEFINE_SCRUB_SBTREE_EVENT(xfs_scrub_btree_key);
+TRACE_EVENT(xfs_scrub_xref_error,
+ TP_PROTO(struct xfs_scrub_context *sc, int error, void *ret_ip),
+ TP_ARGS(sc, error, ret_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, type)
+ __field(int, error)
+ __field(void *, ret_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = sc->mp->m_super->s_dev;
+ __entry->type = sc->sm->sm_type;
+ __entry->error = error;
+ __entry->ret_ip = ret_ip;
+ ),
+ TP_printk("dev %d:%d type %u xref error %d ret_ip %pF",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->type,
+ __entry->error,
+ __entry->ret_ip)
+);
+
#endif /* _TRACE_XFS_SCRUB_TRACE_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index a3eeaba156c5..9c6a830da0ee 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -390,6 +390,19 @@ xfs_map_blocks(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
+ /*
+ * Truncate can race with writeback since writeback doesn't take the
+ * iolock and truncate decreases the file size before it starts
+ * truncating the pages between new_size and old_size. Therefore, we
+ * can end up in the situation where writeback gets a CoW fork mapping
+ * but the truncate makes the mapping invalid and we end up in here
+ * trying to get a new mapping. Bail out here so that we simply never
+ * get a valid mapping and so we drop the write altogether. The page
+ * truncation will kill the contents anyway.
+ */
+ if (type == XFS_IO_COW && offset > i_size_read(inode))
+ return 0;
+
ASSERT(type != XFS_IO_COW);
if (type == XFS_IO_UNWRITTEN)
bmapi_flags |= XFS_BMAPI_IGSTATE;
@@ -399,7 +412,7 @@ xfs_map_blocks(
(ip->i_df.if_flags & XFS_IFEXTENTS));
ASSERT(offset <= mp->m_super->s_maxbytes);
- if (offset + count > mp->m_super->s_maxbytes)
+ if (offset > mp->m_super->s_maxbytes - count)
count = mp->m_super->s_maxbytes - offset;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
@@ -791,7 +804,7 @@ xfs_aops_discard_page(
goto out_invalidate;
xfs_alert(ip->i_mount,
- "page discard on page %p, inode 0x%llx, offset %llu.",
+ "page discard on page "PTR_FMT", inode 0x%llx, offset %llu.",
page, ip->i_ino, offset);
xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -896,13 +909,13 @@ xfs_writepage_map(
struct writeback_control *wbc,
struct inode *inode,
struct page *page,
- loff_t offset,
- uint64_t end_offset)
+ uint64_t end_offset)
{
LIST_HEAD(submit_list);
struct xfs_ioend *ioend, *next;
struct buffer_head *bh, *head;
ssize_t len = i_blocksize(inode);
+ uint64_t offset;
int error = 0;
int count = 0;
int uptodate = 1;
@@ -1146,7 +1159,7 @@ xfs_do_writepage(
end_offset = offset;
}
- return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset);
+ return xfs_writepage_map(wpc, wbc, inode, page, end_offset);
redirty:
redirty_page_for_writepage(wbc, page);
@@ -1265,7 +1278,7 @@ xfs_map_trim_size(
if (mapping_size > size)
mapping_size = size;
if (offset < i_size_read(inode) &&
- offset + mapping_size >= i_size_read(inode)) {
+ (xfs_ufsize_t)offset + mapping_size >= i_size_read(inode)) {
/* limit mapping to block that spans EOF */
mapping_size = roundup_64(i_size_read(inode) - offset,
i_blocksize(inode));
@@ -1312,7 +1325,7 @@ xfs_get_blocks(
lockmode = xfs_ilock_data_map_shared(ip);
ASSERT(offset <= mp->m_super->s_maxbytes);
- if (offset + size > mp->m_super->s_maxbytes)
+ if (offset > mp->m_super->s_maxbytes - size)
size = mp->m_super->s_maxbytes - offset;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index dd136f7275e4..e5fb008d75e8 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -389,7 +389,8 @@ xfs_bud_init(
int
xfs_bui_recover(
struct xfs_mount *mp,
- struct xfs_bui_log_item *buip)
+ struct xfs_bui_log_item *buip,
+ struct xfs_defer_ops *dfops)
{
int error = 0;
unsigned int bui_type;
@@ -404,9 +405,7 @@ xfs_bui_recover(
xfs_exntst_t state;
struct xfs_trans *tp;
struct xfs_inode *ip = NULL;
- struct xfs_defer_ops dfops;
struct xfs_bmbt_irec irec;
- xfs_fsblock_t firstfsb;
ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags));
@@ -464,7 +463,6 @@ xfs_bui_recover(
if (VFS_I(ip)->i_nlink == 0)
xfs_iflags_set(ip, XFS_IRECOVERY);
- xfs_defer_init(&dfops, &firstfsb);
/* Process deferred bmap item. */
state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
@@ -479,16 +477,16 @@ xfs_bui_recover(
break;
default:
error = -EFSCORRUPTED;
- goto err_dfops;
+ goto err_inode;
}
xfs_trans_ijoin(tp, ip, 0);
count = bmap->me_len;
- error = xfs_trans_log_finish_bmap_update(tp, budp, &dfops, type,
+ error = xfs_trans_log_finish_bmap_update(tp, budp, dfops, type,
ip, whichfork, bmap->me_startoff,
bmap->me_startblock, &count, state);
if (error)
- goto err_dfops;
+ goto err_inode;
if (count > 0) {
ASSERT(type == XFS_BMAP_UNMAP);
@@ -496,16 +494,11 @@ xfs_bui_recover(
irec.br_blockcount = count;
irec.br_startoff = bmap->me_startoff;
irec.br_state = state;
- error = xfs_bmap_unmap_extent(tp->t_mountp, &dfops, ip, &irec);
+ error = xfs_bmap_unmap_extent(tp->t_mountp, dfops, ip, &irec);
if (error)
- goto err_dfops;
+ goto err_inode;
}
- /* Finish transaction, free inodes. */
- error = xfs_defer_finish(&tp, &dfops);
- if (error)
- goto err_dfops;
-
set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -513,8 +506,6 @@ xfs_bui_recover(
return error;
-err_dfops:
- xfs_defer_cancel(&dfops);
err_inode:
xfs_trans_cancel(tp);
if (ip) {
diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h
index c867daae4a3c..24b354a2c836 100644
--- a/fs/xfs/xfs_bmap_item.h
+++ b/fs/xfs/xfs_bmap_item.h
@@ -93,6 +93,7 @@ struct xfs_bud_log_item *xfs_bud_init(struct xfs_mount *,
struct xfs_bui_log_item *);
void xfs_bui_item_free(struct xfs_bui_log_item *);
void xfs_bui_release(struct xfs_bui_log_item *);
-int xfs_bui_recover(struct xfs_mount *mp, struct xfs_bui_log_item *buip);
+int xfs_bui_recover(struct xfs_mount *mp, struct xfs_bui_log_item *buip,
+ struct xfs_defer_ops *dfops);
#endif /* __XFS_BMAP_ITEM_H__ */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 6d37ab43195f..c83f549dc17b 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1872,7 +1872,7 @@ xfs_swap_extents(
*/
lock_two_nondirectories(VFS_I(ip), VFS_I(tip));
lock_flags = XFS_MMAPLOCK_EXCL;
- xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL);
+ xfs_lock_two_inodes(ip, XFS_MMAPLOCK_EXCL, tip, XFS_MMAPLOCK_EXCL);
/* Verify that both files have the same format */
if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) {
@@ -1919,7 +1919,7 @@ xfs_swap_extents(
* Lock and join the inodes to the tansaction so that transaction commit
* or cancel will unlock the inodes from this point onwards.
*/
- xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
+ xfs_lock_two_inodes(ip, XFS_ILOCK_EXCL, tip, XFS_ILOCK_EXCL);
lock_flags |= XFS_ILOCK_EXCL;
xfs_trans_ijoin(tp, ip, 0);
xfs_trans_ijoin(tp, tip, 0);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 4db6e8d780f6..d1da2ee9e6db 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -236,6 +236,7 @@ _xfs_buf_alloc(
init_completion(&bp->b_iowait);
INIT_LIST_HEAD(&bp->b_lru);
INIT_LIST_HEAD(&bp->b_list);
+ INIT_LIST_HEAD(&bp->b_li_list);
sema_init(&bp->b_sema, 0); /* held, no waiters */
spin_lock_init(&bp->b_lock);
XB_SET_OWNER(bp);
@@ -585,7 +586,7 @@ _xfs_buf_find(
* returning a specific error on buffer lookup failures.
*/
xfs_alert(btp->bt_mount,
- "%s: Block out of range: block 0x%llx, EOFS 0x%llx ",
+ "%s: daddr 0x%llx out of range, EOFS 0x%llx",
__func__, cmap.bm_bn, eofs);
WARN_ON(1);
return NULL;
@@ -1180,13 +1181,14 @@ xfs_buf_ioend_async(
}
void
-xfs_buf_ioerror(
+__xfs_buf_ioerror(
xfs_buf_t *bp,
- int error)
+ int error,
+ xfs_failaddr_t failaddr)
{
ASSERT(error <= 0 && error >= -1000);
bp->b_error = error;
- trace_xfs_buf_ioerror(bp, error, _RET_IP_);
+ trace_xfs_buf_ioerror(bp, error, failaddr);
}
void
@@ -1195,8 +1197,9 @@ xfs_buf_ioerror_alert(
const char *func)
{
xfs_alert(bp->b_target->bt_mount,
-"metadata I/O error: block 0x%llx (\"%s\") error %d numblks %d",
- (uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length);
+"metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d",
+ func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length,
+ -bp->b_error);
}
int
@@ -1378,9 +1381,10 @@ _xfs_buf_ioapply(
*/
if (xfs_sb_version_hascrc(&mp->m_sb)) {
xfs_warn(mp,
- "%s: no ops on block 0x%llx/0x%x",
+ "%s: no buf ops on daddr 0x%llx len %d",
__func__, bp->b_bn, bp->b_length);
- xfs_hex_dump(bp->b_addr, 64);
+ xfs_hex_dump(bp->b_addr,
+ XFS_CORRUPTION_DUMP_LEN);
dump_stack();
}
}
@@ -1671,7 +1675,7 @@ xfs_wait_buftarg(
list_del_init(&bp->b_lru);
if (bp->b_flags & XBF_WRITE_FAIL) {
xfs_alert(btp->bt_mount,
-"Corruption Alert: Buffer at block 0x%llx had permanent write failures!",
+"Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!",
(long long)bp->b_bn);
xfs_alert(btp->bt_mount,
"Please run xfs_repair to determine the extent of the problem.");
@@ -1815,22 +1819,27 @@ xfs_alloc_buftarg(
btp->bt_daxdev = dax_dev;
if (xfs_setsize_buftarg_early(btp, bdev))
- goto error;
+ goto error_free;
if (list_lru_init(&btp->bt_lru))
- goto error;
+ goto error_free;
if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
- goto error;
+ goto error_lru;
btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count;
btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan;
btp->bt_shrinker.seeks = DEFAULT_SEEKS;
btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE;
- register_shrinker(&btp->bt_shrinker);
+ if (register_shrinker(&btp->bt_shrinker))
+ goto error_pcpu;
return btp;
-error:
+error_pcpu:
+ percpu_counter_destroy(&btp->bt_io_count);
+error_lru:
+ list_lru_destroy(&btp->bt_lru);
+error_free:
kmem_free(btp);
return NULL;
}
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index f873bb786824..2f4c91452861 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -140,6 +140,7 @@ struct xfs_buf_ops {
char *name;
void (*verify_read)(struct xfs_buf *);
void (*verify_write)(struct xfs_buf *);
+ xfs_failaddr_t (*verify_struct)(struct xfs_buf *bp);
};
typedef struct xfs_buf {
@@ -175,7 +176,8 @@ typedef struct xfs_buf {
struct workqueue_struct *b_ioend_wq; /* I/O completion wq */
xfs_buf_iodone_t b_iodone; /* I/O completion function */
struct completion b_iowait; /* queue for I/O waiters */
- void *b_fspriv;
+ void *b_log_item;
+ struct list_head b_li_list; /* Log items list head */
struct xfs_trans *b_transp;
struct page **b_pages; /* array of page pointers */
struct page *b_page_array[XB_PAGES]; /* inline pages */
@@ -315,7 +317,9 @@ extern void xfs_buf_unlock(xfs_buf_t *);
/* Buffer Read and Write Routines */
extern int xfs_bwrite(struct xfs_buf *bp);
extern void xfs_buf_ioend(struct xfs_buf *bp);
-extern void xfs_buf_ioerror(xfs_buf_t *, int);
+extern void __xfs_buf_ioerror(struct xfs_buf *bp, int error,
+ xfs_failaddr_t failaddr);
+#define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address)
extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func);
extern void xfs_buf_submit(struct xfs_buf *bp);
extern int xfs_buf_submit_wait(struct xfs_buf *bp);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index e0a0af0946f2..270ddb4d2313 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -61,14 +61,14 @@ xfs_buf_log_format_size(
*/
STATIC void
xfs_buf_item_size_segment(
- struct xfs_buf_log_item *bip,
- struct xfs_buf_log_format *blfp,
- int *nvecs,
- int *nbytes)
+ struct xfs_buf_log_item *bip,
+ struct xfs_buf_log_format *blfp,
+ int *nvecs,
+ int *nbytes)
{
- struct xfs_buf *bp = bip->bli_buf;
- int next_bit;
- int last_bit;
+ struct xfs_buf *bp = bip->bli_buf;
+ int next_bit;
+ int last_bit;
last_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
if (last_bit == -1)
@@ -218,12 +218,12 @@ xfs_buf_item_format_segment(
uint offset,
struct xfs_buf_log_format *blfp)
{
- struct xfs_buf *bp = bip->bli_buf;
- uint base_size;
- int first_bit;
- int last_bit;
- int next_bit;
- uint nbits;
+ struct xfs_buf *bp = bip->bli_buf;
+ uint base_size;
+ int first_bit;
+ int last_bit;
+ int next_bit;
+ uint nbits;
/* copy the flags across from the base format item */
blfp->blf_flags = bip->__bli_format.blf_flags;
@@ -406,12 +406,12 @@ xfs_buf_item_unpin(
int remove)
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
- xfs_buf_t *bp = bip->bli_buf;
- struct xfs_ail *ailp = lip->li_ailp;
- int stale = bip->bli_flags & XFS_BLI_STALE;
- int freed;
+ xfs_buf_t *bp = bip->bli_buf;
+ struct xfs_ail *ailp = lip->li_ailp;
+ int stale = bip->bli_flags & XFS_BLI_STALE;
+ int freed;
- ASSERT(bp->b_fspriv == bip);
+ ASSERT(bp->b_log_item == bip);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
trace_xfs_buf_item_unpin(bip);
@@ -456,13 +456,14 @@ xfs_buf_item_unpin(
*/
if (bip->bli_flags & XFS_BLI_STALE_INODE) {
xfs_buf_do_callbacks(bp);
- bp->b_fspriv = NULL;
+ bp->b_log_item = NULL;
+ list_del_init(&bp->b_li_list);
bp->b_iodone = NULL;
} else {
spin_lock(&ailp->xa_lock);
xfs_trans_ail_delete(ailp, lip, SHUTDOWN_LOG_IO_ERROR);
xfs_buf_item_relse(bp);
- ASSERT(bp->b_fspriv == NULL);
+ ASSERT(bp->b_log_item == NULL);
}
xfs_buf_relse(bp);
} else if (freed && remove) {
@@ -722,18 +723,15 @@ xfs_buf_item_free_format(
/*
* Allocate a new buf log item to go with the given buffer.
- * Set the buffer's b_fsprivate field to point to the new
- * buf log item. If there are other item's attached to the
- * buffer (see xfs_buf_attach_iodone() below), then put the
- * buf log item at the front.
+ * Set the buffer's b_log_item field to point to the new
+ * buf log item.
*/
int
xfs_buf_item_init(
struct xfs_buf *bp,
struct xfs_mount *mp)
{
- struct xfs_log_item *lip = bp->b_fspriv;
- struct xfs_buf_log_item *bip;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
int chunks;
int map_size;
int error;
@@ -741,13 +739,14 @@ xfs_buf_item_init(
/*
* Check to see if there is already a buf log item for
- * this buffer. If there is, it is guaranteed to be
- * the first. If we do already have one, there is
+ * this buffer. If we do already have one, there is
* nothing to do here so return.
*/
ASSERT(bp->b_target->bt_mount == mp);
- if (lip != NULL && lip->li_type == XFS_LI_BUF)
+ if (bip != NULL) {
+ ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
return 0;
+ }
bip = kmem_zone_zalloc(xfs_buf_item_zone, KM_SLEEP);
xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops);
@@ -781,13 +780,7 @@ xfs_buf_item_init(
bip->bli_formats[i].blf_map_size = map_size;
}
- /*
- * Put the buf item into the list of items attached to the
- * buffer at the front.
- */
- if (bp->b_fspriv)
- bip->bli_item.li_bio_list = bp->b_fspriv;
- bp->b_fspriv = bip;
+ bp->b_log_item = bip;
xfs_buf_hold(bp);
return 0;
}
@@ -880,7 +873,7 @@ xfs_buf_item_log_segment(
*/
void
xfs_buf_item_log(
- xfs_buf_log_item_t *bip,
+ struct xfs_buf_log_item *bip,
uint first,
uint last)
{
@@ -943,7 +936,7 @@ xfs_buf_item_dirty_format(
STATIC void
xfs_buf_item_free(
- xfs_buf_log_item_t *bip)
+ struct xfs_buf_log_item *bip)
{
xfs_buf_item_free_format(bip);
kmem_free(bip->bli_item.li_lv_shadow);
@@ -961,13 +954,13 @@ void
xfs_buf_item_relse(
xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip = bp->b_fspriv;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
trace_xfs_buf_item_relse(bp, _RET_IP_);
ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL));
- bp->b_fspriv = bip->bli_item.li_bio_list;
- if (bp->b_fspriv == NULL)
+ bp->b_log_item = NULL;
+ if (list_empty(&bp->b_li_list))
bp->b_iodone = NULL;
xfs_buf_rele(bp);
@@ -980,9 +973,7 @@ xfs_buf_item_relse(
* to be called when the buffer's I/O completes. If it is not set
* already, set the buffer's b_iodone() routine to be
* xfs_buf_iodone_callbacks() and link the log item into the list of
- * items rooted at b_fsprivate. Items are always added as the second
- * entry in the list if there is a first, because the buf item code
- * assumes that the buf log item is first.
+ * items rooted at b_li_list.
*/
void
xfs_buf_attach_iodone(
@@ -990,18 +981,10 @@ xfs_buf_attach_iodone(
void (*cb)(xfs_buf_t *, xfs_log_item_t *),
xfs_log_item_t *lip)
{
- xfs_log_item_t *head_lip;
-
ASSERT(xfs_buf_islocked(bp));
lip->li_cb = cb;
- head_lip = bp->b_fspriv;
- if (head_lip) {
- lip->li_bio_list = head_lip->li_bio_list;
- head_lip->li_bio_list = lip;
- } else {
- bp->b_fspriv = lip;
- }
+ list_add_tail(&lip->li_bio_list, &bp->b_li_list);
ASSERT(bp->b_iodone == NULL ||
bp->b_iodone == xfs_buf_iodone_callbacks);
@@ -1011,12 +994,12 @@ xfs_buf_attach_iodone(
/*
* We can have many callbacks on a buffer. Running the callbacks individually
* can cause a lot of contention on the AIL lock, so we allow for a single
- * callback to be able to scan the remaining lip->li_bio_list for other items
- * of the same type and callback to be processed in the first call.
+ * callback to be able to scan the remaining items in bp->b_li_list for other
+ * items of the same type and callback to be processed in the first call.
*
* As a result, the loop walking the callback list below will also modify the
* list. it removes the first item from the list and then runs the callback.
- * The loop then restarts from the new head of the list. This allows the
+ * The loop then restarts from the new first item int the list. This allows the
* callback to scan and modify the list attached to the buffer and we don't
* have to care about maintaining a next item pointer.
*/
@@ -1024,18 +1007,26 @@ STATIC void
xfs_buf_do_callbacks(
struct xfs_buf *bp)
{
+ struct xfs_buf_log_item *blip = bp->b_log_item;
struct xfs_log_item *lip;
- while ((lip = bp->b_fspriv) != NULL) {
- bp->b_fspriv = lip->li_bio_list;
- ASSERT(lip->li_cb != NULL);
+ /* If there is a buf_log_item attached, run its callback */
+ if (blip) {
+ lip = &blip->bli_item;
+ lip->li_cb(bp, lip);
+ }
+
+ while (!list_empty(&bp->b_li_list)) {
+ lip = list_first_entry(&bp->b_li_list, struct xfs_log_item,
+ li_bio_list);
+
/*
- * Clear the next pointer so we don't have any
+ * Remove the item from the list, so we don't have any
* confusion if the item is added to another buf.
* Don't touch the log item after calling its
* callback, because it could have freed itself.
*/
- lip->li_bio_list = NULL;
+ list_del_init(&lip->li_bio_list);
lip->li_cb(bp, lip);
}
}
@@ -1052,13 +1043,22 @@ STATIC void
xfs_buf_do_callbacks_fail(
struct xfs_buf *bp)
{
- struct xfs_log_item *next;
- struct xfs_log_item *lip = bp->b_fspriv;
- struct xfs_ail *ailp = lip->li_ailp;
+ struct xfs_log_item *lip;
+ struct xfs_ail *ailp;
+ /*
+ * Buffer log item errors are handled directly by xfs_buf_item_push()
+ * and xfs_buf_iodone_callback_error, and they have no IO error
+ * callbacks. Check only for items in b_li_list.
+ */
+ if (list_empty(&bp->b_li_list))
+ return;
+
+ lip = list_first_entry(&bp->b_li_list, struct xfs_log_item,
+ li_bio_list);
+ ailp = lip->li_ailp;
spin_lock(&ailp->xa_lock);
- for (; lip; lip = next) {
- next = lip->li_bio_list;
+ list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
if (lip->li_ops->iop_error)
lip->li_ops->iop_error(lip, bp);
}
@@ -1069,13 +1069,23 @@ static bool
xfs_buf_iodone_callback_error(
struct xfs_buf *bp)
{
- struct xfs_log_item *lip = bp->b_fspriv;
- struct xfs_mount *mp = lip->li_mountp;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
+ struct xfs_log_item *lip;
+ struct xfs_mount *mp;
static ulong lasttime;
static xfs_buftarg_t *lasttarg;
struct xfs_error_cfg *cfg;
/*
+ * The failed buffer might not have a buf_log_item attached or the
+ * log_item list might be empty. Get the mp from the available
+ * xfs_log_item
+ */
+ lip = list_first_entry_or_null(&bp->b_li_list, struct xfs_log_item,
+ li_bio_list);
+ mp = lip ? lip->li_mountp : bip->bli_item.li_mountp;
+
+ /*
* If we've already decided to shutdown the filesystem because of
* I/O errors, there's no point in giving this a retry.
*/
@@ -1183,7 +1193,8 @@ xfs_buf_iodone_callbacks(
bp->b_first_retry_time = 0;
xfs_buf_do_callbacks(bp);
- bp->b_fspriv = NULL;
+ bp->b_log_item = NULL;
+ list_del_init(&bp->b_li_list);
bp->b_iodone = NULL;
xfs_buf_ioend(bp);
}
@@ -1228,10 +1239,9 @@ xfs_buf_iodone(
bool
xfs_buf_resubmit_failed_buffers(
struct xfs_buf *bp,
- struct xfs_log_item *lip,
struct list_head *buffer_list)
{
- struct xfs_log_item *next;
+ struct xfs_log_item *lip;
/*
* Clear XFS_LI_FAILED flag from all items before resubmit
@@ -1239,10 +1249,8 @@ xfs_buf_resubmit_failed_buffers(
* XFS_LI_FAILED set/clear is protected by xa_lock, caller this
* function already have it acquired
*/
- for (; lip; lip = next) {
- next = lip->li_bio_list;
+ list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
xfs_clear_li_failed(lip);
- }
/* Add this buffer back to the delayed write list */
return xfs_buf_delwri_queue(bp, buffer_list);
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 9690ce62c9a7..643f53dcfe51 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -50,7 +50,7 @@ struct xfs_buf_log_item;
* needed to log buffers. It tracks how many times the lock has been
* locked, and which 128 byte chunks of the buffer are dirty.
*/
-typedef struct xfs_buf_log_item {
+struct xfs_buf_log_item {
xfs_log_item_t bli_item; /* common item structure */
struct xfs_buf *bli_buf; /* real buffer pointer */
unsigned int bli_flags; /* misc flags */
@@ -59,11 +59,11 @@ typedef struct xfs_buf_log_item {
int bli_format_count; /* count of headers */
struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */
struct xfs_buf_log_format __bli_format; /* embedded in-log header */
-} xfs_buf_log_item_t;
+};
int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
void xfs_buf_item_relse(struct xfs_buf *);
-void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint);
+void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint);
bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *);
void xfs_buf_attach_iodone(struct xfs_buf *,
void(*)(struct xfs_buf *, xfs_log_item_t *),
@@ -71,7 +71,6 @@ void xfs_buf_attach_iodone(struct xfs_buf *,
void xfs_buf_iodone_callbacks(struct xfs_buf *);
void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
bool xfs_buf_resubmit_failed_buffers(struct xfs_buf *,
- struct xfs_log_item *,
struct list_head *);
extern kmem_zone_t *xfs_buf_item_zone;
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 0c58918bc0ad..b6ae3597bfb0 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -152,7 +152,6 @@ xfs_dir2_block_getdents(
struct xfs_inode *dp = args->dp; /* incore directory inode */
xfs_dir2_data_hdr_t *hdr; /* block header */
struct xfs_buf *bp; /* buffer for block */
- xfs_dir2_block_tail_t *btp; /* block tail */
xfs_dir2_data_entry_t *dep; /* block data entry */
xfs_dir2_data_unused_t *dup; /* block unused entry */
char *endptr; /* end of the data entries */
@@ -185,9 +184,8 @@ xfs_dir2_block_getdents(
/*
* Set up values for the loop.
*/
- btp = xfs_dir2_block_tail_p(geo, hdr);
ptr = (char *)dp->d_ops->data_entry_p(hdr);
- endptr = (char *)xfs_dir2_block_leaf_p(btp);
+ endptr = xfs_dir3_data_endp(geo, hdr);
/*
* Loop over the data portion of the block.
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index d57c2db64e59..43572f8a1b8e 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -399,52 +399,6 @@ error0:
return error;
}
-STATIC int
-xfs_qm_dqrepair(
- struct xfs_mount *mp,
- struct xfs_trans *tp,
- struct xfs_dquot *dqp,
- xfs_dqid_t firstid,
- struct xfs_buf **bpp)
-{
- int error;
- struct xfs_disk_dquot *ddq;
- struct xfs_dqblk *d;
- int i;
-
- /*
- * Read the buffer without verification so we get the corrupted
- * buffer returned to us. make sure we verify it on write, though.
- */
- error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno,
- mp->m_quotainfo->qi_dqchunklen,
- 0, bpp, NULL);
-
- if (error) {
- ASSERT(*bpp == NULL);
- return error;
- }
- (*bpp)->b_ops = &xfs_dquot_buf_ops;
-
- ASSERT(xfs_buf_islocked(*bpp));
- d = (struct xfs_dqblk *)(*bpp)->b_addr;
-
- /* Do the actual repair of dquots in this buffer */
- for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) {
- ddq = &d[i].dd_diskdq;
- error = xfs_dqcheck(mp, ddq, firstid + i,
- dqp->dq_flags & XFS_DQ_ALLTYPES,
- XFS_QMOPT_DQREPAIR, "xfs_qm_dqrepair");
- if (error) {
- /* repair failed, we're screwed */
- xfs_trans_brelse(tp, *bpp);
- return -EIO;
- }
- }
-
- return 0;
-}
-
/*
* Maps a dquot to the buffer containing its on-disk version.
* This returns a ptr to the buffer containing the on-disk dquot
@@ -526,14 +480,6 @@ xfs_qm_dqtobp(
dqp->q_blkno,
mp->m_quotainfo->qi_dqchunklen,
0, &bp, &xfs_dquot_buf_ops);
-
- if (error == -EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) {
- xfs_dqid_t firstid = (xfs_dqid_t)map.br_startoff *
- mp->m_quotainfo->qi_dqperchunk;
- ASSERT(bp == NULL);
- error = xfs_qm_dqrepair(mp, tp, dqp, firstid, &bp);
- }
-
if (error) {
ASSERT(bp == NULL);
return error;
@@ -970,14 +916,22 @@ xfs_qm_dqflush_done(
* holding the lock before removing the dquot from the AIL.
*/
if ((lip->li_flags & XFS_LI_IN_AIL) &&
- lip->li_lsn == qip->qli_flush_lsn) {
+ ((lip->li_lsn == qip->qli_flush_lsn) ||
+ (lip->li_flags & XFS_LI_FAILED))) {
/* xfs_trans_ail_delete() drops the AIL lock. */
spin_lock(&ailp->xa_lock);
- if (lip->li_lsn == qip->qli_flush_lsn)
+ if (lip->li_lsn == qip->qli_flush_lsn) {
xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
- else
+ } else {
+ /*
+ * Clear the failed state since we are about to drop the
+ * flush lock
+ */
+ if (lip->li_flags & XFS_LI_FAILED)
+ xfs_clear_li_failed(lip);
spin_unlock(&ailp->xa_lock);
+ }
}
/*
@@ -1002,6 +956,7 @@ xfs_qm_dqflush(
struct xfs_mount *mp = dqp->q_mount;
struct xfs_buf *bp;
struct xfs_disk_dquot *ddqp;
+ xfs_failaddr_t fa;
int error;
ASSERT(XFS_DQ_IS_LOCKED(dqp));
@@ -1048,9 +1003,10 @@ xfs_qm_dqflush(
/*
* A simple sanity check in case we got a corrupted dquot..
*/
- error = xfs_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
- XFS_QMOPT_DOWARN, "dqflush (incore copy)");
- if (error) {
+ fa = xfs_dquot_verify(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, 0);
+ if (fa) {
+ xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS",
+ be32_to_cpu(ddqp->d_id), fa);
xfs_buf_relse(bp);
xfs_dqfunlock(dqp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 2c7a1629e064..96eaa6933709 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -137,6 +137,23 @@ xfs_qm_dqunpin_wait(
wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
}
+/*
+ * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer
+ * have been failed during writeback
+ *
+ * this informs the AIL that the dquot is already flush locked on the next push,
+ * and acquires a hold on the buffer to ensure that it isn't reclaimed before
+ * dirty data makes it to disk.
+ */
+STATIC void
+xfs_dquot_item_error(
+ struct xfs_log_item *lip,
+ struct xfs_buf *bp)
+{
+ ASSERT(!completion_done(&DQUOT_ITEM(lip)->qli_dquot->q_flush));
+ xfs_set_li_failed(lip, bp);
+}
+
STATIC uint
xfs_qm_dquot_logitem_push(
struct xfs_log_item *lip,
@@ -144,13 +161,28 @@ xfs_qm_dquot_logitem_push(
__acquires(&lip->li_ailp->xa_lock)
{
struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
- struct xfs_buf *bp = NULL;
+ struct xfs_buf *bp = lip->li_buf;
uint rval = XFS_ITEM_SUCCESS;
int error;
if (atomic_read(&dqp->q_pincount) > 0)
return XFS_ITEM_PINNED;
+ /*
+ * The buffer containing this item failed to be written back
+ * previously. Resubmit the buffer for IO
+ */
+ if (lip->li_flags & XFS_LI_FAILED) {
+ if (!xfs_buf_trylock(bp))
+ return XFS_ITEM_LOCKED;
+
+ if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list))
+ rval = XFS_ITEM_FLUSHING;
+
+ xfs_buf_unlock(bp);
+ return rval;
+ }
+
if (!xfs_dqlock_nowait(dqp))
return XFS_ITEM_LOCKED;
@@ -177,7 +209,7 @@ xfs_qm_dquot_logitem_push(
error = xfs_qm_dqflush(dqp, &bp);
if (error) {
- xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
+ xfs_warn(dqp->q_mount, "%s: push error %d on dqp "PTR_FMT,
__func__, error, dqp);
} else {
if (!xfs_buf_delwri_queue(bp, buffer_list))
@@ -242,7 +274,8 @@ static const struct xfs_item_ops xfs_dquot_item_ops = {
.iop_unlock = xfs_qm_dquot_logitem_unlock,
.iop_committed = xfs_qm_dquot_logitem_committed,
.iop_push = xfs_qm_dquot_logitem_push,
- .iop_committing = xfs_qm_dquot_logitem_committing
+ .iop_committing = xfs_qm_dquot_logitem_committing,
+ .iop_error = xfs_dquot_item_error
};
/*
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 4c9f35d983b2..ccf520f0b00d 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -24,6 +24,7 @@
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_sysfs.h"
+#include "xfs_inode.h"
#ifdef DEBUG
@@ -314,12 +315,12 @@ xfs_error_report(
struct xfs_mount *mp,
const char *filename,
int linenum,
- void *ra)
+ xfs_failaddr_t failaddr)
{
if (level <= xfs_error_level) {
xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
"Internal error %s at line %d of file %s. Caller %pS",
- tag, linenum, filename, ra);
+ tag, linenum, filename, failaddr);
xfs_stack_trace();
}
@@ -333,11 +334,11 @@ xfs_corruption_error(
void *p,
const char *filename,
int linenum,
- void *ra)
+ xfs_failaddr_t failaddr)
{
if (level <= xfs_error_level)
- xfs_hex_dump(p, 64);
- xfs_error_report(tag, level, mp, filename, linenum, ra);
+ xfs_hex_dump(p, XFS_CORRUPTION_DUMP_LEN);
+ xfs_error_report(tag, level, mp, filename, linenum, failaddr);
xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair");
}
@@ -347,19 +348,62 @@ xfs_corruption_error(
*/
void
xfs_verifier_error(
- struct xfs_buf *bp)
+ struct xfs_buf *bp,
+ int error,
+ xfs_failaddr_t failaddr)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ xfs_failaddr_t fa;
+
+ fa = failaddr ? failaddr : __return_address;
+ __xfs_buf_ioerror(bp, error, fa);
xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx",
bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
- __return_address, bp->b_ops->name, bp->b_bn);
+ fa, bp->b_ops->name, bp->b_bn);
xfs_alert(mp, "Unmount and run xfs_repair");
if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
- xfs_alert(mp, "First 64 bytes of corrupted metadata buffer:");
- xfs_hex_dump(xfs_buf_offset(bp, 0), 64);
+ xfs_alert(mp, "First %d bytes of corrupted metadata buffer:",
+ XFS_CORRUPTION_DUMP_LEN);
+ xfs_hex_dump(xfs_buf_offset(bp, 0), XFS_CORRUPTION_DUMP_LEN);
+ }
+
+ if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
+ xfs_stack_trace();
+}
+
+/*
+ * Warnings for inode corruption problems. Don't bother with the stack
+ * trace unless the error level is turned up high.
+ */
+void
+xfs_inode_verifier_error(
+ struct xfs_inode *ip,
+ int error,
+ const char *name,
+ void *buf,
+ size_t bufsz,
+ xfs_failaddr_t failaddr)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ xfs_failaddr_t fa;
+ int sz;
+
+ fa = failaddr ? failaddr : __return_address;
+
+ xfs_alert(mp, "Metadata %s detected at %pS, inode 0x%llx %s",
+ error == -EFSBADCRC ? "CRC error" : "corruption",
+ fa, ip->i_ino, name);
+
+ xfs_alert(mp, "Unmount and run xfs_repair");
+
+ if (buf && xfs_error_level >= XFS_ERRLEVEL_LOW) {
+ sz = min_t(size_t, XFS_CORRUPTION_DUMP_LEN, bufsz);
+ xfs_alert(mp, "First %d bytes of corrupted metadata buffer:",
+ sz);
+ xfs_hex_dump(buf, sz);
}
if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index ea816c1bf8db..7e728c5a46b8 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -21,11 +21,16 @@
struct xfs_mount;
extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
- const char *filename, int linenum, void *ra);
+ const char *filename, int linenum,
+ xfs_failaddr_t failaddr);
extern void xfs_corruption_error(const char *tag, int level,
struct xfs_mount *mp, void *p, const char *filename,
- int linenum, void *ra);
-extern void xfs_verifier_error(struct xfs_buf *bp);
+ int linenum, xfs_failaddr_t failaddr);
+extern void xfs_verifier_error(struct xfs_buf *bp, int error,
+ xfs_failaddr_t failaddr);
+extern void xfs_inode_verifier_error(struct xfs_inode *ip, int error,
+ const char *name, void *buf, size_t bufsz,
+ xfs_failaddr_t failaddr);
#define XFS_ERROR_REPORT(e, lvl, mp) \
xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address)
@@ -37,6 +42,9 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
#define XFS_ERRLEVEL_LOW 1
#define XFS_ERRLEVEL_HIGH 5
+/* Dump 128 bytes of any corrupt buffer */
+#define XFS_CORRUPTION_DUMP_LEN (128)
+
/*
* Macros to set EFSCORRUPTED & return/branch.
*/
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 44f8c5451210..64da90655e95 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -538,7 +538,7 @@ xfs_efi_recover(
return error;
efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
- xfs_rmap_skip_owner_update(&oinfo);
+ xfs_rmap_any_owner_update(&oinfo);
for (i = 0; i < efip->efi_format.efi_nextents; i++) {
extp = &efip->efi_format.efi_extents[i];
error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 8f22fc579dbb..8b4545623e25 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -49,83 +49,6 @@
* File system operations
*/
-int
-xfs_fs_geometry(
- xfs_mount_t *mp,
- xfs_fsop_geom_t *geo,
- int new_version)
-{
-
- memset(geo, 0, sizeof(*geo));
-
- geo->blocksize = mp->m_sb.sb_blocksize;
- geo->rtextsize = mp->m_sb.sb_rextsize;
- geo->agblocks = mp->m_sb.sb_agblocks;
- geo->agcount = mp->m_sb.sb_agcount;
- geo->logblocks = mp->m_sb.sb_logblocks;
- geo->sectsize = mp->m_sb.sb_sectsize;
- geo->inodesize = mp->m_sb.sb_inodesize;
- geo->imaxpct = mp->m_sb.sb_imax_pct;
- geo->datablocks = mp->m_sb.sb_dblocks;
- geo->rtblocks = mp->m_sb.sb_rblocks;
- geo->rtextents = mp->m_sb.sb_rextents;
- geo->logstart = mp->m_sb.sb_logstart;
- ASSERT(sizeof(geo->uuid)==sizeof(mp->m_sb.sb_uuid));
- memcpy(geo->uuid, &mp->m_sb.sb_uuid, sizeof(mp->m_sb.sb_uuid));
- if (new_version >= 2) {
- geo->sunit = mp->m_sb.sb_unit;
- geo->swidth = mp->m_sb.sb_width;
- }
- if (new_version >= 3) {
- geo->version = XFS_FSOP_GEOM_VERSION;
- geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK |
- XFS_FSOP_GEOM_FLAGS_DIRV2 |
- (xfs_sb_version_hasattr(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_ATTR : 0) |
- (xfs_sb_version_hasquota(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_QUOTA : 0) |
- (xfs_sb_version_hasalign(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_IALIGN : 0) |
- (xfs_sb_version_hasdalign(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_DALIGN : 0) |
- (xfs_sb_version_hasextflgbit(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_EXTFLG : 0) |
- (xfs_sb_version_hassector(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
- (xfs_sb_version_hasasciici(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_DIRV2CI : 0) |
- (xfs_sb_version_haslazysbcount(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) |
- (xfs_sb_version_hasattr2(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_ATTR2 : 0) |
- (xfs_sb_version_hasprojid32bit(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_PROJID32 : 0) |
- (xfs_sb_version_hascrc(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_V5SB : 0) |
- (xfs_sb_version_hasftype(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_FTYPE : 0) |
- (xfs_sb_version_hasfinobt(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_FINOBT : 0) |
- (xfs_sb_version_hassparseinodes(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_SPINODES : 0) |
- (xfs_sb_version_hasrmapbt(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_RMAPBT : 0) |
- (xfs_sb_version_hasreflink(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_REFLINK : 0);
- geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
- mp->m_sb.sb_logsectsize : BBSIZE;
- geo->rtsectsize = mp->m_sb.sb_blocksize;
- geo->dirblocksize = mp->m_dir_geo->blksize;
- }
- if (new_version >= 4) {
- geo->flags |=
- (xfs_sb_version_haslogv2(&mp->m_sb) ?
- XFS_FSOP_GEOM_FLAGS_LOGV2 : 0);
- geo->logsunit = mp->m_sb.sb_logsunit;
- }
- return 0;
-}
-
static struct xfs_buf *
xfs_growfs_get_hdr_buf(
struct xfs_mount *mp,
@@ -571,6 +494,11 @@ xfs_growfs_data_private(
* this doesn't actually exist in the rmap btree.
*/
xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
+ error = xfs_rmap_free(tp, bp, agno,
+ be32_to_cpu(agf->agf_length) - new,
+ new, &oinfo);
+ if (error)
+ goto error0;
error = xfs_free_extent(tp,
XFS_AGB_TO_FSB(mp, agno,
be32_to_cpu(agf->agf_length) - new),
@@ -950,7 +878,7 @@ xfs_do_force_shutdown(
if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
xfs_notice(mp,
- "%s(0x%x) called from line %d of file %s. Return address = 0x%p",
+ "%s(0x%x) called from line %d of file %s. Return address = "PTR_FMT,
__func__, flags, lnnum, fname, __return_address);
}
/*
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h
index 2954c13a3acd..20484ed5e919 100644
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -18,7 +18,6 @@
#ifndef __XFS_FSOPS_H__
#define __XFS_FSOPS_H__
-extern int xfs_fs_geometry(xfs_mount_t *mp, xfs_fsop_geom_t *geo, int nversion);
extern int xfs_growfs_data(xfs_mount_t *mp, xfs_growfs_data_t *in);
extern int xfs_growfs_log(xfs_mount_t *mp, xfs_growfs_log_t *in);
extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 43005fbe8b1e..d53a316162d6 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -37,6 +37,7 @@
#include <linux/kthread.h>
#include <linux/freezer.h>
+#include <linux/iversion.h>
/*
* Allocate and initialise an xfs_inode.
@@ -293,15 +294,17 @@ xfs_reinit_inode(
int error;
uint32_t nlink = inode->i_nlink;
uint32_t generation = inode->i_generation;
- uint64_t version = inode->i_version;
+ uint64_t version = inode_peek_iversion(inode);
umode_t mode = inode->i_mode;
+ dev_t dev = inode->i_rdev;
error = inode_init_always(mp->m_super, inode);
set_nlink(inode, nlink);
inode->i_generation = generation;
- inode->i_version = version;
+ inode_set_iversion_queried(inode, version);
inode->i_mode = mode;
+ inode->i_rdev = dev;
return error;
}
@@ -473,6 +476,11 @@ xfs_iget_cache_miss(
if (error)
goto out_destroy;
+ if (!xfs_inode_verify_forks(ip)) {
+ error = -EFSCORRUPTED;
+ goto out_destroy;
+ }
+
trace_xfs_iget_miss(ip);
if ((VFS_I(ip)->i_mode == 0) && !(flags & XFS_IGET_CREATE)) {
@@ -870,7 +878,7 @@ xfs_eofblocks_worker(
* based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default).
* (We'll just piggyback on the post-EOF prealloc space workqueue.)
*/
-STATIC void
+void
xfs_queue_cowblocks(
struct xfs_mount *mp)
{
@@ -1536,8 +1544,23 @@ xfs_inode_free_quota_eofblocks(
return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks);
}
+static inline unsigned long
+xfs_iflag_for_tag(
+ int tag)
+{
+ switch (tag) {
+ case XFS_ICI_EOFBLOCKS_TAG:
+ return XFS_IEOFBLOCKS;
+ case XFS_ICI_COWBLOCKS_TAG:
+ return XFS_ICOWBLOCKS;
+ default:
+ ASSERT(0);
+ return 0;
+ }
+}
+
static void
-__xfs_inode_set_eofblocks_tag(
+__xfs_inode_set_blocks_tag(
xfs_inode_t *ip,
void (*execute)(struct xfs_mount *mp),
void (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
@@ -1552,10 +1575,10 @@ __xfs_inode_set_eofblocks_tag(
* Don't bother locking the AG and looking up in the radix trees
* if we already know that we have the tag set.
*/
- if (ip->i_flags & XFS_IEOFBLOCKS)
+ if (ip->i_flags & xfs_iflag_for_tag(tag))
return;
spin_lock(&ip->i_flags_lock);
- ip->i_flags |= XFS_IEOFBLOCKS;
+ ip->i_flags |= xfs_iflag_for_tag(tag);
spin_unlock(&ip->i_flags_lock);
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -1587,13 +1610,13 @@ xfs_inode_set_eofblocks_tag(
xfs_inode_t *ip)
{
trace_xfs_inode_set_eofblocks_tag(ip);
- return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_eofblocks,
+ return __xfs_inode_set_blocks_tag(ip, xfs_queue_eofblocks,
trace_xfs_perag_set_eofblocks,
XFS_ICI_EOFBLOCKS_TAG);
}
static void
-__xfs_inode_clear_eofblocks_tag(
+__xfs_inode_clear_blocks_tag(
xfs_inode_t *ip,
void (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
int error, unsigned long caller_ip),
@@ -1603,7 +1626,7 @@ __xfs_inode_clear_eofblocks_tag(
struct xfs_perag *pag;
spin_lock(&ip->i_flags_lock);
- ip->i_flags &= ~XFS_IEOFBLOCKS;
+ ip->i_flags &= ~xfs_iflag_for_tag(tag);
spin_unlock(&ip->i_flags_lock);
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -1630,33 +1653,20 @@ xfs_inode_clear_eofblocks_tag(
xfs_inode_t *ip)
{
trace_xfs_inode_clear_eofblocks_tag(ip);
- return __xfs_inode_clear_eofblocks_tag(ip,
+ return __xfs_inode_clear_blocks_tag(ip,
trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG);
}
/*
- * Automatic CoW Reservation Freeing
- *
- * These functions automatically garbage collect leftover CoW reservations
- * that were made on behalf of a cowextsize hint when we start to run out
- * of quota or when the reservations sit around for too long. If the file
- * has dirty pages or is undergoing writeback, its CoW reservations will
- * be retained.
- *
- * The actual garbage collection piggybacks off the same code that runs
- * the speculative EOF preallocation garbage collector.
+ * Set ourselves up to free CoW blocks from this file. If it's already clean
+ * then we can bail out quickly, but otherwise we must back off if the file
+ * is undergoing some kind of write.
*/
-STATIC int
-xfs_inode_free_cowblocks(
+static bool
+xfs_prep_free_cowblocks(
struct xfs_inode *ip,
- int flags,
- void *args)
+ struct xfs_ifork *ifp)
{
- int ret;
- struct xfs_eofblocks *eofb = args;
- int match;
- struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
-
/*
* Just clear the tag if we have an empty cow fork or none at all. It's
* possible the inode was fully unshared since it was originally tagged.
@@ -1664,7 +1674,7 @@ xfs_inode_free_cowblocks(
if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) {
trace_xfs_inode_free_cowblocks_invalid(ip);
xfs_inode_clear_cowblocks_tag(ip);
- return 0;
+ return false;
}
/*
@@ -1675,6 +1685,35 @@ xfs_inode_free_cowblocks(
mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) ||
mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) ||
atomic_read(&VFS_I(ip)->i_dio_count))
+ return false;
+
+ return true;
+}
+
+/*
+ * Automatic CoW Reservation Freeing
+ *
+ * These functions automatically garbage collect leftover CoW reservations
+ * that were made on behalf of a cowextsize hint when we start to run out
+ * of quota or when the reservations sit around for too long. If the file
+ * has dirty pages or is undergoing writeback, its CoW reservations will
+ * be retained.
+ *
+ * The actual garbage collection piggybacks off the same code that runs
+ * the speculative EOF preallocation garbage collector.
+ */
+STATIC int
+xfs_inode_free_cowblocks(
+ struct xfs_inode *ip,
+ int flags,
+ void *args)
+{
+ struct xfs_eofblocks *eofb = args;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+ int match;
+ int ret = 0;
+
+ if (!xfs_prep_free_cowblocks(ip, ifp))
return 0;
if (eofb) {
@@ -1695,7 +1734,12 @@ xfs_inode_free_cowblocks(
xfs_ilock(ip, XFS_IOLOCK_EXCL);
xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
- ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
+ /*
+ * Check again, nobody else should be able to dirty blocks or change
+ * the reflink iflag now that we have the first two locks held.
+ */
+ if (xfs_prep_free_cowblocks(ip, ifp))
+ ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
@@ -1724,7 +1768,7 @@ xfs_inode_set_cowblocks_tag(
xfs_inode_t *ip)
{
trace_xfs_inode_set_cowblocks_tag(ip);
- return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks,
+ return __xfs_inode_set_blocks_tag(ip, xfs_queue_cowblocks,
trace_xfs_perag_set_cowblocks,
XFS_ICI_COWBLOCKS_TAG);
}
@@ -1734,6 +1778,6 @@ xfs_inode_clear_cowblocks_tag(
xfs_inode_t *ip)
{
trace_xfs_inode_clear_cowblocks_tag(ip);
- return __xfs_inode_clear_eofblocks_tag(ip,
+ return __xfs_inode_clear_blocks_tag(ip,
trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG);
}
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index bff4d85e5498..d4a77588eca1 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -81,6 +81,7 @@ void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip);
int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *);
int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip);
void xfs_cowblocks_worker(struct work_struct *);
+void xfs_queue_cowblocks(struct xfs_mount *);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, int flags, void *args),
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 61d1cb7dc10d..604ee384a00a 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -16,6 +16,7 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <linux/log2.h>
+#include <linux/iversion.h>
#include "xfs.h"
#include "xfs_fs.h"
@@ -546,23 +547,36 @@ again:
/*
* xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
- * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
- * lock more than one at a time, lockdep will report false positives saying we
- * have violated locking orders.
+ * the mmaplock or the ilock, but not more than one type at a time. If we lock
+ * more than one at a time, lockdep will report false positives saying we have
+ * violated locking orders. The iolock must be double-locked separately since
+ * we use i_rwsem for that. We now support taking one lock EXCL and the other
+ * SHARED.
*/
void
xfs_lock_two_inodes(
- xfs_inode_t *ip0,
- xfs_inode_t *ip1,
- uint lock_mode)
+ struct xfs_inode *ip0,
+ uint ip0_mode,
+ struct xfs_inode *ip1,
+ uint ip1_mode)
{
- xfs_inode_t *temp;
+ struct xfs_inode *temp;
+ uint mode_temp;
int attempts = 0;
xfs_log_item_t *lp;
- ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
- if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))
- ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
+ ASSERT(hweight32(ip0_mode) == 1);
+ ASSERT(hweight32(ip1_mode) == 1);
+ ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
+ ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
+ ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
+ !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
+ ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
+ !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
+ ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
+ !(ip0_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
+ ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
+ !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
ASSERT(ip0->i_ino != ip1->i_ino);
@@ -570,10 +584,13 @@ xfs_lock_two_inodes(
temp = ip0;
ip0 = ip1;
ip1 = temp;
+ mode_temp = ip0_mode;
+ ip0_mode = ip1_mode;
+ ip1_mode = mode_temp;
}
again:
- xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0));
+ xfs_ilock(ip0, xfs_lock_inumorder(ip0_mode, 0));
/*
* If the first lock we have locked is in the AIL, we must TRY to get
@@ -582,18 +599,17 @@ xfs_lock_two_inodes(
*/
lp = (xfs_log_item_t *)ip0->i_itemp;
if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
- if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) {
- xfs_iunlock(ip0, lock_mode);
+ if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) {
+ xfs_iunlock(ip0, ip0_mode);
if ((++attempts % 5) == 0)
delay(1); /* Don't just spin the CPU */
goto again;
}
} else {
- xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1));
+ xfs_ilock(ip1, xfs_lock_inumorder(ip1_mode, 1));
}
}
-
void
__xfs_iflock(
struct xfs_inode *ip)
@@ -749,7 +765,6 @@ xfs_ialloc(
xfs_nlink_t nlink,
dev_t rdev,
prid_t prid,
- int okalloc,
xfs_buf_t **ialloc_context,
xfs_inode_t **ipp)
{
@@ -765,7 +780,7 @@ xfs_ialloc(
* Call the space management code to pick
* the on-disk inode to be allocated.
*/
- error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
+ error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode,
ialloc_context, &ino);
if (error)
return error;
@@ -833,7 +848,7 @@ xfs_ialloc(
ip->i_d.di_flags = 0;
if (ip->i_d.di_version == 3) {
- inode->i_version = 1;
+ inode_set_iversion(inode, 1);
ip->i_d.di_flags2 = 0;
ip->i_d.di_cowextsize = 0;
ip->i_d.di_crtime.t_sec = (int32_t)tv.tv_sec;
@@ -957,7 +972,6 @@ xfs_dir_ialloc(
xfs_nlink_t nlink,
dev_t rdev,
prid_t prid, /* project id */
- int okalloc, /* ok to allocate new space */
xfs_inode_t **ipp, /* pointer to inode; it will be
locked. */
int *committed)
@@ -988,8 +1002,8 @@ xfs_dir_ialloc(
* transaction commit so that no other process can steal
* the inode(s) that we've just allocated.
*/
- code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc,
- &ialloc_context, &ip);
+ code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, &ialloc_context,
+ &ip);
/*
* Return an error if we were unable to allocate a new inode.
@@ -1061,7 +1075,7 @@ xfs_dir_ialloc(
* this call should always succeed.
*/
code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid,
- okalloc, &ialloc_context, &ip);
+ &ialloc_context, &ip);
/*
* If we get an error at this point, return to the caller
@@ -1182,11 +1196,6 @@ xfs_create(
xfs_flush_inodes(mp);
error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
}
- if (error == -ENOSPC) {
- /* No space at all so try a "no-allocation" reservation */
- resblks = 0;
- error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
- }
if (error)
goto out_release_inode;
@@ -1203,19 +1212,13 @@ xfs_create(
if (error)
goto out_trans_cancel;
- if (!resblks) {
- error = xfs_dir_canenter(tp, dp, name);
- if (error)
- goto out_trans_cancel;
- }
-
/*
* A newly created regular or special file just has one directory
* entry pointing to them, but a directory also the "." entry
* pointing to itself.
*/
- error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
- prid, resblks > 0, &ip, NULL);
+ error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip,
+ NULL);
if (error)
goto out_trans_cancel;
@@ -1340,11 +1343,6 @@ xfs_create_tmpfile(
tres = &M_RES(mp)->tr_create_tmpfile;
error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
- if (error == -ENOSPC) {
- /* No space at all so try a "no-allocation" reservation */
- resblks = 0;
- error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
- }
if (error)
goto out_release_inode;
@@ -1353,8 +1351,7 @@ xfs_create_tmpfile(
if (error)
goto out_trans_cancel;
- error = xfs_dir_ialloc(&tp, dp, mode, 1, 0,
- prid, resblks > 0, &ip, NULL);
+ error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, &ip, NULL);
if (error)
goto out_trans_cancel;
@@ -1440,7 +1437,7 @@ xfs_link(
if (error)
goto std_return;
- xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
+ xfs_lock_two_inodes(sip, XFS_ILOCK_EXCL, tdp, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
@@ -1506,6 +1503,24 @@ xfs_link(
return error;
}
+/* Clear the reflink flag and the cowblocks tag if possible. */
+static void
+xfs_itruncate_clear_reflink_flags(
+ struct xfs_inode *ip)
+{
+ struct xfs_ifork *dfork;
+ struct xfs_ifork *cfork;
+
+ if (!xfs_is_reflink_inode(ip))
+ return;
+ dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+ if (dfork->if_bytes == 0 && cfork->if_bytes == 0)
+ ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
+ if (cfork->if_bytes == 0)
+ xfs_inode_clear_cowblocks_tag(ip);
+}
+
/*
* Free up the underlying blocks past new_size. The new size must be smaller
* than the current size. This routine can be used both for the attribute and
@@ -1602,15 +1617,7 @@ xfs_itruncate_extents(
if (error)
goto out;
- /*
- * Clear the reflink flag if there are no data fork blocks and
- * there are no extents staged in the cow fork.
- */
- if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) {
- if (ip->i_d.di_nblocks == 0)
- ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
- xfs_inode_clear_cowblocks_tag(ip);
- }
+ xfs_itruncate_clear_reflink_flags(ip);
/*
* Always re-log the inode so that our permanent transaction can keep
@@ -2223,7 +2230,7 @@ xfs_ifree_cluster(
xfs_buf_t *bp;
xfs_inode_t *ip;
xfs_inode_log_item_t *iip;
- xfs_log_item_t *lip;
+ struct xfs_log_item *lip;
struct xfs_perag *pag;
xfs_ino_t inum;
@@ -2281,8 +2288,7 @@ xfs_ifree_cluster(
* stale first, we will not attempt to lock them in the loop
* below as the XFS_ISTALE flag will be set.
*/
- lip = bp->b_fspriv;
- while (lip) {
+ list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
if (lip->li_type == XFS_LI_INODE) {
iip = (xfs_inode_log_item_t *)lip;
ASSERT(iip->ili_logged == 1);
@@ -2292,7 +2298,6 @@ xfs_ifree_cluster(
&iip->ili_item.li_lsn);
xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
}
- lip = lip->li_bio_list;
}
@@ -2401,6 +2406,24 @@ retry:
}
/*
+ * Free any local-format buffers sitting around before we reset to
+ * extents format.
+ */
+static inline void
+xfs_ifree_local_data(
+ struct xfs_inode *ip,
+ int whichfork)
+{
+ struct xfs_ifork *ifp;
+
+ if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
+ return;
+
+ ifp = XFS_IFORK_PTR(ip, whichfork);
+ xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
+}
+
+/*
* This is called to return an inode to the inode free list.
* The inode should already be truncated to 0 length and have
* no pages associated with it. This routine also assumes that
@@ -2437,8 +2460,12 @@ xfs_ifree(
if (error)
return error;
+ xfs_ifree_local_data(ip, XFS_DATA_FORK);
+ xfs_ifree_local_data(ip, XFS_ATTR_FORK);
+
VFS_I(ip)->i_mode = 0; /* mark incore inode as free */
ip->i_d.di_flags = 0;
+ ip->i_d.di_flags2 = 0;
ip->i_d.di_dmevmask = 0;
ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */
ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
@@ -2574,7 +2601,7 @@ xfs_remove(
goto std_return;
}
- xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
+ xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
@@ -3467,6 +3494,36 @@ abort_out:
return error;
}
+/*
+ * If there are inline format data / attr forks attached to this inode,
+ * make sure they're not corrupt.
+ */
+bool
+xfs_inode_verify_forks(
+ struct xfs_inode *ip)
+{
+ struct xfs_ifork *ifp;
+ xfs_failaddr_t fa;
+
+ fa = xfs_ifork_verify_data(ip, &xfs_default_ifork_ops);
+ if (fa) {
+ ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork",
+ ifp->if_u1.if_data, ifp->if_bytes, fa);
+ return false;
+ }
+
+ fa = xfs_ifork_verify_attr(ip, &xfs_default_ifork_ops);
+ if (fa) {
+ ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
+ xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork",
+ ifp ? ifp->if_u1.if_data : NULL,
+ ifp ? ifp->if_bytes : 0, fa);
+ return false;
+ }
+ return true;
+}
+
STATIC int
xfs_iflush_int(
struct xfs_inode *ip,
@@ -3489,7 +3546,7 @@ xfs_iflush_int(
if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
mp, XFS_ERRTAG_IFLUSH_1)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
- "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
+ "%s: Bad inode %Lu magic number 0x%x, ptr "PTR_FMT,
__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
goto corrupt_out;
}
@@ -3499,7 +3556,7 @@ xfs_iflush_int(
(ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
mp, XFS_ERRTAG_IFLUSH_3)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
- "%s: Bad regular inode %Lu, ptr 0x%p",
+ "%s: Bad regular inode %Lu, ptr "PTR_FMT,
__func__, ip->i_ino, ip);
goto corrupt_out;
}
@@ -3510,7 +3567,7 @@ xfs_iflush_int(
(ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
mp, XFS_ERRTAG_IFLUSH_4)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
- "%s: Bad directory inode %Lu, ptr 0x%p",
+ "%s: Bad directory inode %Lu, ptr "PTR_FMT,
__func__, ip->i_ino, ip);
goto corrupt_out;
}
@@ -3519,7 +3576,7 @@ xfs_iflush_int(
ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: detected corrupt incore inode %Lu, "
- "total extents = %d, nblocks = %Ld, ptr 0x%p",
+ "total extents = %d, nblocks = %Ld, ptr "PTR_FMT,
__func__, ip->i_ino,
ip->i_d.di_nextents + ip->i_d.di_anextents,
ip->i_d.di_nblocks, ip);
@@ -3528,7 +3585,7 @@ xfs_iflush_int(
if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize,
mp, XFS_ERRTAG_IFLUSH_6)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
- "%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p",
+ "%s: bad inode %Lu, forkoff 0x%x, ptr "PTR_FMT,
__func__, ip->i_ino, ip->i_d.di_forkoff, ip);
goto corrupt_out;
}
@@ -3545,10 +3602,8 @@ xfs_iflush_int(
if (ip->i_d.di_version < 3)
ip->i_d.di_flushiter++;
- /* Check the inline directory data. */
- if (S_ISDIR(VFS_I(ip)->i_mode) &&
- ip->i_d.di_format == XFS_DINODE_FMT_LOCAL &&
- xfs_dir2_sf_verify(ip))
+ /* Check the inline fork data before we write out. */
+ if (!xfs_inode_verify_forks(ip))
goto corrupt_out;
/*
@@ -3611,7 +3666,7 @@ xfs_iflush_int(
/* generate the checksum. */
xfs_dinode_calc_crc(mp, dip);
- ASSERT(bp->b_fspriv != NULL);
+ ASSERT(!list_empty(&bp->b_li_list));
ASSERT(bp->b_iodone != NULL);
return 0;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index cc13c3763721..3e8dc990d41c 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -232,6 +232,7 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
* log recovery to replay a bmap operation on the inode.
*/
#define XFS_IRECOVERY (1 << 11)
+#define XFS_ICOWBLOCKS (1 << 12)/* has the cowblocks tag set */
/*
* Per-lifetime flags need to be reset when re-using a reclaimable inode during
@@ -422,13 +423,14 @@ void xfs_iunpin_wait(xfs_inode_t *);
#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
int xfs_iflush(struct xfs_inode *, struct xfs_buf **);
-void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
+void xfs_lock_two_inodes(struct xfs_inode *ip0, uint ip0_mode,
+ struct xfs_inode *ip1, uint ip1_mode);
xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip);
xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip);
int xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t,
- xfs_nlink_t, dev_t, prid_t, int,
+ xfs_nlink_t, dev_t, prid_t,
struct xfs_inode **, int *);
/* from xfs_file.c */
@@ -490,4 +492,6 @@ extern struct kmem_zone *xfs_inode_zone;
/* The default CoW extent size hint. */
#define XFS_DEFAULT_COWEXTSZ_HINT 32
+bool xfs_inode_verify_forks(struct xfs_inode *ip);
+
#endif /* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 6ee5c3bf19ad..d5037f060d6f 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -30,6 +30,7 @@
#include "xfs_buf_item.h"
#include "xfs_log.h"
+#include <linux/iversion.h>
kmem_zone_t *xfs_ili_zone; /* inode log item zone */
@@ -354,7 +355,7 @@ xfs_inode_to_log_dinode(
to->di_next_unlinked = NULLAGINO;
if (from->di_version == 3) {
- to->di_changecount = inode->i_version;
+ to->di_changecount = inode_peek_iversion(inode);
to->di_crtime.t_sec = from->di_crtime.t_sec;
to->di_crtime.t_nsec = from->di_crtime.t_nsec;
to->di_flags2 = from->di_flags2;
@@ -521,7 +522,7 @@ xfs_inode_item_push(
if (!xfs_buf_trylock(bp))
return XFS_ITEM_LOCKED;
- if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list))
+ if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list))
rval = XFS_ITEM_FLUSHING;
xfs_buf_unlock(bp);
@@ -712,37 +713,23 @@ xfs_iflush_done(
struct xfs_log_item *lip)
{
struct xfs_inode_log_item *iip;
- struct xfs_log_item *blip;
- struct xfs_log_item *next;
- struct xfs_log_item *prev;
+ struct xfs_log_item *blip, *n;
struct xfs_ail *ailp = lip->li_ailp;
int need_ail = 0;
+ LIST_HEAD(tmp);
/*
* Scan the buffer IO completions for other inodes being completed and
* attach them to the current inode log item.
*/
- blip = bp->b_fspriv;
- prev = NULL;
- while (blip != NULL) {
- if (blip->li_cb != xfs_iflush_done) {
- prev = blip;
- blip = blip->li_bio_list;
- continue;
- }
- /* remove from list */
- next = blip->li_bio_list;
- if (!prev) {
- bp->b_fspriv = next;
- } else {
- prev->li_bio_list = next;
- }
+ list_add_tail(&lip->li_bio_list, &tmp);
- /* add to current list */
- blip->li_bio_list = lip->li_bio_list;
- lip->li_bio_list = blip;
+ list_for_each_entry_safe(blip, n, &bp->b_li_list, li_bio_list) {
+ if (lip->li_cb != xfs_iflush_done)
+ continue;
+ list_move_tail(&blip->li_bio_list, &tmp);
/*
* while we have the item, do the unlocked check for needing
* the AIL lock.
@@ -751,8 +738,6 @@ xfs_iflush_done(
if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
(blip->li_flags & XFS_LI_FAILED))
need_ail++;
-
- blip = next;
}
/* make sure we capture the state of the initial inode. */
@@ -775,7 +760,7 @@ xfs_iflush_done(
/* this is an opencoded batch version of xfs_trans_ail_delete */
spin_lock(&ailp->xa_lock);
- for (blip = lip; blip; blip = blip->li_bio_list) {
+ list_for_each_entry(blip, &tmp, li_bio_list) {
if (INODE_ITEM(blip)->ili_logged &&
blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn)
mlip_changed |= xfs_ail_delete_one(ailp, blip);
@@ -801,15 +786,14 @@ xfs_iflush_done(
* ili_last_fields bits now that we know that the data corresponding to
* them is safely on disk.
*/
- for (blip = lip; blip; blip = next) {
- next = blip->li_bio_list;
- blip->li_bio_list = NULL;
-
+ list_for_each_entry_safe(blip, n, &tmp, li_bio_list) {
+ list_del_init(&blip->li_bio_list);
iip = INODE_ITEM(blip);
iip->ili_logged = 0;
iip->ili_last_fields = 0;
xfs_ifunlock(iip->ili_inode);
}
+ list_del(&tmp);
}
/*
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 20dc65fef6a4..89fb1eb80aae 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -45,6 +45,7 @@
#include <linux/fsmap.h>
#include "xfs_fsmap.h"
#include "scrub/xfs_scrub.h"
+#include "xfs_sb.h"
#include <linux/capability.h>
#include <linux/cred.h>
@@ -809,7 +810,7 @@ xfs_ioc_fsgeometry_v1(
xfs_fsop_geom_t fsgeo;
int error;
- error = xfs_fs_geometry(mp, &fsgeo, 3);
+ error = xfs_fs_geometry(&mp->m_sb, &fsgeo, 3);
if (error)
return error;
@@ -831,7 +832,7 @@ xfs_ioc_fsgeometry(
xfs_fsop_geom_t fsgeo;
int error;
- error = xfs_fs_geometry(mp, &fsgeo, 4);
+ error = xfs_fs_geometry(&mp->m_sb, &fsgeo, 4);
if (error)
return error;
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 35c79e246fde..10fbde359649 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -37,6 +37,7 @@
#include "xfs_ioctl.h"
#include "xfs_ioctl32.h"
#include "xfs_trace.h"
+#include "xfs_sb.h"
#define _NATIVE_IOC(cmd, type) \
_IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
@@ -66,7 +67,7 @@ xfs_compat_ioc_fsgeometry_v1(
xfs_fsop_geom_t fsgeo;
int error;
- error = xfs_fs_geometry(mp, &fsgeo, 3);
+ error = xfs_fs_geometry(&mp->m_sb, &fsgeo, 3);
if (error)
return error;
/* The 32-bit variant simply has some padding at the end */
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 33eb4fb2e3fd..66e1edbfb2b2 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1006,7 +1006,7 @@ xfs_file_iomap_begin(
}
ASSERT(offset <= mp->m_super->s_maxbytes);
- if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
+ if (offset > mp->m_super->s_maxbytes - length)
length = mp->m_super->s_maxbytes - offset;
offset_fsb = XFS_B_TO_FSBT(mp, offset);
end_fsb = XFS_B_TO_FSB(mp, offset + length);
@@ -1213,7 +1213,7 @@ xfs_xattr_iomap_begin(
ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL);
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
- &nimaps, XFS_BMAPI_ENTIRE | XFS_BMAPI_ATTRFORK);
+ &nimaps, XFS_BMAPI_ATTRFORK);
out_unlock:
xfs_iunlock(ip, lockmode);
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 99562ec0de56..bee51a14a906 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -285,8 +285,22 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y)
#define XFS_IS_REALTIME_INODE(ip) \
(((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) && \
(ip)->i_mount->m_rtdev_targp)
+#define XFS_IS_REALTIME_MOUNT(mp) ((mp)->m_rtdev_targp ? 1 : 0)
#else
#define XFS_IS_REALTIME_INODE(ip) (0)
+#define XFS_IS_REALTIME_MOUNT(mp) (0)
+#endif
+
+/*
+ * Starting in Linux 4.15, the %p (raw pointer value) printk modifier
+ * prints a hashed version of the pointer to avoid leaking kernel
+ * pointers into dmesg. If we're trying to debug the kernel we want the
+ * raw values, so override this behavior as best we can.
+ */
+#ifdef DEBUG
+# define PTR_FMT "%px"
+#else
+# define PTR_FMT "%p"
#endif
#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 38d4227895ae..3e5ba1ecc080 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -781,17 +781,17 @@ xfs_log_mount_finish(
* something to an unlinked inode, the irele won't cause
* premature truncation and freeing of the inode, which results
* in log recovery failure. We have to evict the unreferenced
- * lru inodes after clearing MS_ACTIVE because we don't
+ * lru inodes after clearing SB_ACTIVE because we don't
* otherwise clean up the lru if there's a subsequent failure in
* xfs_mountfs, which leads to us leaking the inodes if nothing
* else (e.g. quotacheck) references the inodes before the
* mount failure occurs.
*/
- mp->m_super->s_flags |= MS_ACTIVE;
+ mp->m_super->s_flags |= SB_ACTIVE;
error = xlog_recover_finish(mp->m_log);
if (!error)
xfs_log_work_queue(mp);
- mp->m_super->s_flags &= ~MS_ACTIVE;
+ mp->m_super->s_flags &= ~SB_ACTIVE;
evict_inodes(mp->m_super);
/*
@@ -1047,6 +1047,7 @@ xfs_log_item_init(
INIT_LIST_HEAD(&item->li_ail);
INIT_LIST_HEAD(&item->li_cil);
+ INIT_LIST_HEAD(&item->li_bio_list);
}
/*
@@ -1242,7 +1243,7 @@ xlog_space_left(
static void
xlog_iodone(xfs_buf_t *bp)
{
- struct xlog_in_core *iclog = bp->b_fspriv;
+ struct xlog_in_core *iclog = bp->b_log_item;
struct xlog *l = iclog->ic_log;
int aborted = 0;
@@ -1773,7 +1774,7 @@ STATIC int
xlog_bdstrat(
struct xfs_buf *bp)
{
- struct xlog_in_core *iclog = bp->b_fspriv;
+ struct xlog_in_core *iclog = bp->b_log_item;
xfs_buf_lock(bp);
if (iclog->ic_state & XLOG_STATE_IOERROR) {
@@ -1919,7 +1920,7 @@ xlog_sync(
}
bp->b_io_length = BTOBB(count);
- bp->b_fspriv = iclog;
+ bp->b_log_item = iclog;
bp->b_flags &= ~XBF_FLUSH;
bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
@@ -1958,7 +1959,7 @@ xlog_sync(
XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */
xfs_buf_associate_memory(bp,
(char *)&iclog->ic_header + count, split);
- bp->b_fspriv = iclog;
+ bp->b_log_item = iclog;
bp->b_flags &= ~XBF_FLUSH;
bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
@@ -2117,7 +2118,9 @@ xlog_print_trans(
/* dump core transaction and ticket info */
xfs_warn(mp, "transaction summary:");
- xfs_warn(mp, " flags = 0x%x", tp->t_flags);
+ xfs_warn(mp, " log res = %d", tp->t_log_res);
+ xfs_warn(mp, " log count = %d", tp->t_log_count);
+ xfs_warn(mp, " flags = 0x%x", tp->t_flags);
xlog_print_tic_res(mp, tp->t_ticket);
@@ -2242,7 +2245,7 @@ xlog_write_setup_ophdr(
break;
default:
xfs_warn(log->l_mp,
- "Bad XFS transaction clientid 0x%x in ticket 0x%p",
+ "Bad XFS transaction clientid 0x%x in ticket "PTR_FMT,
ophdr->oh_clientid, ticket);
return NULL;
}
@@ -3924,7 +3927,7 @@ xlog_verify_iclog(
}
if (clientid != XFS_TRANSACTION && clientid != XFS_LOG)
xfs_warn(log->l_mp,
- "%s: invalid clientid %d op 0x%p offset 0x%lx",
+ "%s: invalid clientid %d op "PTR_FMT" offset 0x%lx",
__func__, clientid, ophead,
(unsigned long)field_offset);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 87b1c331f9eb..00240c9ee72e 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -24,6 +24,7 @@
#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
+#include "xfs_defer.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_inode.h"
@@ -399,9 +400,9 @@ xlog_recover_iodone(
* On v5 supers, a bli could be attached to update the metadata LSN.
* Clean it up.
*/
- if (bp->b_fspriv)
+ if (bp->b_log_item)
xfs_buf_item_relse(bp);
- ASSERT(bp->b_fspriv == NULL);
+ ASSERT(bp->b_log_item == NULL);
bp->b_iodone = NULL;
xfs_buf_ioend(bp);
@@ -2217,7 +2218,7 @@ xlog_recover_do_inode_buffer(
next_unlinked_offset - reg_buf_offset;
if (unlikely(*logged_nextp == 0)) {
xfs_alert(mp,
- "Bad inode buffer log record (ptr = 0x%p, bp = 0x%p). "
+ "Bad inode buffer log record (ptr = "PTR_FMT", bp = "PTR_FMT"). "
"Trying to replay bad (0) inode di_next_unlinked field.",
item, bp);
XFS_ERROR_REPORT("xlog_recover_do_inode_buf",
@@ -2629,7 +2630,7 @@ xlog_recover_validate_buf_type(
ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone);
bp->b_iodone = xlog_recover_iodone;
xfs_buf_item_init(bp, mp);
- bip = bp->b_fspriv;
+ bip = bp->b_log_item;
bip->bli_item.li_lsn = current_lsn;
}
}
@@ -2651,7 +2652,7 @@ xlog_recover_do_reg_buffer(
int i;
int bit;
int nbits;
- int error;
+ xfs_failaddr_t fa;
trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
@@ -2686,7 +2687,7 @@ xlog_recover_do_reg_buffer(
* the first dquot in the buffer should do. XXXThis is
* probably a good thing to do for other buf types also.
*/
- error = 0;
+ fa = NULL;
if (buf_f->blf_flags &
(XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
if (item->ri_buf[i].i_addr == NULL) {
@@ -2700,11 +2701,14 @@ xlog_recover_do_reg_buffer(
item->ri_buf[i].i_len, __func__);
goto next;
}
- error = xfs_dqcheck(mp, item->ri_buf[i].i_addr,
- -1, 0, XFS_QMOPT_DOWARN,
- "dquot_buf_recover");
- if (error)
+ fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr,
+ -1, 0, 0);
+ if (fa) {
+ xfs_alert(mp,
+ "dquot corrupt at %pS trying to replay into block 0x%llx",
+ fa, bp->b_bn);
goto next;
+ }
}
memcpy(xfs_buf_offset(bp,
@@ -2956,6 +2960,10 @@ xfs_recover_inode_owner_change(
if (error)
goto out_free_ip;
+ if (!xfs_inode_verify_forks(ip)) {
+ error = -EFSCORRUPTED;
+ goto out_free_ip;
+ }
if (in_f->ilf_fields & XFS_ILOG_DOWNER) {
ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT);
@@ -3041,7 +3049,7 @@ xlog_recover_inode_pass2(
*/
if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) {
xfs_alert(mp,
- "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld",
+ "%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld",
__func__, dip, bp, in_f->ilf_ino);
XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)",
XFS_ERRLEVEL_LOW, mp);
@@ -3051,7 +3059,7 @@ xlog_recover_inode_pass2(
ldip = item->ri_buf[1].i_addr;
if (unlikely(ldip->di_magic != XFS_DINODE_MAGIC)) {
xfs_alert(mp,
- "%s: Bad inode log record, rec ptr 0x%p, ino %Ld",
+ "%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld",
__func__, item, in_f->ilf_ino);
XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)",
XFS_ERRLEVEL_LOW, mp);
@@ -3109,8 +3117,8 @@ xlog_recover_inode_pass2(
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
XFS_ERRLEVEL_LOW, mp, ldip);
xfs_alert(mp,
- "%s: Bad regular inode log record, rec ptr 0x%p, "
- "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
+ "%s: Bad regular inode log record, rec ptr "PTR_FMT", "
+ "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
__func__, item, dip, bp, in_f->ilf_ino);
error = -EFSCORRUPTED;
goto out_release;
@@ -3122,8 +3130,8 @@ xlog_recover_inode_pass2(
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
XFS_ERRLEVEL_LOW, mp, ldip);
xfs_alert(mp,
- "%s: Bad dir inode log record, rec ptr 0x%p, "
- "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
+ "%s: Bad dir inode log record, rec ptr "PTR_FMT", "
+ "ino ptr = "PTR_FMT", ino bp = "PTR_FMT", ino %Ld",
__func__, item, dip, bp, in_f->ilf_ino);
error = -EFSCORRUPTED;
goto out_release;
@@ -3133,8 +3141,8 @@ xlog_recover_inode_pass2(
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
XFS_ERRLEVEL_LOW, mp, ldip);
xfs_alert(mp,
- "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
- "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld",
+ "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
+ "dino bp "PTR_FMT", ino %Ld, total extents = %d, nblocks = %Ld",
__func__, item, dip, bp, in_f->ilf_ino,
ldip->di_nextents + ldip->di_anextents,
ldip->di_nblocks);
@@ -3145,8 +3153,8 @@ xlog_recover_inode_pass2(
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
XFS_ERRLEVEL_LOW, mp, ldip);
xfs_alert(mp,
- "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
- "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__,
+ "%s: Bad inode log record, rec ptr "PTR_FMT", dino ptr "PTR_FMT", "
+ "dino bp "PTR_FMT", ino %Ld, forkoff 0x%x", __func__,
item, dip, bp, in_f->ilf_ino, ldip->di_forkoff);
error = -EFSCORRUPTED;
goto out_release;
@@ -3156,7 +3164,7 @@ xlog_recover_inode_pass2(
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
XFS_ERRLEVEL_LOW, mp, ldip);
xfs_alert(mp,
- "%s: Bad inode log record length %d, rec ptr 0x%p",
+ "%s: Bad inode log record length %d, rec ptr "PTR_FMT,
__func__, item->ri_buf[1].i_len, item);
error = -EFSCORRUPTED;
goto out_release;
@@ -3302,6 +3310,7 @@ xlog_recover_dquot_pass2(
xfs_mount_t *mp = log->l_mp;
xfs_buf_t *bp;
struct xfs_disk_dquot *ddq, *recddq;
+ xfs_failaddr_t fa;
int error;
xfs_dq_logformat_t *dq_f;
uint type;
@@ -3344,10 +3353,12 @@ xlog_recover_dquot_pass2(
*/
dq_f = item->ri_buf[0].i_addr;
ASSERT(dq_f);
- error = xfs_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
- "xlog_recover_dquot_pass2 (log copy)");
- if (error)
+ fa = xfs_dquot_verify(mp, recddq, dq_f->qlf_id, 0, 0);
+ if (fa) {
+ xfs_alert(mp, "corrupt dquot ID 0x%x in log at %pS",
+ dq_f->qlf_id, fa);
return -EIO;
+ }
ASSERT(dq_f->qlf_len == 1);
/*
@@ -4716,7 +4727,8 @@ STATIC int
xlog_recover_process_cui(
struct xfs_mount *mp,
struct xfs_ail *ailp,
- struct xfs_log_item *lip)
+ struct xfs_log_item *lip,
+ struct xfs_defer_ops *dfops)
{
struct xfs_cui_log_item *cuip;
int error;
@@ -4729,7 +4741,7 @@ xlog_recover_process_cui(
return 0;
spin_unlock(&ailp->xa_lock);
- error = xfs_cui_recover(mp, cuip);
+ error = xfs_cui_recover(mp, cuip, dfops);
spin_lock(&ailp->xa_lock);
return error;
@@ -4756,7 +4768,8 @@ STATIC int
xlog_recover_process_bui(
struct xfs_mount *mp,
struct xfs_ail *ailp,
- struct xfs_log_item *lip)
+ struct xfs_log_item *lip,
+ struct xfs_defer_ops *dfops)
{
struct xfs_bui_log_item *buip;
int error;
@@ -4769,7 +4782,7 @@ xlog_recover_process_bui(
return 0;
spin_unlock(&ailp->xa_lock);
- error = xfs_bui_recover(mp, buip);
+ error = xfs_bui_recover(mp, buip, dfops);
spin_lock(&ailp->xa_lock);
return error;
@@ -4805,6 +4818,46 @@ static inline bool xlog_item_is_intent(struct xfs_log_item *lip)
}
}
+/* Take all the collected deferred ops and finish them in order. */
+static int
+xlog_finish_defer_ops(
+ struct xfs_mount *mp,
+ struct xfs_defer_ops *dfops)
+{
+ struct xfs_trans *tp;
+ int64_t freeblks;
+ uint resblks;
+ int error;
+
+ /*
+ * We're finishing the defer_ops that accumulated as a result of
+ * recovering unfinished intent items during log recovery. We
+ * reserve an itruncate transaction because it is the largest
+ * permanent transaction type. Since we're the only user of the fs
+ * right now, take 93% (15/16) of the available free blocks. Use
+ * weird math to avoid a 64-bit division.
+ */
+ freeblks = percpu_counter_sum(&mp->m_fdblocks);
+ if (freeblks <= 0)
+ return -ENOSPC;
+ resblks = min_t(int64_t, UINT_MAX, freeblks);
+ resblks = (resblks * 15) >> 4;
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks,
+ 0, XFS_TRANS_RESERVE, &tp);
+ if (error)
+ return error;
+
+ error = xfs_defer_finish(&tp, dfops);
+ if (error)
+ goto out_cancel;
+
+ return xfs_trans_commit(tp);
+
+out_cancel:
+ xfs_trans_cancel(tp);
+ return error;
+}
+
/*
* When this is called, all of the log intent items which did not have
* corresponding log done items should be in the AIL. What we do now
@@ -4825,10 +4878,12 @@ STATIC int
xlog_recover_process_intents(
struct xlog *log)
{
- struct xfs_log_item *lip;
- int error = 0;
+ struct xfs_defer_ops dfops;
struct xfs_ail_cursor cur;
+ struct xfs_log_item *lip;
struct xfs_ail *ailp;
+ xfs_fsblock_t firstfsb;
+ int error = 0;
#if defined(DEBUG) || defined(XFS_WARN)
xfs_lsn_t last_lsn;
#endif
@@ -4839,6 +4894,7 @@ xlog_recover_process_intents(
#if defined(DEBUG) || defined(XFS_WARN)
last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
#endif
+ xfs_defer_init(&dfops, &firstfsb);
while (lip != NULL) {
/*
* We're done when we see something other than an intent.
@@ -4859,6 +4915,12 @@ xlog_recover_process_intents(
*/
ASSERT(XFS_LSN_CMP(last_lsn, lip->li_lsn) >= 0);
+ /*
+ * NOTE: If your intent processing routine can create more
+ * deferred ops, you /must/ attach them to the dfops in this
+ * routine or else those subsequent intents will get
+ * replayed in the wrong order!
+ */
switch (lip->li_type) {
case XFS_LI_EFI:
error = xlog_recover_process_efi(log->l_mp, ailp, lip);
@@ -4867,10 +4929,12 @@ xlog_recover_process_intents(
error = xlog_recover_process_rui(log->l_mp, ailp, lip);
break;
case XFS_LI_CUI:
- error = xlog_recover_process_cui(log->l_mp, ailp, lip);
+ error = xlog_recover_process_cui(log->l_mp, ailp, lip,
+ &dfops);
break;
case XFS_LI_BUI:
- error = xlog_recover_process_bui(log->l_mp, ailp, lip);
+ error = xlog_recover_process_bui(log->l_mp, ailp, lip,
+ &dfops);
break;
}
if (error)
@@ -4880,6 +4944,11 @@ xlog_recover_process_intents(
out:
xfs_trans_ail_cursor_done(&cur);
spin_unlock(&ailp->xa_lock);
+ if (error)
+ xfs_defer_cancel(&dfops);
+ else
+ error = xlog_finish_defer_ops(log->l_mp, &dfops);
+
return error;
}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index c879b517cc94..98fd41cbb9e1 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -162,6 +162,7 @@ xfs_free_perag(
ASSERT(pag);
ASSERT(atomic_read(&pag->pag_ref) == 0);
xfs_buf_hash_destroy(pag);
+ mutex_destroy(&pag->pag_ici_reclaim_lock);
call_rcu(&pag->rcu_head, __xfs_free_perag);
}
}
@@ -248,6 +249,7 @@ xfs_initialize_perag(
out_hash_destroy:
xfs_buf_hash_destroy(pag);
out_free_pag:
+ mutex_destroy(&pag->pag_ici_reclaim_lock);
kmem_free(pag);
out_unwind_new_pags:
/* unwind any prior newly initialized pags */
@@ -256,6 +258,7 @@ out_unwind_new_pags:
if (!pag)
break;
xfs_buf_hash_destroy(pag);
+ mutex_destroy(&pag->pag_ici_reclaim_lock);
kmem_free(pag);
}
return error;
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 010a13a201aa..5b848f4b637f 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -48,7 +48,7 @@
STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
-
+STATIC void xfs_qm_destroy_quotainos(xfs_quotainfo_t *qi);
STATIC void xfs_qm_dqfree_one(struct xfs_dquot *dqp);
/*
* We use the batch lookup interface to iterate over the dquots as it
@@ -162,7 +162,7 @@ xfs_qm_dqpurge(
*/
error = xfs_qm_dqflush(dqp, &bp);
if (error) {
- xfs_warn(mp, "%s: dquot %p flush failed",
+ xfs_warn(mp, "%s: dquot "PTR_FMT" flush failed",
__func__, dqp);
} else {
error = xfs_bwrite(bp);
@@ -291,8 +291,7 @@ xfs_qm_dqattach_one(
* exist on disk and we didn't ask it to allocate; ESRCH if quotas got
* turned off suddenly.
*/
- error = xfs_qm_dqget(ip->i_mount, ip, id, type,
- doalloc | XFS_QMOPT_DOWARN, &dqp);
+ error = xfs_qm_dqget(ip->i_mount, ip, id, type, doalloc, &dqp);
if (error)
return error;
@@ -481,7 +480,7 @@ xfs_qm_dquot_isolate(
error = xfs_qm_dqflush(dqp, &bp);
if (error) {
- xfs_warn(dqp->q_mount, "%s: dquot %p flush failed",
+ xfs_warn(dqp->q_mount, "%s: dquot "PTR_FMT" flush failed",
__func__, dqp);
goto out_unlock_dirty;
}
@@ -574,7 +573,7 @@ xfs_qm_set_defquota(
struct xfs_def_quota *defq;
int error;
- error = xfs_qm_dqread(mp, 0, type, XFS_QMOPT_DOWARN, &dqp);
+ error = xfs_qm_dqread(mp, 0, type, 0, &dqp);
if (!error) {
xfs_disk_dquot_t *ddqp = &dqp->q_core;
@@ -652,7 +651,7 @@ xfs_qm_init_quotainfo(
XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
(XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
XFS_DQ_PROJ),
- XFS_QMOPT_DOWARN, &dqp);
+ 0, &dqp);
if (!error) {
xfs_disk_dquot_t *ddqp = &dqp->q_core;
@@ -695,9 +694,17 @@ xfs_qm_init_quotainfo(
qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan;
qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE;
- register_shrinker(&qinf->qi_shrinker);
+
+ error = register_shrinker(&qinf->qi_shrinker);
+ if (error)
+ goto out_free_inos;
+
return 0;
+out_free_inos:
+ mutex_destroy(&qinf->qi_quotaofflock);
+ mutex_destroy(&qinf->qi_tree_lock);
+ xfs_qm_destroy_quotainos(qinf);
out_free_lru:
list_lru_destroy(&qinf->qi_lru);
out_free_qinf:
@@ -706,7 +713,6 @@ out_free_qinf:
return error;
}
-
/*
* Gets called when unmounting a filesystem or when all quotas get
* turned off.
@@ -723,19 +729,8 @@ xfs_qm_destroy_quotainfo(
unregister_shrinker(&qi->qi_shrinker);
list_lru_destroy(&qi->qi_lru);
-
- if (qi->qi_uquotaip) {
- IRELE(qi->qi_uquotaip);
- qi->qi_uquotaip = NULL; /* paranoia */
- }
- if (qi->qi_gquotaip) {
- IRELE(qi->qi_gquotaip);
- qi->qi_gquotaip = NULL;
- }
- if (qi->qi_pquotaip) {
- IRELE(qi->qi_pquotaip);
- qi->qi_pquotaip = NULL;
- }
+ xfs_qm_destroy_quotainos(qi);
+ mutex_destroy(&qi->qi_tree_lock);
mutex_destroy(&qi->qi_quotaofflock);
kmem_free(qi);
mp->m_quotainfo = NULL;
@@ -793,8 +788,8 @@ xfs_qm_qino_alloc(
return error;
if (need_alloc) {
- error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip,
- &committed);
+ error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip,
+ &committed);
if (error) {
xfs_trans_cancel(tp);
return error;
@@ -847,6 +842,7 @@ xfs_qm_reset_dqcounts(
{
struct xfs_dqblk *dqb;
int j;
+ xfs_failaddr_t fa;
trace_xfs_reset_dqcounts(bp, _RET_IP_);
@@ -868,10 +864,13 @@ xfs_qm_reset_dqcounts(
/*
* Do a sanity check, and if needed, repair the dqblk. Don't
* output any warnings because it's perfectly possible to
- * find uninitialised dquot blks. See comment in xfs_dqcheck.
+ * find uninitialised dquot blks. See comment in
+ * xfs_dquot_verify.
*/
- xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
- "xfs_quotacheck");
+ fa = xfs_dquot_verify(mp, ddq, id + j, type, 0);
+ if (fa)
+ xfs_dquot_repair(mp, ddq, id + j, type);
+
/*
* Reset type in case we are reusing group quota file for
* project quotas or vice versa
@@ -1078,8 +1077,7 @@ xfs_qm_quotacheck_dqadjust(
struct xfs_dquot *dqp;
int error;
- error = xfs_qm_dqget(mp, ip, id, type,
- XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp);
+ error = xfs_qm_dqget(mp, ip, id, type, XFS_QMOPT_DQALLOC, &dqp);
if (error) {
/*
* Shouldn't be able to turn off quotas here.
@@ -1600,6 +1598,24 @@ error_rele:
}
STATIC void
+xfs_qm_destroy_quotainos(
+ xfs_quotainfo_t *qi)
+{
+ if (qi->qi_uquotaip) {
+ IRELE(qi->qi_uquotaip);
+ qi->qi_uquotaip = NULL; /* paranoia */
+ }
+ if (qi->qi_gquotaip) {
+ IRELE(qi->qi_gquotaip);
+ qi->qi_gquotaip = NULL;
+ }
+ if (qi->qi_pquotaip) {
+ IRELE(qi->qi_pquotaip);
+ qi->qi_pquotaip = NULL;
+ }
+}
+
+STATIC void
xfs_qm_dqfree_one(
struct xfs_dquot *dqp)
{
@@ -1682,8 +1698,7 @@ xfs_qm_vop_dqalloc(
xfs_iunlock(ip, lockflags);
error = xfs_qm_dqget(mp, NULL, uid,
XFS_DQ_USER,
- XFS_QMOPT_DQALLOC |
- XFS_QMOPT_DOWARN,
+ XFS_QMOPT_DQALLOC,
&uq);
if (error) {
ASSERT(error != -ENOENT);
@@ -1709,8 +1724,7 @@ xfs_qm_vop_dqalloc(
xfs_iunlock(ip, lockflags);
error = xfs_qm_dqget(mp, NULL, gid,
XFS_DQ_GROUP,
- XFS_QMOPT_DQALLOC |
- XFS_QMOPT_DOWARN,
+ XFS_QMOPT_DQALLOC,
&gq);
if (error) {
ASSERT(error != -ENOENT);
@@ -1729,8 +1743,7 @@ xfs_qm_vop_dqalloc(
xfs_iunlock(ip, lockflags);
error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
XFS_DQ_PROJ,
- XFS_QMOPT_DQALLOC |
- XFS_QMOPT_DOWARN,
+ XFS_QMOPT_DQALLOC,
&pq);
if (error) {
ASSERT(error != -ENOENT);
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 8f2e2fac4255..3a55d6fc271b 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -393,7 +393,8 @@ xfs_cud_init(
int
xfs_cui_recover(
struct xfs_mount *mp,
- struct xfs_cui_log_item *cuip)
+ struct xfs_cui_log_item *cuip,
+ struct xfs_defer_ops *dfops)
{
int i;
int error = 0;
@@ -405,11 +406,9 @@ xfs_cui_recover(
struct xfs_trans *tp;
struct xfs_btree_cur *rcur = NULL;
enum xfs_refcount_intent_type type;
- xfs_fsblock_t firstfsb;
xfs_fsblock_t new_fsb;
xfs_extlen_t new_len;
struct xfs_bmbt_irec irec;
- struct xfs_defer_ops dfops;
bool requeue_only = false;
ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags));
@@ -465,7 +464,6 @@ xfs_cui_recover(
return error;
cudp = xfs_trans_get_cud(tp, cuip);
- xfs_defer_init(&dfops, &firstfsb);
for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
refc = &cuip->cui_format.cui_extents[i];
refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK;
@@ -485,7 +483,7 @@ xfs_cui_recover(
new_len = refc->pe_len;
} else
error = xfs_trans_log_finish_refcount_update(tp, cudp,
- &dfops, type, refc->pe_startblock, refc->pe_len,
+ dfops, type, refc->pe_startblock, refc->pe_len,
&new_fsb, &new_len, &rcur);
if (error)
goto abort_error;
@@ -497,21 +495,21 @@ xfs_cui_recover(
switch (type) {
case XFS_REFCOUNT_INCREASE:
error = xfs_refcount_increase_extent(
- tp->t_mountp, &dfops, &irec);
+ tp->t_mountp, dfops, &irec);
break;
case XFS_REFCOUNT_DECREASE:
error = xfs_refcount_decrease_extent(
- tp->t_mountp, &dfops, &irec);
+ tp->t_mountp, dfops, &irec);
break;
case XFS_REFCOUNT_ALLOC_COW:
error = xfs_refcount_alloc_cow_extent(
- tp->t_mountp, &dfops,
+ tp->t_mountp, dfops,
irec.br_startblock,
irec.br_blockcount);
break;
case XFS_REFCOUNT_FREE_COW:
error = xfs_refcount_free_cow_extent(
- tp->t_mountp, &dfops,
+ tp->t_mountp, dfops,
irec.br_startblock,
irec.br_blockcount);
break;
@@ -525,17 +523,12 @@ xfs_cui_recover(
}
xfs_refcount_finish_one_cleanup(tp, rcur, error);
- error = xfs_defer_finish(&tp, &dfops);
- if (error)
- goto abort_defer;
set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
error = xfs_trans_commit(tp);
return error;
abort_error:
xfs_refcount_finish_one_cleanup(tp, rcur, error);
-abort_defer:
- xfs_defer_cancel(&dfops);
xfs_trans_cancel(tp);
return error;
}
diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h
index 5b74dddfa64b..0e5327349a13 100644
--- a/fs/xfs/xfs_refcount_item.h
+++ b/fs/xfs/xfs_refcount_item.h
@@ -96,6 +96,7 @@ struct xfs_cud_log_item *xfs_cud_init(struct xfs_mount *,
struct xfs_cui_log_item *);
void xfs_cui_item_free(struct xfs_cui_log_item *);
void xfs_cui_release(struct xfs_cui_log_item *);
-int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip);
+int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip,
+ struct xfs_defer_ops *dfops);
#endif /* __XFS_REFCOUNT_ITEM_H__ */
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index cc041a29eb70..270246943a06 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -49,8 +49,6 @@
#include "xfs_alloc.h"
#include "xfs_quota_defs.h"
#include "xfs_quota.h"
-#include "xfs_btree.h"
-#include "xfs_bmap_btree.h"
#include "xfs_reflink.h"
#include "xfs_iomap.h"
#include "xfs_rmap_btree.h"
@@ -456,6 +454,8 @@ retry:
if (error)
goto out_bmap_cancel;
+ xfs_inode_set_cowblocks_tag(ip);
+
/* Finish up. */
error = xfs_defer_finish(&tp, &dfops);
if (error)
@@ -464,6 +464,13 @@ retry:
error = xfs_trans_commit(tp);
if (error)
return error;
+
+ /*
+ * Allocation succeeded but the requested range was not even partially
+ * satisfied? Bail out!
+ */
+ if (nimaps == 0)
+ return -ENOSPC;
convert:
return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb,
&dfops);
@@ -492,8 +499,9 @@ xfs_reflink_find_cow_mapping(
struct xfs_iext_cursor icur;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
- ASSERT(xfs_is_reflink_inode(ip));
+ if (!xfs_is_reflink_inode(ip))
+ return false;
offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got))
return false;
@@ -598,10 +606,6 @@ xfs_reflink_cancel_cow_blocks(
del.br_startblock, del.br_blockcount,
NULL);
- /* Update quota accounting */
- xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT,
- -(long)del.br_blockcount);
-
/* Roll the transaction */
xfs_defer_ijoin(&dfops, ip);
error = xfs_defer_finish(tpp, &dfops);
@@ -612,6 +616,16 @@ xfs_reflink_cancel_cow_blocks(
/* Remove the mapping from the CoW fork. */
xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
+
+ /* Remove the quota reservation */
+ error = xfs_trans_reserve_quota_nblks(NULL, ip,
+ -(long)del.br_blockcount, 0,
+ XFS_QMOPT_RES_REGBLKS);
+ if (error)
+ break;
+ } else {
+ /* Didn't do anything, push cursor back. */
+ xfs_iext_prev(ifp, &icur);
}
next_extent:
if (!xfs_iext_get_extent(ifp, &icur, &got))
@@ -727,7 +741,7 @@ xfs_reflink_end_cow(
(unsigned int)(end_fsb - offset_fsb),
XFS_DATA_FORK);
error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
- resblks, 0, 0, &tp);
+ resblks, 0, XFS_TRANS_RESERVE, &tp);
if (error)
goto out;
@@ -791,6 +805,10 @@ xfs_reflink_end_cow(
if (error)
goto out_defer;
+ /* Charge this new data fork mapping to the on-disk quota. */
+ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_DELBCOUNT,
+ (long)del.br_blockcount);
+
/* Remove the mapping from the CoW fork. */
xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
@@ -940,7 +958,7 @@ xfs_reflink_set_inode_flag(
if (src->i_ino == dest->i_ino)
xfs_ilock(src, XFS_ILOCK_EXCL);
else
- xfs_lock_two_inodes(src, dest, XFS_ILOCK_EXCL);
+ xfs_lock_two_inodes(src, XFS_ILOCK_EXCL, dest, XFS_ILOCK_EXCL);
if (!xfs_is_reflink_inode(src)) {
trace_xfs_reflink_set_inode_flag(src);
@@ -1198,13 +1216,16 @@ xfs_reflink_remap_blocks(
/* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */
while (len) {
+ uint lock_mode;
+
trace_xfs_reflink_remap_blocks_loop(src, srcoff, len,
dest, destoff);
+
/* Read extent from the source file */
nimaps = 1;
- xfs_ilock(src, XFS_ILOCK_EXCL);
+ lock_mode = xfs_ilock_data_map_shared(src);
error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0);
- xfs_iunlock(src, XFS_ILOCK_EXCL);
+ xfs_iunlock(src, lock_mode);
if (error)
goto err;
ASSERT(nimaps == 1);
@@ -1241,6 +1262,50 @@ err:
}
/*
+ * Grab the exclusive iolock for a data copy from src to dest, making
+ * sure to abide vfs locking order (lowest pointer value goes first) and
+ * breaking the pnfs layout leases on dest before proceeding. The loop
+ * is needed because we cannot call the blocking break_layout() with the
+ * src iolock held, and therefore have to back out both locks.
+ */
+static int
+xfs_iolock_two_inodes_and_break_layout(
+ struct inode *src,
+ struct inode *dest)
+{
+ int error;
+
+retry:
+ if (src < dest) {
+ inode_lock_shared(src);
+ inode_lock_nested(dest, I_MUTEX_NONDIR2);
+ } else {
+ /* src >= dest */
+ inode_lock(dest);
+ }
+
+ error = break_layout(dest, false);
+ if (error == -EWOULDBLOCK) {
+ inode_unlock(dest);
+ if (src < dest)
+ inode_unlock_shared(src);
+ error = break_layout(dest, true);
+ if (error)
+ return error;
+ goto retry;
+ }
+ if (error) {
+ inode_unlock(dest);
+ if (src < dest)
+ inode_unlock_shared(src);
+ return error;
+ }
+ if (src > dest)
+ inode_lock_shared_nested(src, I_MUTEX_NONDIR2);
+ return 0;
+}
+
+/*
* Link a range of blocks from one file to another.
*/
int
@@ -1270,11 +1335,14 @@ xfs_reflink_remap_range(
return -EIO;
/* Lock both files against IO */
- lock_two_nondirectories(inode_in, inode_out);
+ ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out);
+ if (ret)
+ return ret;
if (same_inode)
xfs_ilock(src, XFS_MMAPLOCK_EXCL);
else
- xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
+ xfs_lock_two_inodes(src, XFS_MMAPLOCK_SHARED, dest,
+ XFS_MMAPLOCK_EXCL);
/* Check file eligibility and prepare for block sharing. */
ret = -EINVAL;
@@ -1291,8 +1359,24 @@ xfs_reflink_remap_range(
if (ret <= 0)
goto out_unlock;
+ /* Attach dquots to dest inode before changing block map */
+ ret = xfs_qm_dqattach(dest, 0);
+ if (ret)
+ goto out_unlock;
+
trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
+ /*
+ * Clear out post-eof preallocations because we don't have page cache
+ * backing the delayed allocations and they'll never get freed on
+ * their own.
+ */
+ if (xfs_can_free_eofblocks(dest, true)) {
+ ret = xfs_free_eofblocks(dest);
+ if (ret)
+ goto out_unlock;
+ }
+
/* Set flags and remap blocks. */
ret = xfs_reflink_set_inode_flag(src, dest);
if (ret)
@@ -1326,10 +1410,12 @@ xfs_reflink_remap_range(
is_dedupe);
out_unlock:
- xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
+ xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
+ if (!same_inode)
+ xfs_iunlock(src, XFS_MMAPLOCK_SHARED);
+ inode_unlock(inode_out);
if (!same_inode)
- xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
- unlock_two_nondirectories(inode_in, inode_out);
+ inode_unlock_shared(inode_in);
if (ret)
trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
return ret;
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index 3f30f846d7f2..dfee3c991155 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -139,6 +139,9 @@ int xfs_rtalloc_query_all(struct xfs_trans *tp,
xfs_rtalloc_query_range_fn fn,
void *priv);
bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno);
+int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_rtblock_t start, xfs_extlen_t len,
+ bool *is_free);
#else
# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS)
# define xfs_rtfree_extent(t,b,l) (ENOSYS)
@@ -148,6 +151,7 @@ bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno);
# define xfs_rtalloc_query_all(t,f,p) (ENOSYS)
# define xfs_rtbuf_get(m,t,b,i,p) (ENOSYS)
# define xfs_verify_rtbno(m, r) (false)
+# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (ENOSYS)
static inline int /* error */
xfs_rtmount_init(
xfs_mount_t *mp) /* file system mount structure */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index f663022353c0..f3e0001f9992 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -212,9 +212,9 @@ xfs_parseargs(
*/
if (sb_rdonly(sb))
mp->m_flags |= XFS_MOUNT_RDONLY;
- if (sb->s_flags & MS_DIRSYNC)
+ if (sb->s_flags & SB_DIRSYNC)
mp->m_flags |= XFS_MOUNT_DIRSYNC;
- if (sb->s_flags & MS_SYNCHRONOUS)
+ if (sb->s_flags & SB_SYNCHRONOUS)
mp->m_flags |= XFS_MOUNT_WSYNC;
/*
@@ -1153,6 +1153,14 @@ xfs_fs_statfs(
((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
(XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
xfs_qm_statvfs(ip, statp);
+
+ if (XFS_IS_REALTIME_MOUNT(mp) &&
+ (ip->i_d.di_flags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) {
+ statp->f_blocks = sbp->sb_rblocks;
+ statp->f_bavail = statp->f_bfree =
+ sbp->sb_frextents * sbp->sb_rextsize;
+ }
+
return 0;
}
@@ -1312,7 +1320,7 @@ xfs_fs_remount(
}
/* ro -> rw */
- if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
+ if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & SB_RDONLY)) {
if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
xfs_warn(mp,
"ro->rw transition prohibited on norecovery mount");
@@ -1360,6 +1368,7 @@ xfs_fs_remount(
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
return error;
}
+ xfs_queue_cowblocks(mp);
/* Create the per-AG metadata reservation pool .*/
error = xfs_fs_reserve_ag_blocks(mp);
@@ -1368,7 +1377,15 @@ xfs_fs_remount(
}
/* rw -> ro */
- if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
+ if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) {
+ /* Get rid of any leftover CoW reservations... */
+ cancel_delayed_work_sync(&mp->m_cowblocks_work);
+ error = xfs_icache_free_cowblocks(mp, NULL);
+ if (error) {
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ return error;
+ }
+
/* Free the per-AG metadata reservation pool. */
error = xfs_fs_unreserve_ag_blocks(mp);
if (error) {
@@ -1651,7 +1668,7 @@ xfs_fs_fill_super(
}
if (xfs_sb_version_hasreflink(&mp->m_sb))
xfs_alert(mp,
- "DAX and reflink have not been tested together!");
+ "DAX and reflink cannot be used together!");
}
if (mp->m_flags & XFS_MOUNT_DISCARD) {
@@ -1675,10 +1692,6 @@ xfs_fs_fill_super(
"EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk!");
}
- if (xfs_sb_version_hasreflink(&mp->m_sb))
- xfs_alert(mp,
- "EXPERIMENTAL reflink feature enabled. Use at your own risk!");
-
error = xfs_mountfs(mp);
if (error)
goto out_filestream_unmount;
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index 5f2f32408011..fcc5dfc70aa0 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -30,7 +30,7 @@ extern void xfs_qm_exit(void);
#ifdef CONFIG_XFS_POSIX_ACL
# define XFS_ACL_STRING "ACLs, "
-# define set_posix_acl_flag(sb) ((sb)->s_flags |= MS_POSIXACL)
+# define set_posix_acl_flag(sb) ((sb)->s_flags |= SB_POSIXACL)
#else
# define XFS_ACL_STRING
# define set_posix_acl_flag(sb) do { } while (0)
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 68d3ca2c4968..2e9e793a8f9d 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -232,11 +232,6 @@ xfs_symlink(
resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, resblks, 0, 0, &tp);
- if (error == -ENOSPC && fs_blocks == 0) {
- resblks = 0;
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, 0, 0, 0,
- &tp);
- }
if (error)
goto out_release_inode;
@@ -260,14 +255,6 @@ xfs_symlink(
goto out_trans_cancel;
/*
- * Check for ability to enter directory entry, if no space reserved.
- */
- if (!resblks) {
- error = xfs_dir_canenter(tp, dp, link_name);
- if (error)
- goto out_trans_cancel;
- }
- /*
* Initialize the bmap freelist prior to calling either
* bmapi or the directory create code.
*/
@@ -277,7 +264,7 @@ xfs_symlink(
* Allocate an inode for the symlink.
*/
error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
- prid, resblks > 0, &ip, NULL);
+ prid, &ip, NULL);
if (error)
goto out_trans_cancel;
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index 5d95fe348294..35f3546b6af5 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -24,7 +24,6 @@
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_da_format.h"
-#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_da_btree.h"
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index d718a10c2271..945de08af7ba 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -72,7 +72,7 @@ DECLARE_EVENT_CLASS(xfs_attr_list_class,
__entry->flags = ctx->flags;
),
TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
- "alist 0x%p size %u count %u firstu %u flags %d %s",
+ "alist %p size %u count %u firstu %u flags %d %s",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->hashval,
@@ -119,7 +119,7 @@ DECLARE_EVENT_CLASS(xfs_perag_class,
__entry->refcount = refcount;
__entry->caller_ip = caller_ip;
),
- TP_printk("dev %d:%d agno %u refcount %d caller %ps",
+ TP_printk("dev %d:%d agno %u refcount %d caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->refcount,
@@ -200,7 +200,7 @@ TRACE_EVENT(xfs_attr_list_node_descend,
__entry->bt_before = be32_to_cpu(btree->before);
),
TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
- "alist 0x%p size %u count %u firstu %u flags %d %s "
+ "alist %p size %u count %u firstu %u flags %d %s "
"node hashval %u, node before %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
@@ -251,8 +251,8 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
__entry->bmap_state = state;
__entry->caller_ip = caller_ip;
),
- TP_printk("dev %d:%d ino 0x%llx state %s cur 0x%p/%d "
- "offset %lld block %lld count %lld flag %d caller %ps",
+ TP_printk("dev %d:%d ino 0x%llx state %s cur %p/%d "
+ "offset %lld block %lld count %lld flag %d caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
@@ -301,7 +301,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
__entry->caller_ip = caller_ip;
),
TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d "
- "lock %d flags %s caller %ps",
+ "lock %d flags %s caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->bno,
__entry->nblks,
@@ -370,7 +370,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
__entry->caller_ip = caller_ip;
),
TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
- "lock %d flags %s caller %ps",
+ "lock %d flags %s caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->bno,
__entry->buffer_length,
@@ -390,7 +390,7 @@ DEFINE_BUF_FLAGS_EVENT(xfs_buf_get);
DEFINE_BUF_FLAGS_EVENT(xfs_buf_read);
TRACE_EVENT(xfs_buf_ioerror,
- TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip),
+ TP_PROTO(struct xfs_buf *bp, int error, xfs_failaddr_t caller_ip),
TP_ARGS(bp, error, caller_ip),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -401,7 +401,7 @@ TRACE_EVENT(xfs_buf_ioerror,
__field(int, pincount)
__field(unsigned, lockval)
__field(int, error)
- __field(unsigned long, caller_ip)
+ __field(xfs_failaddr_t, caller_ip)
),
TP_fast_assign(
__entry->dev = bp->b_target->bt_dev;
@@ -415,7 +415,7 @@ TRACE_EVENT(xfs_buf_ioerror,
__entry->caller_ip = caller_ip;
),
TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
- "lock %d error %d flags %s caller %ps",
+ "lock %d error %d flags %s caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->bno,
__entry->buffer_length,
@@ -460,7 +460,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
),
TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
"lock %d flags %s recur %d refcount %d bliflags %s "
- "lidesc 0x%p liflags %s",
+ "lidesc %p liflags %s",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->buf_bno,
__entry->buf_len,
@@ -579,7 +579,7 @@ DECLARE_EVENT_CLASS(xfs_lock_class,
__entry->lock_flags = lock_flags;
__entry->caller_ip = caller_ip;
),
- TP_printk("dev %d:%d ino 0x%llx flags %s caller %ps",
+ TP_printk("dev %d:%d ino 0x%llx flags %s caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
@@ -697,7 +697,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
__entry->pincount = atomic_read(&ip->i_pincount);
__entry->caller_ip = caller_ip;
),
- TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %ps",
+ TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->count,
@@ -1028,7 +1028,7 @@ DECLARE_EVENT_CLASS(xfs_log_item_class,
__entry->flags = lip->li_flags;
__entry->lsn = lip->li_lsn;
),
- TP_printk("dev %d:%d lip 0x%p lsn %d/%d type %s flags %s",
+ TP_printk("dev %d:%d lip %p lsn %d/%d type %s flags %s",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->lip,
CYCLE_LSN(__entry->lsn), BLOCK_LSN(__entry->lsn),
@@ -1049,7 +1049,7 @@ TRACE_EVENT(xfs_log_force,
__entry->lsn = lsn;
__entry->caller_ip = caller_ip;
),
- TP_printk("dev %d:%d lsn 0x%llx caller %ps",
+ TP_printk("dev %d:%d lsn 0x%llx caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->lsn, (void *)__entry->caller_ip)
)
@@ -1082,7 +1082,7 @@ DECLARE_EVENT_CLASS(xfs_ail_class,
__entry->old_lsn = old_lsn;
__entry->new_lsn = new_lsn;
),
- TP_printk("dev %d:%d lip 0x%p old lsn %d/%d new lsn %d/%d type %s flags %s",
+ TP_printk("dev %d:%d lip %p old lsn %d/%d new lsn %d/%d type %s flags %s",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->lip,
CYCLE_LSN(__entry->old_lsn), BLOCK_LSN(__entry->old_lsn),
@@ -1403,7 +1403,7 @@ TRACE_EVENT(xfs_bunmap,
__entry->flags = flags;
),
TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
- "flags %s caller %ps",
+ "flags %s caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->size,
@@ -1517,7 +1517,7 @@ TRACE_EVENT(xfs_agf,
),
TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
"levels b %u c %u flfirst %u fllast %u flcount %u "
- "freeblks %u longest %u caller %ps",
+ "freeblks %u longest %u caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__print_flags(__entry->flags, "|", XFS_AGF_FLAGS),
@@ -2014,7 +2014,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
__entry->count = item->ri_cnt;
__entry->total = item->ri_total;
),
- TP_printk("dev %d:%d tid 0x%x lsn 0x%llx, pass %d, item 0x%p, "
+ TP_printk("dev %d:%d tid 0x%x lsn 0x%llx, pass %d, item %p, "
"item type %s item region count/total %d/%d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->tid,
@@ -2486,7 +2486,7 @@ DECLARE_EVENT_CLASS(xfs_ag_error_class,
__entry->error = error;
__entry->caller_ip = caller_ip;
),
- TP_printk("dev %d:%d agno %u error %d caller %ps",
+ TP_printk("dev %d:%d agno %u error %d caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->error,
@@ -2977,7 +2977,7 @@ DECLARE_EVENT_CLASS(xfs_inode_error_class,
__entry->error = error;
__entry->caller_ip = caller_ip;
),
- TP_printk("dev %d:%d ino %llx error %d caller %ps",
+ TP_printk("dev %d:%d ino %llx error %d caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->error,
@@ -3313,6 +3313,32 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_low_key);
DEFINE_GETFSMAP_EVENT(xfs_getfsmap_high_key);
DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
+TRACE_EVENT(xfs_trans_resv_calc,
+ TP_PROTO(struct xfs_mount *mp, unsigned int type,
+ struct xfs_trans_res *res),
+ TP_ARGS(mp, type, res),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, type)
+ __field(uint, logres)
+ __field(int, logcount)
+ __field(int, logflags)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->type = type;
+ __entry->logres = res->tr_logres;
+ __entry->logcount = res->tr_logcount;
+ __entry->logflags = res->tr_logflags;
+ ),
+ TP_printk("dev %d:%d type %d logres %u logcount %d flags 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->type,
+ __entry->logres,
+ __entry->logcount,
+ __entry->logflags)
+);
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index a87f657f59c9..86f92df32c42 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -35,6 +35,27 @@
kmem_zone_t *xfs_trans_zone;
kmem_zone_t *xfs_log_item_desc_zone;
+#if defined(CONFIG_TRACEPOINTS)
+static void
+xfs_trans_trace_reservations(
+ struct xfs_mount *mp)
+{
+ struct xfs_trans_res resv;
+ struct xfs_trans_res *res;
+ struct xfs_trans_res *end_res;
+ int i;
+
+ res = (struct xfs_trans_res *)M_RES(mp);
+ end_res = (struct xfs_trans_res *)(M_RES(mp) + 1);
+ for (i = 0; res < end_res; i++, res++)
+ trace_xfs_trans_resv_calc(mp, i, res);
+ xfs_log_get_max_trans_res(mp, &resv);
+ trace_xfs_trans_resv_calc(mp, -1, &resv);
+}
+#else
+# define xfs_trans_trace_reservations(mp)
+#endif
+
/*
* Initialize the precomputed transaction reservation values
* in the mount structure.
@@ -44,6 +65,7 @@ xfs_trans_init(
struct xfs_mount *mp)
{
xfs_trans_resv_calc(mp, M_RES(mp));
+ xfs_trans_trace_reservations(mp);
}
/*
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 815b53d20e26..9d542dfe0052 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -50,7 +50,7 @@ typedef struct xfs_log_item {
uint li_type; /* item type */
uint li_flags; /* misc flags */
struct xfs_buf *li_buf; /* real buffer pointer */
- struct xfs_log_item *li_bio_list; /* buffer item list */
+ struct list_head li_bio_list; /* buffer item list */
void (*li_cb)(struct xfs_buf *,
struct xfs_log_item *);
/* buffer item iodone */
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 3ba7a96a8abd..653ce379d36b 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -82,12 +82,12 @@ _xfs_trans_bjoin(
ASSERT(bp->b_transp == NULL);
/*
- * The xfs_buf_log_item pointer is stored in b_fsprivate. If
+ * The xfs_buf_log_item pointer is stored in b_log_item. If
* it doesn't have one yet, then allocate one and initialize it.
* The checks to see if one is there are in xfs_buf_item_init().
*/
xfs_buf_item_init(bp, tp->t_mountp);
- bip = bp->b_fspriv;
+ bip = bp->b_log_item;
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
@@ -118,7 +118,7 @@ xfs_trans_bjoin(
struct xfs_buf *bp)
{
_xfs_trans_bjoin(tp, bp, 0);
- trace_xfs_trans_bjoin(bp->b_fspriv);
+ trace_xfs_trans_bjoin(bp->b_log_item);
}
/*
@@ -139,7 +139,7 @@ xfs_trans_get_buf_map(
xfs_buf_flags_t flags)
{
xfs_buf_t *bp;
- xfs_buf_log_item_t *bip;
+ struct xfs_buf_log_item *bip;
if (!tp)
return xfs_buf_get_map(target, map, nmaps, flags);
@@ -159,7 +159,7 @@ xfs_trans_get_buf_map(
}
ASSERT(bp->b_transp == tp);
- bip = bp->b_fspriv;
+ bip = bp->b_log_item;
ASSERT(bip != NULL);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_recur++;
@@ -175,7 +175,7 @@ xfs_trans_get_buf_map(
ASSERT(!bp->b_error);
_xfs_trans_bjoin(tp, bp, 1);
- trace_xfs_trans_get_buf(bp->b_fspriv);
+ trace_xfs_trans_get_buf(bp->b_log_item);
return bp;
}
@@ -188,12 +188,13 @@ xfs_trans_get_buf_map(
* mount structure.
*/
xfs_buf_t *
-xfs_trans_getsb(xfs_trans_t *tp,
- struct xfs_mount *mp,
- int flags)
+xfs_trans_getsb(
+ xfs_trans_t *tp,
+ struct xfs_mount *mp,
+ int flags)
{
xfs_buf_t *bp;
- xfs_buf_log_item_t *bip;
+ struct xfs_buf_log_item *bip;
/*
* Default to just trying to lock the superblock buffer
@@ -210,7 +211,7 @@ xfs_trans_getsb(xfs_trans_t *tp,
*/
bp = mp->m_sb_bp;
if (bp->b_transp == tp) {
- bip = bp->b_fspriv;
+ bip = bp->b_log_item;
ASSERT(bip != NULL);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_recur++;
@@ -223,7 +224,7 @@ xfs_trans_getsb(xfs_trans_t *tp,
return NULL;
_xfs_trans_bjoin(tp, bp, 1);
- trace_xfs_trans_getsb(bp->b_fspriv);
+ trace_xfs_trans_getsb(bp->b_log_item);
return bp;
}
@@ -266,7 +267,7 @@ xfs_trans_read_buf_map(
if (bp) {
ASSERT(xfs_buf_islocked(bp));
ASSERT(bp->b_transp == tp);
- ASSERT(bp->b_fspriv != NULL);
+ ASSERT(bp->b_log_item != NULL);
ASSERT(!bp->b_error);
ASSERT(bp->b_flags & XBF_DONE);
@@ -279,7 +280,7 @@ xfs_trans_read_buf_map(
return -EIO;
}
- bip = bp->b_fspriv;
+ bip = bp->b_log_item;
bip->bli_recur++;
ASSERT(atomic_read(&bip->bli_refcount) > 0);
@@ -329,7 +330,7 @@ xfs_trans_read_buf_map(
if (tp) {
_xfs_trans_bjoin(tp, bp, 1);
- trace_xfs_trans_read_buf(bp->b_fspriv);
+ trace_xfs_trans_read_buf(bp->b_log_item);
}
*bpp = bp;
return 0;
@@ -352,10 +353,11 @@ xfs_trans_read_buf_map(
* brelse() call.
*/
void
-xfs_trans_brelse(xfs_trans_t *tp,
- xfs_buf_t *bp)
+xfs_trans_brelse(
+ xfs_trans_t *tp,
+ xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip;
+ struct xfs_buf_log_item *bip;
int freed;
/*
@@ -368,7 +370,7 @@ xfs_trans_brelse(xfs_trans_t *tp,
}
ASSERT(bp->b_transp == tp);
- bip = bp->b_fspriv;
+ bip = bp->b_log_item;
ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
@@ -456,10 +458,11 @@ xfs_trans_brelse(xfs_trans_t *tp,
*/
/* ARGSUSED */
void
-xfs_trans_bhold(xfs_trans_t *tp,
- xfs_buf_t *bp)
+xfs_trans_bhold(
+ xfs_trans_t *tp,
+ xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip = bp->b_fspriv;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
ASSERT(bp->b_transp == tp);
ASSERT(bip != NULL);
@@ -476,10 +479,11 @@ xfs_trans_bhold(xfs_trans_t *tp,
* for this transaction.
*/
void
-xfs_trans_bhold_release(xfs_trans_t *tp,
- xfs_buf_t *bp)
+xfs_trans_bhold_release(
+ xfs_trans_t *tp,
+ xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip = bp->b_fspriv;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
ASSERT(bp->b_transp == tp);
ASSERT(bip != NULL);
@@ -500,7 +504,7 @@ xfs_trans_dirty_buf(
struct xfs_trans *tp,
struct xfs_buf *bp)
{
- struct xfs_buf_log_item *bip = bp->b_fspriv;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
ASSERT(bp->b_transp == tp);
ASSERT(bip != NULL);
@@ -557,7 +561,7 @@ xfs_trans_log_buf(
uint first,
uint last)
{
- struct xfs_buf_log_item *bip = bp->b_fspriv;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
ASSERT(first <= last && last < BBTOB(bp->b_length));
ASSERT(!(bip->bli_flags & XFS_BLI_ORDERED));
@@ -600,10 +604,10 @@ xfs_trans_log_buf(
*/
void
xfs_trans_binval(
- xfs_trans_t *tp,
- xfs_buf_t *bp)
+ xfs_trans_t *tp,
+ xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip = bp->b_fspriv;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
int i;
ASSERT(bp->b_transp == tp);
@@ -655,10 +659,10 @@ xfs_trans_binval(
*/
void
xfs_trans_inode_buf(
- xfs_trans_t *tp,
- xfs_buf_t *bp)
+ xfs_trans_t *tp,
+ xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip = bp->b_fspriv;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
ASSERT(bp->b_transp == tp);
ASSERT(bip != NULL);
@@ -679,10 +683,10 @@ xfs_trans_inode_buf(
*/
void
xfs_trans_stale_inode_buf(
- xfs_trans_t *tp,
- xfs_buf_t *bp)
+ xfs_trans_t *tp,
+ xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip = bp->b_fspriv;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
ASSERT(bp->b_transp == tp);
ASSERT(bip != NULL);
@@ -704,10 +708,10 @@ xfs_trans_stale_inode_buf(
/* ARGSUSED */
void
xfs_trans_inode_alloc_buf(
- xfs_trans_t *tp,
- xfs_buf_t *bp)
+ xfs_trans_t *tp,
+ xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip = bp->b_fspriv;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
ASSERT(bp->b_transp == tp);
ASSERT(bip != NULL);
@@ -729,7 +733,7 @@ xfs_trans_ordered_buf(
struct xfs_trans *tp,
struct xfs_buf *bp)
{
- struct xfs_buf_log_item *bip = bp->b_fspriv;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
ASSERT(bp->b_transp == tp);
ASSERT(bip != NULL);
@@ -759,7 +763,7 @@ xfs_trans_buf_set_type(
struct xfs_buf *bp,
enum xfs_blft type)
{
- struct xfs_buf_log_item *bip = bp->b_fspriv;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
if (!tp)
return;
@@ -776,8 +780,8 @@ xfs_trans_buf_copy_type(
struct xfs_buf *dst_bp,
struct xfs_buf *src_bp)
{
- struct xfs_buf_log_item *sbip = src_bp->b_fspriv;
- struct xfs_buf_log_item *dbip = dst_bp->b_fspriv;
+ struct xfs_buf_log_item *sbip = src_bp->b_log_item;
+ struct xfs_buf_log_item *dbip = dst_bp->b_log_item;
enum xfs_blft type;
type = xfs_blft_from_flags(&sbip->__bli_format);
@@ -797,11 +801,11 @@ xfs_trans_buf_copy_type(
/* ARGSUSED */
void
xfs_trans_dquot_buf(
- xfs_trans_t *tp,
- xfs_buf_t *bp,
- uint type)
+ xfs_trans_t *tp,
+ xfs_buf_t *bp,
+ uint type)
{
- struct xfs_buf_log_item *bip = bp->b_fspriv;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
ASSERT(type == XFS_BLF_UDQUOT_BUF ||
type == XFS_BLF_PDQUOT_BUF ||
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index daa7615497f9..4a89da4b6fe7 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -28,6 +28,8 @@
#include "xfs_inode_item.h"
#include "xfs_trace.h"
+#include <linux/iversion.h>
+
/*
* Add a locked inode to the transaction.
*
@@ -110,15 +112,17 @@ xfs_trans_log_inode(
/*
* First time we log the inode in a transaction, bump the inode change
- * counter if it is configured for this to occur. We don't use
- * inode_inc_version() because there is no need for extra locking around
- * i_version as we already hold the inode locked exclusively for
- * metadata modification.
+ * counter if it is configured for this to occur. While we have the
+ * inode locked exclusively for metadata modification, we can usually
+ * avoid setting XFS_ILOG_CORE if no one has queried the value since
+ * the last time it was incremented. If we have XFS_ILOG_CORE already
+ * set however, then go ahead and bump the i_version counter
+ * unconditionally.
*/
if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) &&
IS_I_VERSION(VFS_I(ip))) {
- VFS_I(ip)->i_version++;
- flags |= XFS_ILOG_CORE;
+ if (inode_maybe_inc_iversion(VFS_I(ip), flags & XFS_ILOG_CORE))
+ flags |= XFS_ILOG_CORE;
}
tp->t_flags |= XFS_TRANS_DIRTY;