summaryrefslogtreecommitdiff
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/Kconfig12
-rw-r--r--fs/xfs/libxfs/xfs_ag.c45
-rw-r--r--fs/xfs/libxfs/xfs_ag.h3
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c87
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h24
-rw-r--r--fs/xfs/libxfs/xfs_attr.c6
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c1
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c251
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h5
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c8
-rw-r--r--fs/xfs/libxfs/xfs_btree_staging.c4
-rw-r--r--fs/xfs/libxfs/xfs_btree_staging.h6
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c27
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c31
-rw-r--r--fs/xfs/libxfs/xfs_dir2_priv.h7
-rw-r--r--fs/xfs/libxfs/xfs_format.h2
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c24
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c6
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c47
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h22
-rw-r--r--fs/xfs/libxfs/xfs_quota_defs.h2
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c129
-rw-r--r--fs/xfs/libxfs/xfs_refcount.h4
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c9
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c10
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c2
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.h83
-rw-r--r--fs/xfs/libxfs/xfs_sb.c27
-rw-r--r--fs/xfs/libxfs/xfs_sb.h2
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c28
-rw-r--r--fs/xfs/libxfs/xfs_types.h13
-rw-r--r--fs/xfs/scrub/attr.c5
-rw-r--r--fs/xfs/scrub/bmap.c8
-rw-r--r--fs/xfs/scrub/repair.c3
-rw-r--r--fs/xfs/scrub/rtbitmap.c3
-rw-r--r--fs/xfs/xfs.h4
-rw-r--r--fs/xfs/xfs_aops.c54
-rw-r--r--fs/xfs/xfs_attr_item.c104
-rw-r--r--fs/xfs/xfs_bmap_item.c85
-rw-r--r--fs/xfs/xfs_bmap_util.c83
-rw-r--r--fs/xfs/xfs_bmap_util.h2
-rw-r--r--fs/xfs/xfs_buf.c44
-rw-r--r--fs/xfs/xfs_buf.h1
-rw-r--r--fs/xfs/xfs_buf_item.c32
-rw-r--r--fs/xfs/xfs_dquot.c1
-rw-r--r--fs/xfs/xfs_dquot_item.c31
-rw-r--r--fs/xfs/xfs_extfree_item.c144
-rw-r--r--fs/xfs/xfs_file.c33
-rw-r--r--fs/xfs/xfs_file.h15
-rw-r--r--fs/xfs/xfs_fsmap.c268
-rw-r--r--fs/xfs/xfs_fsops.c5
-rw-r--r--fs/xfs/xfs_icache.c2
-rw-r--r--fs/xfs/xfs_inode.c43
-rw-r--r--fs/xfs/xfs_inode.h2
-rw-r--r--fs/xfs/xfs_inode_item.c35
-rw-r--r--fs/xfs/xfs_ioctl.c12
-rw-r--r--fs/xfs/xfs_iomap.c81
-rw-r--r--fs/xfs/xfs_iops.c1
-rw-r--r--fs/xfs/xfs_iops.h3
-rw-r--r--fs/xfs/xfs_refcount_item.c68
-rw-r--r--fs/xfs/xfs_reflink.c27
-rw-r--r--fs/xfs/xfs_rmap_item.c6
-rw-r--r--fs/xfs/xfs_rtalloc.c90
-rw-r--r--fs/xfs/xfs_rtalloc.h73
-rw-r--r--fs/xfs/xfs_symlink.c8
-rw-r--r--fs/xfs/xfs_trace.h40
66 files changed, 1474 insertions, 869 deletions
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 9fac5ea8d0e4..dff90db507e3 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -154,6 +154,18 @@ config XFS_DEBUG
Say N unless you are an XFS developer, or you play one on TV.
+config XFS_DEBUG_EXPENSIVE
+ bool "XFS expensive debugging checks"
+ depends on XFS_FS && XFS_DEBUG
+ help
+ Say Y here to get an XFS build with expensive debugging checks
+ enabled. These checks may affect performance significantly.
+
+ Note that the resulting code will be HUGER and SLOWER, and probably
+ not useful unless you are debugging a particular problem.
+
+ Say N unless you are an XFS developer, or you play one on TV.
+
config XFS_ASSERT_FATAL
bool "XFS fatal asserts"
default y
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index bf47efe08a58..9743fa5b5388 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -259,6 +259,30 @@ xfs_agino_range(
return __xfs_agino_range(mp, xfs_ag_block_count(mp, agno), first, last);
}
+/*
+ * Free perag within the specified AG range, it is only used to free unused
+ * perags under the error handling path.
+ */
+void
+xfs_free_unused_perag_range(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agstart,
+ xfs_agnumber_t agend)
+{
+ struct xfs_perag *pag;
+ xfs_agnumber_t index;
+
+ for (index = agstart; index < agend; index++) {
+ spin_lock(&mp->m_perag_lock);
+ pag = radix_tree_delete(&mp->m_perag_tree, index);
+ spin_unlock(&mp->m_perag_lock);
+ if (!pag)
+ break;
+ xfs_buf_hash_destroy(pag);
+ kmem_free(pag);
+ }
+}
+
int
xfs_initialize_perag(
struct xfs_mount *mp,
@@ -345,18 +369,14 @@ xfs_initialize_perag(
return 0;
out_remove_pag:
+ spin_lock(&mp->m_perag_lock);
radix_tree_delete(&mp->m_perag_tree, index);
+ spin_unlock(&mp->m_perag_lock);
out_free_pag:
kmem_free(pag);
out_unwind_new_pags:
/* unwind any prior newly initialized pags */
- for (index = first_initialised; index < agcount; index++) {
- pag = radix_tree_delete(&mp->m_perag_tree, index);
- if (!pag)
- break;
- xfs_buf_hash_destroy(pag);
- kmem_free(pag);
- }
+ xfs_free_unused_perag_range(mp, first_initialised, agcount);
return error;
}
@@ -906,7 +926,10 @@ xfs_ag_shrink_space(
if (err2 != -ENOSPC)
goto resv_err;
- __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, true);
+ err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL,
+ XFS_AG_RESV_NONE, true);
+ if (err2)
+ goto resv_err;
/*
* Roll the transaction before trying to re-init the per-ag
@@ -981,10 +1004,8 @@ xfs_ag_extend_space(
if (error)
return error;
- error = xfs_free_extent(tp, XFS_AGB_TO_FSB(pag->pag_mount, pag->pag_agno,
- be32_to_cpu(agf->agf_length) - len),
- len, &XFS_RMAP_OINFO_SKIP_UPDATE,
- XFS_AG_RESV_NONE);
+ error = xfs_free_extent(tp, pag, be32_to_cpu(agf->agf_length) - len,
+ len, &XFS_RMAP_OINFO_SKIP_UPDATE, XFS_AG_RESV_NONE);
if (error)
return error;
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index 191b22b9a35b..eb84af1c8628 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -106,6 +106,9 @@ struct xfs_perag {
#endif /* __KERNEL__ */
};
+
+void xfs_free_unused_perag_range(struct xfs_mount *mp, xfs_agnumber_t agstart,
+ xfs_agnumber_t agend);
int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount,
xfs_rfsblock_t dcount, xfs_agnumber_t *maxagi);
int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t agno);
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 74d039bdc9f7..cd5b197d7046 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2485,45 +2485,53 @@ xfs_agfl_reset(
* the real allocation can proceed. Deferring the free disconnects freeing up
* the AGFL slot from freeing the block.
*/
-STATIC void
+static int
xfs_defer_agfl_block(
struct xfs_trans *tp,
xfs_agnumber_t agno,
- xfs_fsblock_t agbno,
+ xfs_agblock_t agbno,
struct xfs_owner_info *oinfo)
{
struct xfs_mount *mp = tp->t_mountp;
- struct xfs_extent_free_item *new; /* new element */
+ struct xfs_extent_free_item *xefi;
+ xfs_fsblock_t fsbno = XFS_AGB_TO_FSB(mp, agno, agbno);
ASSERT(xfs_extfree_item_cache != NULL);
ASSERT(oinfo != NULL);
- new = kmem_cache_zalloc(xfs_extfree_item_cache,
+ if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno)))
+ return -EFSCORRUPTED;
+
+ xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
GFP_KERNEL | __GFP_NOFAIL);
- new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
- new->xefi_blockcount = 1;
- new->xefi_owner = oinfo->oi_owner;
+ xefi->xefi_startblock = fsbno;
+ xefi->xefi_blockcount = 1;
+ xefi->xefi_owner = oinfo->oi_owner;
+ xefi->xefi_agresv = XFS_AG_RESV_AGFL;
trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
- xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
+ xfs_extent_free_get_group(mp, xefi);
+ xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list);
+ return 0;
}
/*
* Add the extent to the list of extents to be free at transaction end.
* The list is maintained sorted (by block number).
*/
-void
+int
__xfs_free_extent_later(
struct xfs_trans *tp,
xfs_fsblock_t bno,
xfs_filblks_t len,
const struct xfs_owner_info *oinfo,
+ enum xfs_ag_resv_type type,
bool skip_discard)
{
- struct xfs_extent_free_item *new; /* new element */
-#ifdef DEBUG
+ struct xfs_extent_free_item *xefi;
struct xfs_mount *mp = tp->t_mountp;
+#ifdef DEBUG
xfs_agnumber_t agno;
xfs_agblock_t agbno;
@@ -2539,28 +2547,36 @@ __xfs_free_extent_later(
ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
#endif
ASSERT(xfs_extfree_item_cache != NULL);
+ ASSERT(type != XFS_AG_RESV_AGFL);
- new = kmem_cache_zalloc(xfs_extfree_item_cache,
+ if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
+ return -EFSCORRUPTED;
+
+ xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
GFP_KERNEL | __GFP_NOFAIL);
- new->xefi_startblock = bno;
- new->xefi_blockcount = (xfs_extlen_t)len;
+ xefi->xefi_startblock = bno;
+ xefi->xefi_blockcount = (xfs_extlen_t)len;
+ xefi->xefi_agresv = type;
if (skip_discard)
- new->xefi_flags |= XFS_EFI_SKIP_DISCARD;
+ xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
if (oinfo) {
ASSERT(oinfo->oi_offset == 0);
if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
- new->xefi_flags |= XFS_EFI_ATTR_FORK;
+ xefi->xefi_flags |= XFS_EFI_ATTR_FORK;
if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
- new->xefi_flags |= XFS_EFI_BMBT_BLOCK;
- new->xefi_owner = oinfo->oi_owner;
+ xefi->xefi_flags |= XFS_EFI_BMBT_BLOCK;
+ xefi->xefi_owner = oinfo->oi_owner;
} else {
- new->xefi_owner = XFS_RMAP_OWN_NULL;
+ xefi->xefi_owner = XFS_RMAP_OWN_NULL;
}
- trace_xfs_bmap_free_defer(tp->t_mountp,
+ trace_xfs_bmap_free_defer(mp,
XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
- xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
+
+ xfs_extent_free_get_group(mp, xefi);
+ xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list);
+ return 0;
}
#ifdef DEBUG
@@ -2720,7 +2736,9 @@ xfs_alloc_fix_freelist(
goto out_agbp_relse;
/* defer agfl frees */
- xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
+ error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
+ if (error)
+ goto out_agbp_relse;
}
targs.tp = tp;
@@ -3447,7 +3465,8 @@ xfs_free_extent_fix_freelist(
int
__xfs_free_extent(
struct xfs_trans *tp,
- xfs_fsblock_t bno,
+ struct xfs_perag *pag,
+ xfs_agblock_t agbno,
xfs_extlen_t len,
const struct xfs_owner_info *oinfo,
enum xfs_ag_resv_type type,
@@ -3455,12 +3474,9 @@ __xfs_free_extent(
{
struct xfs_mount *mp = tp->t_mountp;
struct xfs_buf *agbp;
- xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno);
- xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno);
struct xfs_agf *agf;
int error;
unsigned int busy_flags = 0;
- struct xfs_perag *pag;
ASSERT(len != 0);
ASSERT(type != XFS_AG_RESV_AGFL);
@@ -3469,10 +3485,9 @@ __xfs_free_extent(
XFS_ERRTAG_FREE_EXTENT))
return -EIO;
- pag = xfs_perag_get(mp, agno);
error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
if (error)
- goto err;
+ return error;
agf = agbp->b_addr;
if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
@@ -3486,20 +3501,18 @@ __xfs_free_extent(
goto err_release;
}
- error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
+ error = xfs_free_ag_extent(tp, agbp, pag->pag_agno, agbno, len, oinfo,
+ type);
if (error)
goto err_release;
if (skip_discard)
busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags);
- xfs_perag_put(pag);
return 0;
err_release:
xfs_trans_brelse(tp, agbp);
-err:
- xfs_perag_put(pag);
return error;
}
@@ -3532,15 +3545,11 @@ xfs_alloc_query_range(
xfs_alloc_query_range_fn fn,
void *priv)
{
- union xfs_btree_irec low_brec;
- union xfs_btree_irec high_brec;
- struct xfs_alloc_query_range_info query;
+ union xfs_btree_irec low_brec = { .a = *low_rec };
+ union xfs_btree_irec high_brec = { .a = *high_rec };
+ struct xfs_alloc_query_range_info query = { .priv = priv, .fn = fn };
ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
- low_brec.a = *low_rec;
- high_brec.a = *high_rec;
- query.priv = priv;
- query.fn = fn;
return xfs_btree_query_range(cur, &low_brec, &high_brec,
xfs_alloc_query_range_helper, &query);
}
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 2c3f762dfb58..2dd93d62150f 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -130,7 +130,8 @@ xfs_alloc_vextent(
int /* error */
__xfs_free_extent(
struct xfs_trans *tp, /* transaction pointer */
- xfs_fsblock_t bno, /* starting block number of extent */
+ struct xfs_perag *pag,
+ xfs_agblock_t agbno,
xfs_extlen_t len, /* length of extent */
const struct xfs_owner_info *oinfo, /* extent owner */
enum xfs_ag_resv_type type, /* block reservation type */
@@ -139,12 +140,13 @@ __xfs_free_extent(
static inline int
xfs_free_extent(
struct xfs_trans *tp,
- xfs_fsblock_t bno,
+ struct xfs_perag *pag,
+ xfs_agblock_t agbno,
xfs_extlen_t len,
const struct xfs_owner_info *oinfo,
enum xfs_ag_resv_type type)
{
- return __xfs_free_extent(tp, bno, len, oinfo, type, false);
+ return __xfs_free_extent(tp, pag, agbno, len, oinfo, type, false);
}
int /* error */
@@ -211,9 +213,9 @@ xfs_buf_to_agfl_bno(
return bp->b_addr;
}
-void __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
+int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
xfs_filblks_t len, const struct xfs_owner_info *oinfo,
- bool skip_discard);
+ enum xfs_ag_resv_type type, bool skip_discard);
/*
* List of extents to be free "later".
@@ -224,21 +226,27 @@ struct xfs_extent_free_item {
uint64_t xefi_owner;
xfs_fsblock_t xefi_startblock;/* starting fs block number */
xfs_extlen_t xefi_blockcount;/* number of blocks in extent */
+ struct xfs_perag *xefi_pag;
unsigned int xefi_flags;
+ enum xfs_ag_resv_type xefi_agresv;
};
+void xfs_extent_free_get_group(struct xfs_mount *mp,
+ struct xfs_extent_free_item *xefi);
+
#define XFS_EFI_SKIP_DISCARD (1U << 0) /* don't issue discard */
#define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */
#define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */
-static inline void
+static inline int
xfs_free_extent_later(
struct xfs_trans *tp,
xfs_fsblock_t bno,
xfs_filblks_t len,
- const struct xfs_owner_info *oinfo)
+ const struct xfs_owner_info *oinfo,
+ enum xfs_ag_resv_type type)
{
- __xfs_free_extent_later(tp, bno, len, oinfo, false);
+ return __xfs_free_extent_later(tp, bno, len, oinfo, type, false);
}
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index e28d93d232de..32d350e97e0f 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -421,10 +421,10 @@ xfs_attr_complete_op(
bool do_replace = args->op_flags & XFS_DA_OP_REPLACE;
args->op_flags &= ~XFS_DA_OP_REPLACE;
- if (do_replace) {
- args->attr_filter &= ~XFS_ATTR_INCOMPLETE;
+ args->attr_filter &= ~XFS_ATTR_INCOMPLETE;
+ if (do_replace)
return replace_state;
- }
+
return XFS_DAS_DONE;
}
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index d440393b40eb..54de405cbab5 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -619,7 +619,6 @@ xfs_attr_rmtval_set_blk(
if (error)
return error;
- ASSERT(nmap == 1);
ASSERT((map->br_startblock != DELAYSTARTBLOCK) &&
(map->br_startblock != HOLESTARTBLOCK));
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 27d3121e6da9..14b0d230f61b 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -21,7 +21,7 @@
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
#include "xfs_bmap_btree.h"
-#include "xfs_rtalloc.h"
+#include "xfs_rtbitmap.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_quota.h"
@@ -572,8 +572,13 @@ xfs_bmap_btree_to_extents(
cblock = XFS_BUF_TO_BLOCK(cbp);
if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
return error;
+
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
- xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo);
+ error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo,
+ XFS_AG_RESV_NONE);
+ if (error)
+ return error;
+
ip->i_nblocks--;
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
xfs_trans_binval(tp, cbp);
@@ -1525,6 +1530,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
}
+ ASSERT(da_new <= da_old);
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
@@ -1554,6 +1560,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
}
+ ASSERT(da_new <= da_old);
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -1587,6 +1594,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
}
+ ASSERT(da_new <= da_old);
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
@@ -1619,6 +1627,7 @@ xfs_bmap_add_extent_delay_real(
goto done;
}
}
+ ASSERT(da_new <= da_old);
break;
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
@@ -1656,6 +1665,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
}
+ ASSERT(da_new <= da_old);
break;
case BMAP_LEFT_FILLING:
@@ -1743,6 +1753,7 @@ xfs_bmap_add_extent_delay_real(
xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
xfs_iext_next(ifp, &bma->icur);
xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT);
+ ASSERT(da_new <= da_old);
break;
case BMAP_RIGHT_FILLING:
@@ -1790,6 +1801,7 @@ xfs_bmap_add_extent_delay_real(
PREV.br_blockcount = temp;
xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
xfs_iext_next(ifp, &bma->icur);
+ ASSERT(da_new <= da_old);
break;
case 0:
@@ -1910,11 +1922,9 @@ xfs_bmap_add_extent_delay_real(
}
/* adjust for changes in reserved delayed indirect blocks */
- if (da_new != da_old) {
- ASSERT(state == 0 || da_new < da_old);
+ if (da_new != da_old)
error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new),
- false);
- }
+ true);
xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
done:
@@ -3949,20 +3959,32 @@ xfs_bmapi_reserve_delalloc(
xfs_extlen_t alen;
xfs_extlen_t indlen;
int error;
- xfs_fileoff_t aoff = off;
+ xfs_fileoff_t aoff;
+ bool use_cowextszhint =
+ whichfork == XFS_COW_FORK && !prealloc;
+retry:
/*
* Cap the alloc length. Keep track of prealloc so we know whether to
* tag the inode before we return.
*/
+ aoff = off;
alen = XFS_FILBLKS_MIN(len + prealloc, XFS_MAX_BMBT_EXTLEN);
if (!eof)
alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
if (prealloc && alen >= len)
prealloc = alen - len;
- /* Figure out the extent size, adjust alen */
- if (whichfork == XFS_COW_FORK) {
+ /*
+ * If we're targetting the COW fork but aren't creating a speculative
+ * posteof preallocation, try to expand the reservation to align with
+ * the COW extent size hint if there's sufficient free space.
+ *
+ * Unlike the data fork, the CoW cancellation functions will free all
+ * the reservations at inactivation, so we don't require that every
+ * delalloc reservation have a dirty pagecache.
+ */
+ if (use_cowextszhint) {
struct xfs_bmbt_irec prev;
xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip);
@@ -3981,7 +4003,7 @@ xfs_bmapi_reserve_delalloc(
*/
error = xfs_quota_reserve_blkres(ip, alen);
if (error)
- return error;
+ goto out;
/*
* Split changing sb for alen and indlen since they could be coming
@@ -4026,6 +4048,17 @@ out_unreserve_blocks:
out_unreserve_quota:
if (XFS_IS_QUOTA_ON(mp))
xfs_quota_unreserve_blkres(ip, alen);
+out:
+ if (error == -ENOSPC || error == -EDQUOT) {
+ trace_xfs_delalloc_enospc(ip, off, len);
+
+ if (prealloc || use_cowextszhint) {
+ /* retry without any preallocation */
+ use_cowextszhint = false;
+ prealloc = 0;
+ goto retry;
+ }
+ }
return error;
}
@@ -4108,8 +4141,10 @@ xfs_bmapi_allocate(
} else {
error = xfs_bmap_alloc_userdata(bma);
}
- if (error || bma->blkno == NULLFSBLOCK)
+ if (error)
return error;
+ if (bma->blkno == NULLFSBLOCK)
+ return -ENOSPC;
if (bma->flags & XFS_BMAPI_ZERO) {
error = xfs_zero_extent(bma->ip, bma->blkno, bma->length);
@@ -4289,6 +4324,15 @@ xfs_bmapi_finish(
* extent state if necessary. Details behaviour is controlled by the flags
* parameter. Only allocates blocks from a single allocation group, to avoid
* locking problems.
+ *
+ * Returns 0 on success and places the extent mappings in mval. nmaps is used
+ * as an input/output parameter where the caller specifies the maximum number
+ * of mappings that may be returned and xfs_bmapi_write passes back the number
+ * of mappings (including existing mappings) it found.
+ *
+ * Returns a negative error code on failure, including -ENOSPC when it could not
+ * allocate any blocks and -ENOSR when it did allocate blocks to convert a
+ * delalloc range, but those blocks were before the passed in range.
*/
int
xfs_bmapi_write(
@@ -4416,10 +4460,16 @@ xfs_bmapi_write(
ASSERT(len > 0);
ASSERT(bma.length > 0);
error = xfs_bmapi_allocate(&bma);
- if (error)
+ if (error) {
+ /*
+ * If we already allocated space in a previous
+ * iteration return what we go so far when
+ * running out of space.
+ */
+ if (error == -ENOSPC && bma.nallocs)
+ break;
goto error0;
- if (bma.blkno == NULLFSBLOCK)
- break;
+ }
/*
* If this is a CoW allocation, record the data in
@@ -4457,7 +4507,6 @@ xfs_bmapi_write(
if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got))
eof = true;
}
- *nmap = n;
error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
whichfork);
@@ -4468,7 +4517,22 @@ xfs_bmapi_write(
ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork));
xfs_bmapi_finish(&bma, whichfork, 0);
xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
- orig_nmap, *nmap);
+ orig_nmap, n);
+
+ /*
+ * When converting delayed allocations, xfs_bmapi_allocate ignores
+ * the passed in bno and always converts from the start of the found
+ * delalloc extent.
+ *
+ * To avoid a successful return with *nmap set to 0, return the magic
+ * -ENOSR error code for this particular case so that the caller can
+ * handle it.
+ */
+ if (!n) {
+ ASSERT(bma.nallocs >= *nmap);
+ return -ENOSR;
+ }
+ *nmap = n;
return 0;
error0:
xfs_bmapi_finish(&bma, whichfork, error);
@@ -4481,8 +4545,8 @@ error0:
* invocations to allocate the target offset if a large enough physical extent
* is not available.
*/
-int
-xfs_bmapi_convert_delalloc(
+static int
+xfs_bmapi_convert_one_delalloc(
struct xfs_inode *ip,
int whichfork,
xfs_off_t offset,
@@ -4539,7 +4603,8 @@ xfs_bmapi_convert_delalloc(
if (!isnullstartblock(bma.got.br_startblock)) {
xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
xfs_iomap_inode_sequence(ip, flags));
- *seq = READ_ONCE(ifp->if_seq);
+ if (seq)
+ *seq = READ_ONCE(ifp->if_seq);
goto out_trans_cancel;
}
@@ -4575,9 +4640,6 @@ xfs_bmapi_convert_delalloc(
if (error)
goto out_finish;
- error = -ENOSPC;
- if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK))
- goto out_finish;
error = -EFSCORRUPTED;
if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock)))
goto out_finish;
@@ -4588,7 +4650,8 @@ xfs_bmapi_convert_delalloc(
ASSERT(!isnullstartblock(bma.got.br_startblock));
xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
xfs_iomap_inode_sequence(ip, flags));
- *seq = READ_ONCE(ifp->if_seq);
+ if (seq)
+ *seq = READ_ONCE(ifp->if_seq);
if (whichfork == XFS_COW_FORK)
xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length);
@@ -4611,6 +4674,36 @@ out_trans_cancel:
return error;
}
+/*
+ * Pass in a dellalloc extent and convert it to real extents, return the real
+ * extent that maps offset_fsb in iomap.
+ */
+int
+xfs_bmapi_convert_delalloc(
+ struct xfs_inode *ip,
+ int whichfork,
+ loff_t offset,
+ struct iomap *iomap,
+ unsigned int *seq)
+{
+ int error;
+
+ /*
+ * Attempt to allocate whatever delalloc extent currently backs offset
+ * and put the result into iomap. Allocate in a loop because it may
+ * take several attempts to allocate real blocks for a contiguous
+ * delalloc extent if free space is sufficiently fragmented.
+ */
+ do {
+ error = xfs_bmapi_convert_one_delalloc(ip, whichfork, offset,
+ iomap, seq);
+ if (error)
+ return error;
+ } while (iomap->offset + iomap->length <= offset);
+
+ return 0;
+}
+
int
xfs_bmapi_remap(
struct xfs_trans *tp,
@@ -4994,7 +5087,6 @@ xfs_bmap_del_extent_real(
xfs_fileoff_t del_endoff; /* first offset past del */
int do_fx; /* free extent at end of routine */
int error; /* error return value */
- int flags = 0;/* inode logging flags */
struct xfs_bmbt_irec got; /* current extent entry */
xfs_fileoff_t got_endoff; /* first offset past got */
int i; /* temp state */
@@ -5007,6 +5099,8 @@ xfs_bmap_del_extent_real(
uint32_t state = xfs_bmap_fork_to_state(whichfork);
struct xfs_bmbt_irec old;
+ *logflagsp = 0;
+
mp = ip->i_mount;
XFS_STATS_INC(mp, xs_del_exlist);
@@ -5019,7 +5113,6 @@ xfs_bmap_del_extent_real(
ASSERT(got_endoff >= del_endoff);
ASSERT(!isnullstartblock(got.br_startblock));
qfield = 0;
- error = 0;
/*
* If it's the case where the directory code is running with no block
@@ -5035,13 +5128,13 @@ xfs_bmap_del_extent_real(
del->br_startoff > got.br_startoff && del_endoff < got_endoff)
return -ENOSPC;
- flags = XFS_ILOG_CORE;
+ *logflagsp = XFS_ILOG_CORE;
if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
if (!(bflags & XFS_BMAPI_REMAP)) {
error = xfs_rtfree_blocks(tp, del->br_startblock,
del->br_blockcount);
if (error)
- goto done;
+ return error;
}
do_fx = 0;
@@ -5056,11 +5149,9 @@ xfs_bmap_del_extent_real(
if (cur) {
error = xfs_bmbt_lookup_eq(cur, &got, &i);
if (error)
- goto done;
- if (XFS_IS_CORRUPT(mp, i != 1)) {
- error = -EFSCORRUPTED;
- goto done;
- }
+ return error;
+ if (XFS_IS_CORRUPT(mp, i != 1))
+ return -EFSCORRUPTED;
}
if (got.br_startoff == del->br_startoff)
@@ -5077,17 +5168,15 @@ xfs_bmap_del_extent_real(
xfs_iext_prev(ifp, icur);
ifp->if_nextents--;
- flags |= XFS_ILOG_CORE;
+ *logflagsp |= XFS_ILOG_CORE;
if (!cur) {
- flags |= xfs_ilog_fext(whichfork);
+ *logflagsp |= xfs_ilog_fext(whichfork);
break;
}
if ((error = xfs_btree_delete(cur, &i)))
- goto done;
- if (XFS_IS_CORRUPT(mp, i != 1)) {
- error = -EFSCORRUPTED;
- goto done;
- }
+ return error;
+ if (XFS_IS_CORRUPT(mp, i != 1))
+ return -EFSCORRUPTED;
break;
case BMAP_LEFT_FILLING:
/*
@@ -5098,12 +5187,12 @@ xfs_bmap_del_extent_real(
got.br_blockcount -= del->br_blockcount;
xfs_iext_update_extent(ip, state, icur, &got);
if (!cur) {
- flags |= xfs_ilog_fext(whichfork);
+ *logflagsp |= xfs_ilog_fext(whichfork);
break;
}
error = xfs_bmbt_update(cur, &got);
if (error)
- goto done;
+ return error;
break;
case BMAP_RIGHT_FILLING:
/*
@@ -5112,12 +5201,12 @@ xfs_bmap_del_extent_real(
got.br_blockcount -= del->br_blockcount;
xfs_iext_update_extent(ip, state, icur, &got);
if (!cur) {
- flags |= xfs_ilog_fext(whichfork);
+ *logflagsp |= xfs_ilog_fext(whichfork);
break;
}
error = xfs_bmbt_update(cur, &got);
if (error)
- goto done;
+ return error;
break;
case 0:
/*
@@ -5134,18 +5223,18 @@ xfs_bmap_del_extent_real(
new.br_state = got.br_state;
new.br_startblock = del_endblock;
- flags |= XFS_ILOG_CORE;
+ *logflagsp |= XFS_ILOG_CORE;
if (cur) {
error = xfs_bmbt_update(cur, &got);
if (error)
- goto done;
+ return error;
error = xfs_btree_increment(cur, 0, &i);
if (error)
- goto done;
+ return error;
cur->bc_rec.b = new;
error = xfs_btree_insert(cur, &i);
if (error && error != -ENOSPC)
- goto done;
+ return error;
/*
* If get no-space back from btree insert, it tried a
* split, and we have a zero block reservation. Fix up
@@ -5158,33 +5247,28 @@ xfs_bmap_del_extent_real(
*/
error = xfs_bmbt_lookup_eq(cur, &got, &i);
if (error)
- goto done;
- if (XFS_IS_CORRUPT(mp, i != 1)) {
- error = -EFSCORRUPTED;
- goto done;
- }
+ return error;
+ if (XFS_IS_CORRUPT(mp, i != 1))
+ return -EFSCORRUPTED;
/*
* Update the btree record back
* to the original value.
*/
error = xfs_bmbt_update(cur, &old);
if (error)
- goto done;
+ return error;
/*
* Reset the extent record back
* to the original value.
*/
xfs_iext_update_extent(ip, state, icur, &old);
- flags = 0;
- error = -ENOSPC;
- goto done;
- }
- if (XFS_IS_CORRUPT(mp, i != 1)) {
- error = -EFSCORRUPTED;
- goto done;
+ *logflagsp = 0;
+ return -ENOSPC;
}
+ if (XFS_IS_CORRUPT(mp, i != 1))
+ return -EFSCORRUPTED;
} else
- flags |= xfs_ilog_fext(whichfork);
+ *logflagsp |= xfs_ilog_fext(whichfork);
ifp->if_nextents++;
xfs_iext_next(ifp, icur);
@@ -5202,10 +5286,13 @@ xfs_bmap_del_extent_real(
if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
xfs_refcount_decrease_extent(tp, del);
} else {
- __xfs_free_extent_later(tp, del->br_startblock,
+ error = __xfs_free_extent_later(tp, del->br_startblock,
del->br_blockcount, NULL,
- (bflags & XFS_BMAPI_NODISCARD) ||
- del->br_state == XFS_EXT_UNWRITTEN);
+ XFS_AG_RESV_NONE,
+ ((bflags & XFS_BMAPI_NODISCARD) ||
+ del->br_state == XFS_EXT_UNWRITTEN));
+ if (error)
+ return error;
}
}
@@ -5220,9 +5307,7 @@ xfs_bmap_del_extent_real(
if (qfield && !(bflags & XFS_BMAPI_REMAP))
xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
-done:
- *logflagsp = flags;
- return error;
+ return 0;
}
/*
@@ -6119,39 +6204,37 @@ xfs_bmap_unmap_extent(
int
xfs_bmap_finish_one(
struct xfs_trans *tp,
- struct xfs_inode *ip,
- enum xfs_bmap_intent_type type,
- int whichfork,
- xfs_fileoff_t startoff,
- xfs_fsblock_t startblock,
- xfs_filblks_t *blockcount,
- xfs_exntst_t state)
+ struct xfs_bmap_intent *bi)
{
+ struct xfs_bmbt_irec *bmap = &bi->bi_bmap;
int error = 0;
ASSERT(tp->t_firstblock == NULLFSBLOCK);
trace_xfs_bmap_deferred(tp->t_mountp,
- XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
- XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
- ip->i_ino, whichfork, startoff, *blockcount, state);
+ XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),
+ bi->bi_type,
+ XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock),
+ bi->bi_owner->i_ino, bi->bi_whichfork,
+ bmap->br_startoff, bmap->br_blockcount,
+ bmap->br_state);
- if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK))
+ if (WARN_ON_ONCE(bi->bi_whichfork != XFS_DATA_FORK))
return -EFSCORRUPTED;
if (XFS_TEST_ERROR(false, tp->t_mountp,
XFS_ERRTAG_BMAP_FINISH_ONE))
return -EIO;
- switch (type) {
+ switch (bi->bi_type) {
case XFS_BMAP_MAP:
- error = xfs_bmapi_remap(tp, ip, startoff, *blockcount,
- startblock, 0);
- *blockcount = 0;
+ error = xfs_bmapi_remap(tp, bi->bi_owner, bmap->br_startoff,
+ bmap->br_blockcount, bmap->br_startblock, 0);
+ bmap->br_blockcount = 0;
break;
case XFS_BMAP_UNMAP:
- error = __xfs_bunmapi(tp, ip, startoff, blockcount,
- XFS_BMAPI_REMAP, 1);
+ error = __xfs_bunmapi(tp, bi->bi_owner, bmap->br_startoff,
+ &bmap->br_blockcount, XFS_BMAPI_REMAP, 1);
break;
default:
ASSERT(0);
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 08c16e4edc0f..524912f276f8 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -236,10 +236,7 @@ struct xfs_bmap_intent {
struct xfs_bmbt_irec bi_bmap;
};
-int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_inode *ip,
- enum xfs_bmap_intent_type type, int whichfork,
- xfs_fileoff_t startoff, xfs_fsblock_t startblock,
- xfs_filblks_t *blockcount, xfs_exntst_t state);
+int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_bmap_intent *bi);
void xfs_bmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip,
struct xfs_bmbt_irec *imap);
void xfs_bmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip,
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 18de4fbfef4e..57f401f2492d 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -285,11 +285,15 @@ xfs_bmbt_free_block(
struct xfs_trans *tp = cur->bc_tp;
xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
struct xfs_owner_info oinfo;
+ int error;
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork);
- xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo);
- ip->i_nblocks--;
+ error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo,
+ XFS_AG_RESV_NONE);
+ if (error)
+ return error;
+ ip->i_nblocks--;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
return 0;
diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c
index dd75e208b543..29e3f8ccb185 100644
--- a/fs/xfs/libxfs/xfs_btree_staging.c
+++ b/fs/xfs/libxfs/xfs_btree_staging.c
@@ -342,9 +342,7 @@ xfs_btree_bload_drop_buf(
if (*bpp == NULL)
return;
- if (!xfs_buf_delwri_queue(*bpp, buffers_list))
- ASSERT(0);
-
+ xfs_buf_delwri_queue_here(*bpp, buffers_list);
xfs_buf_relse(*bpp);
*bpp = NULL;
}
diff --git a/fs/xfs/libxfs/xfs_btree_staging.h b/fs/xfs/libxfs/xfs_btree_staging.h
index f0d2976050ae..5f638f711246 100644
--- a/fs/xfs/libxfs/xfs_btree_staging.h
+++ b/fs/xfs/libxfs/xfs_btree_staging.h
@@ -37,12 +37,6 @@ struct xbtree_ifakeroot {
/* Number of bytes available for this fork in the inode. */
unsigned int if_fork_size;
-
- /* Fork format. */
- unsigned int if_format;
-
- /* Number of records. */
- unsigned int if_extents;
};
/* Cursor interactions with fake roots for inode-rooted btrees. */
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index e576560b46e9..12e3cca804b7 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -2158,8 +2158,8 @@ xfs_da_grow_inode_int(
struct xfs_inode *dp = args->dp;
int w = args->whichfork;
xfs_rfsblock_t nblks = dp->i_nblocks;
- struct xfs_bmbt_irec map, *mapp;
- int nmap, error, got, i, mapi;
+ struct xfs_bmbt_irec map, *mapp = &map;
+ int nmap, error, got, i, mapi = 1;
/*
* Find a spot in the file space to put the new block.
@@ -2175,14 +2175,7 @@ xfs_da_grow_inode_int(
error = xfs_bmapi_write(tp, dp, *bno, count,
xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
args->total, &map, &nmap);
- if (error)
- return error;
-
- ASSERT(nmap <= 1);
- if (nmap == 1) {
- mapp = &map;
- mapi = 1;
- } else if (nmap == 0 && count > 1) {
+ if (error == -ENOSPC && count > 1) {
xfs_fileoff_t b;
int c;
@@ -2199,16 +2192,13 @@ xfs_da_grow_inode_int(
args->total, &mapp[mapi], &nmap);
if (error)
goto out_free_map;
- if (nmap < 1)
- break;
mapi += nmap;
b = mapp[mapi - 1].br_startoff +
mapp[mapi - 1].br_blockcount;
}
- } else {
- mapi = 0;
- mapp = NULL;
}
+ if (error)
+ goto out_free_map;
/*
* Count the blocks we got, make sure it matches the total.
@@ -2316,10 +2306,17 @@ xfs_da3_swap_lastblock(
return error;
/*
* Copy the last block into the dead buffer and log it.
+ * On CRC-enabled file systems, also update the stamped in blkno.
*/
memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize);
+ if (xfs_has_crc(mp)) {
+ struct xfs_da3_blkinfo *da3 = dead_buf->b_addr;
+
+ da3->blkno = cpu_to_be64(xfs_buf_daddr(dead_buf));
+ }
xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1);
dead_info = dead_buf->b_addr;
+
/*
* Get values from the moved block.
*/
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index dbcf58979a59..e1d5da6d8d4a 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -177,6 +177,14 @@ __xfs_dir3_data_check(
while (offset < end) {
struct xfs_dir2_data_unused *dup = bp->b_addr + offset;
struct xfs_dir2_data_entry *dep = bp->b_addr + offset;
+ unsigned int reclen;
+
+ /*
+ * Are the remaining bytes large enough to hold an
+ * unused entry?
+ */
+ if (offset > end - xfs_dir2_data_unusedsize(1))
+ return __this_address;
/*
* If it's unused, look for the space in the bestfree table.
@@ -186,9 +194,13 @@ __xfs_dir3_data_check(
if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
xfs_failaddr_t fa;
+ reclen = xfs_dir2_data_unusedsize(
+ be16_to_cpu(dup->length));
if (lastfree != 0)
return __this_address;
- if (offset + be16_to_cpu(dup->length) > end)
+ if (be16_to_cpu(dup->length) != reclen)
+ return __this_address;
+ if (offset + reclen > end)
return __this_address;
if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) !=
offset)
@@ -206,10 +218,18 @@ __xfs_dir3_data_check(
be16_to_cpu(bf[2].length))
return __this_address;
}
- offset += be16_to_cpu(dup->length);
+ offset += reclen;
lastfree = 1;
continue;
}
+
+ /*
+ * This is not an unused entry. Are the remaining bytes
+ * large enough for a dirent with a single-byte name?
+ */
+ if (offset > end - xfs_dir2_data_entsize(mp, 1))
+ return __this_address;
+
/*
* It's a real entry. Validate the fields.
* If this is a block directory then make sure it's
@@ -218,9 +238,10 @@ __xfs_dir3_data_check(
*/
if (dep->namelen == 0)
return __this_address;
- if (!xfs_verify_dir_ino(mp, be64_to_cpu(dep->inumber)))
+ reclen = xfs_dir2_data_entsize(mp, dep->namelen);
+ if (offset + reclen > end)
return __this_address;
- if (offset + xfs_dir2_data_entsize(mp, dep->namelen) > end)
+ if (!xfs_verify_dir_ino(mp, be64_to_cpu(dep->inumber)))
return __this_address;
if (be16_to_cpu(*xfs_dir2_data_entry_tag_p(mp, dep)) != offset)
return __this_address;
@@ -244,7 +265,7 @@ __xfs_dir3_data_check(
if (i >= be32_to_cpu(btp->count))
return __this_address;
}
- offset += xfs_dir2_data_entsize(mp, dep->namelen);
+ offset += reclen;
}
/*
* Need to have seen all the entries and all the bestfree slots.
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index 7404a9ff1a92..9046d08554e9 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -188,6 +188,13 @@ extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp,
struct dir_context *ctx, size_t bufsize);
static inline unsigned int
+xfs_dir2_data_unusedsize(
+ unsigned int len)
+{
+ return round_up(len, XFS_DIR2_DATA_ALIGN);
+}
+
+static inline unsigned int
xfs_dir2_data_entsize(
struct xfs_mount *mp,
unsigned int namelen)
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 371dc07233e0..20acb8573d7a 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -98,7 +98,7 @@ typedef struct xfs_sb {
uint32_t sb_blocksize; /* logical block size, bytes */
xfs_rfsblock_t sb_dblocks; /* number of data blocks */
xfs_rfsblock_t sb_rblocks; /* number of realtime blocks */
- xfs_rtblock_t sb_rextents; /* number of realtime extents */
+ xfs_rtbxlen_t sb_rextents; /* number of realtime extents */
uuid_t sb_uuid; /* user-visible file system unique id */
xfs_fsblock_t sb_logstart; /* starting block of log if internal */
xfs_ino_t sb_rootino; /* root inode number */
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 120dbec16f5c..d1472cbd48ff 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1827,7 +1827,7 @@ xfs_dialloc(
* might be sparse and only free the regions that are allocated as part of the
* chunk.
*/
-STATIC void
+static int
xfs_difree_inode_chunk(
struct xfs_trans *tp,
xfs_agnumber_t agno,
@@ -1844,10 +1844,10 @@ xfs_difree_inode_chunk(
if (!xfs_inobt_issparse(rec->ir_holemask)) {
/* not sparse, calculate extent info directly */
- xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, sagbno),
- M_IGEO(mp)->ialloc_blks,
- &XFS_RMAP_OINFO_INODES);
- return;
+ return xfs_free_extent_later(tp,
+ XFS_AGB_TO_FSB(mp, agno, sagbno),
+ M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES,
+ XFS_AG_RESV_NONE);
}
/* holemask is only 16-bits (fits in an unsigned long) */
@@ -1864,6 +1864,8 @@ xfs_difree_inode_chunk(
XFS_INOBT_HOLEMASK_BITS);
nextbit = startidx + 1;
while (startidx < XFS_INOBT_HOLEMASK_BITS) {
+ int error;
+
nextbit = find_next_zero_bit(holemask, XFS_INOBT_HOLEMASK_BITS,
nextbit);
/*
@@ -1889,8 +1891,11 @@ xfs_difree_inode_chunk(
ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
- xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, agbno),
- contigblk, &XFS_RMAP_OINFO_INODES);
+ error = xfs_free_extent_later(tp,
+ XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
+ &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE);
+ if (error)
+ return error;
/* reset range to current bit and carry on... */
startidx = endidx = nextbit;
@@ -1898,6 +1903,7 @@ xfs_difree_inode_chunk(
next:
nextbit++;
}
+ return 0;
}
STATIC int
@@ -1998,7 +2004,9 @@ xfs_difree_inobt(
goto error0;
}
- xfs_difree_inode_chunk(tp, pag->pag_agno, &rec);
+ error = xfs_difree_inode_chunk(tp, pag->pag_agno, &rec);
+ if (error)
+ goto error0;
} else {
xic->deleted = false;
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 8c83e265770c..7125447cde1a 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -156,9 +156,11 @@ __xfs_inobt_free_block(
struct xfs_buf *bp,
enum xfs_ag_resv_type resv)
{
+ xfs_fsblock_t fsbno;
+
xfs_inobt_mod_blockcount(cur, -1);
- return xfs_free_extent(cur->bc_tp,
- XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)), 1,
+ fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
+ return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
&XFS_RMAP_OINFO_INOBT, resv);
}
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 601b05ca5fc2..3c611c8ac158 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -365,17 +365,40 @@ xfs_dinode_verify_fork(
/*
* For fork types that can contain local data, check that the fork
* format matches the size of local data contained within the fork.
- *
- * For all types, check that when the size says the should be in extent
- * or btree format, the inode isn't claiming it is in local format.
*/
if (whichfork == XFS_DATA_FORK) {
- if (S_ISDIR(mode) || S_ISLNK(mode)) {
+ /*
+ * A directory small enough to fit in the inode must be stored
+ * in local format. The directory sf <-> extents conversion
+ * code updates the directory size accordingly. Directories
+ * being truncated have zero size and are not subject to this
+ * check.
+ */
+ if (S_ISDIR(mode)) {
+ if (dip->di_size &&
+ be64_to_cpu(dip->di_size) <= fork_size &&
+ fork_format != XFS_DINODE_FMT_LOCAL)
+ return __this_address;
+ }
+
+ /*
+ * A symlink with a target small enough to fit in the inode can
+ * be stored in extents format if xattrs were added (thus
+ * converting the data fork from shortform to remote format)
+ * and then removed.
+ */
+ if (S_ISLNK(mode)) {
if (be64_to_cpu(dip->di_size) <= fork_size &&
+ fork_format != XFS_DINODE_FMT_EXTENTS &&
fork_format != XFS_DINODE_FMT_LOCAL)
return __this_address;
}
+ /*
+ * For all types, check that when the size says the fork should
+ * be in extent or btree format, the inode isn't claiming to be
+ * in local format.
+ */
if (be64_to_cpu(dip->di_size) > fork_size &&
fork_format == XFS_DINODE_FMT_LOCAL)
return __this_address;
@@ -491,9 +514,19 @@ xfs_dinode_verify(
if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
return __this_address;
- /* No zero-length symlinks/dirs. */
- if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
- return __this_address;
+ /*
+ * No zero-length symlinks/dirs unless they're unlinked and hence being
+ * inactivated.
+ */
+ if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) {
+ if (dip->di_version > 1) {
+ if (dip->di_nlink)
+ return __this_address;
+ } else {
+ if (dip->di_onlink)
+ return __this_address;
+ }
+ }
fa = xfs_dinode_verify_nrext64(mp, dip);
if (fa)
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 2420865f3007..a5100a11faf9 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -131,4 +131,26 @@ void xlog_check_buf_cancel_table(struct xlog *log);
#define xlog_check_buf_cancel_table(log) do { } while (0)
#endif
+/*
+ * Transform a regular reservation into one suitable for recovery of a log
+ * intent item.
+ *
+ * Intent recovery only runs a single step of the transaction chain and defers
+ * the rest to a separate transaction. Therefore, we reduce logcount to 1 here
+ * to avoid livelocks if the log grant space is nearly exhausted due to the
+ * recovered intent pinning the tail. Keep the same logflags to avoid tripping
+ * asserts elsewhere. Struct copies abound below.
+ */
+static inline struct xfs_trans_res
+xlog_recover_resv(const struct xfs_trans_res *r)
+{
+ struct xfs_trans_res ret = {
+ .tr_logres = r->tr_logres,
+ .tr_logcount = 1,
+ .tr_logflags = r->tr_logflags,
+ };
+
+ return ret;
+}
+
#endif /* __XFS_LOG_RECOVER_H__ */
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index cb035da3f990..fb05f44f6c75 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -56,7 +56,7 @@ typedef uint8_t xfs_dqtype_t;
* And, of course, we also need to take into account the dquot log format item
* used to describe each dquot.
*/
-#define XFS_DQUOT_LOGRES(mp) \
+#define XFS_DQUOT_LOGRES \
((sizeof(struct xfs_dq_logformat) + sizeof(struct xfs_disk_dquot)) * 6)
#define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 6f7ed9288fe4..7e16e76fd2e1 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1129,8 +1129,11 @@ xfs_refcount_adjust_extents(
fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
cur->bc_ag.pag->pag_agno,
tmp.rc_startblock);
- xfs_free_extent_later(cur->bc_tp, fsbno,
- tmp.rc_blockcount, NULL);
+ error = xfs_free_extent_later(cur->bc_tp, fsbno,
+ tmp.rc_blockcount, NULL,
+ XFS_AG_RESV_NONE);
+ if (error)
+ goto out_error;
}
(*agbno) += tmp.rc_blockcount;
@@ -1188,8 +1191,11 @@ xfs_refcount_adjust_extents(
fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
cur->bc_ag.pag->pag_agno,
ext.rc_startblock);
- xfs_free_extent_later(cur->bc_tp, fsbno,
- ext.rc_blockcount, NULL);
+ error = xfs_free_extent_later(cur->bc_tp, fsbno,
+ ext.rc_blockcount, NULL,
+ XFS_AG_RESV_NONE);
+ if (error)
+ goto out_error;
}
skip:
@@ -1213,37 +1219,33 @@ out_error:
STATIC int
xfs_refcount_adjust(
struct xfs_btree_cur *cur,
- xfs_agblock_t agbno,
- xfs_extlen_t aglen,
- xfs_agblock_t *new_agbno,
- xfs_extlen_t *new_aglen,
+ xfs_agblock_t *agbno,
+ xfs_extlen_t *aglen,
enum xfs_refc_adjust_op adj)
{
bool shape_changed;
int shape_changes = 0;
int error;
- *new_agbno = agbno;
- *new_aglen = aglen;
if (adj == XFS_REFCOUNT_ADJUST_INCREASE)
- trace_xfs_refcount_increase(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- agbno, aglen);
+ trace_xfs_refcount_increase(cur->bc_mp,
+ cur->bc_ag.pag->pag_agno, *agbno, *aglen);
else
- trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- agbno, aglen);
+ trace_xfs_refcount_decrease(cur->bc_mp,
+ cur->bc_ag.pag->pag_agno, *agbno, *aglen);
/*
* Ensure that no rcextents cross the boundary of the adjustment range.
*/
error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED,
- agbno, &shape_changed);
+ *agbno, &shape_changed);
if (error)
goto out_error;
if (shape_changed)
shape_changes++;
error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED,
- agbno + aglen, &shape_changed);
+ *agbno + *aglen, &shape_changed);
if (error)
goto out_error;
if (shape_changed)
@@ -1253,7 +1255,7 @@ xfs_refcount_adjust(
* Try to merge with the left or right extents of the range.
*/
error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_SHARED,
- new_agbno, new_aglen, adj, &shape_changed);
+ agbno, aglen, adj, &shape_changed);
if (error)
goto out_error;
if (shape_changed)
@@ -1262,7 +1264,7 @@ xfs_refcount_adjust(
cur->bc_ag.refc.shape_changes++;
/* Now that we've taken care of the ends, adjust the middle extents */
- error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, adj);
+ error = xfs_refcount_adjust_extents(cur, agbno, aglen, adj);
if (error)
goto out_error;
@@ -1298,21 +1300,20 @@ xfs_refcount_finish_one_cleanup(
static inline int
xfs_refcount_continue_op(
struct xfs_btree_cur *cur,
- xfs_fsblock_t startblock,
- xfs_agblock_t new_agbno,
- xfs_extlen_t new_len,
- xfs_fsblock_t *new_fsbno)
+ struct xfs_refcount_intent *ri,
+ xfs_agblock_t new_agbno)
{
struct xfs_mount *mp = cur->bc_mp;
struct xfs_perag *pag = cur->bc_ag.pag;
- if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, new_len)))
+ if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno,
+ ri->ri_blockcount)))
return -EFSCORRUPTED;
- *new_fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno);
+ ri->ri_startblock = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno);
- ASSERT(xfs_verify_fsbext(mp, *new_fsbno, new_len));
- ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, *new_fsbno));
+ ASSERT(xfs_verify_fsbext(mp, ri->ri_startblock, ri->ri_blockcount));
+ ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, ri->ri_startblock));
return 0;
}
@@ -1327,11 +1328,7 @@ xfs_refcount_continue_op(
int
xfs_refcount_finish_one(
struct xfs_trans *tp,
- enum xfs_refcount_intent_type type,
- xfs_fsblock_t startblock,
- xfs_extlen_t blockcount,
- xfs_fsblock_t *new_fsb,
- xfs_extlen_t *new_len,
+ struct xfs_refcount_intent *ri,
struct xfs_btree_cur **pcur)
{
struct xfs_mount *mp = tp->t_mountp;
@@ -1339,17 +1336,16 @@ xfs_refcount_finish_one(
struct xfs_buf *agbp = NULL;
int error = 0;
xfs_agblock_t bno;
- xfs_agblock_t new_agbno;
unsigned long nr_ops = 0;
int shape_changes = 0;
struct xfs_perag *pag;
- pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, startblock));
- bno = XFS_FSB_TO_AGBNO(mp, startblock);
+ pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ri->ri_startblock));
+ bno = XFS_FSB_TO_AGBNO(mp, ri->ri_startblock);
- trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock),
- type, XFS_FSB_TO_AGBNO(mp, startblock),
- blockcount);
+ trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, ri->ri_startblock),
+ ri->ri_type, XFS_FSB_TO_AGBNO(mp, ri->ri_startblock),
+ ri->ri_blockcount);
if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) {
error = -EIO;
@@ -1380,42 +1376,42 @@ xfs_refcount_finish_one(
}
*pcur = rcur;
- switch (type) {
+ switch (ri->ri_type) {
case XFS_REFCOUNT_INCREASE:
- error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno,
- new_len, XFS_REFCOUNT_ADJUST_INCREASE);
+ error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount,
+ XFS_REFCOUNT_ADJUST_INCREASE);
if (error)
goto out_drop;
- if (*new_len > 0)
- error = xfs_refcount_continue_op(rcur, startblock,
- new_agbno, *new_len, new_fsb);
+ if (ri->ri_blockcount > 0)
+ error = xfs_refcount_continue_op(rcur, ri, bno);
break;
case XFS_REFCOUNT_DECREASE:
- error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno,
- new_len, XFS_REFCOUNT_ADJUST_DECREASE);
+ error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount,
+ XFS_REFCOUNT_ADJUST_DECREASE);
if (error)
goto out_drop;
- if (*new_len > 0)
- error = xfs_refcount_continue_op(rcur, startblock,
- new_agbno, *new_len, new_fsb);
+ if (ri->ri_blockcount > 0)
+ error = xfs_refcount_continue_op(rcur, ri, bno);
break;
case XFS_REFCOUNT_ALLOC_COW:
- *new_fsb = startblock + blockcount;
- *new_len = 0;
- error = __xfs_refcount_cow_alloc(rcur, bno, blockcount);
+ error = __xfs_refcount_cow_alloc(rcur, bno, ri->ri_blockcount);
+ if (error)
+ goto out_drop;
+ ri->ri_blockcount = 0;
break;
case XFS_REFCOUNT_FREE_COW:
- *new_fsb = startblock + blockcount;
- *new_len = 0;
- error = __xfs_refcount_cow_free(rcur, bno, blockcount);
+ error = __xfs_refcount_cow_free(rcur, bno, ri->ri_blockcount);
+ if (error)
+ goto out_drop;
+ ri->ri_blockcount = 0;
break;
default:
ASSERT(0);
error = -EFSCORRUPTED;
}
- if (!error && *new_len > 0)
- trace_xfs_refcount_finish_one_leftover(mp, pag->pag_agno, type,
- bno, blockcount, new_agbno, *new_len);
+ if (!error && ri->ri_blockcount > 0)
+ trace_xfs_refcount_finish_one_leftover(mp, pag->pag_agno,
+ ri->ri_type, bno, ri->ri_blockcount);
out_drop:
xfs_perag_put(pag);
return error;
@@ -1907,8 +1903,13 @@ xfs_refcount_recover_cow_leftovers(
struct xfs_buf *agbp;
struct xfs_refcount_recovery *rr, *n;
struct list_head debris;
- union xfs_btree_irec low;
- union xfs_btree_irec high;
+ union xfs_btree_irec low = {
+ .rc.rc_domain = XFS_REFC_DOMAIN_COW,
+ };
+ union xfs_btree_irec high = {
+ .rc.rc_domain = XFS_REFC_DOMAIN_COW,
+ .rc.rc_startblock = -1U,
+ };
xfs_fsblock_t fsb;
int error;
@@ -1939,10 +1940,6 @@ xfs_refcount_recover_cow_leftovers(
cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag);
/* Find all the leftover CoW staging extents. */
- memset(&low, 0, sizeof(low));
- memset(&high, 0, sizeof(high));
- low.rc.rc_domain = high.rc.rc_domain = XFS_REFC_DOMAIN_COW;
- high.rc.rc_startblock = -1U;
error = xfs_btree_query_range(cur, &low, &high,
xfs_refcount_recover_extent, &debris);
xfs_btree_del_cursor(cur, error);
@@ -1968,7 +1965,11 @@ xfs_refcount_recover_cow_leftovers(
rr->rr_rrec.rc_blockcount);
/* Free the block. */
- xfs_free_extent_later(tp, fsb, rr->rr_rrec.rc_blockcount, NULL);
+ error = xfs_free_extent_later(tp, fsb,
+ rr->rr_rrec.rc_blockcount, NULL,
+ XFS_AG_RESV_NONE);
+ if (error)
+ goto out_trans;
error = xfs_trans_commit(tp);
if (error)
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index 452f30556f5a..c633477ce3ce 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -75,9 +75,7 @@ void xfs_refcount_decrease_extent(struct xfs_trans *tp,
extern void xfs_refcount_finish_one_cleanup(struct xfs_trans *tp,
struct xfs_btree_cur *rcur, int error);
extern int xfs_refcount_finish_one(struct xfs_trans *tp,
- enum xfs_refcount_intent_type type, xfs_fsblock_t startblock,
- xfs_extlen_t blockcount, xfs_fsblock_t *new_fsb,
- xfs_extlen_t *new_len, struct xfs_btree_cur **pcur);
+ struct xfs_refcount_intent *ri, struct xfs_btree_cur **pcur);
extern int xfs_refcount_find_shared(struct xfs_btree_cur *cur,
xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno,
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index e1f789866683..fbd53b6951a9 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -106,18 +106,13 @@ xfs_refcountbt_free_block(
struct xfs_buf *agbp = cur->bc_ag.agbp;
struct xfs_agf *agf = agbp->b_addr;
xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
- int error;
trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.pag->pag_agno,
XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1);
be32_add_cpu(&agf->agf_refcount_blocks, -1);
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
- error = xfs_free_extent(cur->bc_tp, fsbno, 1, &XFS_RMAP_OINFO_REFC,
- XFS_AG_RESV_METADATA);
- if (error)
- return error;
-
- return error;
+ return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
+ &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA);
}
STATIC int
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index b56aca1e7c66..95d3599561ce 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -2337,14 +2337,10 @@ xfs_rmap_query_range(
xfs_rmap_query_range_fn fn,
void *priv)
{
- union xfs_btree_irec low_brec;
- union xfs_btree_irec high_brec;
- struct xfs_rmap_query_range_info query;
+ union xfs_btree_irec low_brec = { .r = *low_rec };
+ union xfs_btree_irec high_brec = { .r = *high_rec };
+ struct xfs_rmap_query_range_info query = { .priv = priv, .fn = fn };
- low_brec.r = *low_rec;
- high_brec.r = *high_rec;
- query.priv = priv;
- query.fn = fn;
return xfs_btree_query_range(cur, &low_brec, &high_brec,
xfs_rmap_query_range_helper, &query);
}
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 655108a4cd05..760172a65aff 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -16,6 +16,7 @@
#include "xfs_trans.h"
#include "xfs_rtalloc.h"
#include "xfs_error.h"
+#include "xfs_rtbitmap.h"
/*
* Realtime allocator bitmap functions shared with userspace.
@@ -1129,3 +1130,4 @@ xfs_rtalloc_extent_is_free(
*is_free = matches;
return 0;
}
+
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h
new file mode 100644
index 000000000000..b89712983347
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rtbitmap.h
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ */
+#ifndef __XFS_RTBITMAP_H__
+#define __XFS_RTBITMAP_H__
+
+/*
+ * XXX: Most of the realtime allocation functions deal in units of realtime
+ * extents, not realtime blocks. This looks funny when paired with the type
+ * name and screams for a larger cleanup.
+ */
+struct xfs_rtalloc_rec {
+ xfs_rtblock_t ar_startext;
+ xfs_rtbxlen_t ar_extcount;
+};
+
+typedef int (*xfs_rtalloc_query_range_fn)(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ const struct xfs_rtalloc_rec *rec,
+ void *priv);
+
+#ifdef CONFIG_XFS_RT
+int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_rtblock_t block, int issum, struct xfs_buf **bpp);
+int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_rtblock_t start, xfs_extlen_t len, int val,
+ xfs_rtblock_t *new, int *stat);
+int xfs_rtfind_back(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_rtblock_t start, xfs_rtblock_t limit,
+ xfs_rtblock_t *rtblock);
+int xfs_rtfind_forw(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_rtblock_t start, xfs_rtblock_t limit,
+ xfs_rtblock_t *rtblock);
+int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_rtblock_t start, xfs_extlen_t len, int val);
+int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp,
+ int log, xfs_rtblock_t bbno, int delta,
+ struct xfs_buf **rbpp, xfs_fsblock_t *rsb,
+ xfs_suminfo_t *sum);
+int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log,
+ xfs_rtblock_t bbno, int delta, struct xfs_buf **rbpp,
+ xfs_fsblock_t *rsb);
+int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_rtblock_t start, xfs_extlen_t len,
+ struct xfs_buf **rbpp, xfs_fsblock_t *rsb);
+int xfs_rtalloc_query_range(struct xfs_mount *mp, struct xfs_trans *tp,
+ const struct xfs_rtalloc_rec *low_rec,
+ const struct xfs_rtalloc_rec *high_rec,
+ xfs_rtalloc_query_range_fn fn, void *priv);
+int xfs_rtalloc_query_all(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_rtalloc_query_range_fn fn,
+ void *priv);
+bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno);
+int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_rtblock_t start, xfs_extlen_t len,
+ bool *is_free);
+/*
+ * Free an extent in the realtime subvolume. Length is expressed in
+ * realtime extents, as is the block number.
+ */
+int /* error */
+xfs_rtfree_extent(
+ struct xfs_trans *tp, /* transaction pointer */
+ xfs_rtblock_t bno, /* starting block number to free */
+ xfs_extlen_t len); /* length of extent freed */
+
+/* Same as above, but in units of rt blocks. */
+int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno,
+ xfs_filblks_t rtlen);
+
+#else /* CONFIG_XFS_RT */
+# define xfs_rtfree_extent(t,b,l) (-ENOSYS)
+# define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS)
+# define xfs_rtalloc_query_range(m,t,l,h,f,p) (-ENOSYS)
+# define xfs_rtalloc_query_all(m,t,f,p) (-ENOSYS)
+# define xfs_rtbuf_get(m,t,b,i,p) (-ENOSYS)
+# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS)
+#endif /* CONFIG_XFS_RT */
+
+#endif /* __XFS_RTBITMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index d214233ef532..8e0a176b8e0b 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -25,6 +25,7 @@
#include "xfs_da_format.h"
#include "xfs_health.h"
#include "xfs_ag.h"
+#include "xfs_rtbitmap.h"
/*
* Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -501,8 +502,9 @@ xfs_validate_sb_common(
rbmblocks = howmany_64(sbp->sb_rextents,
NBBY * sbp->sb_blocksize);
- if (sbp->sb_rextents != rexts ||
- sbp->sb_rextslog != xfs_highbit32(sbp->sb_rextents) ||
+ if (!xfs_validate_rtextents(rexts) ||
+ sbp->sb_rextents != rexts ||
+ sbp->sb_rextslog != xfs_compute_rextslog(rexts) ||
sbp->sb_rbmblocks != rbmblocks) {
xfs_notice(mp,
"realtime geometry sanity check failed");
@@ -1020,11 +1022,12 @@ xfs_log_sb(
* and hence we don't need have to update it here.
*/
if (xfs_has_lazysbcount(mp)) {
- mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
+ mp->m_sb.sb_icount = percpu_counter_sum_positive(&mp->m_icount);
mp->m_sb.sb_ifree = min_t(uint64_t,
- percpu_counter_sum(&mp->m_ifree),
+ percpu_counter_sum_positive(&mp->m_ifree),
mp->m_sb.sb_icount);
- mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
+ mp->m_sb.sb_fdblocks =
+ percpu_counter_sum_positive(&mp->m_fdblocks);
}
xfs_sb_to_disk(bp->b_addr, &mp->m_sb);
@@ -1365,3 +1368,17 @@ xfs_validate_stripe_geometry(
}
return true;
}
+
+/*
+ * Compute the maximum level number of the realtime summary file, as defined by
+ * mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct
+ * use of rt volumes with more than 2^32 extents.
+ */
+uint8_t
+xfs_compute_rextslog(
+ xfs_rtbxlen_t rtextents)
+{
+ if (!rtextents)
+ return 0;
+ return xfs_highbit64(rtextents);
+}
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index 19134b23c10b..2e8e8d63d4eb 100644
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -38,4 +38,6 @@ extern int xfs_sb_get_secondary(struct xfs_mount *mp,
extern bool xfs_validate_stripe_geometry(struct xfs_mount *mp,
__s64 sunit, __s64 swidth, int sectorsize, bool silent);
+uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents);
+
#endif /* __XFS_SB_H__ */
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 5b2f27cbdb80..1bb2891b26ff 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -334,11 +334,11 @@ xfs_calc_write_reservation(
blksz);
t1 += adj;
t3 += adj;
- return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
+ return XFS_DQUOT_LOGRES + max3(t1, t2, t3);
}
t4 = xfs_calc_refcountbt_reservation(mp, 1);
- return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3));
+ return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3));
}
unsigned int
@@ -406,11 +406,11 @@ xfs_calc_itruncate_reservation(
xfs_refcountbt_block_count(mp, 4),
blksz);
- return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
+ return XFS_DQUOT_LOGRES + max3(t1, t2, t3);
}
t4 = xfs_calc_refcountbt_reservation(mp, 2);
- return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3));
+ return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3));
}
unsigned int
@@ -436,7 +436,7 @@ STATIC uint
xfs_calc_rename_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
max((xfs_calc_inode_res(mp, 5) +
xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1))),
@@ -475,7 +475,7 @@ STATIC uint
xfs_calc_link_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
xfs_calc_iunlink_remove_reservation(mp) +
max((xfs_calc_inode_res(mp, 2) +
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
@@ -513,7 +513,7 @@ STATIC uint
xfs_calc_remove_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
xfs_calc_iunlink_add_reservation(mp) +
max((xfs_calc_inode_res(mp, 2) +
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
@@ -572,7 +572,7 @@ xfs_calc_icreate_resv_alloc(
STATIC uint
xfs_calc_icreate_reservation(xfs_mount_t *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
max(xfs_calc_icreate_resv_alloc(mp),
xfs_calc_create_resv_modify(mp));
}
@@ -581,7 +581,7 @@ STATIC uint
xfs_calc_create_tmpfile_reservation(
struct xfs_mount *mp)
{
- uint res = XFS_DQUOT_LOGRES(mp);
+ uint res = XFS_DQUOT_LOGRES;
res += xfs_calc_icreate_resv_alloc(mp);
return res + xfs_calc_iunlink_add_reservation(mp);
@@ -630,7 +630,7 @@ STATIC uint
xfs_calc_ifree_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_iunlink_remove_reservation(mp) +
@@ -647,7 +647,7 @@ STATIC uint
xfs_calc_ichange_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
@@ -756,7 +756,7 @@ STATIC uint
xfs_calc_addafork_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(1, mp->m_dir_geo->blksize) +
@@ -804,7 +804,7 @@ STATIC uint
xfs_calc_attrsetm_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1));
@@ -844,7 +844,7 @@ STATIC uint
xfs_calc_attrrm_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
+ return XFS_DQUOT_LOGRES +
max((xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH,
XFS_FSB_TO_B(mp, 1)) +
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 5ebdda7e1078..42fed04f038d 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -31,6 +31,7 @@ typedef uint64_t xfs_rfsblock_t; /* blockno in filesystem (raw) */
typedef uint64_t xfs_rtblock_t; /* extent (block) in realtime area */
typedef uint64_t xfs_fileoff_t; /* block number in a file */
typedef uint64_t xfs_filblks_t; /* number of blocks in a file */
+typedef uint64_t xfs_rtbxlen_t; /* rtbitmap extent length in rtextents */
typedef int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */
@@ -227,4 +228,16 @@ bool xfs_verify_fileoff(struct xfs_mount *mp, xfs_fileoff_t off);
bool xfs_verify_fileext(struct xfs_mount *mp, xfs_fileoff_t off,
xfs_fileoff_t len);
+/* Do we support an rt volume having this number of rtextents? */
+static inline bool
+xfs_validate_rtextents(
+ xfs_rtbxlen_t rtextents)
+{
+ /* No runt rt volumes */
+ if (rtextents == 0)
+ return false;
+
+ return true;
+}
+
#endif /* __XFS_TYPES_H__ */
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
index b6f0c9f3f124..f51771e5c3fe 100644
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -159,6 +159,11 @@ xchk_xattr_listent(
args.value = xchk_xattr_valuebuf(sx->sc);
args.valuelen = valuelen;
+ /*
+ * Get the attr value to ensure that lookup can find this attribute
+ * through the dabtree indexing and that remote value retrieval also
+ * works correctly.
+ */
error = xfs_attr_get_ilocked(&args);
/* ENODATA means the hash lookup failed and the attr is bad */
if (error == -ENODATA)
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index f0b9cb6506fd..45b135929144 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -647,7 +647,13 @@ xchk_bmap(
}
break;
case XFS_ATTR_FORK:
- if (!xfs_has_attr(mp) && !xfs_has_attr2(mp))
+ /*
+ * "attr" means that an attr fork was created at some point in
+ * the life of this filesystem. "attr2" means that inodes have
+ * variable-sized data/attr fork areas. Hence we only check
+ * attr here.
+ */
+ if (!xfs_has_attr(mp))
xchk_ino_set_corrupt(sc, sc->ip->i_ino);
break;
default:
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index c18bd039fce9..e0ed0ebfdaea 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -582,7 +582,8 @@ xrep_reap_block(
else if (resv == XFS_AG_RESV_AGFL)
error = xrep_put_freelist(sc, agbno);
else
- error = xfs_free_extent(sc->tp, fsbno, 1, oinfo, resv);
+ error = xfs_free_extent(sc->tp, sc->sa.pag, agbno, 1, oinfo,
+ resv);
if (agf_bp != sc->sa.agf_bp)
xfs_trans_brelse(sc->tp, agf_bp);
if (error)
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index 0a3bde64c675..fad7c353ada6 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -11,9 +11,10 @@
#include "xfs_mount.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_rtalloc.h"
+#include "xfs_rtbitmap.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
+#include "xfs_sb.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index f6ffb4f248f7..9355ccad9503 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -10,6 +10,10 @@
#define DEBUG 1
#endif
+#ifdef CONFIG_XFS_DEBUG_EXPENSIVE
+#define DEBUG_EXPENSIVE 1
+#endif
+
#ifdef CONFIG_XFS_ASSERT_FATAL
#define XFS_ASSERT_FATAL 1
#endif
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 21c241e96d48..50a7f2745514 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -225,45 +225,6 @@ xfs_imap_valid(
return true;
}
-/*
- * Pass in a dellalloc extent and convert it to real extents, return the real
- * extent that maps offset_fsb in wpc->iomap.
- *
- * The current page is held locked so nothing could have removed the block
- * backing offset_fsb, although it could have moved from the COW to the data
- * fork by another thread.
- */
-static int
-xfs_convert_blocks(
- struct iomap_writepage_ctx *wpc,
- struct xfs_inode *ip,
- int whichfork,
- loff_t offset)
-{
- int error;
- unsigned *seq;
-
- if (whichfork == XFS_COW_FORK)
- seq = &XFS_WPC(wpc)->cow_seq;
- else
- seq = &XFS_WPC(wpc)->data_seq;
-
- /*
- * Attempt to allocate whatever delalloc extent currently backs offset
- * and put the result into wpc->iomap. Allocate in a loop because it
- * may take several attempts to allocate real blocks for a contiguous
- * delalloc extent if free space is sufficiently fragmented.
- */
- do {
- error = xfs_bmapi_convert_delalloc(ip, whichfork, offset,
- &wpc->iomap, seq);
- if (error)
- return error;
- } while (wpc->iomap.offset + wpc->iomap.length <= offset);
-
- return 0;
-}
-
static int
xfs_map_blocks(
struct iomap_writepage_ctx *wpc,
@@ -281,6 +242,7 @@ xfs_map_blocks(
struct xfs_iext_cursor icur;
int retries = 0;
int error = 0;
+ unsigned int *seq;
if (xfs_is_shutdown(mp))
return -EIO;
@@ -376,7 +338,19 @@ retry:
trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap);
return 0;
allocate_blocks:
- error = xfs_convert_blocks(wpc, ip, whichfork, offset);
+ /*
+ * Convert a dellalloc extent to a real one. The current page is held
+ * locked so nothing could have removed the block backing offset_fsb,
+ * although it could have moved from the COW to the data fork by another
+ * thread.
+ */
+ if (whichfork == XFS_COW_FORK)
+ seq = &XFS_WPC(wpc)->cow_seq;
+ else
+ seq = &XFS_WPC(wpc)->data_seq;
+
+ error = xfs_bmapi_convert_delalloc(ip, whichfork, offset,
+ &wpc->iomap, seq);
if (error) {
/*
* If we failed to find the extent in the COW fork we might have
diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
index 2788a6f2edcd..4a712f1565c1 100644
--- a/fs/xfs/xfs_attr_item.c
+++ b/fs/xfs/xfs_attr_item.c
@@ -329,6 +329,13 @@ xfs_xattri_finish_update(
goto out;
}
+ /* If an attr removal is trivially complete, we're done. */
+ if (attr->xattri_op_flags == XFS_ATTRI_OP_FLAGS_REMOVE &&
+ !xfs_inode_hasattr(args->dp)) {
+ error = 0;
+ goto out;
+ }
+
error = xfs_attr_set_iter(attr);
if (!error && attr->xattri_dela_state != XFS_DAS_DONE)
error = -EAGAIN;
@@ -503,6 +510,9 @@ xfs_attri_validate(
unsigned int op = attrp->alfi_op_flags &
XFS_ATTRI_OP_FLAGS_TYPE_MASK;
+ if (!xfs_sb_version_haslogxattrs(&mp->m_sb))
+ return false;
+
if (attrp->__pad != 0)
return false;
@@ -547,7 +557,7 @@ xfs_attri_item_recover(
struct xfs_inode *ip;
struct xfs_da_args *args;
struct xfs_trans *tp;
- struct xfs_trans_res tres;
+ struct xfs_trans_res resv;
struct xfs_attri_log_format *attrp;
struct xfs_attri_log_nameval *nv = attrip->attri_nameval;
int error;
@@ -594,8 +604,6 @@ xfs_attri_item_recover(
args->op_flags = XFS_DA_OP_RECOVERY | XFS_DA_OP_OKNOENT |
XFS_DA_OP_LOGGED;
- ASSERT(xfs_sb_version_haslogxattrs(&mp->m_sb));
-
switch (attr->xattri_op_flags) {
case XFS_ATTRI_OP_FLAGS_SET:
case XFS_ATTRI_OP_FLAGS_REPLACE:
@@ -608,8 +616,6 @@ xfs_attri_item_recover(
attr->xattri_dela_state = xfs_attr_init_add_state(args);
break;
case XFS_ATTRI_OP_FLAGS_REMOVE:
- if (!xfs_inode_hasattr(args->dp))
- goto out;
attr->xattri_dela_state = xfs_attr_init_remove_state(args);
break;
default:
@@ -618,8 +624,9 @@ xfs_attri_item_recover(
goto out;
}
- xfs_init_attr_trans(args, &tres, &total);
- error = xfs_trans_alloc(mp, &tres, total, 0, XFS_TRANS_RESERVE, &tp);
+ xfs_init_attr_trans(args, &resv, &total);
+ resv = xlog_recover_resv(&resv);
+ error = xfs_trans_alloc(mp, &resv, total, 0, XFS_TRANS_RESERVE, &tp);
if (error)
goto out;
@@ -710,48 +717,111 @@ xlog_recover_attri_commit_pass2(
const void *attr_value = NULL;
const void *attr_name;
size_t len;
-
- attri_formatp = item->ri_buf[0].i_addr;
- attr_name = item->ri_buf[1].i_addr;
+ unsigned int op, i = 0;
/* Validate xfs_attri_log_format before the large memory allocation */
len = sizeof(struct xfs_attri_log_format);
- if (item->ri_buf[0].i_len != len) {
+ if (item->ri_buf[i].i_len != len) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
return -EFSCORRUPTED;
}
+ attri_formatp = item->ri_buf[i].i_addr;
if (!xfs_attri_validate(mp, attri_formatp)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ attri_formatp, len);
return -EFSCORRUPTED;
}
+ /* Check the number of log iovecs makes sense for the op code. */
+ op = attri_formatp->alfi_op_flags & XFS_ATTRI_OP_FLAGS_TYPE_MASK;
+ switch (op) {
+ case XFS_ATTRI_OP_FLAGS_SET:
+ case XFS_ATTRI_OP_FLAGS_REPLACE:
+ /* Log item, attr name, attr value */
+ if (item->ri_total != 3) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ attri_formatp, len);
+ return -EFSCORRUPTED;
+ }
+ break;
+ case XFS_ATTRI_OP_FLAGS_REMOVE:
+ /* Log item, attr name */
+ if (item->ri_total != 2) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ attri_formatp, len);
+ return -EFSCORRUPTED;
+ }
+ break;
+ default:
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ attri_formatp, len);
+ return -EFSCORRUPTED;
+ }
+ i++;
+
/* Validate the attr name */
- if (item->ri_buf[1].i_len !=
+ if (item->ri_buf[i].i_len !=
xlog_calc_iovec_len(attri_formatp->alfi_name_len)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ attri_formatp, len);
return -EFSCORRUPTED;
}
+ attr_name = item->ri_buf[i].i_addr;
if (!xfs_attr_namecheck(attr_name, attri_formatp->alfi_name_len)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- item->ri_buf[1].i_addr, item->ri_buf[1].i_len);
+ attri_formatp, len);
return -EFSCORRUPTED;
}
+ i++;
/* Validate the attr value, if present */
if (attri_formatp->alfi_value_len != 0) {
- if (item->ri_buf[2].i_len != xlog_calc_iovec_len(attri_formatp->alfi_value_len)) {
+ if (item->ri_buf[i].i_len != xlog_calc_iovec_len(attri_formatp->alfi_value_len)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
item->ri_buf[0].i_addr,
item->ri_buf[0].i_len);
return -EFSCORRUPTED;
}
- attr_value = item->ri_buf[2].i_addr;
+ attr_value = item->ri_buf[i].i_addr;
+ i++;
+ }
+
+ /*
+ * Make sure we got the correct number of buffers for the operation
+ * that we just loaded.
+ */
+ if (i != item->ri_total) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ attri_formatp, len);
+ return -EFSCORRUPTED;
+ }
+
+ switch (op) {
+ case XFS_ATTRI_OP_FLAGS_REMOVE:
+ /* Regular remove operations operate only on names. */
+ if (attr_value != NULL || attri_formatp->alfi_value_len != 0) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ attri_formatp, len);
+ return -EFSCORRUPTED;
+ }
+ fallthrough;
+ case XFS_ATTRI_OP_FLAGS_SET:
+ case XFS_ATTRI_OP_FLAGS_REPLACE:
+ /*
+ * Regular xattr set/remove/replace operations require a name
+ * and do not take a newname. Values are optional for set and
+ * replace.
+ */
+ if (attr_name == NULL || attri_formatp->alfi_name_len == 0) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ attri_formatp, len);
+ return -EFSCORRUPTED;
+ }
+ break;
}
/*
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 41323da523d1..1058603db3ac 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -246,18 +246,11 @@ static int
xfs_trans_log_finish_bmap_update(
struct xfs_trans *tp,
struct xfs_bud_log_item *budp,
- enum xfs_bmap_intent_type type,
- struct xfs_inode *ip,
- int whichfork,
- xfs_fileoff_t startoff,
- xfs_fsblock_t startblock,
- xfs_filblks_t *blockcount,
- xfs_exntst_t state)
+ struct xfs_bmap_intent *bi)
{
int error;
- error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff,
- startblock, blockcount, state);
+ error = xfs_bmap_finish_one(tp, bi);
/*
* Mark the transaction dirty, even on error. This ensures the
@@ -378,25 +371,17 @@ xfs_bmap_update_finish_item(
struct list_head *item,
struct xfs_btree_cur **state)
{
- struct xfs_bmap_intent *bmap;
- xfs_filblks_t count;
+ struct xfs_bmap_intent *bi;
int error;
- bmap = container_of(item, struct xfs_bmap_intent, bi_list);
- count = bmap->bi_bmap.br_blockcount;
- error = xfs_trans_log_finish_bmap_update(tp, BUD_ITEM(done),
- bmap->bi_type,
- bmap->bi_owner, bmap->bi_whichfork,
- bmap->bi_bmap.br_startoff,
- bmap->bi_bmap.br_startblock,
- &count,
- bmap->bi_bmap.br_state);
- if (!error && count > 0) {
- ASSERT(bmap->bi_type == XFS_BMAP_UNMAP);
- bmap->bi_bmap.br_blockcount = count;
+ bi = container_of(item, struct xfs_bmap_intent, bi_list);
+
+ error = xfs_trans_log_finish_bmap_update(tp, BUD_ITEM(done), bi);
+ if (!error && bi->bi_bmap.br_blockcount > 0) {
+ ASSERT(bi->bi_type == XFS_BMAP_UNMAP);
return -EAGAIN;
}
- kmem_cache_free(xfs_bmap_intent_cache, bmap);
+ kmem_cache_free(xfs_bmap_intent_cache, bi);
return error;
}
@@ -471,17 +456,14 @@ xfs_bui_item_recover(
struct xfs_log_item *lip,
struct list_head *capture_list)
{
- struct xfs_bmbt_irec irec;
+ struct xfs_bmap_intent fake = { };
+ struct xfs_trans_res resv;
struct xfs_bui_log_item *buip = BUI_ITEM(lip);
struct xfs_trans *tp;
struct xfs_inode *ip = NULL;
struct xfs_mount *mp = lip->li_log->l_mp;
- struct xfs_map_extent *bmap;
+ struct xfs_map_extent *map;
struct xfs_bud_log_item *budp;
- xfs_filblks_t count;
- xfs_exntst_t state;
- unsigned int bui_type;
- int whichfork;
int iext_delta;
int error = 0;
@@ -491,19 +473,18 @@ xfs_bui_item_recover(
return -EFSCORRUPTED;
}
- bmap = &buip->bui_format.bui_extents[0];
- state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
- XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
- whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
+ map = &buip->bui_format.bui_extents[0];
+ fake.bi_whichfork = (map->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
XFS_ATTR_FORK : XFS_DATA_FORK;
- bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
+ fake.bi_type = map->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
- error = xlog_recover_iget(mp, bmap->me_owner, &ip);
+ error = xlog_recover_iget(mp, map->me_owner, &ip);
if (error)
return error;
/* Allocate transaction and do the work. */
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
+ resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate);
+ error = xfs_trans_alloc(mp, &resv,
XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
if (error)
goto err_rele;
@@ -512,34 +493,34 @@ xfs_bui_item_recover(
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
- if (bui_type == XFS_BMAP_MAP)
+ if (fake.bi_type == XFS_BMAP_MAP)
iext_delta = XFS_IEXT_ADD_NOSPLIT_CNT;
else
iext_delta = XFS_IEXT_PUNCH_HOLE_CNT;
- error = xfs_iext_count_may_overflow(ip, whichfork, iext_delta);
+ error = xfs_iext_count_may_overflow(ip, fake.bi_whichfork, iext_delta);
if (error == -EFBIG)
error = xfs_iext_count_upgrade(tp, ip, iext_delta);
if (error)
goto err_cancel;
- count = bmap->me_len;
- error = xfs_trans_log_finish_bmap_update(tp, budp, bui_type, ip,
- whichfork, bmap->me_startoff, bmap->me_startblock,
- &count, state);
+ fake.bi_owner = ip;
+ fake.bi_bmap.br_startblock = map->me_startblock;
+ fake.bi_bmap.br_startoff = map->me_startoff;
+ fake.bi_bmap.br_blockcount = map->me_len;
+ fake.bi_bmap.br_state = (map->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
+ XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
+
+ error = xfs_trans_log_finish_bmap_update(tp, budp, &fake);
if (error == -EFSCORRUPTED)
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bmap,
- sizeof(*bmap));
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, map,
+ sizeof(*map));
if (error)
goto err_cancel;
- if (count > 0) {
- ASSERT(bui_type == XFS_BMAP_UNMAP);
- irec.br_startblock = bmap->me_startblock;
- irec.br_blockcount = count;
- irec.br_startoff = bmap->me_startoff;
- irec.br_state = state;
- xfs_bmap_unmap_extent(tp, ip, &irec);
+ if (fake.bi_bmap.br_blockcount > 0) {
+ ASSERT(fake.bi_type == XFS_BMAP_UNMAP);
+ xfs_bmap_unmap_extent(tp, ip, &fake.bi_bmap);
}
/*
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 468bb61a5e46..bab8ba224e10 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -636,13 +636,11 @@ out_unlock:
/*
* Test whether it is appropriate to check an inode for and free post EOF
- * blocks. The 'force' parameter determines whether we should also consider
- * regular files that are marked preallocated or append-only.
+ * blocks.
*/
bool
xfs_can_free_eofblocks(
- struct xfs_inode *ip,
- bool force)
+ struct xfs_inode *ip)
{
struct xfs_bmbt_irec imap;
struct xfs_mount *mp = ip->i_mount;
@@ -676,11 +674,11 @@ xfs_can_free_eofblocks(
return false;
/*
- * Do not free real preallocated or append-only files unless the file
- * has delalloc blocks and we are forced to remove them.
+ * Only free real extents for inodes with persistent preallocations or
+ * the append-only flag.
*/
if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))
- if (!force || ip->i_delayed_blks == 0)
+ if (ip->i_delayed_blks == 0)
return false;
/*
@@ -734,6 +732,22 @@ xfs_free_eofblocks(
/* Wait on dio to ensure i_size has settled. */
inode_dio_wait(VFS_I(ip));
+ /*
+ * For preallocated files only free delayed allocations.
+ *
+ * Note that this means we also leave speculative preallocations in
+ * place for preallocated files.
+ */
+ if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) {
+ if (ip->i_delayed_blks) {
+ xfs_bmap_punch_delalloc_range(ip,
+ round_up(XFS_ISIZE(ip), mp->m_sb.sb_blocksize),
+ LLONG_MAX);
+ }
+ xfs_inode_clear_eofblocks_tag(ip);
+ return 0;
+ }
+
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
if (error) {
ASSERT(xfs_is_shutdown(mp));
@@ -868,33 +882,32 @@ xfs_alloc_file_space(
if (error)
goto error;
- error = xfs_bmapi_write(tp, ip, startoffset_fsb,
- allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp,
- &nimaps);
- if (error)
- goto error;
-
- ip->i_diflags |= XFS_DIFLAG_PREALLOC;
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
- error = xfs_trans_commit(tp);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- if (error)
- break;
-
/*
* If the allocator cannot find a single free extent large
* enough to cover the start block of the requested range,
- * xfs_bmapi_write will return 0 but leave *nimaps set to 0.
+ * xfs_bmapi_write will return -ENOSR.
*
* In that case we simply need to keep looping with the same
* startoffset_fsb so that one of the following allocations
* will eventually reach the requested range.
*/
- if (nimaps) {
+ error = xfs_bmapi_write(tp, ip, startoffset_fsb,
+ allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp,
+ &nimaps);
+ if (error) {
+ if (error != -ENOSR)
+ goto error;
+ error = 0;
+ } else {
startoffset_fsb += imapp->br_blockcount;
allocatesize_fsb -= imapp->br_blockcount;
}
+
+ ip->i_diflags |= XFS_DIFLAG_PREALLOC;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ error = xfs_trans_commit(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
return error;
@@ -950,14 +963,18 @@ xfs_flush_unmap_range(
xfs_off_t offset,
xfs_off_t len)
{
- struct xfs_mount *mp = ip->i_mount;
struct inode *inode = VFS_I(ip);
xfs_off_t rounding, start, end;
int error;
- rounding = max_t(xfs_off_t, mp->m_sb.sb_blocksize, PAGE_SIZE);
- start = round_down(offset, rounding);
- end = round_up(offset + len, rounding) - 1;
+ /*
+ * Make sure we extend the flush out to extent alignment
+ * boundaries so any extent range overlapping the start/end
+ * of the modification we are about to do is clean and idle.
+ */
+ rounding = max_t(xfs_off_t, xfs_inode_alloc_unitsize(ip), PAGE_SIZE);
+ start = rounddown_64(offset, rounding);
+ end = roundup_64(offset + len, rounding) - 1;
error = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (error)
@@ -1042,14 +1059,14 @@ xfs_prepare_shift(
struct xfs_inode *ip,
loff_t offset)
{
- struct xfs_mount *mp = ip->i_mount;
+ unsigned int rounding;
int error;
/*
* Trim eofblocks to avoid shifting uninitialized post-eof preallocation
* into the accessible region of the file.
*/
- if (xfs_can_free_eofblocks(ip, true)) {
+ if (xfs_can_free_eofblocks(ip)) {
error = xfs_free_eofblocks(ip);
if (error)
return error;
@@ -1060,11 +1077,13 @@ xfs_prepare_shift(
* with the full range of the operation. If we don't, a COW writeback
* completion could race with an insert, front merge with the start
* extent (after split) during the shift and corrupt the file. Start
- * with the block just prior to the start to stabilize the boundary.
+ * with the allocation unit just prior to the start to stabilize the
+ * boundary.
*/
- offset = round_down(offset, mp->m_sb.sb_blocksize);
+ rounding = xfs_inode_alloc_unitsize(ip);
+ offset = rounddown_64(offset, rounding);
if (offset)
- offset -= mp->m_sb.sb_blocksize;
+ offset -= rounding;
/*
* Writeback and invalidate cache for the remainder of the file as we're
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 6888078f5c31..1383019ccdb7 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -63,7 +63,7 @@ int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
xfs_off_t len);
/* EOF block manipulation functions */
-bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
+bool xfs_can_free_eofblocks(struct xfs_inode *ip);
int xfs_free_eofblocks(struct xfs_inode *ip);
int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 54c774af6e1c..257945cdf63b 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -2040,6 +2040,14 @@ error_free:
return NULL;
}
+static inline void
+xfs_buf_list_del(
+ struct xfs_buf *bp)
+{
+ list_del_init(&bp->b_list);
+ wake_up_var(&bp->b_list);
+}
+
/*
* Cancel a delayed write list.
*
@@ -2057,7 +2065,7 @@ xfs_buf_delwri_cancel(
xfs_buf_lock(bp);
bp->b_flags &= ~_XBF_DELWRI_Q;
- list_del_init(&bp->b_list);
+ xfs_buf_list_del(bp);
xfs_buf_relse(bp);
}
}
@@ -2111,6 +2119,34 @@ xfs_buf_delwri_queue(
}
/*
+ * Queue a buffer to this delwri list as part of a data integrity operation.
+ * If the buffer is on any other delwri list, we'll wait for that to clear
+ * so that the caller can submit the buffer for IO and wait for the result.
+ * Callers must ensure the buffer is not already on the list.
+ */
+void
+xfs_buf_delwri_queue_here(
+ struct xfs_buf *bp,
+ struct list_head *buffer_list)
+{
+ /*
+ * We need this buffer to end up on the /caller's/ delwri list, not any
+ * old list. This can happen if the buffer is marked stale (which
+ * clears DELWRI_Q) after the AIL queues the buffer to its list but
+ * before the AIL has a chance to submit the list.
+ */
+ while (!list_empty(&bp->b_list)) {
+ xfs_buf_unlock(bp);
+ wait_var_event(&bp->b_list, list_empty(&bp->b_list));
+ xfs_buf_lock(bp);
+ }
+
+ ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
+
+ xfs_buf_delwri_queue(bp, buffer_list);
+}
+
+/*
* Compare function is more complex than it needs to be because
* the return value is only 32 bits and we are doing comparisons
* on 64 bit values
@@ -2172,7 +2208,7 @@ xfs_buf_delwri_submit_buffers(
* reference and remove it from the list here.
*/
if (!(bp->b_flags & _XBF_DELWRI_Q)) {
- list_del_init(&bp->b_list);
+ xfs_buf_list_del(bp);
xfs_buf_relse(bp);
continue;
}
@@ -2192,7 +2228,7 @@ xfs_buf_delwri_submit_buffers(
list_move_tail(&bp->b_list, wait_list);
} else {
bp->b_flags |= XBF_ASYNC;
- list_del_init(&bp->b_list);
+ xfs_buf_list_del(bp);
}
__xfs_buf_submit(bp, false);
}
@@ -2246,7 +2282,7 @@ xfs_buf_delwri_submit(
while (!list_empty(&wait_list)) {
bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
- list_del_init(&bp->b_list);
+ xfs_buf_list_del(bp);
/*
* Wait on the locked buffer, check for errors and unlock and
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 549c60942208..6cf0332ba62c 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -305,6 +305,7 @@ extern void xfs_buf_stale(struct xfs_buf *bp);
/* Delayed Write Buffer Routines */
extern void xfs_buf_delwri_cancel(struct list_head *);
extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
+void xfs_buf_delwri_queue_here(struct xfs_buf *bp, struct list_head *bl);
extern int xfs_buf_delwri_submit(struct list_head *);
extern int xfs_buf_delwri_submit_nowait(struct list_head *);
extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 023d4e0385dd..b02ce568de0c 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -22,6 +22,7 @@
#include "xfs_trace.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
+#include "xfs_error.h"
struct kmem_cache *xfs_buf_item_cache;
@@ -781,8 +782,39 @@ xfs_buf_item_committed(
return lsn;
}
+#ifdef DEBUG_EXPENSIVE
+static int
+xfs_buf_item_precommit(
+ struct xfs_trans *tp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_buf_log_item *bip = BUF_ITEM(lip);
+ struct xfs_buf *bp = bip->bli_buf;
+ struct xfs_mount *mp = bp->b_mount;
+ xfs_failaddr_t fa;
+
+ if (!bp->b_ops || !bp->b_ops->verify_struct)
+ return 0;
+ if (bip->bli_flags & XFS_BLI_STALE)
+ return 0;
+
+ fa = bp->b_ops->verify_struct(bp);
+ if (fa) {
+ xfs_buf_verifier_error(bp, -EFSCORRUPTED, bp->b_ops->name,
+ bp->b_addr, BBTOB(bp->b_length), fa);
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ ASSERT(fa == NULL);
+ }
+
+ return 0;
+}
+#else
+# define xfs_buf_item_precommit NULL
+#endif
+
static const struct xfs_item_ops xfs_buf_item_ops = {
.iop_size = xfs_buf_item_size,
+ .iop_precommit = xfs_buf_item_precommit,
.iop_format = xfs_buf_item_format,
.iop_pin = xfs_buf_item_pin,
.iop_unpin = xfs_buf_item_unpin,
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index a8b2f3b278ea..6186b69be50a 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -333,7 +333,6 @@ xfs_dquot_disk_alloc(
goto err_cancel;
ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
- ASSERT(nmaps == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
(map.br_startblock != HOLESTARTBLOCK));
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 6a1aae799cf1..7d19091215b0 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -17,6 +17,7 @@
#include "xfs_trans_priv.h"
#include "xfs_qm.h"
#include "xfs_log.h"
+#include "xfs_error.h"
static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip)
{
@@ -193,8 +194,38 @@ xfs_qm_dquot_logitem_committing(
return xfs_qm_dquot_logitem_release(lip);
}
+#ifdef DEBUG_EXPENSIVE
+static int
+xfs_qm_dquot_logitem_precommit(
+ struct xfs_trans *tp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
+ struct xfs_mount *mp = dqp->q_mount;
+ struct xfs_disk_dquot ddq = { };
+ xfs_failaddr_t fa;
+
+ xfs_dquot_to_disk(&ddq, dqp);
+ fa = xfs_dquot_verify(mp, &ddq, dqp->q_id);
+ if (fa) {
+ XFS_CORRUPTION_ERROR("Bad dquot during logging",
+ XFS_ERRLEVEL_LOW, mp, &ddq, sizeof(ddq));
+ xfs_alert(mp,
+ "Metadata corruption detected at %pS, dquot 0x%x",
+ fa, dqp->q_id);
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ ASSERT(fa == NULL);
+ }
+
+ return 0;
+}
+#else
+# define xfs_qm_dquot_logitem_precommit NULL
+#endif
+
static const struct xfs_item_ops xfs_dquot_item_ops = {
.iop_size = xfs_qm_dquot_logitem_size,
+ .iop_precommit = xfs_qm_dquot_logitem_precommit,
.iop_format = xfs_qm_dquot_logitem_format,
.iop_pin = xfs_qm_dquot_logitem_pin,
.iop_unpin = xfs_qm_dquot_logitem_unpin,
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index d5130d1fcfae..be9f279a5c75 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -345,23 +345,29 @@ static int
xfs_trans_free_extent(
struct xfs_trans *tp,
struct xfs_efd_log_item *efdp,
- xfs_fsblock_t start_block,
- xfs_extlen_t ext_len,
- const struct xfs_owner_info *oinfo,
- bool skip_discard)
+ struct xfs_extent_free_item *xefi)
{
+ struct xfs_owner_info oinfo = { };
struct xfs_mount *mp = tp->t_mountp;
struct xfs_extent *extp;
uint next_extent;
- xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block);
xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp,
- start_block);
+ xefi->xefi_startblock);
int error;
- trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len);
+ oinfo.oi_owner = xefi->xefi_owner;
+ if (xefi->xefi_flags & XFS_EFI_ATTR_FORK)
+ oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
+ if (xefi->xefi_flags & XFS_EFI_BMBT_BLOCK)
+ oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK;
+
+ trace_xfs_bmap_free_deferred(tp->t_mountp, xefi->xefi_pag->pag_agno, 0,
+ agbno, xefi->xefi_blockcount);
+
+ error = __xfs_free_extent(tp, xefi->xefi_pag, agbno,
+ xefi->xefi_blockcount, &oinfo, xefi->xefi_agresv,
+ xefi->xefi_flags & XFS_EFI_SKIP_DISCARD);
- error = __xfs_free_extent(tp, start_block, ext_len,
- oinfo, XFS_AG_RESV_NONE, skip_discard);
/*
* Mark the transaction dirty, even on error. This ensures the
* transaction is aborted, which:
@@ -375,8 +381,8 @@ xfs_trans_free_extent(
next_extent = efdp->efd_next_extent;
ASSERT(next_extent < efdp->efd_format.efd_nextents);
extp = &(efdp->efd_format.efd_extents[next_extent]);
- extp->ext_start = start_block;
- extp->ext_len = ext_len;
+ extp->ext_start = xefi->xefi_startblock;
+ extp->ext_len = xefi->xefi_blockcount;
efdp->efd_next_extent++;
return error;
@@ -389,14 +395,13 @@ xfs_extent_free_diff_items(
const struct list_head *a,
const struct list_head *b)
{
- struct xfs_mount *mp = priv;
struct xfs_extent_free_item *ra;
struct xfs_extent_free_item *rb;
ra = container_of(a, struct xfs_extent_free_item, xefi_list);
rb = container_of(b, struct xfs_extent_free_item, xefi_list);
- return XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) -
- XFS_FSB_TO_AGNO(mp, rb->xefi_startblock);
+
+ return ra->xefi_pag->pag_agno - rb->xefi_pag->pag_agno;
}
/* Log a free extent to the intent item. */
@@ -404,7 +409,7 @@ STATIC void
xfs_extent_free_log_item(
struct xfs_trans *tp,
struct xfs_efi_log_item *efip,
- struct xfs_extent_free_item *free)
+ struct xfs_extent_free_item *xefi)
{
uint next_extent;
struct xfs_extent *extp;
@@ -420,8 +425,8 @@ xfs_extent_free_log_item(
next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
ASSERT(next_extent < efip->efi_format.efi_nextents);
extp = &efip->efi_format.efi_extents[next_extent];
- extp->ext_start = free->xefi_startblock;
- extp->ext_len = free->xefi_blockcount;
+ extp->ext_start = xefi->xefi_startblock;
+ extp->ext_len = xefi->xefi_blockcount;
}
static struct xfs_log_item *
@@ -433,15 +438,15 @@ xfs_extent_free_create_intent(
{
struct xfs_mount *mp = tp->t_mountp;
struct xfs_efi_log_item *efip = xfs_efi_init(mp, count);
- struct xfs_extent_free_item *free;
+ struct xfs_extent_free_item *xefi;
ASSERT(count > 0);
xfs_trans_add_item(tp, &efip->efi_item);
if (sort)
list_sort(mp, items, xfs_extent_free_diff_items);
- list_for_each_entry(free, items, xefi_list)
- xfs_extent_free_log_item(tp, efip, free);
+ list_for_each_entry(xefi, items, xefi_list)
+ xfs_extent_free_log_item(tp, efip, xefi);
return &efip->efi_item;
}
@@ -455,6 +460,26 @@ xfs_extent_free_create_done(
return &xfs_trans_get_efd(tp, EFI_ITEM(intent), count)->efd_item;
}
+/* Take a passive ref to the AG containing the space we're freeing. */
+void
+xfs_extent_free_get_group(
+ struct xfs_mount *mp,
+ struct xfs_extent_free_item *xefi)
+{
+ xfs_agnumber_t agno;
+
+ agno = XFS_FSB_TO_AGNO(mp, xefi->xefi_startblock);
+ xefi->xefi_pag = xfs_perag_get(mp, agno);
+}
+
+/* Release a passive AG ref after some freeing work. */
+static inline void
+xfs_extent_free_put_group(
+ struct xfs_extent_free_item *xefi)
+{
+ xfs_perag_put(xefi->xefi_pag);
+}
+
/* Process a free extent. */
STATIC int
xfs_extent_free_finish_item(
@@ -463,21 +488,15 @@ xfs_extent_free_finish_item(
struct list_head *item,
struct xfs_btree_cur **state)
{
- struct xfs_owner_info oinfo = { };
- struct xfs_extent_free_item *free;
+ struct xfs_extent_free_item *xefi;
int error;
- free = container_of(item, struct xfs_extent_free_item, xefi_list);
- oinfo.oi_owner = free->xefi_owner;
- if (free->xefi_flags & XFS_EFI_ATTR_FORK)
- oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
- if (free->xefi_flags & XFS_EFI_BMBT_BLOCK)
- oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK;
- error = xfs_trans_free_extent(tp, EFD_ITEM(done),
- free->xefi_startblock,
- free->xefi_blockcount,
- &oinfo, free->xefi_flags & XFS_EFI_SKIP_DISCARD);
- kmem_cache_free(xfs_extfree_item_cache, free);
+ xefi = container_of(item, struct xfs_extent_free_item, xefi_list);
+
+ error = xfs_trans_free_extent(tp, EFD_ITEM(done), xefi);
+
+ xfs_extent_free_put_group(xefi);
+ kmem_cache_free(xfs_extfree_item_cache, xefi);
return error;
}
@@ -494,10 +513,12 @@ STATIC void
xfs_extent_free_cancel_item(
struct list_head *item)
{
- struct xfs_extent_free_item *free;
+ struct xfs_extent_free_item *xefi;
- free = container_of(item, struct xfs_extent_free_item, xefi_list);
- kmem_cache_free(xfs_extfree_item_cache, free);
+ xefi = container_of(item, struct xfs_extent_free_item, xefi_list);
+
+ xfs_extent_free_put_group(xefi);
+ kmem_cache_free(xfs_extfree_item_cache, xefi);
}
const struct xfs_defer_op_type xfs_extent_free_defer_type = {
@@ -523,28 +544,25 @@ xfs_agfl_free_finish_item(
struct xfs_owner_info oinfo = { };
struct xfs_mount *mp = tp->t_mountp;
struct xfs_efd_log_item *efdp = EFD_ITEM(done);
- struct xfs_extent_free_item *free;
+ struct xfs_extent_free_item *xefi;
struct xfs_extent *extp;
struct xfs_buf *agbp;
int error;
- xfs_agnumber_t agno;
xfs_agblock_t agbno;
uint next_extent;
- struct xfs_perag *pag;
- free = container_of(item, struct xfs_extent_free_item, xefi_list);
- ASSERT(free->xefi_blockcount == 1);
- agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock);
- agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock);
- oinfo.oi_owner = free->xefi_owner;
+ xefi = container_of(item, struct xfs_extent_free_item, xefi_list);
+ ASSERT(xefi->xefi_blockcount == 1);
+ agbno = XFS_FSB_TO_AGBNO(mp, xefi->xefi_startblock);
+ oinfo.oi_owner = xefi->xefi_owner;
- trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount);
+ trace_xfs_agfl_free_deferred(mp, xefi->xefi_pag->pag_agno, 0, agbno,
+ xefi->xefi_blockcount);
- pag = xfs_perag_get(mp, agno);
- error = xfs_alloc_read_agf(pag, tp, 0, &agbp);
+ error = xfs_alloc_read_agf(xefi->xefi_pag, tp, 0, &agbp);
if (!error)
- error = xfs_free_agfl_block(tp, agno, agbno, agbp, &oinfo);
- xfs_perag_put(pag);
+ error = xfs_free_agfl_block(tp, xefi->xefi_pag->pag_agno,
+ agbno, agbp, &oinfo);
/*
* Mark the transaction dirty, even on error. This ensures the
@@ -559,11 +577,12 @@ xfs_agfl_free_finish_item(
next_extent = efdp->efd_next_extent;
ASSERT(next_extent < efdp->efd_format.efd_nextents);
extp = &(efdp->efd_format.efd_extents[next_extent]);
- extp->ext_start = free->xefi_startblock;
- extp->ext_len = free->xefi_blockcount;
+ extp->ext_start = xefi->xefi_startblock;
+ extp->ext_len = xefi->xefi_blockcount;
efdp->efd_next_extent++;
- kmem_cache_free(xfs_extfree_item_cache, free);
+ xfs_extent_free_put_group(xefi);
+ kmem_cache_free(xfs_extfree_item_cache, xefi);
return error;
}
@@ -595,11 +614,11 @@ xfs_efi_item_recover(
struct xfs_log_item *lip,
struct list_head *capture_list)
{
+ struct xfs_trans_res resv;
struct xfs_efi_log_item *efip = EFI_ITEM(lip);
struct xfs_mount *mp = lip->li_log->l_mp;
struct xfs_efd_log_item *efdp;
struct xfs_trans *tp;
- struct xfs_extent *extp;
int i;
int error = 0;
@@ -618,16 +637,27 @@ xfs_efi_item_recover(
}
}
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
+ resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate);
+ error = xfs_trans_alloc(mp, &resv, 0, 0, 0, &tp);
if (error)
return error;
efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
for (i = 0; i < efip->efi_format.efi_nextents; i++) {
+ struct xfs_extent_free_item fake = {
+ .xefi_owner = XFS_RMAP_OWN_UNKNOWN,
+ .xefi_agresv = XFS_AG_RESV_NONE,
+ };
+ struct xfs_extent *extp;
+
extp = &efip->efi_format.efi_extents[i];
- error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
- extp->ext_len,
- &XFS_RMAP_OINFO_ANY_OWNER, false);
+
+ fake.xefi_startblock = extp->ext_start;
+ fake.xefi_blockcount = extp->ext_len;
+
+ xfs_extent_free_get_group(mp, &fake);
+ error = xfs_trans_free_extent(tp, efdp, &fake);
+ xfs_extent_free_put_group(&fake);
if (error == -EFSCORRUPTED)
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
extp, sizeof(*extp));
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 821cb86a83bd..3c910e36da69 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -24,6 +24,7 @@
#include "xfs_pnfs.h"
#include "xfs_iomap.h"
#include "xfs_reflink.h"
+#include "xfs_file.h"
#include <linux/dax.h>
#include <linux/falloc.h>
@@ -38,33 +39,25 @@ static const struct vm_operations_struct xfs_file_vm_ops;
* Decide if the given file range is aligned to the size of the fundamental
* allocation unit for the file.
*/
-static bool
+bool
xfs_is_falloc_aligned(
struct xfs_inode *ip,
loff_t pos,
long long int len)
{
- struct xfs_mount *mp = ip->i_mount;
- uint64_t mask;
-
- if (XFS_IS_REALTIME_INODE(ip)) {
- if (!is_power_of_2(mp->m_sb.sb_rextsize)) {
- u64 rextbytes;
- u32 mod;
-
- rextbytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize);
- div_u64_rem(pos, rextbytes, &mod);
- if (mod)
- return false;
- div_u64_rem(len, rextbytes, &mod);
- return mod == 0;
- }
- mask = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize) - 1;
- } else {
- mask = mp->m_sb.sb_blocksize - 1;
+ unsigned int alloc_unit = xfs_inode_alloc_unitsize(ip);
+
+ if (!is_power_of_2(alloc_unit)) {
+ u32 mod;
+
+ div_u64_rem(pos, alloc_unit, &mod);
+ if (mod)
+ return false;
+ div_u64_rem(len, alloc_unit, &mod);
+ return mod == 0;
}
- return !((pos | len) & mask);
+ return !((pos | len) & (alloc_unit - 1));
}
/*
diff --git a/fs/xfs/xfs_file.h b/fs/xfs/xfs_file.h
new file mode 100644
index 000000000000..2ad91f755caf
--- /dev/null
+++ b/fs/xfs/xfs_file.h
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ */
+#ifndef __XFS_FILE_H__
+#define __XFS_FILE_H__
+
+extern const struct file_operations xfs_file_operations;
+extern const struct file_operations xfs_dir_file_operations;
+
+bool xfs_is_falloc_aligned(struct xfs_inode *ip, loff_t pos,
+ long long int len);
+
+#endif /* __XFS_FILE_H__ */
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 062e5dc5db9f..a0668a1ef100 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -23,7 +23,7 @@
#include "xfs_refcount.h"
#include "xfs_refcount_btree.h"
#include "xfs_alloc_btree.h"
-#include "xfs_rtalloc.h"
+#include "xfs_rtbitmap.h"
#include "xfs_ag.h"
/* Convert an xfs_fsmap to an fsmap. */
@@ -71,7 +71,7 @@ xfs_fsmap_owner_to_rmap(
switch (src->fmr_owner) {
case 0: /* "lowest owner id possible" */
case -1ULL: /* "highest owner id possible" */
- dest->rm_owner = 0;
+ dest->rm_owner = src->fmr_owner;
break;
case XFS_FMR_OWN_FREE:
dest->rm_owner = XFS_RMAP_OWN_NULL;
@@ -160,9 +160,18 @@ struct xfs_getfsmap_info {
struct xfs_buf *agf_bp; /* AGF, for refcount queries */
struct xfs_perag *pag; /* AG info, if applicable */
xfs_daddr_t next_daddr; /* next daddr we expect */
+ /* daddr of low fsmap key when we're using the rtbitmap */
+ xfs_daddr_t low_daddr;
u64 missing_owner; /* owner of holes */
u32 dev; /* device id */
- struct xfs_rmap_irec low; /* low rmap key */
+ /*
+ * Low rmap key for the query. If low.rm_blockcount is nonzero, this
+ * is the second (or later) call to retrieve the recordset in pieces.
+ * xfs_getfsmap_rec_before_start will compare all records retrieved
+ * by the rmapbt query to filter out any records that start before
+ * the last record.
+ */
+ struct xfs_rmap_irec low;
struct xfs_rmap_irec high; /* high rmap key */
bool last; /* last extent? */
};
@@ -237,16 +246,31 @@ xfs_getfsmap_format(
xfs_fsmap_from_internal(rec, xfm);
}
+static inline bool
+xfs_getfsmap_rec_before_start(
+ struct xfs_getfsmap_info *info,
+ const struct xfs_rmap_irec *rec,
+ xfs_daddr_t rec_daddr)
+{
+ if (info->low_daddr != XFS_BUF_DADDR_NULL)
+ return rec_daddr < info->low_daddr;
+ if (info->low.rm_blockcount)
+ return xfs_rmap_compare(rec, &info->low) < 0;
+ return false;
+}
+
/*
* Format a reverse mapping for getfsmap, having translated rm_startblock
- * into the appropriate daddr units.
+ * into the appropriate daddr units. Pass in a nonzero @len_daddr if the
+ * length could be larger than rm_blockcount in struct xfs_rmap_irec.
*/
STATIC int
xfs_getfsmap_helper(
struct xfs_trans *tp,
struct xfs_getfsmap_info *info,
const struct xfs_rmap_irec *rec,
- xfs_daddr_t rec_daddr)
+ xfs_daddr_t rec_daddr,
+ xfs_daddr_t len_daddr)
{
struct xfs_fsmap fmr;
struct xfs_mount *mp = tp->t_mountp;
@@ -256,12 +280,15 @@ xfs_getfsmap_helper(
if (fatal_signal_pending(current))
return -EINTR;
+ if (len_daddr == 0)
+ len_daddr = XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+
/*
* Filter out records that start before our startpoint, if the
* caller requested that.
*/
- if (xfs_rmap_compare(rec, &info->low) < 0) {
- rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+ if (xfs_getfsmap_rec_before_start(info, rec, rec_daddr)) {
+ rec_daddr += len_daddr;
if (info->next_daddr < rec_daddr)
info->next_daddr = rec_daddr;
return 0;
@@ -280,7 +307,7 @@ xfs_getfsmap_helper(
info->head->fmh_entries++;
- rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+ rec_daddr += len_daddr;
if (info->next_daddr < rec_daddr)
info->next_daddr = rec_daddr;
return 0;
@@ -320,7 +347,7 @@ xfs_getfsmap_helper(
if (error)
return error;
fmr.fmr_offset = XFS_FSB_TO_BB(mp, rec->rm_offset);
- fmr.fmr_length = XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+ fmr.fmr_length = len_daddr;
if (rec->rm_flags & XFS_RMAP_UNWRITTEN)
fmr.fmr_flags |= FMR_OF_PREALLOC;
if (rec->rm_flags & XFS_RMAP_ATTR_FORK)
@@ -337,7 +364,7 @@ xfs_getfsmap_helper(
xfs_getfsmap_format(mp, &fmr, info);
out:
- rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+ rec_daddr += len_daddr;
if (info->next_daddr < rec_daddr)
info->next_daddr = rec_daddr;
return 0;
@@ -358,7 +385,7 @@ xfs_getfsmap_datadev_helper(
fsb = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno, rec->rm_startblock);
rec_daddr = XFS_FSB_TO_DADDR(mp, fsb);
- return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr);
+ return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr, 0);
}
/* Transform a bnobt irec into a fsmap */
@@ -382,7 +409,7 @@ xfs_getfsmap_datadev_bnobt_helper(
irec.rm_offset = 0;
irec.rm_flags = 0;
- return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr);
+ return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr, 0);
}
/* Set rmap flags based on the getfsmap flags */
@@ -409,31 +436,25 @@ xfs_getfsmap_logdev(
{
struct xfs_mount *mp = tp->t_mountp;
struct xfs_rmap_irec rmap;
- int error;
+ xfs_daddr_t rec_daddr, len_daddr;
+ xfs_fsblock_t start_fsb, end_fsb;
+ uint64_t eofs;
- /* Set up search keys */
- info->low.rm_startblock = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical);
- info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
- error = xfs_fsmap_owner_to_rmap(&info->low, keys);
- if (error)
- return error;
- info->low.rm_blockcount = 0;
- xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
+ eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
+ if (keys[0].fmr_physical >= eofs)
+ return 0;
+ start_fsb = XFS_BB_TO_FSBT(mp,
+ keys[0].fmr_physical + keys[0].fmr_length);
+ end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
- error = xfs_fsmap_owner_to_rmap(&info->high, keys + 1);
- if (error)
- return error;
- info->high.rm_startblock = -1U;
- info->high.rm_owner = ULLONG_MAX;
- info->high.rm_offset = ULLONG_MAX;
- info->high.rm_blockcount = 0;
- info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS;
- info->missing_owner = XFS_FMR_OWN_FREE;
+ /* Adjust the low key if we are continuing from where we left off. */
+ if (keys[0].fmr_length > 0)
+ info->low_daddr = XFS_FSB_TO_BB(mp, start_fsb);
- trace_xfs_fsmap_low_key(mp, info->dev, NULLAGNUMBER, &info->low);
- trace_xfs_fsmap_high_key(mp, info->dev, NULLAGNUMBER, &info->high);
+ trace_xfs_fsmap_low_key_linear(mp, info->dev, start_fsb);
+ trace_xfs_fsmap_high_key_linear(mp, info->dev, end_fsb);
- if (keys[0].fmr_physical > 0)
+ if (start_fsb > 0)
return 0;
/* Fabricate an rmap entry for the external log device. */
@@ -443,7 +464,9 @@ xfs_getfsmap_logdev(
rmap.rm_offset = 0;
rmap.rm_flags = 0;
- return xfs_getfsmap_helper(tp, info, &rmap, 0);
+ rec_daddr = XFS_FSB_TO_BB(mp, rmap.rm_startblock);
+ len_daddr = XFS_FSB_TO_BB(mp, rmap.rm_blockcount);
+ return xfs_getfsmap_helper(tp, info, &rmap, rec_daddr, len_daddr);
}
#ifdef CONFIG_XFS_RT
@@ -457,72 +480,58 @@ xfs_getfsmap_rtdev_rtbitmap_helper(
{
struct xfs_getfsmap_info *info = priv;
struct xfs_rmap_irec irec;
- xfs_daddr_t rec_daddr;
+ xfs_rtblock_t rtbno;
+ xfs_daddr_t rec_daddr, len_daddr;
+
+ rtbno = rec->ar_startext * mp->m_sb.sb_rextsize;
+ rec_daddr = XFS_FSB_TO_BB(mp, rtbno);
+ irec.rm_startblock = rtbno;
+
+ rtbno = rec->ar_extcount * mp->m_sb.sb_rextsize;
+ len_daddr = XFS_FSB_TO_BB(mp, rtbno);
+ irec.rm_blockcount = rtbno;
- irec.rm_startblock = rec->ar_startext * mp->m_sb.sb_rextsize;
- rec_daddr = XFS_FSB_TO_BB(mp, irec.rm_startblock);
- irec.rm_blockcount = rec->ar_extcount * mp->m_sb.sb_rextsize;
irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
irec.rm_offset = 0;
irec.rm_flags = 0;
- return xfs_getfsmap_helper(tp, info, &irec, rec_daddr);
+ return xfs_getfsmap_helper(tp, info, &irec, rec_daddr, len_daddr);
}
-/* Execute a getfsmap query against the realtime device. */
+/* Execute a getfsmap query against the realtime device rtbitmap. */
STATIC int
-__xfs_getfsmap_rtdev(
+xfs_getfsmap_rtdev_rtbitmap(
struct xfs_trans *tp,
const struct xfs_fsmap *keys,
- int (*query_fn)(struct xfs_trans *,
- struct xfs_getfsmap_info *),
struct xfs_getfsmap_info *info)
{
+
+ struct xfs_rtalloc_rec alow = { 0 };
+ struct xfs_rtalloc_rec ahigh = { 0 };
struct xfs_mount *mp = tp->t_mountp;
- xfs_fsblock_t start_fsb;
- xfs_fsblock_t end_fsb;
+ xfs_rtblock_t start_rtb;
+ xfs_rtblock_t end_rtb;
uint64_t eofs;
- int error = 0;
+ int error;
- eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
+ eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rextents * mp->m_sb.sb_rextsize);
if (keys[0].fmr_physical >= eofs)
return 0;
- start_fsb = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical);
- end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
+ start_rtb = XFS_BB_TO_FSBT(mp,
+ keys[0].fmr_physical + keys[0].fmr_length);
+ end_rtb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
- /* Set up search keys */
- info->low.rm_startblock = start_fsb;
- error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
- if (error)
- return error;
- info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
- info->low.rm_blockcount = 0;
- xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
-
- info->high.rm_startblock = end_fsb;
- error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]);
- if (error)
- return error;
- info->high.rm_offset = XFS_BB_TO_FSBT(mp, keys[1].fmr_offset);
- info->high.rm_blockcount = 0;
- xfs_getfsmap_set_irec_flags(&info->high, &keys[1]);
+ info->missing_owner = XFS_FMR_OWN_UNKNOWN;
- trace_xfs_fsmap_low_key(mp, info->dev, NULLAGNUMBER, &info->low);
- trace_xfs_fsmap_high_key(mp, info->dev, NULLAGNUMBER, &info->high);
+ /* Adjust the low key if we are continuing from where we left off. */
+ if (keys[0].fmr_length > 0) {
+ info->low_daddr = XFS_FSB_TO_BB(mp, start_rtb);
+ if (info->low_daddr >= eofs)
+ return 0;
+ }
- return query_fn(tp, info);
-}
-
-/* Actually query the realtime bitmap. */
-STATIC int
-xfs_getfsmap_rtdev_rtbitmap_query(
- struct xfs_trans *tp,
- struct xfs_getfsmap_info *info)
-{
- struct xfs_rtalloc_rec alow = { 0 };
- struct xfs_rtalloc_rec ahigh = { 0 };
- struct xfs_mount *mp = tp->t_mountp;
- int error;
+ trace_xfs_fsmap_low_key_linear(mp, info->dev, start_rtb);
+ trace_xfs_fsmap_high_key_linear(mp, info->dev, end_rtb);
xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED);
@@ -530,8 +539,8 @@ xfs_getfsmap_rtdev_rtbitmap_query(
* Set up query parameters to return free rtextents covering the range
* we want.
*/
- alow.ar_startext = info->low.rm_startblock;
- ahigh.ar_startext = info->high.rm_startblock;
+ alow.ar_startext = start_rtb;
+ ahigh.ar_startext = end_rtb;
do_div(alow.ar_startext, mp->m_sb.sb_rextsize);
if (do_div(ahigh.ar_startext, mp->m_sb.sb_rextsize))
ahigh.ar_startext++;
@@ -554,19 +563,20 @@ err:
xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED);
return error;
}
+#endif /* CONFIG_XFS_RT */
-/* Execute a getfsmap query against the realtime device rtbitmap. */
-STATIC int
-xfs_getfsmap_rtdev_rtbitmap(
- struct xfs_trans *tp,
- const struct xfs_fsmap *keys,
- struct xfs_getfsmap_info *info)
+static inline bool
+rmap_not_shareable(struct xfs_mount *mp, const struct xfs_rmap_irec *r)
{
- info->missing_owner = XFS_FMR_OWN_UNKNOWN;
- return __xfs_getfsmap_rtdev(tp, keys, xfs_getfsmap_rtdev_rtbitmap_query,
- info);
+ if (!xfs_has_reflink(mp))
+ return true;
+ if (XFS_RMAP_NON_INODE_OWNER(r->rm_owner))
+ return true;
+ if (r->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK |
+ XFS_RMAP_UNWRITTEN))
+ return true;
+ return false;
}
-#endif /* CONFIG_XFS_RT */
/* Execute a getfsmap query against the regular data device. */
STATIC int
@@ -601,14 +611,30 @@ __xfs_getfsmap_datadev(
* low to the fsmap low key and max out the high key to the end
* of the AG.
*/
- info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb);
info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
if (error)
return error;
- info->low.rm_blockcount = 0;
+ info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fmr_length);
xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
+ /* Adjust the low key if we are continuing from where we left off. */
+ if (info->low.rm_blockcount == 0) {
+ /* No previous record from which to continue */
+ } else if (rmap_not_shareable(mp, &info->low)) {
+ /* Last record seen was an unshareable extent */
+ info->low.rm_owner = 0;
+ info->low.rm_offset = 0;
+
+ start_fsb += info->low.rm_blockcount;
+ if (XFS_FSB_TO_DADDR(mp, start_fsb) >= eofs)
+ return 0;
+ } else {
+ /* Last record seen was a shareable file data extent */
+ info->low.rm_offset += info->low.rm_blockcount;
+ }
+ info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb);
+
info->high.rm_startblock = -1U;
info->high.rm_owner = ULLONG_MAX;
info->high.rm_offset = ULLONG_MAX;
@@ -659,12 +685,8 @@ __xfs_getfsmap_datadev(
* Set the AG low key to the start of the AG prior to
* moving on to the next AG.
*/
- if (pag->pag_agno == start_ag) {
- info->low.rm_startblock = 0;
- info->low.rm_owner = 0;
- info->low.rm_offset = 0;
- info->low.rm_flags = 0;
- }
+ if (pag->pag_agno == start_ag)
+ memset(&info->low, 0, sizeof(info->low));
/*
* If this is the last AG, report any gap at the end of it
@@ -791,6 +813,19 @@ xfs_getfsmap_check_keys(
struct xfs_fsmap *low_key,
struct xfs_fsmap *high_key)
{
+ if (low_key->fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) {
+ if (low_key->fmr_offset)
+ return false;
+ }
+ if (high_key->fmr_flags != -1U &&
+ (high_key->fmr_flags & (FMR_OF_SPECIAL_OWNER |
+ FMR_OF_EXTENT_MAP))) {
+ if (high_key->fmr_offset && high_key->fmr_offset != -1ULL)
+ return false;
+ }
+ if (high_key->fmr_length && high_key->fmr_length != -1ULL)
+ return false;
+
if (low_key->fmr_device > high_key->fmr_device)
return false;
if (low_key->fmr_device < high_key->fmr_device)
@@ -834,15 +869,15 @@ xfs_getfsmap_check_keys(
* ----------------
* There are multiple levels of keys and counters at work here:
* xfs_fsmap_head.fmh_keys -- low and high fsmap keys passed in;
- * these reflect fs-wide sector addrs.
+ * these reflect fs-wide sector addrs.
* dkeys -- fmh_keys used to query each device;
- * these are fmh_keys but w/ the low key
- * bumped up by fmr_length.
+ * these are fmh_keys but w/ the low key
+ * bumped up by fmr_length.
* xfs_getfsmap_info.next_daddr -- next disk addr we expect to see; this
* is how we detect gaps in the fsmap
records and report them.
* xfs_getfsmap_info.low/high -- per-AG low/high keys computed from
- * dkeys; used to query the metadata.
+ * dkeys; used to query the metadata.
*/
int
xfs_getfsmap(
@@ -863,6 +898,8 @@ xfs_getfsmap(
if (!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[0]) ||
!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1]))
return -EINVAL;
+ if (!xfs_getfsmap_check_keys(&head->fmh_keys[0], &head->fmh_keys[1]))
+ return -EINVAL;
use_rmap = xfs_has_rmapbt(mp) &&
has_capability_noaudit(current, CAP_SYS_ADMIN);
@@ -901,26 +938,15 @@ xfs_getfsmap(
* blocks could be mapped to several other files/offsets.
* According to rmapbt record ordering, the minimal next
* possible record for the block range is the next starting
- * offset in the same inode. Therefore, bump the file offset to
- * continue the search appropriately. For all other low key
- * mapping types (attr blocks, metadata), bump the physical
- * offset as there can be no other mapping for the same physical
- * block range.
+ * offset in the same inode. Therefore, each fsmap backend bumps
+ * the file offset to continue the search appropriately. For
+ * all other low key mapping types (attr blocks, metadata), each
+ * fsmap backend bumps the physical offset as there can be no
+ * other mapping for the same physical block range.
*/
dkeys[0] = head->fmh_keys[0];
- if (dkeys[0].fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) {
- dkeys[0].fmr_physical += dkeys[0].fmr_length;
- dkeys[0].fmr_owner = 0;
- if (dkeys[0].fmr_offset)
- return -EINVAL;
- } else
- dkeys[0].fmr_offset += dkeys[0].fmr_length;
- dkeys[0].fmr_length = 0;
memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap));
- if (!xfs_getfsmap_check_keys(dkeys, &head->fmh_keys[1]))
- return -EINVAL;
-
info.next_daddr = head->fmh_keys[0].fmr_physical +
head->fmh_keys[0].fmr_length;
info.fsmap_recs = fsmap_recs;
@@ -960,6 +986,8 @@ xfs_getfsmap(
info.dev = handlers[i].dev;
info.last = false;
info.pag = NULL;
+ info.low_daddr = XFS_BUF_DADDR_NULL;
+ info.low.rm_blockcount = 0;
error = handlers[i].fn(tp, dkeys, &info);
if (error)
break;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 77b14f788214..96e9d64fbe62 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -153,7 +153,7 @@ xfs_growfs_data_private(
(delta > 0 ? XFS_GROWFS_SPACE_RES(mp) : -delta), 0,
XFS_TRANS_RESERVE, &tp);
if (error)
- return error;
+ goto out_free_unused_perag;
last_pag = xfs_perag_get(mp, oagcount - 1);
if (delta > 0) {
@@ -227,6 +227,9 @@ xfs_growfs_data_private(
out_trans_cancel:
xfs_trans_cancel(tp);
+out_free_unused_perag:
+ if (nagcount > oagcount)
+ xfs_free_unused_perag_range(mp, oagcount, nagcount);
return error;
}
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 6df826fc787c..586d26c05160 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1186,7 +1186,7 @@ xfs_inode_free_eofblocks(
}
*lockflags |= XFS_IOLOCK_EXCL;
- if (xfs_can_free_eofblocks(ip, false))
+ if (xfs_can_free_eofblocks(ip))
return xfs_free_eofblocks(ip);
/* inode could be preallocated or append-only */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 26961b0dae03..3ccbc31767b3 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -983,10 +983,12 @@ xfs_create(
prid = xfs_get_initial_prid(dp);
/*
- * Make sure that we have allocated dquot(s) on disk.
+ * Make sure that we have allocated dquot(s) on disk. The uid/gid
+ * computation code must match what the VFS uses to assign i_[ug]id.
+ * INHERIT adjusts the gid computation for setgid/grpid systems.
*/
- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns),
- mapped_fsgid(mnt_userns, &init_user_ns), prid,
+ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, i_user_ns(VFS_I(dp))),
+ mapped_fsgid(mnt_userns, i_user_ns(VFS_I(dp))), prid,
XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
&udqp, &gdqp, &pdqp);
if (error)
@@ -1132,10 +1134,12 @@ xfs_create_tmpfile(
prid = xfs_get_initial_prid(dp);
/*
- * Make sure that we have allocated dquot(s) on disk.
+ * Make sure that we have allocated dquot(s) on disk. The uid/gid
+ * computation code must match what the VFS uses to assign i_[ug]id.
+ * INHERIT adjusts the gid computation for setgid/grpid systems.
*/
- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns),
- mapped_fsgid(mnt_userns, &init_user_ns), prid,
+ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, i_user_ns(VFS_I(dp))),
+ mapped_fsgid(mnt_userns, i_user_ns(VFS_I(dp))), prid,
XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
&udqp, &gdqp, &pdqp);
if (error)
@@ -1459,7 +1463,7 @@ xfs_release(
if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
return 0;
- if (xfs_can_free_eofblocks(ip, false)) {
+ if (xfs_can_free_eofblocks(ip)) {
/*
* Check if the inode is being opened, written and closed
* frequently and we have delayed allocation blocks outstanding
@@ -1675,15 +1679,13 @@ xfs_inode_needs_inactive(
/*
* This file isn't being freed, so check if there are post-eof blocks
- * to free. @force is true because we are evicting an inode from the
- * cache. Post-eof blocks must be freed, lest we end up with broken
- * free space accounting.
+ * to free.
*
* Note: don't bother with iolock here since lockdep complains about
* acquiring it in reclaim context. We have the only reference to the
* inode at this point anyways.
*/
- return xfs_can_free_eofblocks(ip, true);
+ return xfs_can_free_eofblocks(ip);
}
/*
@@ -1734,15 +1736,11 @@ xfs_inactive(
if (VFS_I(ip)->i_nlink != 0) {
/*
- * force is true because we are evicting an inode from the
- * cache. Post-eof blocks must be freed, lest we end up with
- * broken free space accounting.
- *
* Note: don't bother with iolock here since lockdep complains
* about acquiring it in reclaim context. We have the only
* reference to the inode at this point anyways.
*/
- if (xfs_can_free_eofblocks(ip, true))
+ if (xfs_can_free_eofblocks(ip))
error = xfs_free_eofblocks(ip);
goto out;
@@ -3777,3 +3775,16 @@ xfs_inode_reload_unlinked(
return error;
}
+
+/* Returns the size of fundamental allocation unit for a file, in bytes. */
+unsigned int
+xfs_inode_alloc_unitsize(
+ struct xfs_inode *ip)
+{
+ unsigned int blocks = 1;
+
+ if (XFS_IS_REALTIME_INODE(ip))
+ blocks = ip->i_mount->m_sb.sb_rextsize;
+
+ return XFS_FSB_TO_B(ip->i_mount, blocks);
+}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index c177c92f3aa5..c4f426eadf8e 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -622,4 +622,6 @@ xfs_inode_unlinked_incomplete(
int xfs_inode_reload_unlinked_bucket(struct xfs_trans *tp, struct xfs_inode *ip);
int xfs_inode_reload_unlinked(struct xfs_inode *ip);
+unsigned int xfs_inode_alloc_unitsize(struct xfs_inode *ip);
+
#endif /* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 91c847a84e10..a734ca8d8f03 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -36,6 +36,36 @@ xfs_inode_item_sort(
return INODE_ITEM(lip)->ili_inode->i_ino;
}
+#ifdef DEBUG_EXPENSIVE
+static void
+xfs_inode_item_precommit_check(
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_dinode *dip;
+ xfs_failaddr_t fa;
+
+ dip = kzalloc(mp->m_sb.sb_inodesize, GFP_KERNEL | GFP_NOFS);
+ if (!dip) {
+ ASSERT(dip != NULL);
+ return;
+ }
+
+ xfs_inode_to_disk(ip, dip, 0);
+ xfs_dinode_calc_crc(mp, dip);
+ fa = xfs_dinode_verify(mp, ip->i_ino, dip);
+ if (fa) {
+ xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip,
+ sizeof(*dip), fa);
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ ASSERT(fa == NULL);
+ }
+ kfree(dip);
+}
+#else
+# define xfs_inode_item_precommit_check(ip) ((void)0)
+#endif
+
/*
* Prior to finally logging the inode, we have to ensure that all the
* per-modification inode state changes are applied. This includes VFS inode
@@ -168,6 +198,8 @@ xfs_inode_item_precommit(
iip->ili_fields |= (flags | iip->ili_last_fields);
spin_unlock(&iip->ili_lock);
+ xfs_inode_item_precommit_check(ip);
+
/*
* We are done with the log item transaction dirty state, so clear it so
* that it doesn't pollute future transactions.
@@ -556,6 +588,9 @@ xfs_inode_to_log_dinode(
memset(to->di_pad2, 0, sizeof(to->di_pad2));
uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
to->di_v3_pad = 0;
+
+ /* dummy value for initialisation */
+ to->di_crc = 0;
} else {
to->di_version = 2;
to->di_flushiter = ip->i_flushiter;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index c7cb496dc345..ef3dc0778566 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -38,6 +38,7 @@
#include "xfs_reflink.h"
#include "xfs_ioctl.h"
#include "xfs_xattr.h"
+#include "xfs_file.h"
#include <linux/mount.h>
#include <linux/namei.h>
@@ -1127,6 +1128,17 @@ xfs_ioctl_setattr_xflags(
/* Can't change realtime flag if any extents are allocated. */
if (ip->i_df.if_nextents || ip->i_delayed_blks)
return -EINVAL;
+
+ /*
+ * If S_DAX is enabled on this file, we can only switch the
+ * device if both support fsdax. We can't update S_DAX because
+ * there might be other threads walking down the access paths.
+ */
+ if (IS_DAX(VFS_I(ip)) &&
+ (mp->m_ddev_targp->bt_daxdev == NULL ||
+ (mp->m_rtdev_targp &&
+ mp->m_rtdev_targp->bt_daxdev == NULL)))
+ return -EINVAL;
}
if (rtflag) {
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index ab5512c0bcf7..28a1c19dfdb3 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -309,14 +309,6 @@ xfs_iomap_write_direct(
if (error)
goto out_unlock;
- /*
- * Copy any maps to caller's array and return any error.
- */
- if (nimaps == 0) {
- error = -ENOSPC;
- goto out_unlock;
- }
-
if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock)))
error = xfs_alert_fsblock_zero(ip, imap);
@@ -1005,6 +997,24 @@ xfs_buffered_write_iomap_begin(
}
/*
+ * For zeroing, trim a delalloc extent that extends beyond the EOF
+ * block. If it starts beyond the EOF block, convert it to an
+ * unwritten extent.
+ */
+ if ((flags & IOMAP_ZERO) && imap.br_startoff <= offset_fsb &&
+ isnullstartblock(imap.br_startblock)) {
+ xfs_fileoff_t eof_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
+
+ if (offset_fsb >= eof_fsb)
+ goto convert_delay;
+ if (end_fsb > eof_fsb) {
+ end_fsb = eof_fsb;
+ xfs_trim_extent(&imap, offset_fsb,
+ end_fsb - offset_fsb);
+ }
+ }
+
+ /*
* Search the COW fork extent list even if we did not find a data fork
* extent. This serves two purposes: first this implements the
* speculative preallocation using cowextsize, so that we also unshare
@@ -1105,47 +1115,48 @@ xfs_buffered_write_iomap_begin(
}
}
-retry:
- error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
- end_fsb - offset_fsb, prealloc_blocks,
- allocfork == XFS_DATA_FORK ? &imap : &cmap,
- allocfork == XFS_DATA_FORK ? &icur : &ccur,
- allocfork == XFS_DATA_FORK ? eof : cow_eof);
- switch (error) {
- case 0:
- break;
- case -ENOSPC:
- case -EDQUOT:
- /* retry without any preallocation */
- trace_xfs_delalloc_enospc(ip, offset, count);
- if (prealloc_blocks) {
- prealloc_blocks = 0;
- goto retry;
- }
- fallthrough;
- default:
- goto out_unlock;
- }
-
if (allocfork == XFS_COW_FORK) {
+ error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
+ end_fsb - offset_fsb, prealloc_blocks, &cmap,
+ &ccur, cow_eof);
+ if (error)
+ goto out_unlock;
+
trace_xfs_iomap_alloc(ip, offset, count, allocfork, &cmap);
goto found_cow;
}
+ error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
+ end_fsb - offset_fsb, prealloc_blocks, &imap, &icur,
+ eof);
+ if (error)
+ goto out_unlock;
+
/*
* Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
* them out if the write happens to fail.
*/
seq = xfs_iomap_inode_sequence(ip, IOMAP_F_NEW);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_iunlock(ip, lockmode);
trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap);
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW, seq);
found_imap:
seq = xfs_iomap_inode_sequence(ip, 0);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_iunlock(ip, lockmode);
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq);
+convert_delay:
+ xfs_iunlock(ip, lockmode);
+ truncate_pagecache(inode, offset);
+ error = xfs_bmapi_convert_delalloc(ip, XFS_DATA_FORK, offset,
+ iomap, NULL);
+ if (error)
+ return error;
+
+ trace_xfs_iomap_alloc(ip, offset, count, XFS_DATA_FORK, &imap);
+ return 0;
+
found_cow:
seq = xfs_iomap_inode_sequence(ip, 0);
if (imap.br_startoff <= offset_fsb) {
@@ -1153,17 +1164,17 @@ found_cow:
if (error)
goto out_unlock;
seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_iunlock(ip, lockmode);
return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
IOMAP_F_SHARED, seq);
}
xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_iunlock(ip, lockmode);
return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0, seq);
out_unlock:
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_iunlock(ip, lockmode);
return error;
}
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 6fbdc0a19e54..9ca1b8bf1f05 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -25,6 +25,7 @@
#include "xfs_error.h"
#include "xfs_ioctl.h"
#include "xfs_xattr.h"
+#include "xfs_file.h"
#include <linux/posix_acl.h>
#include <linux/security.h>
diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h
index e570dcb5df8d..73ff92355eaa 100644
--- a/fs/xfs/xfs_iops.h
+++ b/fs/xfs/xfs_iops.h
@@ -8,9 +8,6 @@
struct xfs_inode;
-extern const struct file_operations xfs_file_operations;
-extern const struct file_operations xfs_dir_file_operations;
-
extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
int xfs_vn_setattr_size(struct user_namespace *mnt_userns,
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 858e3e9eb4a8..dfd7b824e32b 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -252,17 +252,12 @@ static int
xfs_trans_log_finish_refcount_update(
struct xfs_trans *tp,
struct xfs_cud_log_item *cudp,
- enum xfs_refcount_intent_type type,
- xfs_fsblock_t startblock,
- xfs_extlen_t blockcount,
- xfs_fsblock_t *new_fsb,
- xfs_extlen_t *new_len,
+ struct xfs_refcount_intent *ri,
struct xfs_btree_cur **pcur)
{
int error;
- error = xfs_refcount_finish_one(tp, type, startblock,
- blockcount, new_fsb, new_len, pcur);
+ error = xfs_refcount_finish_one(tp, ri, pcur);
/*
* Mark the transaction dirty, even on error. This ensures the
@@ -378,25 +373,20 @@ xfs_refcount_update_finish_item(
struct list_head *item,
struct xfs_btree_cur **state)
{
- struct xfs_refcount_intent *refc;
- xfs_fsblock_t new_fsb;
- xfs_extlen_t new_aglen;
+ struct xfs_refcount_intent *ri;
int error;
- refc = container_of(item, struct xfs_refcount_intent, ri_list);
- error = xfs_trans_log_finish_refcount_update(tp, CUD_ITEM(done),
- refc->ri_type, refc->ri_startblock, refc->ri_blockcount,
- &new_fsb, &new_aglen, state);
+ ri = container_of(item, struct xfs_refcount_intent, ri_list);
+ error = xfs_trans_log_finish_refcount_update(tp, CUD_ITEM(done), ri,
+ state);
/* Did we run out of reservation? Requeue what we didn't finish. */
- if (!error && new_aglen > 0) {
- ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE ||
- refc->ri_type == XFS_REFCOUNT_DECREASE);
- refc->ri_startblock = new_fsb;
- refc->ri_blockcount = new_aglen;
+ if (!error && ri->ri_blockcount > 0) {
+ ASSERT(ri->ri_type == XFS_REFCOUNT_INCREASE ||
+ ri->ri_type == XFS_REFCOUNT_DECREASE);
return -EAGAIN;
}
- kmem_cache_free(xfs_refcount_intent_cache, refc);
+ kmem_cache_free(xfs_refcount_intent_cache, ri);
return error;
}
@@ -463,18 +453,14 @@ xfs_cui_item_recover(
struct xfs_log_item *lip,
struct list_head *capture_list)
{
- struct xfs_bmbt_irec irec;
+ struct xfs_trans_res resv;
struct xfs_cui_log_item *cuip = CUI_ITEM(lip);
- struct xfs_phys_extent *refc;
struct xfs_cud_log_item *cudp;
struct xfs_trans *tp;
struct xfs_btree_cur *rcur = NULL;
struct xfs_mount *mp = lip->li_log->l_mp;
- xfs_fsblock_t new_fsb;
- xfs_extlen_t new_len;
unsigned int refc_type;
bool requeue_only = false;
- enum xfs_refcount_intent_type type;
int i;
int error = 0;
@@ -505,14 +491,18 @@ xfs_cui_item_recover(
* doesn't fit. We need to reserve enough blocks to handle a
* full btree split on either end of the refcount range.
*/
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
- mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp);
+ resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate);
+ error = xfs_trans_alloc(mp, &resv, mp->m_refc_maxlevels * 2, 0,
+ XFS_TRANS_RESERVE, &tp);
if (error)
return error;
cudp = xfs_trans_get_cud(tp, cuip);
for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
+ struct xfs_refcount_intent fake = { };
+ struct xfs_phys_extent *refc;
+
refc = &cuip->cui_format.cui_extents[i];
refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK;
switch (refc_type) {
@@ -520,7 +510,7 @@ xfs_cui_item_recover(
case XFS_REFCOUNT_DECREASE:
case XFS_REFCOUNT_ALLOC_COW:
case XFS_REFCOUNT_FREE_COW:
- type = refc_type;
+ fake.ri_type = refc_type;
break;
default:
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
@@ -529,13 +519,12 @@ xfs_cui_item_recover(
error = -EFSCORRUPTED;
goto abort_error;
}
- if (requeue_only) {
- new_fsb = refc->pe_startblock;
- new_len = refc->pe_len;
- } else
+
+ fake.ri_startblock = refc->pe_startblock;
+ fake.ri_blockcount = refc->pe_len;
+ if (!requeue_only)
error = xfs_trans_log_finish_refcount_update(tp, cudp,
- type, refc->pe_startblock, refc->pe_len,
- &new_fsb, &new_len, &rcur);
+ &fake, &rcur);
if (error == -EFSCORRUPTED)
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
&cuip->cui_format,
@@ -544,10 +533,13 @@ xfs_cui_item_recover(
goto abort_error;
/* Requeue what we didn't finish. */
- if (new_len > 0) {
- irec.br_startblock = new_fsb;
- irec.br_blockcount = new_len;
- switch (type) {
+ if (fake.ri_blockcount > 0) {
+ struct xfs_bmbt_irec irec = {
+ .br_startblock = fake.ri_startblock,
+ .br_blockcount = fake.ri_blockcount,
+ };
+
+ switch (fake.ri_type) {
case XFS_REFCOUNT_INCREASE:
xfs_refcount_increase_extent(tp, &irec);
break;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index cbdc23217a42..d539487eaf1a 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -431,13 +431,6 @@ xfs_reflink_fill_cow_hole(
if (error)
return error;
- /*
- * Allocation succeeded but the requested range was not even partially
- * satisfied? Bail out!
- */
- if (nimaps == 0)
- return -ENOSPC;
-
convert:
return xfs_reflink_convert_unwritten(ip, imap, cmap, convert_now);
@@ -500,13 +493,6 @@ xfs_reflink_fill_delalloc(
error = xfs_trans_commit(tp);
if (error)
return error;
-
- /*
- * Allocation succeeded but the requested range was not even
- * partially satisfied? Bail out!
- */
- if (nimaps == 0)
- return -ENOSPC;
} while (cmap->br_startoff + cmap->br_blockcount <= imap->br_startoff);
return xfs_reflink_convert_unwritten(ip, imap, cmap, convert_now);
@@ -618,8 +604,11 @@ xfs_reflink_cancel_cow_blocks(
xfs_refcount_free_cow_extent(*tpp, del.br_startblock,
del.br_blockcount);
- xfs_free_extent_later(*tpp, del.br_startblock,
- del.br_blockcount, NULL);
+ error = xfs_free_extent_later(*tpp, del.br_startblock,
+ del.br_blockcount, NULL,
+ XFS_AG_RESV_NONE);
+ if (error)
+ break;
/* Roll the transaction */
error = xfs_defer_finish(tpp);
@@ -729,12 +718,6 @@ xfs_reflink_end_cow_extent(
int nmaps;
int error;
- /* No COW extents? That's easy! */
- if (ifp->if_bytes == 0) {
- *offset_fsb = end_fsb;
- return 0;
- }
-
resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
XFS_TRANS_RESERVE, &tp);
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index 534504ede1a3..2043cea261c0 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -492,6 +492,7 @@ xfs_rui_item_recover(
struct xfs_log_item *lip,
struct list_head *capture_list)
{
+ struct xfs_trans_res resv;
struct xfs_rui_log_item *ruip = RUI_ITEM(lip);
struct xfs_map_extent *rmap;
struct xfs_rud_log_item *rudp;
@@ -519,8 +520,9 @@ xfs_rui_item_recover(
}
}
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
- mp->m_rmap_maxlevels, 0, XFS_TRANS_RESERVE, &tp);
+ resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate);
+ error = xfs_trans_alloc(mp, &resv, mp->m_rmap_maxlevels, 0,
+ XFS_TRANS_RESERVE, &tp);
if (error)
return error;
rudp = xfs_trans_get_rud(tp, ruip);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 0bfbbc1dd0da..fc21b4e81ade 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -19,6 +19,7 @@
#include "xfs_icache.h"
#include "xfs_rtalloc.h"
#include "xfs_sb.h"
+#include "xfs_rtbitmap.h"
/*
* Read and return the summary information for a given extent size,
@@ -317,7 +318,7 @@ xfs_rtallocate_extent_block(
/*
* Searched the whole thing & didn't find a maxlen free extent.
*/
- if (minlen < maxlen && besti != -1) {
+ if (minlen <= maxlen && besti != -1) {
xfs_extlen_t p; /* amount to trim length by */
/*
@@ -839,8 +840,6 @@ xfs_growfs_rt_alloc(
nmap = 1;
error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks,
XFS_BMAPI_METADATA, 0, &map, &nmap);
- if (!error && nmap < 1)
- error = -ENOSPC;
if (error)
goto out_trans_cancel;
/*
@@ -917,6 +916,39 @@ xfs_alloc_rsum_cache(
}
/*
+ * If we changed the rt extent size (meaning there was no rt volume previously)
+ * and the root directory had EXTSZINHERIT and RTINHERIT set, it's possible
+ * that the extent size hint on the root directory is no longer congruent with
+ * the new rt extent size. Log the rootdir inode to fix this.
+ */
+static int
+xfs_growfs_rt_fixup_extsize(
+ struct xfs_mount *mp)
+{
+ struct xfs_inode *ip = mp->m_rootip;
+ struct xfs_trans *tp;
+ int error = 0;
+
+ xfs_ilock(ip, XFS_IOLOCK_EXCL);
+ if (!(ip->i_diflags & XFS_DIFLAG_RTINHERIT) ||
+ !(ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT))
+ goto out_iolock;
+
+ error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_ichange, 0, 0, false,
+ &tp);
+ if (error)
+ goto out_iolock;
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ error = xfs_trans_commit(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+out_iolock:
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ return error;
+}
+
+/*
* Visible (exported) functions.
*/
@@ -945,6 +977,7 @@ xfs_growfs_rt(
xfs_sb_t *sbp; /* old superblock */
xfs_fsblock_t sumbno; /* summary block number */
uint8_t *rsum_cache; /* old summary cache */
+ xfs_agblock_t old_rextsize = mp->m_sb.sb_rextsize;
sbp = &mp->m_sb;
@@ -954,34 +987,39 @@ xfs_growfs_rt(
/* Needs to have been mounted with an rt device. */
if (!XFS_IS_REALTIME_MOUNT(mp))
return -EINVAL;
+
+ if (!mutex_trylock(&mp->m_growlock))
+ return -EWOULDBLOCK;
/*
* Mount should fail if the rt bitmap/summary files don't load, but
* we'll check anyway.
*/
+ error = -EINVAL;
if (!mp->m_rbmip || !mp->m_rsumip)
- return -EINVAL;
+ goto out_unlock;
/* Shrink not supported. */
if (in->newblocks <= sbp->sb_rblocks)
- return -EINVAL;
+ goto out_unlock;
/* Can only change rt extent size when adding rt volume. */
if (sbp->sb_rblocks > 0 && in->extsize != sbp->sb_rextsize)
- return -EINVAL;
+ goto out_unlock;
/* Range check the extent size. */
if (XFS_FSB_TO_B(mp, in->extsize) > XFS_MAX_RTEXTSIZE ||
XFS_FSB_TO_B(mp, in->extsize) < XFS_MIN_RTEXTSIZE)
- return -EINVAL;
+ goto out_unlock;
/* Unsupported realtime features. */
+ error = -EOPNOTSUPP;
if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp) || xfs_has_quota(mp))
- return -EOPNOTSUPP;
+ goto out_unlock;
nrblocks = in->newblocks;
error = xfs_sb_validate_fsb_count(sbp, nrblocks);
if (error)
- return error;
+ goto out_unlock;
/*
* Read in the last block of the device, make sure it exists.
*/
@@ -989,7 +1027,7 @@ xfs_growfs_rt(
XFS_FSB_TO_BB(mp, nrblocks - 1),
XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
if (error)
- return error;
+ goto out_unlock;
xfs_buf_relse(bp);
/*
@@ -997,8 +1035,12 @@ xfs_growfs_rt(
*/
nrextents = nrblocks;
do_div(nrextents, in->extsize);
+ if (!xfs_validate_rtextents(nrextents)) {
+ error = -EINVAL;
+ goto out_unlock;
+ }
nrbmblocks = howmany_64(nrextents, NBBY * sbp->sb_blocksize);
- nrextslog = xfs_highbit32(nrextents);
+ nrextslog = xfs_compute_rextslog(nrextents);
nrsumlevels = nrextslog + 1;
nrsumsize = (uint)sizeof(xfs_suminfo_t) * nrsumlevels * nrbmblocks;
nrsumblocks = XFS_B_TO_FSB(mp, nrsumsize);
@@ -1008,8 +1050,11 @@ xfs_growfs_rt(
* the log. This prevents us from getting a log overflow,
* since we'll log basically the whole summary file at once.
*/
- if (nrsumblocks > (mp->m_sb.sb_logblocks >> 1))
- return -EINVAL;
+ if (nrsumblocks > (mp->m_sb.sb_logblocks >> 1)) {
+ error = -EINVAL;
+ goto out_unlock;
+ }
+
/*
* Get the old block counts for bitmap and summary inodes.
* These can't change since other growfs callers are locked out.
@@ -1021,10 +1066,10 @@ xfs_growfs_rt(
*/
error = xfs_growfs_rt_alloc(mp, rbmblocks, nrbmblocks, mp->m_rbmip);
if (error)
- return error;
+ goto out_unlock;
error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, mp->m_rsumip);
if (error)
- return error;
+ goto out_unlock;
rsum_cache = mp->m_rsum_cache;
if (nrbmblocks != sbp->sb_rbmblocks)
@@ -1060,13 +1105,16 @@ xfs_growfs_rt(
nsbp->sb_rextents = nsbp->sb_rblocks;
do_div(nsbp->sb_rextents, nsbp->sb_rextsize);
ASSERT(nsbp->sb_rextents != 0);
- nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents);
+ nsbp->sb_rextslog = xfs_compute_rextslog(nsbp->sb_rextents);
nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1;
nrsumsize =
(uint)sizeof(xfs_suminfo_t) * nrsumlevels *
nsbp->sb_rbmblocks;
nrsumblocks = XFS_B_TO_FSB(mp, nrsumsize);
nmp->m_rsumsize = nrsumsize = XFS_FSB_TO_B(mp, nrsumblocks);
+ /* recompute growfsrt reservation from new rsumsize */
+ xfs_trans_resv_calc(nmp, &nmp->m_resv);
+
/*
* Start a transaction, get the log reservation.
*/
@@ -1150,6 +1198,8 @@ error_cancel:
*/
mp->m_rsumlevels = nrsumlevels;
mp->m_rsumsize = nrsumsize;
+ /* recompute growfsrt reservation from new rsumsize */
+ xfs_trans_resv_calc(mp, &mp->m_resv);
error = xfs_trans_commit(tp);
if (error)
@@ -1161,6 +1211,12 @@ error_cancel:
if (error)
goto out_free;
+ if (old_rextsize != in->extsize) {
+ error = xfs_growfs_rt_fixup_extsize(mp);
+ if (error)
+ goto out_free;
+ }
+
/* Update secondary superblocks now the physical grow has completed */
error = xfs_update_secondary_sbs(mp);
@@ -1184,6 +1240,8 @@ out_free:
}
}
+out_unlock:
+ mutex_unlock(&mp->m_growlock);
return error;
}
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index 65c284e9d33e..11859c259a1c 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -11,22 +11,6 @@
struct xfs_mount;
struct xfs_trans;
-/*
- * XXX: Most of the realtime allocation functions deal in units of realtime
- * extents, not realtime blocks. This looks funny when paired with the type
- * name and screams for a larger cleanup.
- */
-struct xfs_rtalloc_rec {
- xfs_rtblock_t ar_startext;
- xfs_rtblock_t ar_extcount;
-};
-
-typedef int (*xfs_rtalloc_query_range_fn)(
- struct xfs_mount *mp,
- struct xfs_trans *tp,
- const struct xfs_rtalloc_rec *rec,
- void *priv);
-
#ifdef CONFIG_XFS_RT
/*
* Function prototypes for exported functions.
@@ -48,19 +32,6 @@ xfs_rtallocate_extent(
xfs_extlen_t prod, /* extent product factor */
xfs_rtblock_t *rtblock); /* out: start block allocated */
-/*
- * Free an extent in the realtime subvolume. Length is expressed in
- * realtime extents, as is the block number.
- */
-int /* error */
-xfs_rtfree_extent(
- struct xfs_trans *tp, /* transaction pointer */
- xfs_rtblock_t bno, /* starting block number to free */
- xfs_extlen_t len); /* length of extent freed */
-
-/* Same as above, but in units of rt blocks. */
-int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno,
- xfs_filblks_t rtlen);
/*
* Initialize realtime fields in the mount structure.
@@ -102,55 +73,11 @@ xfs_growfs_rt(
struct xfs_mount *mp, /* file system mount structure */
xfs_growfs_rt_t *in); /* user supplied growfs struct */
-/*
- * From xfs_rtbitmap.c
- */
-int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtblock_t block, int issum, struct xfs_buf **bpp);
-int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtblock_t start, xfs_extlen_t len, int val,
- xfs_rtblock_t *new, int *stat);
-int xfs_rtfind_back(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtblock_t start, xfs_rtblock_t limit,
- xfs_rtblock_t *rtblock);
-int xfs_rtfind_forw(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtblock_t start, xfs_rtblock_t limit,
- xfs_rtblock_t *rtblock);
-int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtblock_t start, xfs_extlen_t len, int val);
-int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp,
- int log, xfs_rtblock_t bbno, int delta,
- struct xfs_buf **rbpp, xfs_fsblock_t *rsb,
- xfs_suminfo_t *sum);
-int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log,
- xfs_rtblock_t bbno, int delta, struct xfs_buf **rbpp,
- xfs_fsblock_t *rsb);
-int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtblock_t start, xfs_extlen_t len,
- struct xfs_buf **rbpp, xfs_fsblock_t *rsb);
-int xfs_rtalloc_query_range(struct xfs_mount *mp, struct xfs_trans *tp,
- const struct xfs_rtalloc_rec *low_rec,
- const struct xfs_rtalloc_rec *high_rec,
- xfs_rtalloc_query_range_fn fn, void *priv);
-int xfs_rtalloc_query_all(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtalloc_query_range_fn fn,
- void *priv);
-bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno);
-int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtblock_t start, xfs_extlen_t len,
- bool *is_free);
int xfs_rtalloc_reinit_frextents(struct xfs_mount *mp);
#else
# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (-ENOSYS)
-# define xfs_rtfree_extent(t,b,l) (-ENOSYS)
-# define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS)
# define xfs_rtpick_extent(m,t,l,rb) (-ENOSYS)
# define xfs_growfs_rt(mp,in) (-ENOSYS)
-# define xfs_rtalloc_query_range(m,t,l,h,f,p) (-ENOSYS)
-# define xfs_rtalloc_query_all(m,t,f,p) (-ENOSYS)
-# define xfs_rtbuf_get(m,t,b,i,p) (-ENOSYS)
-# define xfs_verify_rtbno(m, r) (false)
-# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS)
# define xfs_rtalloc_reinit_frextents(m) (0)
static inline int /* error */
xfs_rtmount_init(
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 8389f3ef88ef..78bd02a98aa5 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -191,10 +191,12 @@ xfs_symlink(
prid = xfs_get_initial_prid(dp);
/*
- * Make sure that we have allocated dquot(s) on disk.
+ * Make sure that we have allocated dquot(s) on disk. The uid/gid
+ * computation code must match what the VFS uses to assign i_[ug]id.
+ * INHERIT adjusts the gid computation for setgid/grpid systems.
*/
- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns),
- mapped_fsgid(mnt_userns, &init_user_ns), prid,
+ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, i_user_ns(VFS_I(dp))),
+ mapped_fsgid(mnt_userns, i_user_ns(VFS_I(dp))), prid,
XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
&udqp, &gdqp, &pdqp);
if (error)
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 0cd62031e53f..a9e3081b6625 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3208,17 +3208,14 @@ DEFINE_REFCOUNT_DEFERRED_EVENT(xfs_refcount_deferred);
TRACE_EVENT(xfs_refcount_finish_one_leftover,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- int type, xfs_agblock_t agbno, xfs_extlen_t len,
- xfs_agblock_t new_agbno, xfs_extlen_t new_len),
- TP_ARGS(mp, agno, type, agbno, len, new_agbno, new_len),
+ int type, xfs_agblock_t agbno, xfs_extlen_t len),
+ TP_ARGS(mp, agno, type, agbno, len),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
__field(int, type)
__field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, len)
- __field(xfs_agblock_t, new_agbno)
- __field(xfs_extlen_t, new_len)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
@@ -3226,17 +3223,13 @@ TRACE_EVENT(xfs_refcount_finish_one_leftover,
__entry->type = type;
__entry->agbno = agbno;
__entry->len = len;
- __entry->new_agbno = new_agbno;
- __entry->new_len = new_len;
),
- TP_printk("dev %d:%d type %d agno 0x%x agbno 0x%x fsbcount 0x%x new_agbno 0x%x new_fsbcount 0x%x",
+ TP_printk("dev %d:%d type %d agno 0x%x agbno 0x%x fsbcount 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->type,
__entry->agno,
__entry->agbno,
- __entry->len,
- __entry->new_agbno,
- __entry->new_len)
+ __entry->len)
);
/* simple inode-based error/%ip tracepoint class */
@@ -3498,6 +3491,31 @@ DEFINE_FSMAP_EVENT(xfs_fsmap_low_key);
DEFINE_FSMAP_EVENT(xfs_fsmap_high_key);
DEFINE_FSMAP_EVENT(xfs_fsmap_mapping);
+DECLARE_EVENT_CLASS(xfs_fsmap_linear_class,
+ TP_PROTO(struct xfs_mount *mp, u32 keydev, uint64_t bno),
+ TP_ARGS(mp, keydev, bno),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, keydev)
+ __field(xfs_fsblock_t, bno)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->keydev = new_decode_dev(keydev);
+ __entry->bno = bno;
+ ),
+ TP_printk("dev %d:%d keydev %d:%d bno 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->keydev), MINOR(__entry->keydev),
+ __entry->bno)
+)
+#define DEFINE_FSMAP_LINEAR_EVENT(name) \
+DEFINE_EVENT(xfs_fsmap_linear_class, name, \
+ TP_PROTO(struct xfs_mount *mp, u32 keydev, uint64_t bno), \
+ TP_ARGS(mp, keydev, bno))
+DEFINE_FSMAP_LINEAR_EVENT(xfs_fsmap_low_key_linear);
+DEFINE_FSMAP_LINEAR_EVENT(xfs_fsmap_high_key_linear);
+
DECLARE_EVENT_CLASS(xfs_getfsmap_class,
TP_PROTO(struct xfs_mount *mp, struct xfs_fsmap *fsmap),
TP_ARGS(mp, fsmap),