summaryrefslogtreecommitdiff
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c7
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h4
-rw-r--r--fs/xfs/libxfs/xfs_attr.c190
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c40
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h2
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c140
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c5
-rw-r--r--fs/xfs/scrub/bmap_repair.c2
-rw-r--r--fs/xfs/scrub/ialloc_repair.c4
-rw-r--r--fs/xfs/xfs_aops.c4
-rw-r--r--fs/xfs/xfs_bmap_util.c10
-rw-r--r--fs/xfs/xfs_bmap_util.h2
-rw-r--r--fs/xfs/xfs_file.c146
-rw-r--r--fs/xfs/xfs_icache.c37
-rw-r--r--fs/xfs/xfs_iomap.c67
-rw-r--r--fs/xfs/xfs_linux.h2
-rw-r--r--fs/xfs/xfs_log.h2
-rw-r--r--fs/xfs/xfs_log_cil.c11
-rw-r--r--fs/xfs/xfs_log_recover.c2
-rw-r--r--fs/xfs/xfs_reflink.c3
-rw-r--r--fs/xfs/xfs_reflink.h19
21 files changed, 352 insertions, 347 deletions
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 59326f84f6a5..04f64cf9777e 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2766,7 +2766,6 @@ xfs_alloc_commit_autoreap(
xfs_defer_item_unpause(tp, aarp->dfp);
}
-#ifdef DEBUG
/*
* Check if an AGF has a free extent record whose length is equal to
* args->minlen.
@@ -2806,7 +2805,6 @@ out:
return error;
}
-#endif
/*
* Decide whether to use this allocation group for this allocation.
@@ -2880,15 +2878,14 @@ xfs_alloc_fix_freelist(
if (!xfs_alloc_space_available(args, need, alloc_flags))
goto out_agbp_relse;
-#ifdef DEBUG
- if (args->alloc_minlen_only) {
+ if (IS_ENABLED(CONFIG_XFS_DEBUG) && args->alloc_minlen_only) {
int stat;
error = xfs_exact_minlen_extent_available(args, agbp, &stat);
if (error || !stat)
goto out_agbp_relse;
}
-#endif
+
/*
* Make the freelist shorter if it's too long.
*
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index fae170825be0..0165452e7cd0 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -53,11 +53,9 @@ typedef struct xfs_alloc_arg {
int datatype; /* mask defining data type treatment */
char wasdel; /* set if allocation was prev delayed */
char wasfromfl; /* set if allocation is from freelist */
+ bool alloc_minlen_only; /* allocate exact minlen extent */
struct xfs_owner_info oinfo; /* owner of blocks being allocated */
enum xfs_ag_resv_type resv; /* block reservation to use */
-#ifdef DEBUG
- bool alloc_minlen_only; /* allocate exact minlen extent */
-#endif
} xfs_alloc_arg_t;
/*
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index f30bcc64100d..c63da14eee04 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -51,7 +51,6 @@ STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
STATIC int xfs_attr_leaf_hasname(struct xfs_da_args *args, struct xfs_buf **bp);
-STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args);
/*
* Internal routines when attribute list is more than one block.
@@ -437,6 +436,33 @@ xfs_attr_hashval(
return xfs_attr_hashname(name, namelen);
}
+/* Save the current remote block info and clear the current pointers. */
+static void
+xfs_attr_save_rmt_blk(
+ struct xfs_da_args *args)
+{
+ args->blkno2 = args->blkno;
+ args->index2 = args->index;
+ args->rmtblkno2 = args->rmtblkno;
+ args->rmtblkcnt2 = args->rmtblkcnt;
+ args->rmtvaluelen2 = args->rmtvaluelen;
+ args->rmtblkno = 0;
+ args->rmtblkcnt = 0;
+ args->rmtvaluelen = 0;
+}
+
+/* Set stored info about a remote block */
+static void
+xfs_attr_restore_rmt_blk(
+ struct xfs_da_args *args)
+{
+ args->blkno = args->blkno2;
+ args->index = args->index2;
+ args->rmtblkno = args->rmtblkno2;
+ args->rmtblkcnt = args->rmtblkcnt2;
+ args->rmtvaluelen = args->rmtvaluelen2;
+}
+
/*
* PPTR_REPLACE operations require the caller to set the old and new names and
* values explicitly. Update the canonical fields to the new name and value
@@ -482,48 +508,73 @@ xfs_attr_complete_op(
return replace_state;
}
+/*
+ * Try to add an attribute to an inode in leaf form.
+ */
static int
xfs_attr_leaf_addname(
struct xfs_attr_intent *attr)
{
struct xfs_da_args *args = attr->xattri_da_args;
+ struct xfs_buf *bp;
int error;
ASSERT(xfs_attr_is_leaf(args->dp));
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, &bp);
+ if (error)
+ return error;
+
/*
- * Use the leaf buffer we may already hold locked as a result of
- * a sf-to-leaf conversion.
+ * Look up the xattr name to set the insertion point for the new xattr.
*/
- error = xfs_attr_leaf_try_add(args);
-
- if (error == -ENOSPC) {
- error = xfs_attr3_leaf_to_node(args);
- if (error)
- return error;
+ error = xfs_attr3_leaf_lookup_int(bp, args);
+ switch (error) {
+ case -ENOATTR:
+ if (args->op_flags & XFS_DA_OP_REPLACE)
+ goto out_brelse;
+ break;
+ case -EEXIST:
+ if (!(args->op_flags & XFS_DA_OP_REPLACE))
+ goto out_brelse;
+ trace_xfs_attr_leaf_replace(args);
/*
- * We're not in leaf format anymore, so roll the transaction and
- * retry the add to the newly allocated node block.
+ * Save the existing remote attr state so that the current
+ * values reflect the state of the new attribute we are about to
+ * add, not the attribute we just found and will remove later.
*/
- attr->xattri_dela_state = XFS_DAS_NODE_ADD;
- goto out;
+ xfs_attr_save_rmt_blk(args);
+ break;
+ case 0:
+ break;
+ default:
+ goto out_brelse;
}
- if (error)
- return error;
/*
* We need to commit and roll if we need to allocate remote xattr blocks
* or perform more xattr manipulations. Otherwise there is nothing more
* to do and we can return success.
*/
- if (args->rmtblkno)
+ if (!xfs_attr3_leaf_add(bp, args)) {
+ error = xfs_attr3_leaf_to_node(args);
+ if (error)
+ return error;
+
+ attr->xattri_dela_state = XFS_DAS_NODE_ADD;
+ } else if (args->rmtblkno) {
attr->xattri_dela_state = XFS_DAS_LEAF_SET_RMT;
- else
- attr->xattri_dela_state = xfs_attr_complete_op(attr,
- XFS_DAS_LEAF_REPLACE);
-out:
+ } else {
+ attr->xattri_dela_state =
+ xfs_attr_complete_op(attr, XFS_DAS_LEAF_REPLACE);
+ }
+
trace_xfs_attr_leaf_addname_return(attr->xattri_dela_state, args->dp);
+ return 0;
+
+out_brelse:
+ xfs_trans_brelse(args->trans, bp);
return error;
}
@@ -546,7 +597,7 @@ xfs_attr_node_addname(
return error;
error = xfs_attr_node_try_addname(attr);
- if (error == -ENOSPC) {
+ if (error == 1) {
error = xfs_attr3_leaf_to_node(args);
if (error)
return error;
@@ -1170,88 +1221,6 @@ xfs_attr_shortform_addname(
* External routines when attribute list is one block
*========================================================================*/
-/* Save the current remote block info and clear the current pointers. */
-static void
-xfs_attr_save_rmt_blk(
- struct xfs_da_args *args)
-{
- args->blkno2 = args->blkno;
- args->index2 = args->index;
- args->rmtblkno2 = args->rmtblkno;
- args->rmtblkcnt2 = args->rmtblkcnt;
- args->rmtvaluelen2 = args->rmtvaluelen;
- args->rmtblkno = 0;
- args->rmtblkcnt = 0;
- args->rmtvaluelen = 0;
-}
-
-/* Set stored info about a remote block */
-static void
-xfs_attr_restore_rmt_blk(
- struct xfs_da_args *args)
-{
- args->blkno = args->blkno2;
- args->index = args->index2;
- args->rmtblkno = args->rmtblkno2;
- args->rmtblkcnt = args->rmtblkcnt2;
- args->rmtvaluelen = args->rmtvaluelen2;
-}
-
-/*
- * Tries to add an attribute to an inode in leaf form
- *
- * This function is meant to execute as part of a delayed operation and leaves
- * the transaction handling to the caller. On success the attribute is added
- * and the inode and transaction are left dirty. If there is not enough space,
- * the attr data is converted to node format and -ENOSPC is returned. Caller is
- * responsible for handling the dirty inode and transaction or adding the attr
- * in node format.
- */
-STATIC int
-xfs_attr_leaf_try_add(
- struct xfs_da_args *args)
-{
- struct xfs_buf *bp;
- int error;
-
- error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, &bp);
- if (error)
- return error;
-
- /*
- * Look up the xattr name to set the insertion point for the new xattr.
- */
- error = xfs_attr3_leaf_lookup_int(bp, args);
- switch (error) {
- case -ENOATTR:
- if (args->op_flags & XFS_DA_OP_REPLACE)
- goto out_brelse;
- break;
- case -EEXIST:
- if (!(args->op_flags & XFS_DA_OP_REPLACE))
- goto out_brelse;
-
- trace_xfs_attr_leaf_replace(args);
- /*
- * Save the existing remote attr state so that the current
- * values reflect the state of the new attribute we are about to
- * add, not the attribute we just found and will remove later.
- */
- xfs_attr_save_rmt_blk(args);
- break;
- case 0:
- break;
- default:
- goto out_brelse;
- }
-
- return xfs_attr3_leaf_add(bp, args);
-
-out_brelse:
- xfs_trans_brelse(args->trans, bp);
- return error;
-}
-
/*
* Return EEXIST if attr is found, or ENOATTR if not
*/
@@ -1417,9 +1386,12 @@ error:
/*
* Add a name to a Btree-format attribute list.
*
- * This will involve walking down the Btree, and may involve splitting
- * leaf nodes and even splitting intermediate nodes up to and including
- * the root node (a special case of an intermediate node).
+ * This will involve walking down the Btree, and may involve splitting leaf
+ * nodes and even splitting intermediate nodes up to and including the root
+ * node (a special case of an intermediate node).
+ *
+ * If the tree was still in single leaf format and needs to converted to
+ * real node format return 1 and let the caller handle that.
*/
static int
xfs_attr_node_try_addname(
@@ -1427,21 +1399,21 @@ xfs_attr_node_try_addname(
{
struct xfs_da_state *state = attr->xattri_da_state;
struct xfs_da_state_blk *blk;
- int error;
+ int error = 0;
trace_xfs_attr_node_addname(state->args);
blk = &state->path.blk[state->path.active-1];
ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
- error = xfs_attr3_leaf_add(blk->bp, state->args);
- if (error == -ENOSPC) {
+ if (!xfs_attr3_leaf_add(blk->bp, state->args)) {
if (state->path.active == 1) {
/*
* Its really a single leaf node, but it had
* out-of-line values so it looked like it *might*
* have been a b-tree. Let the caller deal with this.
*/
+ error = 1;
goto out;
}
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index e50d913ad32f..fddb55605e0c 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -47,7 +47,7 @@
*/
STATIC int xfs_attr3_leaf_create(struct xfs_da_args *args,
xfs_dablk_t which_block, struct xfs_buf **bpp);
-STATIC int xfs_attr3_leaf_add_work(struct xfs_buf *leaf_buffer,
+STATIC void xfs_attr3_leaf_add_work(struct xfs_buf *leaf_buffer,
struct xfs_attr3_icleaf_hdr *ichdr,
struct xfs_da_args *args, int freemap_index);
STATIC void xfs_attr3_leaf_compact(struct xfs_da_args *args,
@@ -995,10 +995,8 @@ xfs_attr_shortform_to_leaf(
xfs_attr_sethash(&nargs);
error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */
ASSERT(error == -ENOATTR);
- error = xfs_attr3_leaf_add(bp, &nargs);
- ASSERT(error != -ENOSPC);
- if (error)
- goto out;
+ if (!xfs_attr3_leaf_add(bp, &nargs))
+ ASSERT(0);
sfe = xfs_attr_sf_nextentry(sfe);
}
error = 0;
@@ -1333,6 +1331,9 @@ xfs_attr3_leaf_create(
/*
* Split the leaf node, rebalance, then add the new entry.
+ *
+ * Returns 0 if the entry was added, 1 if a further split is needed or a
+ * negative error number otherwise.
*/
int
xfs_attr3_leaf_split(
@@ -1340,8 +1341,9 @@ xfs_attr3_leaf_split(
struct xfs_da_state_blk *oldblk,
struct xfs_da_state_blk *newblk)
{
- xfs_dablk_t blkno;
- int error;
+ bool added;
+ xfs_dablk_t blkno;
+ int error;
trace_xfs_attr_leaf_split(state->args);
@@ -1376,10 +1378,10 @@ xfs_attr3_leaf_split(
*/
if (state->inleaf) {
trace_xfs_attr_leaf_add_old(state->args);
- error = xfs_attr3_leaf_add(oldblk->bp, state->args);
+ added = xfs_attr3_leaf_add(oldblk->bp, state->args);
} else {
trace_xfs_attr_leaf_add_new(state->args);
- error = xfs_attr3_leaf_add(newblk->bp, state->args);
+ added = xfs_attr3_leaf_add(newblk->bp, state->args);
}
/*
@@ -1387,13 +1389,15 @@ xfs_attr3_leaf_split(
*/
oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL);
newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL);
- return error;
+ if (!added)
+ return 1;
+ return 0;
}
/*
* Add a name to the leaf attribute list structure.
*/
-int
+bool
xfs_attr3_leaf_add(
struct xfs_buf *bp,
struct xfs_da_args *args)
@@ -1402,6 +1406,7 @@ xfs_attr3_leaf_add(
struct xfs_attr3_icleaf_hdr ichdr;
int tablesize;
int entsize;
+ bool added = true;
int sum;
int tmp;
int i;
@@ -1430,7 +1435,7 @@ xfs_attr3_leaf_add(
if (ichdr.freemap[i].base < ichdr.firstused)
tmp += sizeof(xfs_attr_leaf_entry_t);
if (ichdr.freemap[i].size >= tmp) {
- tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, i);
+ xfs_attr3_leaf_add_work(bp, &ichdr, args, i);
goto out_log_hdr;
}
sum += ichdr.freemap[i].size;
@@ -1442,7 +1447,7 @@ xfs_attr3_leaf_add(
* no good and we should just give up.
*/
if (!ichdr.holes && sum < entsize)
- return -ENOSPC;
+ return false;
/*
* Compact the entries to coalesce free space.
@@ -1455,24 +1460,24 @@ xfs_attr3_leaf_add(
* free region, in freemap[0]. If it is not big enough, give up.
*/
if (ichdr.freemap[0].size < (entsize + sizeof(xfs_attr_leaf_entry_t))) {
- tmp = -ENOSPC;
+ added = false;
goto out_log_hdr;
}
- tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, 0);
+ xfs_attr3_leaf_add_work(bp, &ichdr, args, 0);
out_log_hdr:
xfs_attr3_leaf_hdr_to_disk(args->geo, leaf, &ichdr);
xfs_trans_log_buf(args->trans, bp,
XFS_DA_LOGRANGE(leaf, &leaf->hdr,
xfs_attr3_leaf_hdr_size(leaf)));
- return tmp;
+ return added;
}
/*
* Add a name to a leaf attribute list structure.
*/
-STATIC int
+STATIC void
xfs_attr3_leaf_add_work(
struct xfs_buf *bp,
struct xfs_attr3_icleaf_hdr *ichdr,
@@ -1590,7 +1595,6 @@ xfs_attr3_leaf_add_work(
}
}
ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index);
- return 0;
}
/*
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index bac219589896..589f810eedc0 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -76,7 +76,7 @@ int xfs_attr3_leaf_split(struct xfs_da_state *state,
int xfs_attr3_leaf_lookup_int(struct xfs_buf *leaf,
struct xfs_da_args *args);
int xfs_attr3_leaf_getvalue(struct xfs_buf *bp, struct xfs_da_args *args);
-int xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer,
+bool xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer,
struct xfs_da_args *args);
int xfs_attr3_leaf_remove(struct xfs_buf *leaf_buffer,
struct xfs_da_args *args);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 8090e8249116..36dd08d13293 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3477,31 +3477,19 @@ xfs_bmap_process_allocated_extent(
xfs_bmap_alloc_account(ap);
}
-#ifdef DEBUG
static int
xfs_bmap_exact_minlen_extent_alloc(
- struct xfs_bmalloca *ap)
+ struct xfs_bmalloca *ap,
+ struct xfs_alloc_arg *args)
{
- struct xfs_mount *mp = ap->ip->i_mount;
- struct xfs_alloc_arg args = { .tp = ap->tp, .mp = mp };
- xfs_fileoff_t orig_offset;
- xfs_extlen_t orig_length;
- int error;
-
- ASSERT(ap->length);
-
if (ap->minlen != 1) {
- ap->blkno = NULLFSBLOCK;
- ap->length = 0;
+ args->fsbno = NULLFSBLOCK;
return 0;
}
- orig_offset = ap->offset;
- orig_length = ap->length;
-
- args.alloc_minlen_only = 1;
-
- xfs_bmap_compute_alignments(ap, &args);
+ args->alloc_minlen_only = 1;
+ args->minlen = args->maxlen = ap->minlen;
+ args->total = ap->total;
/*
* Unlike the longest extent available in an AG, we don't track
@@ -3511,39 +3499,16 @@ xfs_bmap_exact_minlen_extent_alloc(
* we need not be concerned about a drop in performance in
* "debug only" code paths.
*/
- ap->blkno = XFS_AGB_TO_FSB(mp, 0, 0);
+ ap->blkno = XFS_AGB_TO_FSB(ap->ip->i_mount, 0, 0);
- args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
- args.minlen = args.maxlen = ap->minlen;
- args.total = ap->total;
-
- args.alignment = 1;
- args.minalignslop = 0;
-
- args.minleft = ap->minleft;
- args.wasdel = ap->wasdel;
- args.resv = XFS_AG_RESV_NONE;
- args.datatype = ap->datatype;
-
- error = xfs_alloc_vextent_first_ag(&args, ap->blkno);
- if (error)
- return error;
-
- if (args.fsbno != NULLFSBLOCK) {
- xfs_bmap_process_allocated_extent(ap, &args, orig_offset,
- orig_length);
- } else {
- ap->blkno = NULLFSBLOCK;
- ap->length = 0;
- }
-
- return 0;
+ /*
+ * Call xfs_bmap_btalloc_low_space here as it first does a "normal" AG
+ * iteration and then drops args->total to args->minlen, which might be
+ * required to find an allocation for the transaction reservation when
+ * the file system is very full.
+ */
+ return xfs_bmap_btalloc_low_space(ap, args);
}
-#else
-
-#define xfs_bmap_exact_minlen_extent_alloc(bma) (-EFSCORRUPTED)
-
-#endif
/*
* If we are not low on available data blocks and we are allocating at
@@ -3801,8 +3766,11 @@ xfs_bmap_btalloc(
/* Trim the allocation back to the maximum an AG can fit. */
args.maxlen = min(ap->length, mp->m_ag_max_usable);
- if ((ap->datatype & XFS_ALLOC_USERDATA) &&
- xfs_inode_is_filestream(ap->ip))
+ if (unlikely(XFS_TEST_ERROR(false, mp,
+ XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
+ error = xfs_bmap_exact_minlen_extent_alloc(ap, &args);
+ else if ((ap->datatype & XFS_ALLOC_USERDATA) &&
+ xfs_inode_is_filestream(ap->ip))
error = xfs_bmap_btalloc_filestreams(ap, &args, stripe_align);
else
error = xfs_bmap_btalloc_best_length(ap, &args, stripe_align);
@@ -4177,43 +4145,6 @@ out:
}
static int
-xfs_bmap_alloc_userdata(
- struct xfs_bmalloca *bma)
-{
- struct xfs_mount *mp = bma->ip->i_mount;
- int whichfork = xfs_bmapi_whichfork(bma->flags);
- int error;
-
- /*
- * Set the data type being allocated. For the data fork, the first data
- * in the file is treated differently to all other allocations. For the
- * attribute fork, we only need to ensure the allocated range is not on
- * the busy list.
- */
- bma->datatype = XFS_ALLOC_NOBUSY;
- if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) {
- bma->datatype |= XFS_ALLOC_USERDATA;
- if (bma->offset == 0)
- bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
-
- if (mp->m_dalign && bma->length >= mp->m_dalign) {
- error = xfs_bmap_isaeof(bma, whichfork);
- if (error)
- return error;
- }
-
- if (XFS_IS_REALTIME_INODE(bma->ip))
- return xfs_bmap_rtalloc(bma);
- }
-
- if (unlikely(XFS_TEST_ERROR(false, mp,
- XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
- return xfs_bmap_exact_minlen_extent_alloc(bma);
-
- return xfs_bmap_btalloc(bma);
-}
-
-static int
xfs_bmapi_allocate(
struct xfs_bmalloca *bma)
{
@@ -4230,15 +4161,32 @@ xfs_bmapi_allocate(
else
bma->minlen = 1;
- if (bma->flags & XFS_BMAPI_METADATA) {
- if (unlikely(XFS_TEST_ERROR(false, mp,
- XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
- error = xfs_bmap_exact_minlen_extent_alloc(bma);
- else
- error = xfs_bmap_btalloc(bma);
- } else {
- error = xfs_bmap_alloc_userdata(bma);
+ if (!(bma->flags & XFS_BMAPI_METADATA)) {
+ /*
+ * For the data and COW fork, the first data in the file is
+ * treated differently to all other allocations. For the
+ * attribute fork, we only need to ensure the allocated range
+ * is not on the busy list.
+ */
+ bma->datatype = XFS_ALLOC_NOBUSY;
+ if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) {
+ bma->datatype |= XFS_ALLOC_USERDATA;
+ if (bma->offset == 0)
+ bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
+
+ if (mp->m_dalign && bma->length >= mp->m_dalign) {
+ error = xfs_bmap_isaeof(bma, whichfork);
+ if (error)
+ return error;
+ }
+ }
}
+
+ if ((bma->datatype & XFS_ALLOC_USERDATA) &&
+ XFS_IS_REALTIME_INODE(bma->ip))
+ error = xfs_bmap_rtalloc(bma);
+ else
+ error = xfs_bmap_btalloc(bma);
if (error)
return error;
if (bma->blkno == NULLFSBLOCK)
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 16a529a88780..17d9e6154f19 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -593,9 +593,8 @@ xfs_da3_split(
switch (oldblk->magic) {
case XFS_ATTR_LEAF_MAGIC:
error = xfs_attr3_leaf_split(state, oldblk, newblk);
- if ((error != 0) && (error != -ENOSPC)) {
+ if (error < 0)
return error; /* GROT: attr is inconsistent */
- }
if (!error) {
addblk = newblk;
break;
@@ -617,6 +616,8 @@ xfs_da3_split(
error = xfs_attr3_leaf_split(state, newblk,
&state->extrablk);
}
+ if (error == 1)
+ return -ENOSPC;
if (error)
return error; /* GROT: attr inconsistent */
addblk = newblk;
diff --git a/fs/xfs/scrub/bmap_repair.c b/fs/xfs/scrub/bmap_repair.c
index 49dc38acc66b..4505f4829d53 100644
--- a/fs/xfs/scrub/bmap_repair.c
+++ b/fs/xfs/scrub/bmap_repair.c
@@ -801,7 +801,7 @@ xrep_bmap(
{
struct xrep_bmap *rb;
char *descr;
- unsigned int max_bmbt_recs;
+ xfs_extnum_t max_bmbt_recs;
bool large_extcount;
int error = 0;
diff --git a/fs/xfs/scrub/ialloc_repair.c b/fs/xfs/scrub/ialloc_repair.c
index a00ec7ae1792..c8d2196a04e1 100644
--- a/fs/xfs/scrub/ialloc_repair.c
+++ b/fs/xfs/scrub/ialloc_repair.c
@@ -657,7 +657,7 @@ xrep_ibt_build_new_trees(
* Start by setting up the inobt staging cursor.
*/
fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
- XFS_IBT_BLOCK(sc->mp)),
+ XFS_IBT_BLOCK(sc->mp));
xrep_newbt_init_ag(&ri->new_inobt, sc, &XFS_RMAP_OINFO_INOBT, fsbno,
XFS_AG_RESV_NONE);
ri->new_inobt.bload.claim_block = xrep_ibt_claim_block;
@@ -678,7 +678,7 @@ xrep_ibt_build_new_trees(
resv = XFS_AG_RESV_NONE;
fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
- XFS_FIBT_BLOCK(sc->mp)),
+ XFS_FIBT_BLOCK(sc->mp));
xrep_newbt_init_ag(&ri->new_finobt, sc, &XFS_RMAP_OINFO_INOBT,
fsbno, resv);
ri->new_finobt.bload.claim_block = xrep_fibt_claim_block;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 6dead20338e2..559a3a577097 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -116,7 +116,7 @@ xfs_end_ioend(
if (unlikely(error)) {
if (ioend->io_flags & IOMAP_F_SHARED) {
xfs_reflink_cancel_cow_range(ip, offset, size, true);
- xfs_bmap_punch_delalloc_range(ip, offset,
+ xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, offset,
offset + size);
}
goto done;
@@ -456,7 +456,7 @@ xfs_discard_folio(
* byte of the next folio. Hence the end offset is only dependent on the
* folio itself and not the start offset that is passed in.
*/
- xfs_bmap_punch_delalloc_range(ip, pos,
+ xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, pos,
folio_pos(folio) + folio_size(folio));
}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 053d567c9108..4719ec90029c 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -442,11 +442,12 @@ out_unlock_iolock:
void
xfs_bmap_punch_delalloc_range(
struct xfs_inode *ip,
+ int whichfork,
xfs_off_t start_byte,
xfs_off_t end_byte)
{
struct xfs_mount *mp = ip->i_mount;
- struct xfs_ifork *ifp = &ip->i_df;
+ struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte);
xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte);
struct xfs_bmbt_irec got, del;
@@ -474,11 +475,14 @@ xfs_bmap_punch_delalloc_range(
continue;
}
- xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur, &got, &del);
+ xfs_bmap_del_extent_delay(ip, whichfork, &icur, &got, &del);
if (!xfs_iext_get_extent(ifp, &icur, &got))
break;
}
+ if (whichfork == XFS_COW_FORK && !ifp->if_bytes)
+ xfs_inode_clear_cowblocks_tag(ip);
+
out_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
@@ -580,7 +584,7 @@ xfs_free_eofblocks(
*/
if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) {
if (ip->i_delayed_blks) {
- xfs_bmap_punch_delalloc_range(ip,
+ xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK,
round_up(XFS_ISIZE(ip), mp->m_sb.sb_blocksize),
LLONG_MAX);
}
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index eb0895bfb9da..b29760d36e1a 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -30,7 +30,7 @@ xfs_bmap_rtalloc(struct xfs_bmalloca *ap)
}
#endif /* CONFIG_XFS_RT */
-void xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
+void xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, int whichfork,
xfs_off_t start_byte, xfs_off_t end_byte);
struct kgetbmap {
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 412b1d71b52b..b19916b11fd5 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -348,9 +348,82 @@ xfs_file_splice_read(
}
/*
+ * Take care of zeroing post-EOF blocks when they might exist.
+ *
+ * Returns 0 if successfully, a negative error for a failure, or 1 if this
+ * function dropped the iolock and reacquired it exclusively and the caller
+ * needs to restart the write sanity checks.
+ */
+static ssize_t
+xfs_file_write_zero_eof(
+ struct kiocb *iocb,
+ struct iov_iter *from,
+ unsigned int *iolock,
+ size_t count,
+ bool *drained_dio)
+{
+ struct xfs_inode *ip = XFS_I(iocb->ki_filp->f_mapping->host);
+ loff_t isize;
+ int error;
+
+ /*
+ * We need to serialise against EOF updates that occur in IO completions
+ * here. We want to make sure that nobody is changing the size while
+ * we do this check until we have placed an IO barrier (i.e. hold
+ * XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. The
+ * spinlock effectively forms a memory barrier once we have
+ * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value and
+ * hence be able to correctly determine if we need to run zeroing.
+ */
+ spin_lock(&ip->i_flags_lock);
+ isize = i_size_read(VFS_I(ip));
+ if (iocb->ki_pos <= isize) {
+ spin_unlock(&ip->i_flags_lock);
+ return 0;
+ }
+ spin_unlock(&ip->i_flags_lock);
+
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+
+ if (!*drained_dio) {
+ /*
+ * If zeroing is needed and we are currently holding the iolock
+ * shared, we need to update it to exclusive which implies
+ * having to redo all checks before.
+ */
+ if (*iolock == XFS_IOLOCK_SHARED) {
+ xfs_iunlock(ip, *iolock);
+ *iolock = XFS_IOLOCK_EXCL;
+ xfs_ilock(ip, *iolock);
+ iov_iter_reexpand(from, count);
+ }
+
+ /*
+ * We now have an IO submission barrier in place, but AIO can do
+ * EOF updates during IO completion and hence we now need to
+ * wait for all of them to drain. Non-AIO DIO will have drained
+ * before we are given the XFS_IOLOCK_EXCL, and so for most
+ * cases this wait is a no-op.
+ */
+ inode_dio_wait(VFS_I(ip));
+ *drained_dio = true;
+ return 1;
+ }
+
+ trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
+
+ xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+ error = xfs_zero_range(ip, isize, iocb->ki_pos - isize, NULL);
+ xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
+
+ return error;
+}
+
+/*
* Common pre-write limit and setup checks.
*
- * Called with the iolocked held either shared and exclusive according to
+ * Called with the iolock held either shared and exclusive according to
* @iolock, and returns with it held. Might upgrade the iolock to exclusive
* if called for a direct write beyond i_size.
*/
@@ -360,13 +433,10 @@ xfs_file_write_checks(
struct iov_iter *from,
unsigned int *iolock)
{
- struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
- struct xfs_inode *ip = XFS_I(inode);
- ssize_t error = 0;
+ struct inode *inode = iocb->ki_filp->f_mapping->host;
size_t count = iov_iter_count(from);
bool drained_dio = false;
- loff_t isize;
+ ssize_t error;
restart:
error = generic_write_checks(iocb, from);
@@ -389,7 +459,7 @@ restart:
* exclusively.
*/
if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) {
- xfs_iunlock(ip, *iolock);
+ xfs_iunlock(XFS_I(inode), *iolock);
*iolock = XFS_IOLOCK_EXCL;
error = xfs_ilock_iocb(iocb, *iolock);
if (error) {
@@ -400,64 +470,24 @@ restart:
}
/*
- * If the offset is beyond the size of the file, we need to zero any
+ * If the offset is beyond the size of the file, we need to zero all
* blocks that fall between the existing EOF and the start of this
- * write. If zeroing is needed and we are currently holding the iolock
- * shared, we need to update it to exclusive which implies having to
- * redo all checks before.
+ * write.
*
- * We need to serialise against EOF updates that occur in IO completions
- * here. We want to make sure that nobody is changing the size while we
- * do this check until we have placed an IO barrier (i.e. hold the
- * XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. The
- * spinlock effectively forms a memory barrier once we have the
- * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value and
- * hence be able to correctly determine if we need to run zeroing.
- *
- * We can do an unlocked check here safely as IO completion can only
- * extend EOF. Truncate is locked out at this point, so the EOF can
- * not move backwards, only forwards. Hence we only need to take the
- * slow path and spin locks when we are at or beyond the current EOF.
+ * We can do an unlocked check for i_size here safely as I/O completion
+ * can only extend EOF. Truncate is locked out at this point, so the
+ * EOF can not move backwards, only forwards. Hence we only need to take
+ * the slow path when we are at or beyond the current EOF.
*/
- if (iocb->ki_pos <= i_size_read(inode))
- goto out;
-
- spin_lock(&ip->i_flags_lock);
- isize = i_size_read(inode);
- if (iocb->ki_pos > isize) {
- spin_unlock(&ip->i_flags_lock);
-
- if (iocb->ki_flags & IOCB_NOWAIT)
- return -EAGAIN;
-
- if (!drained_dio) {
- if (*iolock == XFS_IOLOCK_SHARED) {
- xfs_iunlock(ip, *iolock);
- *iolock = XFS_IOLOCK_EXCL;
- xfs_ilock(ip, *iolock);
- iov_iter_reexpand(from, count);
- }
- /*
- * We now have an IO submission barrier in place, but
- * AIO can do EOF updates during IO completion and hence
- * we now need to wait for all of them to drain. Non-AIO
- * DIO will have drained before we are given the
- * XFS_IOLOCK_EXCL, and so for most cases this wait is a
- * no-op.
- */
- inode_dio_wait(inode);
- drained_dio = true;
+ if (iocb->ki_pos > i_size_read(inode)) {
+ error = xfs_file_write_zero_eof(iocb, from, iolock, count,
+ &drained_dio);
+ if (error == 1)
goto restart;
- }
-
- trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
- error = xfs_zero_range(ip, isize, iocb->ki_pos - isize, NULL);
if (error)
return error;
- } else
- spin_unlock(&ip->i_flags_lock);
+ }
-out:
return kiocb_modified(iocb);
}
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index a680e5b82672..6b119a7a324f 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1280,14 +1280,17 @@ xfs_inode_clear_eofblocks_tag(
}
/*
- * Set ourselves up to free CoW blocks from this file. If it's already clean
- * then we can bail out quickly, but otherwise we must back off if the file
- * is undergoing some kind of write.
+ * Prepare to free COW fork blocks from an inode.
*/
static bool
xfs_prep_free_cowblocks(
- struct xfs_inode *ip)
+ struct xfs_inode *ip,
+ struct xfs_icwalk *icw)
{
+ bool sync;
+
+ sync = icw && (icw->icw_flags & XFS_ICWALK_FLAG_SYNC);
+
/*
* Just clear the tag if we have an empty cow fork or none at all. It's
* possible the inode was fully unshared since it was originally tagged.
@@ -1299,16 +1302,22 @@ xfs_prep_free_cowblocks(
}
/*
- * If the mapping is dirty or under writeback we cannot touch the
- * CoW fork. Leave it alone if we're in the midst of a directio.
+ * A cowblocks trim of an inode can have a significant effect on
+ * fragmentation even when a reasonable COW extent size hint is set.
+ * Therefore, we prefer to not process cowblocks unless they are clean
+ * and idle. We can never process a cowblocks inode that is dirty or has
+ * in-flight I/O under any circumstances, because outstanding writeback
+ * or dio expects targeted COW fork blocks exist through write
+ * completion where they can be remapped into the data fork.
+ *
+ * Therefore, the heuristic used here is to never process inodes
+ * currently opened for write from background (i.e. non-sync) scans. For
+ * sync scans, use the pagecache/dio state of the inode to ensure we
+ * never free COW fork blocks out from under pending I/O.
*/
- if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) ||
- mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) ||
- mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) ||
- atomic_read(&VFS_I(ip)->i_dio_count))
+ if (!sync && inode_is_open_for_write(VFS_I(ip)))
return false;
-
- return true;
+ return xfs_can_free_cowblocks(ip);
}
/*
@@ -1337,7 +1346,7 @@ xfs_inode_free_cowblocks(
if (!xfs_iflags_test(ip, XFS_ICOWBLOCKS))
return 0;
- if (!xfs_prep_free_cowblocks(ip))
+ if (!xfs_prep_free_cowblocks(ip, icw))
return 0;
if (!xfs_icwalk_match(ip, icw))
@@ -1366,7 +1375,7 @@ xfs_inode_free_cowblocks(
* Check again, nobody else should be able to dirty blocks or change
* the reflink iflag now that we have the first two locks held.
*/
- if (xfs_prep_free_cowblocks(ip))
+ if (xfs_prep_free_cowblocks(ip, icw))
ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
return ret;
}
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 1e11f48814c0..916531d9f83c 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -975,6 +975,7 @@ xfs_buffered_write_iomap_begin(
int allocfork = XFS_DATA_FORK;
int error = 0;
unsigned int lockmode = XFS_ILOCK_EXCL;
+ unsigned int iomap_flags = 0;
u64 seq;
if (xfs_is_shutdown(mp))
@@ -1145,6 +1146,11 @@ xfs_buffered_write_iomap_begin(
}
}
+ /*
+ * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
+ * them out if the write happens to fail.
+ */
+ iomap_flags |= IOMAP_F_NEW;
if (allocfork == XFS_COW_FORK) {
error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
end_fsb - offset_fsb, prealloc_blocks, &cmap,
@@ -1162,19 +1168,11 @@ xfs_buffered_write_iomap_begin(
if (error)
goto out_unlock;
- /*
- * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
- * them out if the write happens to fail.
- */
- seq = xfs_iomap_inode_sequence(ip, IOMAP_F_NEW);
- xfs_iunlock(ip, lockmode);
trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap);
- return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW, seq);
-
found_imap:
- seq = xfs_iomap_inode_sequence(ip, 0);
+ seq = xfs_iomap_inode_sequence(ip, iomap_flags);
xfs_iunlock(ip, lockmode);
- return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags, seq);
convert_delay:
xfs_iunlock(ip, lockmode);
@@ -1188,20 +1186,20 @@ convert_delay:
return 0;
found_cow:
- seq = xfs_iomap_inode_sequence(ip, 0);
if (imap.br_startoff <= offset_fsb) {
- error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq);
+ error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0,
+ xfs_iomap_inode_sequence(ip, 0));
if (error)
goto out_unlock;
- seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
- xfs_iunlock(ip, lockmode);
- return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
- IOMAP_F_SHARED, seq);
+ } else {
+ xfs_trim_extent(&cmap, offset_fsb,
+ imap.br_startoff - offset_fsb);
}
- xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb);
+ iomap_flags |= IOMAP_F_SHARED;
+ seq = xfs_iomap_inode_sequence(ip, iomap_flags);
xfs_iunlock(ip, lockmode);
- return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0, seq);
+ return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, iomap_flags, seq);
out_unlock:
xfs_iunlock(ip, lockmode);
@@ -1215,7 +1213,10 @@ xfs_buffered_write_delalloc_punch(
loff_t length,
struct iomap *iomap)
{
- xfs_bmap_punch_delalloc_range(XFS_I(inode), offset, offset + length);
+ xfs_bmap_punch_delalloc_range(XFS_I(inode),
+ (iomap->flags & IOMAP_F_SHARED) ?
+ XFS_COW_FORK : XFS_DATA_FORK,
+ offset, offset + length);
}
static int
@@ -1227,8 +1228,30 @@ xfs_buffered_write_iomap_end(
unsigned flags,
struct iomap *iomap)
{
- iomap_file_buffered_write_punch_delalloc(inode, offset, length, written,
- flags, iomap, &xfs_buffered_write_delalloc_punch);
+ loff_t start_byte, end_byte;
+
+ /* If we didn't reserve the blocks, we're not allowed to punch them. */
+ if (iomap->type != IOMAP_DELALLOC || !(iomap->flags & IOMAP_F_NEW))
+ return 0;
+
+ /* Nothing to do if we've written the entire delalloc extent */
+ start_byte = iomap_last_written_block(inode, offset, written);
+ end_byte = round_up(offset + length, i_blocksize(inode));
+ if (start_byte >= end_byte)
+ return 0;
+
+ /* For zeroing operations the callers already hold invalidate_lock. */
+ if (flags & (IOMAP_UNSHARE | IOMAP_ZERO)) {
+ rwsem_assert_held_write(&inode->i_mapping->invalidate_lock);
+ iomap_write_delalloc_release(inode, start_byte, end_byte, flags,
+ iomap, xfs_buffered_write_delalloc_punch);
+ } else {
+ filemap_invalidate_lock(inode->i_mapping);
+ iomap_write_delalloc_release(inode, start_byte, end_byte, flags,
+ iomap, xfs_buffered_write_delalloc_punch);
+ filemap_invalidate_unlock(inode->i_mapping);
+ }
+
return 0;
}
@@ -1435,6 +1458,8 @@ xfs_zero_range(
{
struct inode *inode = VFS_I(ip);
+ xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL);
+
if (IS_DAX(inode))
return dax_zero_range(inode, pos, len, did_zero,
&xfs_dax_write_iomap_ops);
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 54a098fe7285..9a2221b4aa21 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -69,7 +69,7 @@ typedef __u32 xfs_nlink_t;
#include <asm/param.h>
#include <linux/uaccess.h>
#include <asm/byteorder.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include "xfs_fs.h"
#include "xfs_stats.h"
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 67c539cc9305..13455854365f 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -158,6 +158,4 @@ bool xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t);
bool xlog_force_shutdown(struct xlog *log, uint32_t shutdown_flags);
-int xfs_attr_use_log_assist(struct xfs_mount *mp);
-
#endif /* __XFS_LOG_H__ */
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 391a938d690c..80da0cf87d7a 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -156,7 +156,6 @@ xlog_cil_insert_pcp_aggregate(
struct xfs_cil *cil,
struct xfs_cil_ctx *ctx)
{
- struct xlog_cil_pcp *cilpcp;
int cpu;
int count = 0;
@@ -171,13 +170,11 @@ xlog_cil_insert_pcp_aggregate(
* structures that could have a nonzero space_used.
*/
for_each_cpu(cpu, &ctx->cil_pcpmask) {
- int old, prev;
+ struct xlog_cil_pcp *cilpcp = per_cpu_ptr(cil->xc_pcp, cpu);
+ int old = READ_ONCE(cilpcp->space_used);
- cilpcp = per_cpu_ptr(cil->xc_pcp, cpu);
- do {
- old = cilpcp->space_used;
- prev = cmpxchg(&cilpcp->space_used, old, 0);
- } while (old != prev);
+ while (!try_cmpxchg(&cilpcp->space_used, &old, 0))
+ ;
count += old;
}
atomic_add(count, &ctx->space_used);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index ec766b4bc853..a13bf53fea49 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1849,7 +1849,7 @@ xlog_find_item_ops(
* from the transaction. However, we can't do that until after we've
* replayed all the other items because they may be dependent on the
* cancelled buffer and replaying the cancelled buffer can remove it
- * form the cancelled buffer table. Hence they have tobe done last.
+ * form the cancelled buffer table. Hence they have to be done last.
*
* 3. Inode allocation buffers must be replayed before inode items that
* read the buffer and replay changes into it. For filesystems using the
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 6fde6ec8092f..5bf6682e701b 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1595,6 +1595,9 @@ xfs_reflink_clear_inode_flag(
ASSERT(xfs_is_reflink_inode(ip));
+ if (!xfs_can_free_cowblocks(ip))
+ return 0;
+
error = xfs_reflink_inode_has_shared_extents(*tpp, ip, &needs_flag);
if (error || needs_flag)
return error;
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index fb55e4ce49fa..4a58e4533671 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -6,6 +6,25 @@
#ifndef __XFS_REFLINK_H
#define __XFS_REFLINK_H 1
+/*
+ * Check whether it is safe to free COW fork blocks from an inode. It is unsafe
+ * to do so when an inode has dirty cache or I/O in-flight, even if no shared
+ * extents exist in the data fork, because outstanding I/O may target blocks
+ * that were speculatively allocated to the COW fork.
+ */
+static inline bool
+xfs_can_free_cowblocks(struct xfs_inode *ip)
+{
+ struct inode *inode = VFS_I(ip);
+
+ if ((inode->i_state & I_DIRTY_PAGES) ||
+ mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) ||
+ mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK) ||
+ atomic_read(&inode->i_dio_count))
+ return false;
+ return true;
+}
+
extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
struct xfs_bmbt_irec *irec, bool *shared);
int xfs_bmap_trim_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap,