xfs: remote attribute blocks aren't really userdata

When adding a new remote attribute, we write the attribute to the new extent before the allocation transaction is committed. This means we cannot reuse busy extents as that violates crash consistency semantics. Hence we currently treat remote attribute extent allocation like userdata because it has the same overwrite ordering constraints as userdata. Unfortunately, this also allows the allocator to incorrectly apply extent size hints to the remote attribute extent allocation. This results in interesting failures, such as transaction block reservation overruns and in-memory inode attribute fork corruption. To fix this, we need to separate the busy extent reuse configuration from the userdata configuration. This changes the definition of XFS_BMAPI_METADATA slightly - it now means that allocation is metadata and reuse of busy extents is acceptible due to the metadata ordering semantics of the journal. If this flag is not set, it means the allocation is that has unordered data writeback, and hence busy extent reuse is not allowed. It no longer implies the allocation is for user data, just that the data write will not be strictly ordered. This matches the semantics for both user data and remote attribute block allocation. As such, This patch changes the "userdata" field to a "datatype" field, and adds a "no busy reuse" flag to the field. When we detect an unordered data extent allocation, we immediately set the no reuse flag. We then set the "user data" flags based on the inode fork we are allocating the extent to. Hence we only set userdata flags on data fork allocations now and consider attribute fork remote extents to be an unordered metadata extent. The result is that remote attribute extents now have the expected allocation semantics, and the data fork allocation behaviour is completely unchanged. It should be noted that there may be other ways to fix this (e.g. use ordered metadata buffers for the remote attribute extent data write) but they are more invasive and difficult to validate both from a design and implementation POV. Hence this patch takes the simple, obvious route to fixing the problem... Reported-and-tested-by: Ross Zwisler <ross.zwisler@linux.intel.com> Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Chinner <david@fromorbit.com>
author: Dave Chinner <dchinner@redhat.com> 2016-09-26 01:21:28 +0300
committer: Dave Chinner <david@fromorbit.com> 2016-09-26 01:21:28 +0300
commit: 292378edcb408c652e841fdc867fc14f8b4995fa (patch)
tree: 7a7c1961c4083c311f4ff7cf0bede090e72223f2 /fs/xfs/libxfs/xfs_alloc.c
parent: ea78d80866ce375defb2fdd1c8a3aafec95e0f85 (diff)
download: linux-292378edcb408c652e841fdc867fc14f8b4995fa.tar.xz
1 files changed, 12 insertions, 11 deletions
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 05b5243d89f6..1d530c253c0e 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -265,7 +265,7 @@ xfs_alloc_compute_diff(
 	xfs_agblock_t	wantbno,	/* target starting block */
 	xfs_extlen_t	wantlen,	/* target length */
 	xfs_extlen_t	alignment,	/* target alignment */
-	char		userdata,	/* are we allocating data? */
+	int		datatype,	/* are we allocating data? */
 	xfs_agblock_t	freebno,	/* freespace's starting block */
 	xfs_extlen_t	freelen,	/* freespace's length */
 	xfs_agblock_t	*newbnop)	/* result: best start block from free */
@@ -276,6 +276,7 @@ xfs_alloc_compute_diff(
 	xfs_extlen_t	newlen1=0;	/* length with newbno1 */
 	xfs_extlen_t	newlen2=0;	/* length with newbno2 */
 	xfs_agblock_t	wantend;	/* end of target extent */
+	bool		userdata = xfs_alloc_is_userdata(datatype);
 
 	ASSERT(freelen >= wantlen);
 	freeend = freebno + freelen;
@@ -917,7 +918,7 @@ xfs_alloc_find_best_extent(
 
 			sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
 						       args->alignment,
-						       args->userdata, *sbnoa,
+						       args->datatype, *sbnoa,
 						       *slena, &new);
 
 			/*
@@ -1101,7 +1102,7 @@ restart:
 			if (args->len < blen)
 				continue;
 			ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
-				args->alignment, args->userdata, ltbnoa,
+				args->alignment, args->datatype, ltbnoa,
 				ltlena, &ltnew);
 			if (ltnew != NULLAGBLOCK &&
 			    (args->len > blen || ltdiff < bdiff)) {
@@ -1254,7 +1255,7 @@ restart:
 			args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
 			xfs_alloc_fix_len(args);
 			ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
-				args->alignment, args->userdata, ltbnoa,
+				args->alignment, args->datatype, ltbnoa,
 				ltlena, &ltnew);
 
 			error = xfs_alloc_find_best_extent(args,
@@ -1271,7 +1272,7 @@ restart:
 			args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
 			xfs_alloc_fix_len(args);
 			gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
-				args->alignment, args->userdata, gtbnoa,
+				args->alignment, args->datatype, gtbnoa,
 				gtlena, &gtnew);
 
 			error = xfs_alloc_find_best_extent(args,
@@ -1331,7 +1332,7 @@ restart:
 	}
 	rlen = args->len;
 	(void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
-				     args->userdata, ltbnoa, ltlena, &ltnew);
+				     args->datatype, ltbnoa, ltlena, &ltnew);
 	ASSERT(ltnew >= ltbno);
 	ASSERT(ltnew + rlen <= ltbnoa + ltlena);
 	ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
@@ -1608,9 +1609,9 @@ xfs_alloc_ag_vextent_small(
 			goto error0;
 		if (fbno != NULLAGBLOCK) {
 			xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
-					     args->userdata);
+			      xfs_alloc_allow_busy_reuse(args->datatype));
 
-			if (args->userdata) {
+			if (xfs_alloc_is_userdata(args->datatype)) {
 				xfs_buf_t	*bp;
 
 				bp = xfs_btree_get_bufs(args->mp, args->tp,
@@ -2058,7 +2059,7 @@ xfs_alloc_fix_freelist(
 	 * somewhere else if we are not being asked to try harder at this
 	 * point
 	 */
-	if (pag->pagf_metadata && args->userdata &&
+	if (pag->pagf_metadata && xfs_alloc_is_userdata(args->datatype) &&
 	    (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
 		ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
 		goto out_agbp_relse;
@@ -2633,7 +2634,7 @@ xfs_alloc_vextent(
 		 * Try near allocation first, then anywhere-in-ag after
 		 * the first a.g. fails.
 		 */
-		if ((args->userdata & XFS_ALLOC_INITIAL_USER_DATA) &&
+		if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
 		    (mp->m_flags & XFS_MOUNT_32BITINODES)) {
 			args->fsbno = XFS_AGB_TO_FSB(mp,
 					((mp->m_agfrotor / rotorstep) %
@@ -2766,7 +2767,7 @@ xfs_alloc_vextent(
 #endif
 
 		/* Zero the extent if we were asked to do so */
-		if (args->userdata & XFS_ALLOC_USERDATA_ZERO) {
+		if (args->datatype & XFS_ALLOC_USERDATA_ZERO) {
 			error = xfs_zero_extent(args->ip, args->fsbno, args->len);
 			if (error)
 				goto error0;
author	Dave Chinner <dchinner@redhat.com>	2016-09-26 01:21:28 +0300
committer	Dave Chinner <david@fromorbit.com>	2016-09-26 01:21:28 +0300
commit	292378edcb408c652e841fdc867fc14f8b4995fa (patch)
tree	7a7c1961c4083c311f4ff7cf0bede090e72223f2 /fs/xfs/libxfs/xfs_alloc.c
parent	ea78d80866ce375defb2fdd1c8a3aafec95e0f85 (diff)
download	linux-292378edcb408c652e841fdc867fc14f8b4995fa.tar.xz