diff options
Diffstat (limited to 'fs/btrfs/tree-log.c')
| -rw-r--r-- | fs/btrfs/tree-log.c | 226 | 
1 files changed, 221 insertions, 5 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1ce80c1c4eb6..9c45431e69ab 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4117,6 +4117,187 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode,  	return 0;  } +/* + * At the moment we always log all xattrs. This is to figure out at log replay + * time which xattrs must have their deletion replayed. If a xattr is missing + * in the log tree and exists in the fs/subvol tree, we delete it. This is + * because if a xattr is deleted, the inode is fsynced and a power failure + * happens, causing the log to be replayed the next time the fs is mounted, + * we want the xattr to not exist anymore (same behaviour as other filesystems + * with a journal, ext3/4, xfs, f2fs, etc). + */ +static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, +				struct btrfs_root *root, +				struct inode *inode, +				struct btrfs_path *path, +				struct btrfs_path *dst_path) +{ +	int ret; +	struct btrfs_key key; +	const u64 ino = btrfs_ino(inode); +	int ins_nr = 0; +	int start_slot = 0; + +	key.objectid = ino; +	key.type = BTRFS_XATTR_ITEM_KEY; +	key.offset = 0; + +	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); +	if (ret < 0) +		return ret; + +	while (true) { +		int slot = path->slots[0]; +		struct extent_buffer *leaf = path->nodes[0]; +		int nritems = btrfs_header_nritems(leaf); + +		if (slot >= nritems) { +			if (ins_nr > 0) { +				u64 last_extent = 0; + +				ret = copy_items(trans, inode, dst_path, path, +						 &last_extent, start_slot, +						 ins_nr, 1, 0); +				/* can't be 1, extent items aren't processed */ +				ASSERT(ret <= 0); +				if (ret < 0) +					return ret; +				ins_nr = 0; +			} +			ret = btrfs_next_leaf(root, path); +			if (ret < 0) +				return ret; +			else if (ret > 0) +				break; +			continue; +		} + +		btrfs_item_key_to_cpu(leaf, &key, slot); +		if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) +			break; + +		if (ins_nr == 0) +			start_slot = slot; +		ins_nr++; +		path->slots[0]++; +		cond_resched(); +	} +	if (ins_nr > 0) { +		u64 last_extent = 0; + +		ret = copy_items(trans, inode, dst_path, path, +				 &last_extent, start_slot, +				 ins_nr, 1, 0); +		/* can't be 1, extent items aren't processed */ +		ASSERT(ret <= 0); +		if (ret < 0) +			return ret; +	} + +	return 0; +} + +/* + * If the no holes feature is enabled we need to make sure any hole between the + * last extent and the i_size of our inode is explicitly marked in the log. This + * is to make sure that doing something like: + * + *      1) create file with 128Kb of data + *      2) truncate file to 64Kb + *      3) truncate file to 256Kb + *      4) fsync file + *      5) <crash/power failure> + *      6) mount fs and trigger log replay + * + * Will give us a file with a size of 256Kb, the first 64Kb of data match what + * the file had in its first 64Kb of data at step 1 and the last 192Kb of the + * file correspond to a hole. The presence of explicit holes in a log tree is + * what guarantees that log replay will remove/adjust file extent items in the + * fs/subvol tree. + * + * Here we do not need to care about holes between extents, that is already done + * by copy_items(). We also only need to do this in the full sync path, where we + * lookup for extents from the fs/subvol tree only. In the fast path case, we + * lookup the list of modified extent maps and if any represents a hole, we + * insert a corresponding extent representing a hole in the log tree. + */ +static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, +				   struct btrfs_root *root, +				   struct inode *inode, +				   struct btrfs_path *path) +{ +	int ret; +	struct btrfs_key key; +	u64 hole_start; +	u64 hole_size; +	struct extent_buffer *leaf; +	struct btrfs_root *log = root->log_root; +	const u64 ino = btrfs_ino(inode); +	const u64 i_size = i_size_read(inode); + +	if (!btrfs_fs_incompat(root->fs_info, NO_HOLES)) +		return 0; + +	key.objectid = ino; +	key.type = BTRFS_EXTENT_DATA_KEY; +	key.offset = (u64)-1; + +	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); +	ASSERT(ret != 0); +	if (ret < 0) +		return ret; + +	ASSERT(path->slots[0] > 0); +	path->slots[0]--; +	leaf = path->nodes[0]; +	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + +	if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) { +		/* inode does not have any extents */ +		hole_start = 0; +		hole_size = i_size; +	} else { +		struct btrfs_file_extent_item *extent; +		u64 len; + +		/* +		 * If there's an extent beyond i_size, an explicit hole was +		 * already inserted by copy_items(). +		 */ +		if (key.offset >= i_size) +			return 0; + +		extent = btrfs_item_ptr(leaf, path->slots[0], +					struct btrfs_file_extent_item); + +		if (btrfs_file_extent_type(leaf, extent) == +		    BTRFS_FILE_EXTENT_INLINE) { +			len = btrfs_file_extent_inline_len(leaf, +							   path->slots[0], +							   extent); +			ASSERT(len == i_size); +			return 0; +		} + +		len = btrfs_file_extent_num_bytes(leaf, extent); +		/* Last extent goes beyond i_size, no need to log a hole. */ +		if (key.offset + len > i_size) +			return 0; +		hole_start = key.offset + len; +		hole_size = i_size - hole_start; +	} +	btrfs_release_path(path); + +	/* Last extent ends at i_size. */ +	if (hole_size == 0) +		return 0; + +	hole_size = ALIGN(hole_size, root->sectorsize); +	ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0, +				       hole_size, 0, hole_size, 0, 0, 0); +	return ret; +} +  /* log a single inode in the tree log.   * At least one parent directory for this inode must exist in the tree   * or be logged already. @@ -4155,6 +4336,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,  	u64 ino = btrfs_ino(inode);  	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;  	u64 logged_isize = 0; +	bool need_log_inode_item = true;  	path = btrfs_alloc_path();  	if (!path) @@ -4263,11 +4445,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,  		} else {  			if (inode_only == LOG_INODE_ALL)  				fast_search = true; -			ret = log_inode_item(trans, log, dst_path, inode); -			if (ret) { -				err = ret; -				goto out_unlock; -			}  			goto log_extents;  		} @@ -4290,6 +4467,28 @@ again:  		if (min_key.type > max_key.type)  			break; +		if (min_key.type == BTRFS_INODE_ITEM_KEY) +			need_log_inode_item = false; + +		/* Skip xattrs, we log them later with btrfs_log_all_xattrs() */ +		if (min_key.type == BTRFS_XATTR_ITEM_KEY) { +			if (ins_nr == 0) +				goto next_slot; +			ret = copy_items(trans, inode, dst_path, path, +					 &last_extent, ins_start_slot, +					 ins_nr, inode_only, logged_isize); +			if (ret < 0) { +				err = ret; +				goto out_unlock; +			} +			ins_nr = 0; +			if (ret) { +				btrfs_release_path(path); +				continue; +			} +			goto next_slot; +		} +  		src = path->nodes[0];  		if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {  			ins_nr++; @@ -4357,9 +4556,26 @@ next_slot:  		ins_nr = 0;  	} +	btrfs_release_path(path); +	btrfs_release_path(dst_path); +	err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path); +	if (err) +		goto out_unlock; +	if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { +		btrfs_release_path(path); +		btrfs_release_path(dst_path); +		err = btrfs_log_trailing_hole(trans, root, inode, path); +		if (err) +			goto out_unlock; +	}  log_extents:  	btrfs_release_path(path);  	btrfs_release_path(dst_path); +	if (need_log_inode_item) { +		err = log_inode_item(trans, log, dst_path, inode); +		if (err) +			goto out_unlock; +	}  	if (fast_search) {  		/*  		 * Some ordered extents started by fsync might have completed  | 
