diff options
Diffstat (limited to 'fs/btrfs/tree-log.c')
| -rw-r--r-- | fs/btrfs/tree-log.c | 108 | 
1 files changed, 85 insertions, 23 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 3c2ae0e4f25a..a5ce99a6c936 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -205,14 +205,11 @@ static int join_running_log_trans(struct btrfs_root *root)   * until you call btrfs_end_log_trans() or it makes any future   * log transactions wait until you call btrfs_end_log_trans()   */ -int btrfs_pin_log_trans(struct btrfs_root *root) +void btrfs_pin_log_trans(struct btrfs_root *root)  { -	int ret = -ENOENT; -  	mutex_lock(&root->log_mutex);  	atomic_inc(&root->log_writers);  	mutex_unlock(&root->log_mutex); -	return ret;  }  /* @@ -258,6 +255,13 @@ struct walk_control {  	/* what stage of the replay code we're currently in */  	int stage; +	/* +	 * Ignore any items from the inode currently being processed. Needs +	 * to be set every time we find a BTRFS_INODE_ITEM_KEY and we are in +	 * the LOG_WALK_REPLAY_INODES stage. +	 */ +	bool ignore_cur_inode; +  	/* the root we are currently replaying */  	struct btrfs_root *replay_dest; @@ -2487,6 +2491,20 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,  			inode_item = btrfs_item_ptr(eb, i,  					    struct btrfs_inode_item); +			/* +			 * If we have a tmpfile (O_TMPFILE) that got fsync'ed +			 * and never got linked before the fsync, skip it, as +			 * replaying it is pointless since it would be deleted +			 * later. We skip logging tmpfiles, but it's always +			 * possible we are replaying a log created with a kernel +			 * that used to log tmpfiles. +			 */ +			if (btrfs_inode_nlink(eb, inode_item) == 0) { +				wc->ignore_cur_inode = true; +				continue; +			} else { +				wc->ignore_cur_inode = false; +			}  			ret = replay_xattr_deletes(wc->trans, root, log,  						   path, key.objectid);  			if (ret) @@ -2524,16 +2542,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,  					     root->fs_info->sectorsize);  				ret = btrfs_drop_extents(wc->trans, root, inode,  							 from, (u64)-1, 1); -				/* -				 * If the nlink count is zero here, the iput -				 * will free the inode.  We bump it to make -				 * sure it doesn't get freed until the link -				 * count fixup is done. -				 */  				if (!ret) { -					if (inode->i_nlink == 0) -						inc_nlink(inode); -					/* Update link count and nbytes. */ +					/* Update the inode's nbytes. */  					ret = btrfs_update_inode(wc->trans,  								 root, inode);  				} @@ -2548,6 +2558,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,  				break;  		} +		if (wc->ignore_cur_inode) +			continue; +  		if (key.type == BTRFS_DIR_INDEX_KEY &&  		    wc->stage == LOG_WALK_REPLAY_DIR_INDEX) {  			ret = replay_one_dir_item(wc->trans, root, path, @@ -3196,9 +3209,12 @@ static void free_log_tree(struct btrfs_trans_handle *trans,  	};  	ret = walk_log_tree(trans, log, &wc); -	/* I don't think this can happen but just in case */ -	if (ret) -		btrfs_abort_transaction(trans, ret); +	if (ret) { +		if (trans) +			btrfs_abort_transaction(trans, ret); +		else +			btrfs_handle_fs_error(log->fs_info, ret, NULL); +	}  	while (1) {  		ret = find_first_extent_bit(&log->dirty_log_pages, @@ -4374,13 +4390,29 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,  	INIT_LIST_HEAD(&extents); -	down_write(&inode->dio_sem);  	write_lock(&tree->lock);  	test_gen = root->fs_info->last_trans_committed;  	logged_start = start;  	logged_end = end;  	list_for_each_entry_safe(em, n, &tree->modified_extents, list) { +		/* +		 * Skip extents outside our logging range. It's important to do +		 * it for correctness because if we don't ignore them, we may +		 * log them before their ordered extent completes, and therefore +		 * we could log them without logging their respective checksums +		 * (the checksum items are added to the csum tree at the very +		 * end of btrfs_finish_ordered_io()). Also leave such extents +		 * outside of our range in the list, since we may have another +		 * ranged fsync in the near future that needs them. If an extent +		 * outside our range corresponds to a hole, log it to avoid +		 * leaving gaps between extents (fsck will complain when we are +		 * not using the NO_HOLES feature). +		 */ +		if ((em->start > end || em->start + em->len <= start) && +		    em->block_start != EXTENT_MAP_HOLE) +			continue; +  		list_del_init(&em->list);  		/*  		 * Just an arbitrary number, this can be really CPU intensive @@ -4440,7 +4472,6 @@ process:  	}  	WARN_ON(!list_empty(&extents));  	write_unlock(&tree->lock); -	up_write(&inode->dio_sem);  	btrfs_release_path(path);  	if (!ret) @@ -4636,7 +4667,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,  			ASSERT(len == i_size ||  			       (len == fs_info->sectorsize &&  				btrfs_file_extent_compression(leaf, extent) != -				BTRFS_COMPRESS_NONE)); +				BTRFS_COMPRESS_NONE) || +			       (len < i_size && i_size < fs_info->sectorsize));  			return 0;  		} @@ -5564,9 +5596,33 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,  			dir_inode = btrfs_iget(fs_info->sb, &inode_key,  					       root, NULL); -			/* If parent inode was deleted, skip it. */ -			if (IS_ERR(dir_inode)) -				continue; +			/* +			 * If the parent inode was deleted, return an error to +			 * fallback to a transaction commit. This is to prevent +			 * getting an inode that was moved from one parent A to +			 * a parent B, got its former parent A deleted and then +			 * it got fsync'ed, from existing at both parents after +			 * a log replay (and the old parent still existing). +			 * Example: +			 * +			 * mkdir /mnt/A +			 * mkdir /mnt/B +			 * touch /mnt/B/bar +			 * sync +			 * mv /mnt/B/bar /mnt/A/bar +			 * mv -T /mnt/A /mnt/B +			 * fsync /mnt/B/bar +			 * <power fail> +			 * +			 * If we ignore the old parent B which got deleted, +			 * after a log replay we would have file bar linked +			 * at both parents and the old parent B would still +			 * exist. +			 */ +			if (IS_ERR(dir_inode)) { +				ret = PTR_ERR(dir_inode); +				goto out; +			}  			if (ctx)  				ctx->log_new_dentries = false; @@ -5640,7 +5696,13 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,  	if (ret)  		goto end_no_trans; -	if (btrfs_inode_in_log(inode, trans->transid)) { +	/* +	 * Skip already logged inodes or inodes corresponding to tmpfiles +	 * (since logging them is pointless, a link count of 0 means they +	 * will never be accessible). +	 */ +	if (btrfs_inode_in_log(inode, trans->transid) || +	    inode->vfs_inode.i_nlink == 0) {  		ret = BTRFS_NO_LOG_SYNC;  		goto end_no_trans;  	}  | 
