diff options
| author | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2016-03-29 16:33:47 +0300 | 
|---|---|---|
| committer | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2016-03-29 16:33:47 +0300 | 
| commit | 8041dcc881c928134c546ae85e6e59e65804357c (patch) | |
| tree | be5d1c21af8cf38ac32ed8708396881aabd44d4e /fs/btrfs/tree-log.c | |
| parent | ed6069be7204541c1da532ad8bbf892e34513552 (diff) | |
| parent | f55532a0c0b8bb6148f4e07853b876ef73bc69ca (diff) | |
| download | linux-8041dcc881c928134c546ae85e6e59e65804357c.tar.xz | |
Merge tag 'v4.6-rc1' into for-linus-4.6
Linux 4.6-rc1
* tag 'v4.6-rc1': (12823 commits)
  Linux 4.6-rc1
  f2fs/crypto: fix xts_tweak initialization
  NTB: Remove _addr functions from ntb_hw_amd
  orangefs: fix orangefs_superblock locking
  orangefs: fix do_readv_writev() handling of error halfway through
  orangefs: have ->kill_sb() evict the VFS side of things first
  orangefs: sanitize ->llseek()
  orangefs-bufmap.h: trim unused junk
  orangefs: saner calling conventions for getting a slot
  orangefs_copy_{to,from}_bufmap(): don't pass bufmap pointer
  orangefs: get rid of readdir_handle_s
  thp: fix typo in khugepaged_scan_pmd()
  MAINTAINERS: fill entries for KASAN
  mm/filemap: generic_file_read_iter(): check for zero reads unconditionally
  kasan: test fix: warn if the UAF could not be detected in kmalloc_uaf2
  mm, kasan: stackdepot implementation. Enable stackdepot for SLAB
  arch, ftrace: for KASAN put hard/soft IRQ entries into separate sections
  mm, kasan: add GFP flags to KASAN API
  mm, kasan: SLAB support
  kasan: modify kmalloc_large_oob_right(), add kmalloc_pagealloc_oob_right()
  ...
Diffstat (limited to 'fs/btrfs/tree-log.c')
| -rw-r--r-- | fs/btrfs/tree-log.c | 102 | 
1 files changed, 90 insertions, 12 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 978c3a810893..24d03c751149 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -26,6 +26,7 @@  #include "print-tree.h"  #include "backref.h"  #include "hash.h" +#include "compression.h"  /* magic values for the inode_only field in btrfs_log_inode:   * @@ -1045,7 +1046,7 @@ again:  		/*  		 * NOTE: we have searched root tree and checked the -		 * coresponding ref, it does not need to check again. +		 * corresponding ref, it does not need to check again.  		 */  		*search_done = 1;  	} @@ -4500,7 +4501,22 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,  	mutex_lock(&BTRFS_I(inode)->log_mutex); -	btrfs_get_logged_extents(inode, &logged_list, start, end); +	/* +	 * Collect ordered extents only if we are logging data. This is to +	 * ensure a subsequent request to log this inode in LOG_INODE_ALL mode +	 * will process the ordered extents if they still exists at the time, +	 * because when we collect them we test and set for the flag +	 * BTRFS_ORDERED_LOGGED to prevent multiple log requests to process the +	 * same ordered extents. The consequence for the LOG_INODE_ALL log mode +	 * not processing the ordered extents is that we end up logging the +	 * corresponding file extent items, based on the extent maps in the +	 * inode's extent_map_tree's modified_list, without logging the +	 * respective checksums (since the may still be only attached to the +	 * ordered extents and have not been inserted in the csum tree by +	 * btrfs_finish_ordered_io() yet). +	 */ +	if (inode_only == LOG_INODE_ALL) +		btrfs_get_logged_extents(inode, &logged_list, start, end);  	/*  	 * a brute force approach to making sure we get the most uptodate @@ -4772,6 +4788,42 @@ out_unlock:  }  /* + * Check if we must fallback to a transaction commit when logging an inode. + * This must be called after logging the inode and is used only in the context + * when fsyncing an inode requires the need to log some other inode - in which + * case we can't lock the i_mutex of each other inode we need to log as that + * can lead to deadlocks with concurrent fsync against other inodes (as we can + * log inodes up or down in the hierarchy) or rename operations for example. So + * we take the log_mutex of the inode after we have logged it and then check for + * its last_unlink_trans value - this is safe because any task setting + * last_unlink_trans must take the log_mutex and it must do this before it does + * the actual unlink operation, so if we do this check before a concurrent task + * sets last_unlink_trans it means we've logged a consistent version/state of + * all the inode items, otherwise we are not sure and must do a transaction + * commit (the concurrent task migth have only updated last_unlink_trans before + * we logged the inode or it might have also done the unlink). + */ +static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans, +					  struct inode *inode) +{ +	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; +	bool ret = false; + +	mutex_lock(&BTRFS_I(inode)->log_mutex); +	if (BTRFS_I(inode)->last_unlink_trans > fs_info->last_trans_committed) { +		/* +		 * Make sure any commits to the log are forced to be full +		 * commits. +		 */ +		btrfs_set_log_full_commit(fs_info, trans); +		ret = true; +	} +	mutex_unlock(&BTRFS_I(inode)->log_mutex); + +	return ret; +} + +/*   * follow the dentry parent pointers up the chain and see if any   * of the directories in it require a full commit before they can   * be logged.  Returns zero if nothing special needs to be done or 1 if @@ -4784,7 +4836,6 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,  					       u64 last_committed)  {  	int ret = 0; -	struct btrfs_root *root;  	struct dentry *old_parent = NULL;  	struct inode *orig_inode = inode; @@ -4816,14 +4867,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,  			BTRFS_I(inode)->logged_trans = trans->transid;  		smp_mb(); -		if (BTRFS_I(inode)->last_unlink_trans > last_committed) { -			root = BTRFS_I(inode)->root; - -			/* -			 * make sure any commits to the log are forced -			 * to be full commits -			 */ -			btrfs_set_log_full_commit(root->fs_info, trans); +		if (btrfs_must_commit_transaction(trans, inode)) {  			ret = 1;  			break;  		} @@ -4982,6 +5026,9 @@ process_leaf:  			btrfs_release_path(path);  			ret = btrfs_log_inode(trans, root, di_inode,  					      log_mode, 0, LLONG_MAX, ctx); +			if (!ret && +			    btrfs_must_commit_transaction(trans, di_inode)) +				ret = 1;  			iput(di_inode);  			if (ret)  				goto next_dir_inode; @@ -5096,6 +5143,9 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,  			ret = btrfs_log_inode(trans, root, dir_inode,  					      LOG_INODE_ALL, 0, LLONG_MAX, ctx); +			if (!ret && +			    btrfs_must_commit_transaction(trans, dir_inode)) +				ret = 1;  			iput(dir_inode);  			if (ret)  				goto out; @@ -5447,6 +5497,9 @@ error:   * They revolve around files there were unlinked from the directory, and   * this function updates the parent directory so that a full commit is   * properly done if it is fsync'd later after the unlinks are done. + * + * Must be called before the unlink operations (updates to the subvolume tree, + * inodes, etc) are done.   */  void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,  			     struct inode *dir, struct inode *inode, @@ -5462,8 +5515,11 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,  	 * into the file.  When the file is logged we check it and  	 * don't log the parents if the file is fully on disk.  	 */ -	if (S_ISREG(inode->i_mode)) +	if (S_ISREG(inode->i_mode)) { +		mutex_lock(&BTRFS_I(inode)->log_mutex);  		BTRFS_I(inode)->last_unlink_trans = trans->transid; +		mutex_unlock(&BTRFS_I(inode)->log_mutex); +	}  	/*  	 * if this directory was already logged any new @@ -5494,7 +5550,29 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,  	return;  record: +	mutex_lock(&BTRFS_I(dir)->log_mutex); +	BTRFS_I(dir)->last_unlink_trans = trans->transid; +	mutex_unlock(&BTRFS_I(dir)->log_mutex); +} + +/* + * Make sure that if someone attempts to fsync the parent directory of a deleted + * snapshot, it ends up triggering a transaction commit. This is to guarantee + * that after replaying the log tree of the parent directory's root we will not + * see the snapshot anymore and at log replay time we will not see any log tree + * corresponding to the deleted snapshot's root, which could lead to replaying + * it after replaying the log tree of the parent directory (which would replay + * the snapshot delete operation). + * + * Must be called before the actual snapshot destroy operation (updates to the + * parent root and tree of tree roots trees, etc) are done. + */ +void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, +				   struct inode *dir) +{ +	mutex_lock(&BTRFS_I(dir)->log_mutex);  	BTRFS_I(dir)->last_unlink_trans = trans->transid; +	mutex_unlock(&BTRFS_I(dir)->log_mutex);  }  /*  | 
