diff options
Diffstat (limited to 'fs/btrfs/inode.c')
| -rw-r--r-- | fs/btrfs/inode.c | 955 | 
1 files changed, 416 insertions, 539 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7cd8ab0ef04d..ebf95f7a44d6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -37,6 +37,7 @@  #include <linux/posix_acl.h>  #include <linux/falloc.h>  #include <linux/slab.h> +#include <linux/ratelimit.h>  #include "compat.h"  #include "ctree.h"  #include "disk-io.h" @@ -51,6 +52,7 @@  #include "compression.h"  #include "locking.h"  #include "free-space-cache.h" +#include "inode-map.h"  struct btrfs_iget_args {  	u64 ino; @@ -136,9 +138,8 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,  		return -ENOMEM;  	path->leave_spinning = 1; -	btrfs_set_trans_block_group(trans, inode); -	key.objectid = inode->i_ino; +	key.objectid = btrfs_ino(inode);  	key.offset = start;  	btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);  	datasize = btrfs_file_extent_calc_inline_size(cur_size); @@ -340,6 +341,10 @@ static noinline int compress_file_range(struct inode *inode,  	int will_compress;  	int compress_type = root->fs_info->compress_type; +	/* if this is a small write inside eof, kick off a defragbot */ +	if (end <= BTRFS_I(inode)->disk_i_size && (end - start + 1) < 16 * 1024) +		btrfs_add_inode_defrag(NULL, inode); +  	actual_end = min_t(u64, isize, end + 1);  again:  	will_compress = 0; @@ -420,9 +425,8 @@ again:  		}  	}  	if (start == 0) { -		trans = btrfs_join_transaction(root, 1); +		trans = btrfs_join_transaction(root);  		BUG_ON(IS_ERR(trans)); -		btrfs_set_trans_block_group(trans, inode);  		trans->block_rsv = &root->fs_info->delalloc_block_rsv;  		/* lets try to make an inline extent */ @@ -617,8 +621,9 @@ retry:  			    async_extent->start + async_extent->ram_size - 1,  			    GFP_NOFS); -		trans = btrfs_join_transaction(root, 1); +		trans = btrfs_join_transaction(root);  		BUG_ON(IS_ERR(trans)); +		trans->block_rsv = &root->fs_info->delalloc_block_rsv;  		ret = btrfs_reserve_extent(trans, root,  					   async_extent->compressed_size,  					   async_extent->compressed_size, @@ -649,7 +654,7 @@ retry:  					async_extent->start +  					async_extent->ram_size - 1, 0); -		em = alloc_extent_map(GFP_NOFS); +		em = alloc_extent_map();  		BUG_ON(!em);  		em->start = async_extent->start;  		em->len = async_extent->ram_size; @@ -745,6 +750,15 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,  	return alloc_hint;  } +static inline bool is_free_space_inode(struct btrfs_root *root, +				       struct inode *inode) +{ +	if (root == root->fs_info->tree_root || +	    BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) +		return true; +	return false; +} +  /*   * when extent_io.c finds a delayed allocation range in the file,   * the call backs end up in this code.  The basic idea is to @@ -777,10 +791,9 @@ static noinline int cow_file_range(struct inode *inode,  	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;  	int ret = 0; -	BUG_ON(root == root->fs_info->tree_root); -	trans = btrfs_join_transaction(root, 1); +	BUG_ON(is_free_space_inode(root, inode)); +	trans = btrfs_join_transaction(root);  	BUG_ON(IS_ERR(trans)); -	btrfs_set_trans_block_group(trans, inode);  	trans->block_rsv = &root->fs_info->delalloc_block_rsv;  	num_bytes = (end - start + blocksize) & ~(blocksize - 1); @@ -788,6 +801,10 @@ static noinline int cow_file_range(struct inode *inode,  	disk_num_bytes = num_bytes;  	ret = 0; +	/* if this is a small write inside eof, kick off defrag */ +	if (end <= BTRFS_I(inode)->disk_i_size && num_bytes < 64 * 1024) +		btrfs_add_inode_defrag(trans, inode); +  	if (start == 0) {  		/* lets try to make an inline extent */  		ret = cow_file_range_inline(trans, root, inode, @@ -826,7 +843,7 @@ static noinline int cow_file_range(struct inode *inode,  					   (u64)-1, &ins, 1);  		BUG_ON(ret); -		em = alloc_extent_map(GFP_NOFS); +		em = alloc_extent_map();  		BUG_ON(!em);  		em->start = start;  		em->orig_start = em->start; @@ -1008,7 +1025,7 @@ static noinline int csum_exist_in_range(struct btrfs_root *root,  	LIST_HEAD(list);  	ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr, -				       bytenr + num_bytes - 1, &list); +				       bytenr + num_bytes - 1, &list, 0);  	if (ret == 0 && list_empty(&list))  		return 0; @@ -1049,29 +1066,33 @@ static noinline int run_delalloc_nocow(struct inode *inode,  	int type;  	int nocow;  	int check_prev = 1; -	bool nolock = false; +	bool nolock; +	u64 ino = btrfs_ino(inode);  	path = btrfs_alloc_path();  	BUG_ON(!path); -	if (root == root->fs_info->tree_root) { -		nolock = true; -		trans = btrfs_join_transaction_nolock(root, 1); -	} else { -		trans = btrfs_join_transaction(root, 1); -	} + +	nolock = is_free_space_inode(root, inode); + +	if (nolock) +		trans = btrfs_join_transaction_nolock(root); +	else +		trans = btrfs_join_transaction(root); +  	BUG_ON(IS_ERR(trans)); +	trans->block_rsv = &root->fs_info->delalloc_block_rsv;  	cow_start = (u64)-1;  	cur_offset = start;  	while (1) { -		ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, +		ret = btrfs_lookup_file_extent(trans, root, path, ino,  					       cur_offset, 0);  		BUG_ON(ret < 0);  		if (ret > 0 && path->slots[0] > 0 && check_prev) {  			leaf = path->nodes[0];  			btrfs_item_key_to_cpu(leaf, &found_key,  					      path->slots[0] - 1); -			if (found_key.objectid == inode->i_ino && +			if (found_key.objectid == ino &&  			    found_key.type == BTRFS_EXTENT_DATA_KEY)  				path->slots[0]--;  		} @@ -1092,7 +1113,7 @@ next_slot:  		num_bytes = 0;  		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); -		if (found_key.objectid > inode->i_ino || +		if (found_key.objectid > ino ||  		    found_key.type > BTRFS_EXTENT_DATA_KEY ||  		    found_key.offset > end)  			break; @@ -1127,7 +1148,7 @@ next_slot:  				goto out_check;  			if (btrfs_extent_readonly(root, disk_bytenr))  				goto out_check; -			if (btrfs_cross_ref_exist(trans, root, inode->i_ino, +			if (btrfs_cross_ref_exist(trans, root, ino,  						  found_key.offset -  						  extent_offset, disk_bytenr))  				goto out_check; @@ -1164,7 +1185,7 @@ out_check:  			goto next_slot;  		} -		btrfs_release_path(root, path); +		btrfs_release_path(path);  		if (cow_start != (u64)-1) {  			ret = cow_file_range(inode, locked_page, cow_start,  					found_key.offset - 1, page_started, @@ -1177,7 +1198,7 @@ out_check:  			struct extent_map *em;  			struct extent_map_tree *em_tree;  			em_tree = &BTRFS_I(inode)->extent_tree; -			em = alloc_extent_map(GFP_NOFS); +			em = alloc_extent_map();  			BUG_ON(!em);  			em->start = cur_offset;  			em->orig_start = em->start; @@ -1222,7 +1243,7 @@ out_check:  		if (cur_offset > end)  			break;  	} -	btrfs_release_path(root, path); +	btrfs_release_path(path);  	if (cur_offset <= end && cow_start == (u64)-1)  		cow_start = cur_offset; @@ -1310,14 +1331,13 @@ static int btrfs_set_bit_hook(struct inode *inode,  	/*  	 * set_bit and clear bit hooks normally require _irqsave/restore -	 * but in this case, we are only testeing for the DELALLOC +	 * but in this case, we are only testing for the DELALLOC  	 * bit, which is only set or cleared with irqs on  	 */  	if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {  		struct btrfs_root *root = BTRFS_I(inode)->root;  		u64 len = state->end + 1 - state->start; -		int do_list = (root->root_key.objectid != -			       BTRFS_ROOT_TREE_OBJECTID); +		bool do_list = !is_free_space_inode(root, inode);  		if (*bits & EXTENT_FIRST_DELALLOC)  			*bits &= ~EXTENT_FIRST_DELALLOC; @@ -1344,14 +1364,13 @@ static int btrfs_clear_bit_hook(struct inode *inode,  {  	/*  	 * set_bit and clear bit hooks normally require _irqsave/restore -	 * but in this case, we are only testeing for the DELALLOC +	 * but in this case, we are only testing for the DELALLOC  	 * bit, which is only set or cleared with irqs on  	 */  	if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {  		struct btrfs_root *root = BTRFS_I(inode)->root;  		u64 len = state->end + 1 - state->start; -		int do_list = (root->root_key.objectid != -			       BTRFS_ROOT_TREE_OBJECTID); +		bool do_list = !is_free_space_inode(root, inode);  		if (*bits & EXTENT_FIRST_DELALLOC)  			*bits &= ~EXTENT_FIRST_DELALLOC; @@ -1458,7 +1477,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,  	skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; -	if (root == root->fs_info->tree_root) +	if (is_free_space_inode(root, inode))  		ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);  	else  		ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); @@ -1500,8 +1519,6 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,  {  	struct btrfs_ordered_sum *sum; -	btrfs_set_trans_block_group(trans, inode); -  	list_for_each_entry(sum, list, list) {  		btrfs_csum_file_blocks(trans,  		       BTRFS_I(inode)->root->fs_info->csum_root, sum); @@ -1644,7 +1661,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,  				 &hint, 0);  	BUG_ON(ret); -	ins.objectid = inode->i_ino; +	ins.objectid = btrfs_ino(inode);  	ins.offset = file_pos;  	ins.type = BTRFS_EXTENT_DATA_KEY;  	ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi)); @@ -1675,7 +1692,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,  	ins.type = BTRFS_EXTENT_ITEM_KEY;  	ret = btrfs_alloc_reserved_file_extent(trans, root,  					root->root_key.objectid, -					inode->i_ino, file_pos, &ins); +					btrfs_ino(inode), file_pos, &ins);  	BUG_ON(ret);  	btrfs_free_path(path); @@ -1701,7 +1718,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)  	struct extent_state *cached_state = NULL;  	int compress_type = 0;  	int ret; -	bool nolock = false; +	bool nolock;  	ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,  					     end - start + 1); @@ -1709,18 +1726,17 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)  		return 0;  	BUG_ON(!ordered_extent); -	nolock = (root == root->fs_info->tree_root); +	nolock = is_free_space_inode(root, inode);  	if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {  		BUG_ON(!list_empty(&ordered_extent->list));  		ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);  		if (!ret) {  			if (nolock) -				trans = btrfs_join_transaction_nolock(root, 1); +				trans = btrfs_join_transaction_nolock(root);  			else -				trans = btrfs_join_transaction(root, 1); +				trans = btrfs_join_transaction(root);  			BUG_ON(IS_ERR(trans)); -			btrfs_set_trans_block_group(trans, inode);  			trans->block_rsv = &root->fs_info->delalloc_block_rsv;  			ret = btrfs_update_inode(trans, root, inode);  			BUG_ON(ret); @@ -1733,11 +1749,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)  			 0, &cached_state, GFP_NOFS);  	if (nolock) -		trans = btrfs_join_transaction_nolock(root, 1); +		trans = btrfs_join_transaction_nolock(root);  	else -		trans = btrfs_join_transaction(root, 1); +		trans = btrfs_join_transaction(root);  	BUG_ON(IS_ERR(trans)); -	btrfs_set_trans_block_group(trans, inode);  	trans->block_rsv = &root->fs_info->delalloc_block_rsv;  	if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) @@ -1855,7 +1870,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,  		}  		read_unlock(&em_tree->lock); -		if (!em || IS_ERR(em)) { +		if (IS_ERR_OR_NULL(em)) {  			kfree(failrec);  			return -EIO;  		} @@ -2004,12 +2019,11 @@ good:  	return 0;  zeroit: -	if (printk_ratelimit()) { -		printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u " -		       "private %llu\n", page->mapping->host->i_ino, +	printk_ratelimited(KERN_INFO "btrfs csum failed ino %llu off %llu csum %u " +		       "private %llu\n", +		       (unsigned long long)btrfs_ino(page->mapping->host),  		       (unsigned long long)start, csum,  		       (unsigned long long)private); -	}  	memset(kaddr + offset, 1, end - start + 1);  	flush_dcache_page(page);  	kunmap_atomic(kaddr, KM_USER0); @@ -2244,7 +2258,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)  	/* insert an orphan item to track this unlinked/truncated file */  	if (insert >= 1) { -		ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); +		ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));  		BUG_ON(ret);  	} @@ -2281,7 +2295,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)  	spin_unlock(&root->orphan_lock);  	if (trans && delete_item) { -		ret = btrfs_del_orphan_item(trans, root, inode->i_ino); +		ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));  		BUG_ON(ret);  	} @@ -2346,7 +2360,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)  			break;  		/* release the path since we're done with it */ -		btrfs_release_path(root, path); +		btrfs_release_path(path);  		/*  		 * this is where we are basically btrfs_lookup, without the @@ -2413,7 +2427,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)  					(u64)-1);  	if (root->orphan_block_rsv || root->orphan_item_inserted) { -		trans = btrfs_join_transaction(root, 1); +		trans = btrfs_join_transaction(root);  		if (!IS_ERR(trans))  			btrfs_end_transaction(trans, root);  	} @@ -2493,12 +2507,12 @@ static void btrfs_read_locked_inode(struct inode *inode)  	struct btrfs_root *root = BTRFS_I(inode)->root;  	struct btrfs_key location;  	int maybe_acls; -	u64 alloc_group_block;  	u32 rdev;  	int ret;  	path = btrfs_alloc_path();  	BUG_ON(!path); +	path->leave_spinning = 1;  	memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));  	ret = btrfs_lookup_inode(NULL, root, path, &location, 0); @@ -2508,6 +2522,12 @@ static void btrfs_read_locked_inode(struct inode *inode)  	leaf = path->nodes[0];  	inode_item = btrfs_item_ptr(leaf, path->slots[0],  				    struct btrfs_inode_item); +	if (!leaf->map_token) +		map_private_extent_buffer(leaf, (unsigned long)inode_item, +					  sizeof(struct btrfs_inode_item), +					  &leaf->map_token, &leaf->kaddr, +					  &leaf->map_start, &leaf->map_len, +					  KM_USER1);  	inode->i_mode = btrfs_inode_mode(leaf, inode_item);  	inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); @@ -2537,18 +2557,20 @@ static void btrfs_read_locked_inode(struct inode *inode)  	BTRFS_I(inode)->index_cnt = (u64)-1;  	BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); -	alloc_group_block = btrfs_inode_block_group(leaf, inode_item); -  	/*  	 * try to precache a NULL acl entry for files that don't have  	 * any xattrs or acls  	 */ -	maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino); +	maybe_acls = acls_after_inode_item(leaf, path->slots[0], +					   btrfs_ino(inode));  	if (!maybe_acls)  		cache_no_acl(inode); -	BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, -						alloc_group_block, 0); +	if (leaf->map_token) { +		unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); +		leaf->map_token = NULL; +	} +  	btrfs_free_path(path);  	inode_item = NULL; @@ -2628,7 +2650,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,  	btrfs_set_inode_transid(leaf, item, trans->transid);  	btrfs_set_inode_rdev(leaf, item, inode->i_rdev);  	btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); -	btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); +	btrfs_set_inode_block_group(leaf, item, 0);  	if (leaf->map_token) {  		unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); @@ -2647,11 +2669,26 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,  	struct extent_buffer *leaf;  	int ret; +	/* +	 * If root is tree root, it means this inode is used to +	 * store free space information. And these inodes are updated +	 * when committing the transaction, so they needn't delaye to +	 * be updated, or deadlock will occured. +	 */ +	if (!is_free_space_inode(root, inode)) { +		ret = btrfs_delayed_update_inode(trans, root, inode); +		if (!ret) +			btrfs_set_inode_last_trans(trans, inode); +		return ret; +	} +  	path = btrfs_alloc_path(); -	BUG_ON(!path); +	if (!path) +		return -ENOMEM; +  	path->leave_spinning = 1; -	ret = btrfs_lookup_inode(trans, root, path, -				 &BTRFS_I(inode)->location, 1); +	ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location, +				 1);  	if (ret) {  		if (ret > 0)  			ret = -ENOENT; @@ -2661,7 +2698,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,  	btrfs_unlock_up_safe(path, 1);  	leaf = path->nodes[0];  	inode_item = btrfs_item_ptr(leaf, path->slots[0], -				  struct btrfs_inode_item); +				    struct btrfs_inode_item);  	fill_inode_item(trans, leaf, inode_item, inode);  	btrfs_mark_buffer_dirty(leaf); @@ -2672,7 +2709,6 @@ failed:  	return ret;  } -  /*   * unlink helper that gets used here in inode.c and in the tree logging   * recovery code.  It remove a link in a directory with a given name, and @@ -2689,6 +2725,8 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,  	struct btrfs_dir_item *di;  	struct btrfs_key key;  	u64 index; +	u64 ino = btrfs_ino(inode); +	u64 dir_ino = btrfs_ino(dir);  	path = btrfs_alloc_path();  	if (!path) { @@ -2697,7 +2735,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,  	}  	path->leave_spinning = 1; -	di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, +	di = btrfs_lookup_dir_item(trans, root, path, dir_ino,  				    name, name_len, -1);  	if (IS_ERR(di)) {  		ret = PTR_ERR(di); @@ -2712,33 +2750,23 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,  	ret = btrfs_delete_one_dir_name(trans, root, path, di);  	if (ret)  		goto err; -	btrfs_release_path(root, path); +	btrfs_release_path(path); -	ret = btrfs_del_inode_ref(trans, root, name, name_len, -				  inode->i_ino, -				  dir->i_ino, &index); +	ret = btrfs_del_inode_ref(trans, root, name, name_len, ino, +				  dir_ino, &index);  	if (ret) {  		printk(KERN_INFO "btrfs failed to delete reference to %.*s, " -		       "inode %lu parent %lu\n", name_len, name, -		       inode->i_ino, dir->i_ino); +		       "inode %llu parent %llu\n", name_len, name, +		       (unsigned long long)ino, (unsigned long long)dir_ino);  		goto err;  	} -	di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, -					 index, name, name_len, -1); -	if (IS_ERR(di)) { -		ret = PTR_ERR(di); -		goto err; -	} -	if (!di) { -		ret = -ENOENT; +	ret = btrfs_delete_delayed_dir_index(trans, root, dir, index); +	if (ret)  		goto err; -	} -	ret = btrfs_delete_one_dir_name(trans, root, path, di); -	btrfs_release_path(root, path);  	ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, -					 inode, dir->i_ino); +					 inode, dir_ino);  	BUG_ON(ret != 0 && ret != -ENOENT);  	ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, @@ -2816,12 +2844,14 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,  	int check_link = 1;  	int err = -ENOSPC;  	int ret; +	u64 ino = btrfs_ino(inode); +	u64 dir_ino = btrfs_ino(dir);  	trans = btrfs_start_transaction(root, 10);  	if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)  		return trans; -	if (inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) +	if (ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)  		return ERR_PTR(-ENOSPC);  	/* check if there is someone else holds reference */ @@ -2862,7 +2892,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,  	} else {  		check_link = 0;  	} -	btrfs_release_path(root, path); +	btrfs_release_path(path);  	ret = btrfs_lookup_inode(trans, root, path,  				&BTRFS_I(inode)->location, 0); @@ -2876,11 +2906,11 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,  	} else {  		check_link = 0;  	} -	btrfs_release_path(root, path); +	btrfs_release_path(path);  	if (ret == 0 && S_ISREG(inode->i_mode)) {  		ret = btrfs_lookup_file_extent(trans, root, path, -					       inode->i_ino, (u64)-1, 0); +					       ino, (u64)-1, 0);  		if (ret < 0) {  			err = ret;  			goto out; @@ -2888,7 +2918,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,  		BUG_ON(ret == 0);  		if (check_path_shared(root, path))  			goto out; -		btrfs_release_path(root, path); +		btrfs_release_path(path);  	}  	if (!check_link) { @@ -2896,7 +2926,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,  		goto out;  	} -	di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, +	di = btrfs_lookup_dir_item(trans, root, path, dir_ino,  				dentry->d_name.name, dentry->d_name.len, 0);  	if (IS_ERR(di)) {  		err = PTR_ERR(di); @@ -2909,11 +2939,11 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,  		err = 0;  		goto out;  	} -	btrfs_release_path(root, path); +	btrfs_release_path(path);  	ref = btrfs_lookup_inode_ref(trans, root, path,  				dentry->d_name.name, dentry->d_name.len, -				inode->i_ino, dir->i_ino, 0); +				ino, dir_ino, 0);  	if (IS_ERR(ref)) {  		err = PTR_ERR(ref);  		goto out; @@ -2922,9 +2952,17 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,  	if (check_path_shared(root, path))  		goto out;  	index = btrfs_inode_ref_index(path->nodes[0], ref); -	btrfs_release_path(root, path); +	btrfs_release_path(path); -	di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index, +	/* +	 * This is a commit root search, if we can lookup inode item and other +	 * relative items in the commit root, it means the transaction of +	 * dir/file creation has been committed, and the dir index item that we +	 * delay to insert has also been inserted into the commit root. So +	 * we needn't worry about the delayed insertion of the dir index item +	 * here. +	 */ +	di = btrfs_lookup_dir_index_item(trans, root, path, dir_ino, index,  				dentry->d_name.name, dentry->d_name.len, 0);  	if (IS_ERR(di)) {  		err = PTR_ERR(di); @@ -2969,8 +3007,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)  	if (IS_ERR(trans))  		return PTR_ERR(trans); -	btrfs_set_trans_block_group(trans, dir); -  	btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);  	ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, @@ -2999,54 +3035,47 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,  	struct btrfs_key key;  	u64 index;  	int ret; +	u64 dir_ino = btrfs_ino(dir);  	path = btrfs_alloc_path();  	if (!path)  		return -ENOMEM; -	di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, +	di = btrfs_lookup_dir_item(trans, root, path, dir_ino,  				   name, name_len, -1); -	BUG_ON(!di || IS_ERR(di)); +	BUG_ON(IS_ERR_OR_NULL(di));  	leaf = path->nodes[0];  	btrfs_dir_item_key_to_cpu(leaf, di, &key);  	WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);  	ret = btrfs_delete_one_dir_name(trans, root, path, di);  	BUG_ON(ret); -	btrfs_release_path(root, path); +	btrfs_release_path(path);  	ret = btrfs_del_root_ref(trans, root->fs_info->tree_root,  				 objectid, root->root_key.objectid, -				 dir->i_ino, &index, name, name_len); +				 dir_ino, &index, name, name_len);  	if (ret < 0) {  		BUG_ON(ret != -ENOENT); -		di = btrfs_search_dir_index_item(root, path, dir->i_ino, +		di = btrfs_search_dir_index_item(root, path, dir_ino,  						 name, name_len); -		BUG_ON(!di || IS_ERR(di)); +		BUG_ON(IS_ERR_OR_NULL(di));  		leaf = path->nodes[0];  		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); -		btrfs_release_path(root, path); +		btrfs_release_path(path);  		index = key.offset;  	} +	btrfs_release_path(path); -	di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, -					 index, name, name_len, -1); -	BUG_ON(!di || IS_ERR(di)); - -	leaf = path->nodes[0]; -	btrfs_dir_item_key_to_cpu(leaf, di, &key); -	WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); -	ret = btrfs_delete_one_dir_name(trans, root, path, di); +	ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);  	BUG_ON(ret); -	btrfs_release_path(root, path);  	btrfs_i_size_write(dir, dir->i_size - name_len * 2);  	dir->i_mtime = dir->i_ctime = CURRENT_TIME;  	ret = btrfs_update_inode(trans, root, dir);  	BUG_ON(ret); -	btrfs_free_path(path);  	return 0;  } @@ -3059,16 +3088,14 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)  	unsigned long nr = 0;  	if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || -	    inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) +	    btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)  		return -ENOTEMPTY;  	trans = __unlink_start_trans(dir, dentry);  	if (IS_ERR(trans))  		return PTR_ERR(trans); -	btrfs_set_trans_block_group(trans, dir); - -	if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { +	if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {  		err = btrfs_unlink_subvol(trans, root, dir,  					  BTRFS_I(inode)->location.objectid,  					  dentry->d_name.name, @@ -3093,178 +3120,6 @@ out:  	return err;  } -#if 0 -/* - * when truncating bytes in a file, it is possible to avoid reading - * the leaves that contain only checksum items.  This can be the - * majority of the IO required to delete a large file, but it must - * be done carefully. - * - * The keys in the level just above the leaves are checked to make sure - * the lowest key in a given leaf is a csum key, and starts at an offset - * after the new  size. - * - * Then the key for the next leaf is checked to make sure it also has - * a checksum item for the same file.  If it does, we know our target leaf - * contains only checksum items, and it can be safely freed without reading - * it. - * - * This is just an optimization targeted at large files.  It may do - * nothing.  It will return 0 unless things went badly. - */ -static noinline int drop_csum_leaves(struct btrfs_trans_handle *trans, -				     struct btrfs_root *root, -				     struct btrfs_path *path, -				     struct inode *inode, u64 new_size) -{ -	struct btrfs_key key; -	int ret; -	int nritems; -	struct btrfs_key found_key; -	struct btrfs_key other_key; -	struct btrfs_leaf_ref *ref; -	u64 leaf_gen; -	u64 leaf_start; - -	path->lowest_level = 1; -	key.objectid = inode->i_ino; -	key.type = BTRFS_CSUM_ITEM_KEY; -	key.offset = new_size; -again: -	ret = btrfs_search_slot(trans, root, &key, path, -1, 1); -	if (ret < 0) -		goto out; - -	if (path->nodes[1] == NULL) { -		ret = 0; -		goto out; -	} -	ret = 0; -	btrfs_node_key_to_cpu(path->nodes[1], &found_key, path->slots[1]); -	nritems = btrfs_header_nritems(path->nodes[1]); - -	if (!nritems) -		goto out; - -	if (path->slots[1] >= nritems) -		goto next_node; - -	/* did we find a key greater than anything we want to delete? */ -	if (found_key.objectid > inode->i_ino || -	   (found_key.objectid == inode->i_ino && found_key.type > key.type)) -		goto out; - -	/* we check the next key in the node to make sure the leave contains -	 * only checksum items.  This comparison doesn't work if our -	 * leaf is the last one in the node -	 */ -	if (path->slots[1] + 1 >= nritems) { -next_node: -		/* search forward from the last key in the node, this -		 * will bring us into the next node in the tree -		 */ -		btrfs_node_key_to_cpu(path->nodes[1], &found_key, nritems - 1); - -		/* unlikely, but we inc below, so check to be safe */ -		if (found_key.offset == (u64)-1) -			goto out; - -		/* search_forward needs a path with locks held, do the -		 * search again for the original key.  It is possible -		 * this will race with a balance and return a path that -		 * we could modify, but this drop is just an optimization -		 * and is allowed to miss some leaves. -		 */ -		btrfs_release_path(root, path); -		found_key.offset++; - -		/* setup a max key for search_forward */ -		other_key.offset = (u64)-1; -		other_key.type = key.type; -		other_key.objectid = key.objectid; - -		path->keep_locks = 1; -		ret = btrfs_search_forward(root, &found_key, &other_key, -					   path, 0, 0); -		path->keep_locks = 0; -		if (ret || found_key.objectid != key.objectid || -		    found_key.type != key.type) { -			ret = 0; -			goto out; -		} - -		key.offset = found_key.offset; -		btrfs_release_path(root, path); -		cond_resched(); -		goto again; -	} - -	/* we know there's one more slot after us in the tree, -	 * read that key so we can verify it is also a checksum item -	 */ -	btrfs_node_key_to_cpu(path->nodes[1], &other_key, path->slots[1] + 1); - -	if (found_key.objectid < inode->i_ino) -		goto next_key; - -	if (found_key.type != key.type || found_key.offset < new_size) -		goto next_key; - -	/* -	 * if the key for the next leaf isn't a csum key from this objectid, -	 * we can't be sure there aren't good items inside this leaf. -	 * Bail out -	 */ -	if (other_key.objectid != inode->i_ino || other_key.type != key.type) -		goto out; - -	leaf_start = btrfs_node_blockptr(path->nodes[1], path->slots[1]); -	leaf_gen = btrfs_node_ptr_generation(path->nodes[1], path->slots[1]); -	/* -	 * it is safe to delete this leaf, it contains only -	 * csum items from this inode at an offset >= new_size -	 */ -	ret = btrfs_del_leaf(trans, root, path, leaf_start); -	BUG_ON(ret); - -	if (root->ref_cows && leaf_gen < trans->transid) { -		ref = btrfs_alloc_leaf_ref(root, 0); -		if (ref) { -			ref->root_gen = root->root_key.offset; -			ref->bytenr = leaf_start; -			ref->owner = 0; -			ref->generation = leaf_gen; -			ref->nritems = 0; - -			btrfs_sort_leaf_ref(ref); - -			ret = btrfs_add_leaf_ref(root, ref, 0); -			WARN_ON(ret); -			btrfs_free_leaf_ref(root, ref); -		} else { -			WARN_ON(1); -		} -	} -next_key: -	btrfs_release_path(root, path); - -	if (other_key.objectid == inode->i_ino && -	    other_key.type == key.type && other_key.offset > key.offset) { -		key.offset = other_key.offset; -		cond_resched(); -		goto again; -	} -	ret = 0; -out: -	/* fixup any changes we've made to the path */ -	path->lowest_level = 0; -	path->keep_locks = 0; -	btrfs_release_path(root, path); -	return ret; -} - -#endif -  /*   * this can truncate away extent items, csum items and directory items.   * It starts at a high offset and removes keys until it can't find @@ -3300,17 +3155,27 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,  	int encoding;  	int ret;  	int err = 0; +	u64 ino = btrfs_ino(inode);  	BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);  	if (root->ref_cows || root == root->fs_info->tree_root)  		btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); +	/* +	 * This function is also used to drop the items in the log tree before +	 * we relog the inode, so if root != BTRFS_I(inode)->root, it means +	 * it is used to drop the loged items. So we shouldn't kill the delayed +	 * items. +	 */ +	if (min_type == 0 && root == BTRFS_I(inode)->root) +		btrfs_kill_delayed_inode_items(inode); +  	path = btrfs_alloc_path();  	BUG_ON(!path);  	path->reada = -1; -	key.objectid = inode->i_ino; +	key.objectid = ino;  	key.offset = (u64)-1;  	key.type = (u8)-1; @@ -3338,7 +3203,7 @@ search_again:  		found_type = btrfs_key_type(&found_key);  		encoding = 0; -		if (found_key.objectid != inode->i_ino) +		if (found_key.objectid != ino)  			break;  		if (found_type < min_type) @@ -3428,7 +3293,6 @@ search_again:  				    btrfs_file_extent_calc_inline_size(size);  				ret = btrfs_truncate_item(trans, root, path,  							  size, 1); -				BUG_ON(ret);  			} else if (root->ref_cows) {  				inode_sub_bytes(inode, item_end + 1 -  						found_key.offset); @@ -3457,7 +3321,7 @@ delete:  			ret = btrfs_free_extent(trans, root, extent_start,  						extent_num_bytes, 0,  						btrfs_header_owner(leaf), -						inode->i_ino, extent_offset); +						ino, extent_offset);  			BUG_ON(ret);  		} @@ -3466,7 +3330,9 @@ delete:  		if (path->slots[0] == 0 ||  		    path->slots[0] != pending_del_slot) { -			if (root->ref_cows) { +			if (root->ref_cows && +			    BTRFS_I(inode)->location.objectid != +						BTRFS_FREE_INO_OBJECTID) {  				err = -EAGAIN;  				goto out;  			} @@ -3477,7 +3343,7 @@ delete:  				BUG_ON(ret);  				pending_del_nr = 0;  			} -			btrfs_release_path(root, path); +			btrfs_release_path(path);  			goto search_again;  		} else {  			path->slots[0]--; @@ -3635,7 +3501,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)  	while (1) {  		em = btrfs_get_extent(inode, NULL, 0, cur_offset,  				block_end - cur_offset, 0); -		BUG_ON(IS_ERR(em) || !em); +		BUG_ON(IS_ERR_OR_NULL(em));  		last_byte = min(extent_map_end(em), block_end);  		last_byte = (last_byte + mask) & ~mask;  		if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { @@ -3647,7 +3513,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)  				err = PTR_ERR(trans);  				break;  			} -			btrfs_set_trans_block_group(trans, inode);  			err = btrfs_drop_extents(trans, inode, cur_offset,  						 cur_offset + hole_size, @@ -3656,7 +3521,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)  				break;  			err = btrfs_insert_file_extent(trans, root, -					inode->i_ino, cur_offset, 0, +					btrfs_ino(inode), cur_offset, 0,  					0, hole_size, 0, hole_size,  					0, 0, 0);  			if (err) @@ -3758,7 +3623,7 @@ void btrfs_evict_inode(struct inode *inode)  	truncate_inode_pages(&inode->i_data, 0);  	if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || -			       root == root->fs_info->tree_root)) +			       is_free_space_inode(root, inode)))  		goto no_delete;  	if (is_bad_inode(inode)) { @@ -3783,7 +3648,6 @@ void btrfs_evict_inode(struct inode *inode)  	while (1) {  		trans = btrfs_start_transaction(root, 0);  		BUG_ON(IS_ERR(trans)); -		btrfs_set_trans_block_group(trans, inode);  		trans->block_rsv = root->orphan_block_rsv;  		ret = btrfs_block_rsv_check(trans, root, @@ -3811,6 +3675,10 @@ void btrfs_evict_inode(struct inode *inode)  		BUG_ON(ret);  	} +	if (!(root == root->fs_info->tree_root || +	      root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)) +		btrfs_return_ino(root, btrfs_ino(inode)); +  	nr = trans->blocks_used;  	btrfs_end_transaction(trans, root);  	btrfs_btree_balance_dirty(root, nr); @@ -3836,12 +3704,12 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,  	path = btrfs_alloc_path();  	BUG_ON(!path); -	di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, +	di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name,  				    namelen, 0);  	if (IS_ERR(di))  		ret = PTR_ERR(di); -	if (!di || IS_ERR(di)) +	if (IS_ERR_OR_NULL(di))  		goto out_err;  	btrfs_dir_item_key_to_cpu(path->nodes[0], di, location); @@ -3889,7 +3757,7 @@ static int fixup_tree_root_location(struct btrfs_root *root,  	leaf = path->nodes[0];  	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); -	if (btrfs_root_ref_dirid(leaf, ref) != dir->i_ino || +	if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(dir) ||  	    btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)  		goto out; @@ -3899,7 +3767,7 @@ static int fixup_tree_root_location(struct btrfs_root *root,  	if (ret)  		goto out; -	btrfs_release_path(root->fs_info->tree_root, path); +	btrfs_release_path(path);  	new_root = btrfs_read_fs_root_no_name(root->fs_info, location);  	if (IS_ERR(new_root)) { @@ -3928,6 +3796,7 @@ static void inode_tree_add(struct inode *inode)  	struct btrfs_inode *entry;  	struct rb_node **p;  	struct rb_node *parent; +	u64 ino = btrfs_ino(inode);  again:  	p = &root->inode_tree.rb_node;  	parent = NULL; @@ -3940,9 +3809,9 @@ again:  		parent = *p;  		entry = rb_entry(parent, struct btrfs_inode, rb_node); -		if (inode->i_ino < entry->vfs_inode.i_ino) +		if (ino < btrfs_ino(&entry->vfs_inode))  			p = &parent->rb_left; -		else if (inode->i_ino > entry->vfs_inode.i_ino) +		else if (ino > btrfs_ino(&entry->vfs_inode))  			p = &parent->rb_right;  		else {  			WARN_ON(!(entry->vfs_inode.i_state & @@ -4006,9 +3875,9 @@ again:  		prev = node;  		entry = rb_entry(node, struct btrfs_inode, rb_node); -		if (objectid < entry->vfs_inode.i_ino) +		if (objectid < btrfs_ino(&entry->vfs_inode))  			node = node->rb_left; -		else if (objectid > entry->vfs_inode.i_ino) +		else if (objectid > btrfs_ino(&entry->vfs_inode))  			node = node->rb_right;  		else  			break; @@ -4016,7 +3885,7 @@ again:  	if (!node) {  		while (prev) {  			entry = rb_entry(prev, struct btrfs_inode, rb_node); -			if (objectid <= entry->vfs_inode.i_ino) { +			if (objectid <= btrfs_ino(&entry->vfs_inode)) {  				node = prev;  				break;  			} @@ -4025,7 +3894,7 @@ again:  	}  	while (node) {  		entry = rb_entry(node, struct btrfs_inode, rb_node); -		objectid = entry->vfs_inode.i_ino + 1; +		objectid = btrfs_ino(&entry->vfs_inode) + 1;  		inode = igrab(&entry->vfs_inode);  		if (inode) {  			spin_unlock(&root->inode_lock); @@ -4063,7 +3932,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)  static int btrfs_find_actor(struct inode *inode, void *opaque)  {  	struct btrfs_iget_args *args = opaque; -	return args->ino == inode->i_ino && +	return args->ino == btrfs_ino(inode) &&  		args->root == BTRFS_I(inode)->root;  } @@ -4208,7 +4077,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,  	return d_splice_alias(inode, dentry);  } -static unsigned char btrfs_filetype_table[] = { +unsigned char btrfs_filetype_table[] = {  	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK  }; @@ -4222,6 +4091,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,  	struct btrfs_key key;  	struct btrfs_key found_key;  	struct btrfs_path *path; +	struct list_head ins_list; +	struct list_head del_list;  	int ret;  	struct extent_buffer *leaf;  	int slot; @@ -4234,6 +4105,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,  	char tmp_name[32];  	char *name_ptr;  	int name_len; +	int is_curr = 0;	/* filp->f_pos points to the current index? */  	/* FIXME, use a real flag for deciding about the key type */  	if (root->fs_info->tree_root == root) @@ -4241,9 +4113,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,  	/* special case for "." */  	if (filp->f_pos == 0) { -		over = filldir(dirent, ".", 1, -			       1, inode->i_ino, -			       DT_DIR); +		over = filldir(dirent, ".", 1, 1, btrfs_ino(inode), DT_DIR);  		if (over)  			return 0;  		filp->f_pos = 1; @@ -4258,11 +4128,20 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,  		filp->f_pos = 2;  	}  	path = btrfs_alloc_path(); -	path->reada = 2; +	if (!path) +		return -ENOMEM; + +	path->reada = 1; + +	if (key_type == BTRFS_DIR_INDEX_KEY) { +		INIT_LIST_HEAD(&ins_list); +		INIT_LIST_HEAD(&del_list); +		btrfs_get_delayed_items(inode, &ins_list, &del_list); +	}  	btrfs_set_key_type(&key, key_type);  	key.offset = filp->f_pos; -	key.objectid = inode->i_ino; +	key.objectid = btrfs_ino(inode);  	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);  	if (ret < 0) @@ -4289,8 +4168,13 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,  			break;  		if (found_key.offset < filp->f_pos)  			goto next; +		if (key_type == BTRFS_DIR_INDEX_KEY && +		    btrfs_should_delete_dir_index(&del_list, +						  found_key.offset)) +			goto next;  		filp->f_pos = found_key.offset; +		is_curr = 1;  		di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);  		di_cur = 0; @@ -4345,6 +4229,15 @@ next:  		path->slots[0]++;  	} +	if (key_type == BTRFS_DIR_INDEX_KEY) { +		if (is_curr) +			filp->f_pos++; +		ret = btrfs_readdir_delayed_dir_index(filp, dirent, filldir, +						      &ins_list); +		if (ret) +			goto nopos; +	} +  	/* Reached end of directory/root. Bump pos past the last item. */  	if (key_type == BTRFS_DIR_INDEX_KEY)  		/* @@ -4357,6 +4250,8 @@ next:  nopos:  	ret = 0;  err: +	if (key_type == BTRFS_DIR_INDEX_KEY) +		btrfs_put_delayed_items(&ins_list, &del_list);  	btrfs_free_path(path);  	return ret;  } @@ -4371,17 +4266,16 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)  	if (BTRFS_I(inode)->dummy_inode)  		return 0; -	smp_mb(); -	nolock = (root->fs_info->closing && root == root->fs_info->tree_root); +	if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode)) +		nolock = true;  	if (wbc->sync_mode == WB_SYNC_ALL) {  		if (nolock) -			trans = btrfs_join_transaction_nolock(root, 1); +			trans = btrfs_join_transaction_nolock(root);  		else -			trans = btrfs_join_transaction(root, 1); +			trans = btrfs_join_transaction(root);  		if (IS_ERR(trans))  			return PTR_ERR(trans); -		btrfs_set_trans_block_group(trans, inode);  		if (nolock)  			ret = btrfs_end_transaction_nolock(trans, root);  		else @@ -4396,7 +4290,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)   * FIXME, needs more benchmarking...there are no reasons other than performance   * to keep or drop this code.   */ -void btrfs_dirty_inode(struct inode *inode) +void btrfs_dirty_inode(struct inode *inode, int flags)  {  	struct btrfs_root *root = BTRFS_I(inode)->root;  	struct btrfs_trans_handle *trans; @@ -4405,9 +4299,8 @@ void btrfs_dirty_inode(struct inode *inode)  	if (BTRFS_I(inode)->dummy_inode)  		return; -	trans = btrfs_join_transaction(root, 1); +	trans = btrfs_join_transaction(root);  	BUG_ON(IS_ERR(trans)); -	btrfs_set_trans_block_group(trans, inode);  	ret = btrfs_update_inode(trans, root, inode);  	if (ret && ret == -ENOSPC) { @@ -4415,25 +4308,24 @@ void btrfs_dirty_inode(struct inode *inode)  		btrfs_end_transaction(trans, root);  		trans = btrfs_start_transaction(root, 1);  		if (IS_ERR(trans)) { -			if (printk_ratelimit()) { -				printk(KERN_ERR "btrfs: fail to " -				       "dirty  inode %lu error %ld\n", -				       inode->i_ino, PTR_ERR(trans)); -			} +			printk_ratelimited(KERN_ERR "btrfs: fail to " +				       "dirty  inode %llu error %ld\n", +				       (unsigned long long)btrfs_ino(inode), +				       PTR_ERR(trans));  			return;  		} -		btrfs_set_trans_block_group(trans, inode);  		ret = btrfs_update_inode(trans, root, inode);  		if (ret) { -			if (printk_ratelimit()) { -				printk(KERN_ERR "btrfs: fail to " -				       "dirty  inode %lu error %d\n", -				       inode->i_ino, ret); -			} +			printk_ratelimited(KERN_ERR "btrfs: fail to " +				       "dirty  inode %llu error %d\n", +				       (unsigned long long)btrfs_ino(inode), +				       ret);  		}  	}  	btrfs_end_transaction(trans, root); +	if (BTRFS_I(inode)->delayed_node) +		btrfs_balance_delayed_items(root);  }  /* @@ -4449,7 +4341,7 @@ static int btrfs_set_inode_index_count(struct inode *inode)  	struct extent_buffer *leaf;  	int ret; -	key.objectid = inode->i_ino; +	key.objectid = btrfs_ino(inode);  	btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);  	key.offset = (u64)-1; @@ -4481,7 +4373,7 @@ static int btrfs_set_inode_index_count(struct inode *inode)  	leaf = path->nodes[0];  	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); -	if (found_key.objectid != inode->i_ino || +	if (found_key.objectid != btrfs_ino(inode) ||  	    btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) {  		BTRFS_I(inode)->index_cnt = 2;  		goto out; @@ -4502,9 +4394,12 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index)  	int ret = 0;  	if (BTRFS_I(dir)->index_cnt == (u64)-1) { -		ret = btrfs_set_inode_index_count(dir); -		if (ret) -			return ret; +		ret = btrfs_inode_delayed_dir_index_count(dir); +		if (ret) { +			ret = btrfs_set_inode_index_count(dir); +			if (ret) +				return ret; +		}  	}  	*index = BTRFS_I(dir)->index_cnt; @@ -4517,8 +4412,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,  				     struct btrfs_root *root,  				     struct inode *dir,  				     const char *name, int name_len, -				     u64 ref_objectid, u64 objectid, -				     u64 alloc_hint, int mode, u64 *index) +				     u64 ref_objectid, u64 objectid, int mode, +				     u64 *index)  {  	struct inode *inode;  	struct btrfs_inode_item *inode_item; @@ -4540,6 +4435,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,  		return ERR_PTR(-ENOMEM);  	} +	/* +	 * we have to initialize this early, so we can reclaim the inode +	 * number if we fail afterwards in this function. +	 */ +	inode->i_ino = objectid; +  	if (dir) {  		trace_btrfs_inode_request(dir); @@ -4565,8 +4466,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,  		owner = 0;  	else  		owner = 1; -	BTRFS_I(inode)->block_group = -			btrfs_find_block_group(root, 0, alloc_hint, owner);  	key[0].objectid = objectid;  	btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); @@ -4585,7 +4484,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,  		goto fail;  	inode_init_owner(inode, dir, mode); -	inode->i_ino = objectid;  	inode_set_bytes(inode, 0);  	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;  	inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], @@ -4649,29 +4547,29 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,  	int ret = 0;  	struct btrfs_key key;  	struct btrfs_root *root = BTRFS_I(parent_inode)->root; +	u64 ino = btrfs_ino(inode); +	u64 parent_ino = btrfs_ino(parent_inode); -	if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { +	if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {  		memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key));  	} else { -		key.objectid = inode->i_ino; +		key.objectid = ino;  		btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);  		key.offset = 0;  	} -	if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { +	if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {  		ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,  					 key.objectid, root->root_key.objectid, -					 parent_inode->i_ino, -					 index, name, name_len); +					 parent_ino, index, name, name_len);  	} else if (add_backref) { -		ret = btrfs_insert_inode_ref(trans, root, -					     name, name_len, inode->i_ino, -					     parent_inode->i_ino, index); +		ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino, +					     parent_ino, index);  	}  	if (ret == 0) {  		ret = btrfs_insert_dir_item(trans, root, name, name_len, -					    parent_inode->i_ino, &key, +					    parent_inode, &key,  					    btrfs_inode_type(inode), index);  		BUG_ON(ret); @@ -4714,10 +4612,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,  	if (!new_valid_dev(rdev))  		return -EINVAL; -	err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); -	if (err) -		return err; -  	/*  	 * 2 for inode item and ref  	 * 2 for dir items @@ -4727,11 +4621,13 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,  	if (IS_ERR(trans))  		return PTR_ERR(trans); -	btrfs_set_trans_block_group(trans, dir); +	err = btrfs_find_free_ino(root, &objectid); +	if (err) +		goto out_unlock;  	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, -				dentry->d_name.len, dir->i_ino, objectid, -				BTRFS_I(dir)->block_group, mode, &index); +				dentry->d_name.len, btrfs_ino(dir), objectid, +				mode, &index);  	if (IS_ERR(inode)) {  		err = PTR_ERR(inode);  		goto out_unlock; @@ -4743,7 +4639,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,  		goto out_unlock;  	} -	btrfs_set_trans_block_group(trans, inode);  	err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);  	if (err)  		drop_inode = 1; @@ -4752,8 +4647,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,  		init_special_inode(inode, inode->i_mode, rdev);  		btrfs_update_inode(trans, root, inode);  	} -	btrfs_update_inode_block_group(trans, inode); -	btrfs_update_inode_block_group(trans, dir);  out_unlock:  	nr = trans->blocks_used;  	btrfs_end_transaction_throttle(trans, root); @@ -4777,9 +4670,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,  	u64 objectid;  	u64 index = 0; -	err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); -	if (err) -		return err;  	/*  	 * 2 for inode item and ref  	 * 2 for dir items @@ -4789,11 +4679,13 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,  	if (IS_ERR(trans))  		return PTR_ERR(trans); -	btrfs_set_trans_block_group(trans, dir); +	err = btrfs_find_free_ino(root, &objectid); +	if (err) +		goto out_unlock;  	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, -				dentry->d_name.len, dir->i_ino, objectid, -				BTRFS_I(dir)->block_group, mode, &index); +				dentry->d_name.len, btrfs_ino(dir), objectid, +				mode, &index);  	if (IS_ERR(inode)) {  		err = PTR_ERR(inode);  		goto out_unlock; @@ -4805,7 +4697,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,  		goto out_unlock;  	} -	btrfs_set_trans_block_group(trans, inode);  	err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);  	if (err)  		drop_inode = 1; @@ -4816,8 +4707,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,  		inode->i_op = &btrfs_file_inode_operations;  		BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;  	} -	btrfs_update_inode_block_group(trans, inode); -	btrfs_update_inode_block_group(trans, dir);  out_unlock:  	nr = trans->blocks_used;  	btrfs_end_transaction_throttle(trans, root); @@ -4864,8 +4753,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,  	btrfs_inc_nlink(inode);  	inode->i_ctime = CURRENT_TIME; - -	btrfs_set_trans_block_group(trans, dir);  	ihold(inode);  	err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index); @@ -4874,7 +4761,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,  		drop_inode = 1;  	} else {  		struct dentry *parent = dget_parent(dentry); -		btrfs_update_inode_block_group(trans, dir);  		err = btrfs_update_inode(trans, root, inode);  		BUG_ON(err);  		btrfs_log_new_name(trans, inode, NULL, parent); @@ -4903,10 +4789,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)  	u64 index = 0;  	unsigned long nr = 1; -	err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); -	if (err) -		return err; -  	/*  	 * 2 items for inode and ref  	 * 2 items for dir items @@ -4915,12 +4797,14 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)  	trans = btrfs_start_transaction(root, 5);  	if (IS_ERR(trans))  		return PTR_ERR(trans); -	btrfs_set_trans_block_group(trans, dir); + +	err = btrfs_find_free_ino(root, &objectid); +	if (err) +		goto out_fail;  	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, -				dentry->d_name.len, dir->i_ino, objectid, -				BTRFS_I(dir)->block_group, S_IFDIR | mode, -				&index); +				dentry->d_name.len, btrfs_ino(dir), objectid, +				S_IFDIR | mode, &index);  	if (IS_ERR(inode)) {  		err = PTR_ERR(inode);  		goto out_fail; @@ -4934,7 +4818,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)  	inode->i_op = &btrfs_dir_inode_operations;  	inode->i_fop = &btrfs_dir_file_operations; -	btrfs_set_trans_block_group(trans, inode);  	btrfs_i_size_write(inode, 0);  	err = btrfs_update_inode(trans, root, inode); @@ -4948,8 +4831,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)  	d_instantiate(dentry, inode);  	drop_on_err = 0; -	btrfs_update_inode_block_group(trans, inode); -	btrfs_update_inode_block_group(trans, dir);  out_fail:  	nr = trans->blocks_used; @@ -5041,7 +4922,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,  	u64 bytenr;  	u64 extent_start = 0;  	u64 extent_end = 0; -	u64 objectid = inode->i_ino; +	u64 objectid = btrfs_ino(inode);  	u32 found_type;  	struct btrfs_path *path = NULL;  	struct btrfs_root *root = BTRFS_I(inode)->root; @@ -5069,7 +4950,7 @@ again:  		else  			goto out;  	} -	em = alloc_extent_map(GFP_NOFS); +	em = alloc_extent_map();  	if (!em) {  		err = -ENOMEM;  		goto out; @@ -5082,7 +4963,15 @@ again:  	if (!path) {  		path = btrfs_alloc_path(); -		BUG_ON(!path); +		if (!path) { +			err = -ENOMEM; +			goto out; +		} +		/* +		 * Chances are we'll be called again, so go ahead and do +		 * readahead +		 */ +		path->reada = 1;  	}  	ret = btrfs_lookup_file_extent(trans, root, path, @@ -5223,8 +5112,10 @@ again:  				kunmap(page);  				free_extent_map(em);  				em = NULL; -				btrfs_release_path(root, path); -				trans = btrfs_join_transaction(root, 1); + +				btrfs_release_path(path); +				trans = btrfs_join_transaction(root); +  				if (IS_ERR(trans))  					return ERR_CAST(trans);  				goto again; @@ -5249,7 +5140,7 @@ not_found_em:  	em->block_start = EXTENT_MAP_HOLE;  	set_bit(EXTENT_FLAG_VACANCY, &em->flags);  insert: -	btrfs_release_path(root, path); +	btrfs_release_path(path);  	if (em->start > start || extent_map_end(em) <= start) {  		printk(KERN_ERR "Btrfs: bad extent! em: [%llu %llu] passed "  		       "[%llu %llu]\n", (unsigned long long)em->start, @@ -5382,7 +5273,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag  		u64 hole_start = start;  		u64 hole_len = len; -		em = alloc_extent_map(GFP_NOFS); +		em = alloc_extent_map();  		if (!em) {  			err = -ENOMEM;  			goto out; @@ -5468,10 +5359,13 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,  		btrfs_drop_extent_cache(inode, start, start + len - 1, 0);  	} -	trans = btrfs_join_transaction(root, 0); +	trans = btrfs_join_transaction(root);  	if (IS_ERR(trans))  		return ERR_CAST(trans); +	if (start <= BTRFS_I(inode)->disk_i_size && len < 64 * 1024) +		btrfs_add_inode_defrag(trans, inode); +  	trans->block_rsv = &root->fs_info->delalloc_block_rsv;  	alloc_hint = get_extent_allocation_hint(inode, start, len); @@ -5483,7 +5377,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,  	}  	if (!em) { -		em = alloc_extent_map(GFP_NOFS); +		em = alloc_extent_map();  		if (!em) {  			em = ERR_PTR(-ENOMEM);  			goto out; @@ -5549,7 +5443,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,  	if (!path)  		return -ENOMEM; -	ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, +	ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode),  				       offset, 0);  	if (ret < 0)  		goto out; @@ -5566,7 +5460,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,  	ret = 0;  	leaf = path->nodes[0];  	btrfs_item_key_to_cpu(leaf, &key, slot); -	if (key.objectid != inode->i_ino || +	if (key.objectid != btrfs_ino(inode) ||  	    key.type != BTRFS_EXTENT_DATA_KEY) {  		/* not our file or wrong item type, must cow */  		goto out; @@ -5600,7 +5494,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,  	 * look for other files referencing this extent, if we  	 * find any we must cow  	 */ -	if (btrfs_cross_ref_exist(trans, root, inode->i_ino, +	if (btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),  				  key.offset - backref_offset, disk_bytenr))  		goto out; @@ -5701,7 +5595,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,  		 * to make sure the current transaction stays open  		 * while we look for nocow cross refs  		 */ -		trans = btrfs_join_transaction(root, 0); +		trans = btrfs_join_transaction(root);  		if (IS_ERR(trans))  			goto must_cow; @@ -5790,9 +5684,10 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)  			flush_dcache_page(bvec->bv_page);  			if (csum != *private) { -				printk(KERN_ERR "btrfs csum failed ino %lu off" +				printk(KERN_ERR "btrfs csum failed ino %llu off"  				      " %llu csum %u private %u\n", -				      inode->i_ino, (unsigned long long)start, +				      (unsigned long long)btrfs_ino(inode), +				      (unsigned long long)start,  				      csum, *private);  				err = -EIO;  			} @@ -5839,7 +5734,7 @@ again:  	BUG_ON(!ordered); -	trans = btrfs_join_transaction(root, 1); +	trans = btrfs_join_transaction(root);  	if (IS_ERR(trans)) {  		err = -ENOMEM;  		goto out; @@ -5939,9 +5834,9 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)  	struct btrfs_dio_private *dip = bio->bi_private;  	if (err) { -		printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu " +		printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu "  		      "sector %#Lx len %u err no %d\n", -		      dip->inode->i_ino, bio->bi_rw, +		      (unsigned long long)btrfs_ino(dip->inode), bio->bi_rw,  		      (unsigned long long)bio->bi_sector, bio->bi_size, err);  		dip->errors = 1; @@ -6589,6 +6484,7 @@ out:  static int btrfs_truncate(struct inode *inode)  {  	struct btrfs_root *root = BTRFS_I(inode)->root; +	struct btrfs_block_rsv *rsv;  	int ret;  	int err = 0;  	struct btrfs_trans_handle *trans; @@ -6602,28 +6498,80 @@ static int btrfs_truncate(struct inode *inode)  	btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);  	btrfs_ordered_update_i_size(inode, inode->i_size, NULL); -	trans = btrfs_start_transaction(root, 5); -	if (IS_ERR(trans)) -		return PTR_ERR(trans); +	/* +	 * Yes ladies and gentelment, this is indeed ugly.  The fact is we have +	 * 3 things going on here +	 * +	 * 1) We need to reserve space for our orphan item and the space to +	 * delete our orphan item.  Lord knows we don't want to have a dangling +	 * orphan item because we didn't reserve space to remove it. +	 * +	 * 2) We need to reserve space to update our inode. +	 * +	 * 3) We need to have something to cache all the space that is going to +	 * be free'd up by the truncate operation, but also have some slack +	 * space reserved in case it uses space during the truncate (thank you +	 * very much snapshotting). +	 * +	 * And we need these to all be seperate.  The fact is we can use alot of +	 * space doing the truncate, and we have no earthly idea how much space +	 * we will use, so we need the truncate reservation to be seperate so it +	 * doesn't end up using space reserved for updating the inode or +	 * removing the orphan item.  We also need to be able to stop the +	 * transaction and start a new one, which means we need to be able to +	 * update the inode several times, and we have no idea of knowing how +	 * many times that will be, so we can't just reserve 1 item for the +	 * entirety of the opration, so that has to be done seperately as well. +	 * Then there is the orphan item, which does indeed need to be held on +	 * to for the whole operation, and we need nobody to touch this reserved +	 * space except the orphan code. +	 * +	 * So that leaves us with +	 * +	 * 1) root->orphan_block_rsv - for the orphan deletion. +	 * 2) rsv - for the truncate reservation, which we will steal from the +	 * transaction reservation. +	 * 3) fs_info->trans_block_rsv - this will have 1 items worth left for +	 * updating the inode. +	 */ +	rsv = btrfs_alloc_block_rsv(root); +	if (!rsv) +		return -ENOMEM; +	btrfs_add_durable_block_rsv(root->fs_info, rsv); -	btrfs_set_trans_block_group(trans, inode); +	trans = btrfs_start_transaction(root, 4); +	if (IS_ERR(trans)) { +		err = PTR_ERR(trans); +		goto out; +	} + +	/* +	 * Reserve space for the truncate process.  Truncate should be adding +	 * space, but if there are snapshots it may end up using space. +	 */ +	ret = btrfs_truncate_reserve_metadata(trans, root, rsv); +	BUG_ON(ret);  	ret = btrfs_orphan_add(trans, inode);  	if (ret) {  		btrfs_end_transaction(trans, root); -		return ret; +		goto out;  	}  	nr = trans->blocks_used;  	btrfs_end_transaction(trans, root);  	btrfs_btree_balance_dirty(root, nr); -	/* Now start a transaction for the truncate */ -	trans = btrfs_start_transaction(root, 0); -	if (IS_ERR(trans)) -		return PTR_ERR(trans); -	btrfs_set_trans_block_group(trans, inode); -	trans->block_rsv = root->orphan_block_rsv; +	/* +	 * Ok so we've already migrated our bytes over for the truncate, so here +	 * just reserve the one slot we need for updating the inode. +	 */ +	trans = btrfs_start_transaction(root, 1); +	if (IS_ERR(trans)) { +		err = PTR_ERR(trans); +		goto out; +	} +	trans->block_rsv = rsv;  	/*  	 * setattr is responsible for setting the ordered_data_close flag, @@ -6647,24 +6595,17 @@ static int btrfs_truncate(struct inode *inode)  	while (1) {  		if (!trans) { -			trans = btrfs_start_transaction(root, 0); -			if (IS_ERR(trans)) -				return PTR_ERR(trans); -			btrfs_set_trans_block_group(trans, inode); -			trans->block_rsv = root->orphan_block_rsv; -		} +			trans = btrfs_start_transaction(root, 3); +			if (IS_ERR(trans)) { +				err = PTR_ERR(trans); +				goto out; +			} -		ret = btrfs_block_rsv_check(trans, root, -					    root->orphan_block_rsv, 0, 5); -		if (ret == -EAGAIN) { -			ret = btrfs_commit_transaction(trans, root); -			if (ret) -				return ret; -			trans = NULL; -			continue; -		} else if (ret) { -			err = ret; -			break; +			ret = btrfs_truncate_reserve_metadata(trans, root, +							      rsv); +			BUG_ON(ret); + +			trans->block_rsv = rsv;  		}  		ret = btrfs_truncate_inode_items(trans, root, inode, @@ -6675,6 +6616,7 @@ static int btrfs_truncate(struct inode *inode)  			break;  		} +		trans->block_rsv = &root->fs_info->trans_block_rsv;  		ret = btrfs_update_inode(trans, root, inode);  		if (ret) {  			err = ret; @@ -6688,6 +6630,7 @@ static int btrfs_truncate(struct inode *inode)  	}  	if (ret == 0 && inode->i_nlink > 0) { +		trans->block_rsv = root->orphan_block_rsv;  		ret = btrfs_orphan_del(trans, inode);  		if (ret)  			err = ret; @@ -6699,15 +6642,20 @@ static int btrfs_truncate(struct inode *inode)  		ret = btrfs_orphan_del(NULL, inode);  	} +	trans->block_rsv = &root->fs_info->trans_block_rsv;  	ret = btrfs_update_inode(trans, root, inode);  	if (ret && !err)  		err = ret;  	nr = trans->blocks_used;  	ret = btrfs_end_transaction_throttle(trans, root); +	btrfs_btree_balance_dirty(root, nr); + +out: +	btrfs_free_block_rsv(root, rsv); +  	if (ret && !err)  		err = ret; -	btrfs_btree_balance_dirty(root, nr);  	return err;  } @@ -6716,15 +6664,14 @@ static int btrfs_truncate(struct inode *inode)   * create a new subvolume directory/inode (helper for the ioctl).   */  int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, -			     struct btrfs_root *new_root, -			     u64 new_dirid, u64 alloc_hint) +			     struct btrfs_root *new_root, u64 new_dirid)  {  	struct inode *inode;  	int err;  	u64 index = 0;  	inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, -				new_dirid, alloc_hint, S_IFDIR | 0700, &index); +				new_dirid, S_IFDIR | 0700, &index);  	if (IS_ERR(inode))  		return PTR_ERR(inode);  	inode->i_op = &btrfs_dir_inode_operations; @@ -6782,12 +6729,15 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)  	ei->ordered_data_close = 0;  	ei->orphan_meta_reserved = 0;  	ei->dummy_inode = 0; +	ei->in_defrag = 0;  	ei->force_compress = BTRFS_COMPRESS_NONE; +	ei->delayed_node = NULL; +  	inode = &ei->vfs_inode; -	extent_map_tree_init(&ei->extent_tree, GFP_NOFS); -	extent_io_tree_init(&ei->io_tree, &inode->i_data, GFP_NOFS); -	extent_io_tree_init(&ei->io_failure_tree, &inode->i_data, GFP_NOFS); +	extent_map_tree_init(&ei->extent_tree); +	extent_io_tree_init(&ei->io_tree, &inode->i_data); +	extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);  	mutex_init(&ei->log_mutex);  	btrfs_ordered_inode_tree_init(&ei->ordered_tree);  	INIT_LIST_HEAD(&ei->i_orphan); @@ -6834,25 +6784,10 @@ void btrfs_destroy_inode(struct inode *inode)  		spin_unlock(&root->fs_info->ordered_extent_lock);  	} -	if (root == root->fs_info->tree_root) { -		struct btrfs_block_group_cache *block_group; - -		block_group = btrfs_lookup_block_group(root->fs_info, -						BTRFS_I(inode)->block_group); -		if (block_group && block_group->inode == inode) { -			spin_lock(&block_group->lock); -			block_group->inode = NULL; -			spin_unlock(&block_group->lock); -			btrfs_put_block_group(block_group); -		} else if (block_group) { -			btrfs_put_block_group(block_group); -		} -	} -  	spin_lock(&root->orphan_lock);  	if (!list_empty(&BTRFS_I(inode)->i_orphan)) { -		printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", -		       inode->i_ino); +		printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", +		       (unsigned long long)btrfs_ino(inode));  		list_del_init(&BTRFS_I(inode)->i_orphan);  	}  	spin_unlock(&root->orphan_lock); @@ -6874,6 +6809,7 @@ void btrfs_destroy_inode(struct inode *inode)  	inode_tree_del(inode);  	btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);  free: +	btrfs_remove_delayed_node(inode);  	call_rcu(&inode->i_rcu, btrfs_i_callback);  } @@ -6882,7 +6818,7 @@ int btrfs_drop_inode(struct inode *inode)  	struct btrfs_root *root = BTRFS_I(inode)->root;  	if (btrfs_root_refs(&root->root_item) == 0 && -	    root != root->fs_info->tree_root) +	    !is_free_space_inode(root, inode))  		return 1;  	else  		return generic_drop_inode(inode); @@ -6991,16 +6927,17 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,  	u64 index = 0;  	u64 root_objectid;  	int ret; +	u64 old_ino = btrfs_ino(old_inode); -	if (new_dir->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) +	if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)  		return -EPERM;  	/* we only allow rename subvolume link between subvolumes */ -	if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) +	if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)  		return -EXDEV; -	if (old_inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID || -	    (new_inode && new_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) +	if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID || +	    (new_inode && btrfs_ino(new_inode) == BTRFS_FIRST_FREE_OBJECTID))  		return -ENOTEMPTY;  	if (S_ISDIR(old_inode->i_mode) && new_inode && @@ -7016,7 +6953,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,  		filemap_flush(old_inode->i_mapping);  	/* close the racy window with snapshot create/destroy ioctl */ -	if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) +	if (old_ino == BTRFS_FIRST_FREE_OBJECTID)  		down_read(&root->fs_info->subvol_sem);  	/*  	 * We want to reserve the absolute worst case amount of items.  So if @@ -7032,8 +6969,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,                  goto out_notrans;          } -	btrfs_set_trans_block_group(trans, new_dir); -  	if (dest != root)  		btrfs_record_root_in_trans(trans, dest); @@ -7041,15 +6976,15 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,  	if (ret)  		goto out_fail; -	if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { +	if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {  		/* force full log commit if subvolume involved. */  		root->fs_info->last_trans_log_full_commit = trans->transid;  	} else {  		ret = btrfs_insert_inode_ref(trans, dest,  					     new_dentry->d_name.name,  					     new_dentry->d_name.len, -					     old_inode->i_ino, -					     new_dir->i_ino, index); +					     old_ino, +					     btrfs_ino(new_dir), index);  		if (ret)  			goto out_fail;  		/* @@ -7065,10 +7000,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,  	 * make sure the inode gets flushed if it is replacing  	 * something.  	 */ -	if (new_inode && new_inode->i_size && -	    old_inode && S_ISREG(old_inode->i_mode)) { +	if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode))  		btrfs_add_ordered_operation(trans, root, old_inode); -	}  	old_dir->i_ctime = old_dir->i_mtime = ctime;  	new_dir->i_ctime = new_dir->i_mtime = ctime; @@ -7077,7 +7010,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,  	if (old_dentry->d_parent != new_dentry->d_parent)  		btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); -	if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { +	if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {  		root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;  		ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid,  					old_dentry->d_name.name, @@ -7094,7 +7027,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,  	if (new_inode) {  		new_inode->i_ctime = CURRENT_TIME; -		if (unlikely(new_inode->i_ino == +		if (unlikely(btrfs_ino(new_inode) ==  			     BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {  			root_objectid = BTRFS_I(new_inode)->location.objectid;  			ret = btrfs_unlink_subvol(trans, dest, new_dir, @@ -7122,7 +7055,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,  			     new_dentry->d_name.len, 0, index);  	BUG_ON(ret); -	if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { +	if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {  		struct dentry *parent = dget_parent(new_dentry);  		btrfs_log_new_name(trans, old_inode, old_dir, parent);  		dput(parent); @@ -7131,7 +7064,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,  out_fail:  	btrfs_end_transaction_throttle(trans, root);  out_notrans: -	if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) +	if (old_ino == BTRFS_FIRST_FREE_OBJECTID)  		up_read(&root->fs_info->subvol_sem);  	return ret; @@ -7185,58 +7118,6 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)  	return 0;  } -int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput, -				   int sync) -{ -	struct btrfs_inode *binode; -	struct inode *inode = NULL; - -	spin_lock(&root->fs_info->delalloc_lock); -	while (!list_empty(&root->fs_info->delalloc_inodes)) { -		binode = list_entry(root->fs_info->delalloc_inodes.next, -				    struct btrfs_inode, delalloc_inodes); -		inode = igrab(&binode->vfs_inode); -		if (inode) { -			list_move_tail(&binode->delalloc_inodes, -				       &root->fs_info->delalloc_inodes); -			break; -		} - -		list_del_init(&binode->delalloc_inodes); -		cond_resched_lock(&root->fs_info->delalloc_lock); -	} -	spin_unlock(&root->fs_info->delalloc_lock); - -	if (inode) { -		if (sync) { -			filemap_write_and_wait(inode->i_mapping); -			/* -			 * We have to do this because compression doesn't -			 * actually set PG_writeback until it submits the pages -			 * for IO, which happens in an async thread, so we could -			 * race and not actually wait for any writeback pages -			 * because they've not been submitted yet.  Technically -			 * this could still be the case for the ordered stuff -			 * since the async thread may not have started to do its -			 * work yet.  If this becomes the case then we need to -			 * figure out a way to make sure that in writepage we -			 * wait for any async pages to be submitted before -			 * returning so that fdatawait does what its supposed to -			 * do. -			 */ -			btrfs_wait_ordered_range(inode, 0, (u64)-1); -		} else { -			filemap_flush(inode->i_mapping); -		} -		if (delay_iput) -			btrfs_add_delayed_iput(inode); -		else -			iput(inode); -		return 1; -	} -	return 0; -} -  static int btrfs_symlink(struct inode *dir, struct dentry *dentry,  			 const char *symname)  { @@ -7260,9 +7141,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,  	if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))  		return -ENAMETOOLONG; -	err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); -	if (err) -		return err;  	/*  	 * 2 items for inode item and ref  	 * 2 items for dir items @@ -7272,12 +7150,13 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,  	if (IS_ERR(trans))  		return PTR_ERR(trans); -	btrfs_set_trans_block_group(trans, dir); +	err = btrfs_find_free_ino(root, &objectid); +	if (err) +		goto out_unlock;  	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, -				dentry->d_name.len, dir->i_ino, objectid, -				BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, -				&index); +				dentry->d_name.len, btrfs_ino(dir), objectid, +				S_IFLNK|S_IRWXUGO, &index);  	if (IS_ERR(inode)) {  		err = PTR_ERR(inode);  		goto out_unlock; @@ -7289,7 +7168,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,  		goto out_unlock;  	} -	btrfs_set_trans_block_group(trans, inode);  	err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);  	if (err)  		drop_inode = 1; @@ -7300,14 +7178,12 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,  		inode->i_op = &btrfs_file_inode_operations;  		BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;  	} -	btrfs_update_inode_block_group(trans, inode); -	btrfs_update_inode_block_group(trans, dir);  	if (drop_inode)  		goto out_unlock;  	path = btrfs_alloc_path();  	BUG_ON(!path); -	key.objectid = inode->i_ino; +	key.objectid = btrfs_ino(inode);  	key.offset = 0;  	btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);  	datasize = btrfs_file_extent_calc_inline_size(name_len); @@ -7315,6 +7191,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,  				      datasize);  	if (err) {  		drop_inode = 1; +		btrfs_free_path(path);  		goto out_unlock;  	}  	leaf = path->nodes[0];  | 
