diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
| -rw-r--r-- | fs/btrfs/disk-io.c | 129 | 
1 files changed, 94 insertions, 35 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6d776717d8b3..6a2a2a951705 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -279,6 +279,12 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,  	len = buf->len - offset;  	while (len > 0) { +		/* +		 * Note: we don't need to check for the err == 1 case here, as +		 * with the given combination of 'start = BTRFS_CSUM_SIZE (32)' +		 * and 'min_len = 32' and the currently implemented mapping +		 * algorithm we cannot cross a page boundary. +		 */  		err = map_private_extent_buffer(buf, offset, 32,  					&kaddr, &map_start, &map_len);  		if (err) @@ -542,7 +548,7 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page)  	if (WARN_ON(!PageUptodate(page)))  		return -EUCLEAN; -	ASSERT(memcmp_extent_buffer(eb, fs_info->fsid, +	ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid,  			btrfs_header_fsid(), BTRFS_FSID_SIZE) == 0);  	return csum_tree_block(fs_info, eb, 0); @@ -557,7 +563,20 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,  	read_extent_buffer(eb, fsid, btrfs_header_fsid(), BTRFS_FSID_SIZE);  	while (fs_devices) { -		if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) { +		u8 *metadata_uuid; + +		/* +		 * Checking the incompat flag is only valid for the current +		 * fs. For seed devices it's forbidden to have their uuid +		 * changed so reading ->fsid in this case is fine +		 */ +		if (fs_devices == fs_info->fs_devices && +		    btrfs_fs_incompat(fs_info, METADATA_UUID)) +			metadata_uuid = fs_devices->metadata_uuid; +		else +			metadata_uuid = fs_devices->fsid; + +		if (!memcmp(fsid, metadata_uuid, BTRFS_FSID_SIZE)) {  			ret = 0;  			break;  		} @@ -660,19 +679,6 @@ out:  	return ret;  } -static int btree_io_failed_hook(struct page *page, int failed_mirror) -{ -	struct extent_buffer *eb; - -	eb = (struct extent_buffer *)page->private; -	set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); -	eb->read_mirror = failed_mirror; -	atomic_dec(&eb->io_pages); -	if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) -		btree_readahead_hook(eb, -EIO); -	return -EIO;	/* we fixed nothing */ -} -  static void end_workqueue_bio(struct bio *bio)  {  	struct btrfs_end_io_wq *end_io_wq = bio->bi_private; @@ -751,11 +757,22 @@ static void run_one_async_start(struct btrfs_work *work)  		async->status = ret;  } +/* + * In order to insert checksums into the metadata in large chunks, we wait + * until bio submission time.   All the pages in the bio are checksummed and + * sums are attached onto the ordered extent record. + * + * At IO completion time the csums attached on the ordered extent record are + * inserted into the tree. + */  static void run_one_async_done(struct btrfs_work *work)  {  	struct async_submit_bio *async; +	struct inode *inode; +	blk_status_t ret;  	async = container_of(work, struct  async_submit_bio, work); +	inode = async->private_data;  	/* If an error occurred we just want to clean up the bio and move on */  	if (async->status) { @@ -764,7 +781,12 @@ static void run_one_async_done(struct btrfs_work *work)  		return;  	} -	btrfs_submit_bio_done(async->private_data, async->bio, async->mirror_num); +	ret = btrfs_map_bio(btrfs_sb(inode->i_sb), async->bio, +			async->mirror_num, 1); +	if (ret) { +		async->bio->bi_status = ret; +		bio_endio(async->bio); +	}  }  static void run_one_async_free(struct btrfs_work *work) @@ -1178,6 +1200,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,  	refcount_set(&root->refs, 1);  	atomic_set(&root->will_be_snapshotted, 0);  	atomic_set(&root->snapshot_force_cow, 0); +	atomic_set(&root->nr_swapfiles, 0);  	root->log_transid = 0;  	root->log_transid_committed = -1;  	root->last_log_commit = 0; @@ -1659,6 +1682,8 @@ static int cleaner_kthread(void *arg)  	while (1) {  		again = 0; +		set_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags); +  		/* Make the cleaner go to sleep early. */  		if (btrfs_need_cleaner_sleep(fs_info))  			goto sleep; @@ -1705,6 +1730,7 @@ static int cleaner_kthread(void *arg)  		 */  		btrfs_delete_unused_bgs(fs_info);  sleep: +		clear_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);  		if (kthread_should_park())  			kthread_parkme();  		if (kthread_should_stop()) @@ -2118,10 +2144,8 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)  static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)  {  	mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); -	rwlock_init(&fs_info->dev_replace.lock); -	atomic_set(&fs_info->dev_replace.blocking_readers, 0); +	init_rwsem(&fs_info->dev_replace.rwsem);  	init_waitqueue_head(&fs_info->dev_replace.replace_wait); -	init_waitqueue_head(&fs_info->dev_replace.read_lock_wq);  }  static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info) @@ -2442,10 +2466,11 @@ static int validate_super(struct btrfs_fs_info *fs_info,  		ret = -EINVAL;  	} -	if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) { +	if (memcmp(fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid, +		   BTRFS_FSID_SIZE) != 0) {  		btrfs_err(fs_info, -			   "dev_item UUID does not match fsid: %pU != %pU", -			   fs_info->fsid, sb->dev_item.fsid); +			"dev_item UUID does not match metadata fsid: %pU != %pU", +			fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid);  		ret = -EINVAL;  	} @@ -2656,6 +2681,9 @@ int open_ctree(struct super_block *sb,  	btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY);  	btrfs_init_block_rsv(&fs_info->delayed_block_rsv,  			     BTRFS_BLOCK_RSV_DELOPS); +	btrfs_init_block_rsv(&fs_info->delayed_refs_rsv, +			     BTRFS_BLOCK_RSV_DELREFS); +  	atomic_set(&fs_info->async_delalloc_pages, 0);  	atomic_set(&fs_info->defrag_running, 0);  	atomic_set(&fs_info->qgroup_op_seq, 0); @@ -2745,6 +2773,9 @@ int open_ctree(struct super_block *sb,  	fs_info->sectorsize = 4096;  	fs_info->stripesize = 4096; +	spin_lock_init(&fs_info->swapfile_pins_lock); +	fs_info->swapfile_pins = RB_ROOT; +  	ret = btrfs_alloc_stripe_hash_table(fs_info);  	if (ret) {  		err = ret; @@ -2781,11 +2812,29 @@ int open_ctree(struct super_block *sb,  	 * the whole block of INFO_SIZE  	 */  	memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy)); -	memcpy(fs_info->super_for_commit, fs_info->super_copy, -	       sizeof(*fs_info->super_for_commit));  	brelse(bh); -	memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE); +	disk_super = fs_info->super_copy; + +	ASSERT(!memcmp(fs_info->fs_devices->fsid, fs_info->super_copy->fsid, +		       BTRFS_FSID_SIZE)); + +	if (btrfs_fs_incompat(fs_info, METADATA_UUID)) { +		ASSERT(!memcmp(fs_info->fs_devices->metadata_uuid, +				fs_info->super_copy->metadata_uuid, +				BTRFS_FSID_SIZE)); +	} + +	features = btrfs_super_flags(disk_super); +	if (features & BTRFS_SUPER_FLAG_CHANGING_FSID_V2) { +		features &= ~BTRFS_SUPER_FLAG_CHANGING_FSID_V2; +		btrfs_set_super_flags(disk_super, features); +		btrfs_info(fs_info, +			"found metadata UUID change in progress flag, clearing"); +	} + +	memcpy(fs_info->super_for_commit, fs_info->super_copy, +	       sizeof(*fs_info->super_for_commit));  	ret = btrfs_validate_mount_super(fs_info);  	if (ret) { @@ -2794,7 +2843,6 @@ int open_ctree(struct super_block *sb,  		goto fail_alloc;  	} -	disk_super = fs_info->super_copy;  	if (!btrfs_super_root(disk_super))  		goto fail_alloc; @@ -2906,7 +2954,7 @@ int open_ctree(struct super_block *sb,  	sb->s_blocksize = sectorsize;  	sb->s_blocksize_bits = blksize_bits(sectorsize); -	memcpy(&sb->s_uuid, fs_info->fsid, BTRFS_FSID_SIZE); +	memcpy(&sb->s_uuid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE);  	mutex_lock(&fs_info->chunk_mutex);  	ret = btrfs_read_sys_array(fs_info); @@ -3055,7 +3103,7 @@ retry_root_backup:  	if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info, NULL)) {  		btrfs_warn(fs_info, -		"writeable mount is not allowed due to too many missing devices"); +		"writable mount is not allowed due to too many missing devices");  		goto fail_sysfs;  	} @@ -3724,7 +3772,8 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)  		btrfs_set_stack_device_io_width(dev_item, dev->io_width);  		btrfs_set_stack_device_sector_size(dev_item, dev->sector_size);  		memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE); -		memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_FSID_SIZE); +		memcpy(dev_item->fsid, dev->fs_devices->metadata_uuid, +		       BTRFS_FSID_SIZE);  		flags = btrfs_super_flags(sb);  		btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN); @@ -4031,7 +4080,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)  #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS  	/*  	 * This is a fast path so only do this check if we have sanity tests -	 * enabled.  Normal people shouldn't be using umapped buffers as dirty +	 * enabled.  Normal people shouldn't be using unmapped buffers as dirty  	 * outside of the sanity tests.  	 */  	if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &buf->bflags))) @@ -4155,6 +4204,14 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)  		spin_lock(&fs_info->ordered_root_lock);  	}  	spin_unlock(&fs_info->ordered_root_lock); + +	/* +	 * We need this here because if we've been flipped read-only we won't +	 * get sync() from the umount, so we need to make sure any ordered +	 * extents that haven't had their dirty pages IO start writeout yet +	 * actually get run and error out properly. +	 */ +	btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);  }  static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, @@ -4219,6 +4276,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,  		if (pin_bytes)  			btrfs_pin_extent(fs_info, head->bytenr,  					 head->num_bytes, 1); +		btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);  		btrfs_put_delayed_ref_head(head);  		cond_resched();  		spin_lock(&delayed_refs->lock); @@ -4329,6 +4387,8 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,  	unpin = pinned_extents;  again:  	while (1) { +		struct extent_state *cached_state = NULL; +  		/*  		 * The btrfs_finish_extent_commit() may get the same range as  		 * ours between find_first_extent_bit and clear_extent_dirty. @@ -4337,13 +4397,14 @@ again:  		 */  		mutex_lock(&fs_info->unused_bg_unpin_mutex);  		ret = find_first_extent_bit(unpin, 0, &start, &end, -					    EXTENT_DIRTY, NULL); +					    EXTENT_DIRTY, &cached_state);  		if (ret) {  			mutex_unlock(&fs_info->unused_bg_unpin_mutex);  			break;  		} -		clear_extent_dirty(unpin, start, end); +		clear_extent_dirty(unpin, start, end, &cached_state); +		free_extent_state(cached_state);  		btrfs_error_unpin_extent_range(fs_info, start, end);  		mutex_unlock(&fs_info->unused_bg_unpin_mutex);  		cond_resched(); @@ -4400,6 +4461,7 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,  		spin_unlock(&cur_trans->dirty_bgs_lock);  		btrfs_put_block_group(cache); +		btrfs_delayed_refs_rsv_release(fs_info, 1);  		spin_lock(&cur_trans->dirty_bgs_lock);  	}  	spin_unlock(&cur_trans->dirty_bgs_lock); @@ -4505,7 +4567,4 @@ static const struct extent_io_ops btree_extent_io_ops = {  	/* mandatory callbacks */  	.submit_bio_hook = btree_submit_bio_hook,  	.readpage_end_io_hook = btree_readpage_end_io_hook, -	.readpage_io_failed_hook = btree_io_failed_hook, - -	/* optional callbacks */  };  | 
