diff options
Diffstat (limited to 'fs/btrfs/scrub.c')
| -rw-r--r-- | fs/btrfs/scrub.c | 396 | 
1 files changed, 276 insertions, 120 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 94db0fa5225a..a39f5d1144e8 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -125,6 +125,7 @@ struct scrub_block {  		/* It is for the data with checksum */  		unsigned int	data_corrected:1;  	}; +	struct btrfs_work	work;  };  /* Used for the chunks with parity stripe such RAID5/6 */ @@ -278,7 +279,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,  		       u64 physical, struct btrfs_device *dev, u64 flags,  		       u64 gen, int mirror_num, u8 *csum, int force,  		       u64 physical_for_dev_replace); -static void scrub_bio_end_io(struct bio *bio, int err); +static void scrub_bio_end_io(struct bio *bio);  static void scrub_bio_end_io_worker(struct btrfs_work *work);  static void scrub_block_complete(struct scrub_block *sblock);  static void scrub_remap_extent(struct btrfs_fs_info *fs_info, @@ -295,7 +296,7 @@ static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx);  static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,  				    struct scrub_page *spage);  static void scrub_wr_submit(struct scrub_ctx *sctx); -static void scrub_wr_bio_end_io(struct bio *bio, int err); +static void scrub_wr_bio_end_io(struct bio *bio);  static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);  static int write_page_nocow(struct scrub_ctx *sctx,  			    u64 physical_for_dev_replace, struct page *page); @@ -332,11 +333,14 @@ static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)  	}  } -static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info) +static void scrub_pause_on(struct btrfs_fs_info *fs_info)  {  	atomic_inc(&fs_info->scrubs_paused);  	wake_up(&fs_info->scrub_pause_wait); +} +static void scrub_pause_off(struct btrfs_fs_info *fs_info) +{  	mutex_lock(&fs_info->scrub_lock);  	__scrub_blocked_if_needed(fs_info);  	atomic_dec(&fs_info->scrubs_paused); @@ -345,6 +349,12 @@ static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)  	wake_up(&fs_info->scrub_pause_wait);  } +static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info) +{ +	scrub_pause_on(fs_info); +	scrub_pause_off(fs_info); +} +  /*   * used for workers that require transaction commits (i.e., for the   * NOCOW case) @@ -454,27 +464,14 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)  	struct scrub_ctx *sctx;  	int		i;  	struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; -	int pages_per_rd_bio;  	int ret; -	/* -	 * the setting of pages_per_rd_bio is correct for scrub but might -	 * be wrong for the dev_replace code where we might read from -	 * different devices in the initial huge bios. However, that -	 * code is able to correctly handle the case when adding a page -	 * to a bio fails. -	 */ -	if (dev->bdev) -		pages_per_rd_bio = min_t(int, SCRUB_PAGES_PER_RD_BIO, -					 bio_get_nr_vecs(dev->bdev)); -	else -		pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;  	sctx = kzalloc(sizeof(*sctx), GFP_NOFS);  	if (!sctx)  		goto nomem;  	atomic_set(&sctx->refs, 1);  	sctx->is_dev_replace = is_dev_replace; -	sctx->pages_per_rd_bio = pages_per_rd_bio; +	sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;  	sctx->curr = -1;  	sctx->dev_root = dev->dev_root;  	for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) { @@ -1429,11 +1426,11 @@ struct scrub_bio_ret {  	int error;  }; -static void scrub_bio_wait_endio(struct bio *bio, int error) +static void scrub_bio_wait_endio(struct bio *bio)  {  	struct scrub_bio_ret *ret = bio->bi_private; -	ret->error = error; +	ret->error = bio->bi_error;  	complete(&ret->event);  } @@ -1790,12 +1787,12 @@ static void scrub_wr_submit(struct scrub_ctx *sctx)  	btrfsic_submit_bio(WRITE, sbio->bio);  } -static void scrub_wr_bio_end_io(struct bio *bio, int err) +static void scrub_wr_bio_end_io(struct bio *bio)  {  	struct scrub_bio *sbio = bio->bi_private;  	struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info; -	sbio->err = err; +	sbio->err = bio->bi_error;  	sbio->bio = bio;  	btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper, @@ -2087,21 +2084,7 @@ static void scrub_submit(struct scrub_ctx *sctx)  	sbio = sctx->bios[sctx->curr];  	sctx->curr = -1;  	scrub_pending_bio_inc(sctx); - -	if (!sbio->bio->bi_bdev) { -		/* -		 * this case should not happen. If btrfs_map_block() is -		 * wrong, it could happen for dev-replace operations on -		 * missing devices when no mirrors are available, but in -		 * this case it should already fail the mount. -		 * This case is handled correctly (but _very_ slowly). -		 */ -		printk_ratelimited(KERN_WARNING -			"BTRFS: scrub_submit(bio bdev == NULL) is unexpected!\n"); -		bio_endio(sbio->bio, -EIO); -	} else { -		btrfsic_submit_bio(READ, sbio->bio); -	} +	btrfsic_submit_bio(READ, sbio->bio);  }  static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx, @@ -2178,6 +2161,134 @@ again:  	return 0;  } +static void scrub_missing_raid56_end_io(struct bio *bio) +{ +	struct scrub_block *sblock = bio->bi_private; +	struct btrfs_fs_info *fs_info = sblock->sctx->dev_root->fs_info; + +	if (bio->bi_error) +		sblock->no_io_error_seen = 0; + +	btrfs_queue_work(fs_info->scrub_workers, &sblock->work); +} + +static void scrub_missing_raid56_worker(struct btrfs_work *work) +{ +	struct scrub_block *sblock = container_of(work, struct scrub_block, work); +	struct scrub_ctx *sctx = sblock->sctx; +	struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; +	unsigned int is_metadata; +	unsigned int have_csum; +	u8 *csum; +	u64 generation; +	u64 logical; +	struct btrfs_device *dev; + +	is_metadata = !(sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA); +	have_csum = sblock->pagev[0]->have_csum; +	csum = sblock->pagev[0]->csum; +	generation = sblock->pagev[0]->generation; +	logical = sblock->pagev[0]->logical; +	dev = sblock->pagev[0]->dev; + +	if (sblock->no_io_error_seen) { +		scrub_recheck_block_checksum(fs_info, sblock, is_metadata, +					     have_csum, csum, generation, +					     sctx->csum_size); +	} + +	if (!sblock->no_io_error_seen) { +		spin_lock(&sctx->stat_lock); +		sctx->stat.read_errors++; +		spin_unlock(&sctx->stat_lock); +		printk_ratelimited_in_rcu(KERN_ERR +			"BTRFS: I/O error rebulding logical %llu for dev %s\n", +			logical, rcu_str_deref(dev->name)); +	} else if (sblock->header_error || sblock->checksum_error) { +		spin_lock(&sctx->stat_lock); +		sctx->stat.uncorrectable_errors++; +		spin_unlock(&sctx->stat_lock); +		printk_ratelimited_in_rcu(KERN_ERR +			"BTRFS: failed to rebuild valid logical %llu for dev %s\n", +			logical, rcu_str_deref(dev->name)); +	} else { +		scrub_write_block_to_dev_replace(sblock); +	} + +	scrub_block_put(sblock); + +	if (sctx->is_dev_replace && +	    atomic_read(&sctx->wr_ctx.flush_all_writes)) { +		mutex_lock(&sctx->wr_ctx.wr_lock); +		scrub_wr_submit(sctx); +		mutex_unlock(&sctx->wr_ctx.wr_lock); +	} + +	scrub_pending_bio_dec(sctx); +} + +static void scrub_missing_raid56_pages(struct scrub_block *sblock) +{ +	struct scrub_ctx *sctx = sblock->sctx; +	struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; +	u64 length = sblock->page_count * PAGE_SIZE; +	u64 logical = sblock->pagev[0]->logical; +	struct btrfs_bio *bbio; +	struct bio *bio; +	struct btrfs_raid_bio *rbio; +	int ret; +	int i; + +	ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical, &length, +			       &bbio, 0, 1); +	if (ret || !bbio || !bbio->raid_map) +		goto bbio_out; + +	if (WARN_ON(!sctx->is_dev_replace || +		    !(bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) { +		/* +		 * We shouldn't be scrubbing a missing device. Even for dev +		 * replace, we should only get here for RAID 5/6. We either +		 * managed to mount something with no mirrors remaining or +		 * there's a bug in scrub_remap_extent()/btrfs_map_block(). +		 */ +		goto bbio_out; +	} + +	bio = btrfs_io_bio_alloc(GFP_NOFS, 0); +	if (!bio) +		goto bbio_out; + +	bio->bi_iter.bi_sector = logical >> 9; +	bio->bi_private = sblock; +	bio->bi_end_io = scrub_missing_raid56_end_io; + +	rbio = raid56_alloc_missing_rbio(sctx->dev_root, bio, bbio, length); +	if (!rbio) +		goto rbio_out; + +	for (i = 0; i < sblock->page_count; i++) { +		struct scrub_page *spage = sblock->pagev[i]; + +		raid56_add_scrub_pages(rbio, spage->page, spage->logical); +	} + +	btrfs_init_work(&sblock->work, btrfs_scrub_helper, +			scrub_missing_raid56_worker, NULL, NULL); +	scrub_block_get(sblock); +	scrub_pending_bio_inc(sctx); +	raid56_submit_missing_rbio(rbio); +	return; + +rbio_out: +	bio_put(bio); +bbio_out: +	btrfs_put_bbio(bbio); +	spin_lock(&sctx->stat_lock); +	sctx->stat.malloc_errors++; +	spin_unlock(&sctx->stat_lock); +} +  static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,  		       u64 physical, struct btrfs_device *dev, u64 flags,  		       u64 gen, int mirror_num, u8 *csum, int force, @@ -2241,31 +2352,39 @@ leave_nomem:  	}  	WARN_ON(sblock->page_count == 0); -	for (index = 0; index < sblock->page_count; index++) { -		struct scrub_page *spage = sblock->pagev[index]; -		int ret; +	if (dev->missing) { +		/* +		 * This case should only be hit for RAID 5/6 device replace. See +		 * the comment in scrub_missing_raid56_pages() for details. +		 */ +		scrub_missing_raid56_pages(sblock); +	} else { +		for (index = 0; index < sblock->page_count; index++) { +			struct scrub_page *spage = sblock->pagev[index]; +			int ret; -		ret = scrub_add_page_to_rd_bio(sctx, spage); -		if (ret) { -			scrub_block_put(sblock); -			return ret; +			ret = scrub_add_page_to_rd_bio(sctx, spage); +			if (ret) { +				scrub_block_put(sblock); +				return ret; +			}  		} -	} -	if (force) -		scrub_submit(sctx); +		if (force) +			scrub_submit(sctx); +	}  	/* last one frees, either here or in bio completion for last page */  	scrub_block_put(sblock);  	return 0;  } -static void scrub_bio_end_io(struct bio *bio, int err) +static void scrub_bio_end_io(struct bio *bio)  {  	struct scrub_bio *sbio = bio->bi_private;  	struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info; -	sbio->err = err; +	sbio->err = bio->bi_error;  	sbio->bio = bio;  	btrfs_queue_work(fs_info->scrub_workers, &sbio->work); @@ -2564,6 +2683,11 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity,  	u8 csum[BTRFS_CSUM_SIZE];  	u32 blocksize; +	if (dev->missing) { +		scrub_parity_mark_sectors_error(sparity, logical, len); +		return 0; +	} +  	if (flags & BTRFS_EXTENT_FLAG_DATA) {  		blocksize = sctx->sectorsize;  	} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { @@ -2672,11 +2796,11 @@ static void scrub_parity_bio_endio_worker(struct btrfs_work *work)  	scrub_pending_bio_dec(sctx);  } -static void scrub_parity_bio_endio(struct bio *bio, int error) +static void scrub_parity_bio_endio(struct bio *bio)  {  	struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private; -	if (error) +	if (bio->bi_error)  		bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,  			  sparity->nsectors); @@ -2702,7 +2826,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)  			   sparity->nsectors))  		goto out; -	length = sparity->logic_end - sparity->logic_start + 1; +	length = sparity->logic_end - sparity->logic_start;  	ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE,  			       sparity->logic_start,  			       &length, &bbio, 0, 1); @@ -2725,8 +2849,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)  		goto rbio_out;  	list_for_each_entry(spage, &sparity->spages, list) -		raid56_parity_add_scrub_pages(rbio, spage->page, -					      spage->logical); +		raid56_add_scrub_pages(rbio, spage->page, spage->logical);  	scrub_pending_bio_inc(sctx);  	raid56_parity_submit_scrub_rbio(rbio); @@ -2774,6 +2897,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,  	struct btrfs_root *root = fs_info->extent_root;  	struct btrfs_root *csum_root = fs_info->csum_root;  	struct btrfs_extent_item *extent; +	struct btrfs_bio *bbio = NULL;  	u64 flags;  	int ret;  	int slot; @@ -2783,6 +2907,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,  	u64 extent_logical;  	u64 extent_physical;  	u64 extent_len; +	u64 mapped_length;  	struct btrfs_device *extent_dev;  	struct scrub_parity *sparity;  	int nsectors; @@ -2856,6 +2981,10 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,  			}  			btrfs_item_key_to_cpu(l, &key, slot); +			if (key.type != BTRFS_EXTENT_ITEM_KEY && +			    key.type != BTRFS_METADATA_ITEM_KEY) +				goto next; +  			if (key.type == BTRFS_METADATA_ITEM_KEY)  				bytes = root->nodesize;  			else @@ -2864,11 +2993,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,  			if (key.objectid + bytes <= logic_start)  				goto next; -			if (key.type != BTRFS_EXTENT_ITEM_KEY && -			    key.type != BTRFS_METADATA_ITEM_KEY) -				goto next; - -			if (key.objectid > logic_end) { +			if (key.objectid >= logic_end) {  				stop_loop = 1;  				break;  			} @@ -2881,11 +3006,12 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,  			flags = btrfs_extent_flags(l, extent);  			generation = btrfs_extent_generation(l, extent); -			if (key.objectid < logic_start && -			    (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) { -				btrfs_err(fs_info, -					  "scrub: tree block %llu spanning stripes, ignored. logical=%llu", -					   key.objectid, logic_start); +			if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) && +			    (key.objectid < logic_start || +			     key.objectid + bytes > +			     logic_start + map->stripe_len)) { +				btrfs_err(fs_info, "scrub: tree block %llu spanning stripes, ignored. logical=%llu", +					  key.objectid, logic_start);  				goto next;  			}  again: @@ -2905,10 +3031,21 @@ again:  			scrub_parity_mark_sectors_data(sparity, extent_logical,  						       extent_len); -			scrub_remap_extent(fs_info, extent_logical, -					   extent_len, &extent_physical, -					   &extent_dev, -					   &extent_mirror_num); +			mapped_length = extent_len; +			ret = btrfs_map_block(fs_info, READ, extent_logical, +					      &mapped_length, &bbio, 0); +			if (!ret) { +				if (!bbio || mapped_length < extent_len) +					ret = -EIO; +			} +			if (ret) { +				btrfs_put_bbio(bbio); +				goto out; +			} +			extent_physical = bbio->stripes[0].physical; +			extent_mirror_num = bbio->mirror_num; +			extent_dev = bbio->stripes[0].dev; +			btrfs_put_bbio(bbio);  			ret = btrfs_lookup_csums_range(csum_root,  						extent_logical, @@ -2923,10 +3060,12 @@ again:  						      extent_dev, flags,  						      generation,  						      extent_mirror_num); + +			scrub_free_csums(sctx); +  			if (ret)  				goto out; -			scrub_free_csums(sctx);  			if (extent_logical + extent_len <  			    key.objectid + bytes) {  				logic_start += map->stripe_len; @@ -2955,7 +3094,7 @@ next:  out:  	if (ret < 0)  		scrub_parity_mark_sectors_error(sparity, logic_start, -						logic_end - logic_start + 1); +						logic_end - logic_start);  	scrub_parity_put(sparity);  	scrub_submit(sctx);  	mutex_lock(&sctx->wr_ctx.wr_lock); @@ -3104,22 +3243,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,  	 */  	ret = 0;  	while (physical < physical_end) { -		/* for raid56, we skip parity stripe */ -		if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { -			ret = get_raid56_logic_offset(physical, num, -					map, &logical, &stripe_logical); -			logical += base; -			if (ret) { -				stripe_logical += base; -				stripe_end = stripe_logical + increment - 1; -				ret = scrub_raid56_parity(sctx, map, scrub_dev, -						ppath, stripe_logical, -						stripe_end); -				if (ret) -					goto out; -				goto skip; -			} -		}  		/*  		 * canceled?  		 */ @@ -3144,6 +3267,24 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,  			scrub_blocked_if_needed(fs_info);  		} +		if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { +			ret = get_raid56_logic_offset(physical, num, map, +						      &logical, +						      &stripe_logical); +			logical += base; +			if (ret) { +				/* it is parity strip */ +				stripe_logical += base; +				stripe_end = stripe_logical + increment; +				ret = scrub_raid56_parity(sctx, map, scrub_dev, +							  ppath, stripe_logical, +							  stripe_end); +				if (ret) +					goto out; +				goto skip; +			} +		} +  		if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))  			key.type = BTRFS_METADATA_ITEM_KEY;  		else @@ -3188,6 +3329,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,  			}  			btrfs_item_key_to_cpu(l, &key, slot); +			if (key.type != BTRFS_EXTENT_ITEM_KEY && +			    key.type != BTRFS_METADATA_ITEM_KEY) +				goto next; +  			if (key.type == BTRFS_METADATA_ITEM_KEY)  				bytes = root->nodesize;  			else @@ -3196,10 +3341,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,  			if (key.objectid + bytes <= logical)  				goto next; -			if (key.type != BTRFS_EXTENT_ITEM_KEY && -			    key.type != BTRFS_METADATA_ITEM_KEY) -				goto next; -  			if (key.objectid >= logical + map->stripe_len) {  				/* out of this device extent */  				if (key.objectid >= logic_end) @@ -3212,8 +3353,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,  			flags = btrfs_extent_flags(l, extent);  			generation = btrfs_extent_generation(l, extent); -			if (key.objectid < logical && -			    (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) { +			if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) && +			    (key.objectid < logical || +			     key.objectid + bytes > +			     logical + map->stripe_len)) {  				btrfs_err(fs_info,  					   "scrub: tree block %llu spanning "  					   "stripes, ignored. logical=%llu", @@ -3247,9 +3390,11 @@ again:  						   &extent_dev,  						   &extent_mirror_num); -			ret = btrfs_lookup_csums_range(csum_root, logical, -						logical + map->stripe_len - 1, -						&sctx->csum_list, 1); +			ret = btrfs_lookup_csums_range(csum_root, +						       extent_logical, +						       extent_logical + +						       extent_len - 1, +						       &sctx->csum_list, 1);  			if (ret)  				goto out; @@ -3257,10 +3402,12 @@ again:  					   extent_physical, extent_dev, flags,  					   generation, extent_mirror_num,  					   extent_logical - logical + physical); + +			scrub_free_csums(sctx); +  			if (ret)  				goto out; -			scrub_free_csums(sctx);  			if (extent_logical + extent_len <  			    key.objectid + bytes) {  				if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { @@ -3278,7 +3425,7 @@ loop:  					if (ret && physical < physical_end) {  						stripe_logical += base;  						stripe_end = stripe_logical + -								increment - 1; +								increment;  						ret = scrub_raid56_parity(sctx,  							map, scrub_dev, ppath,  							stripe_logical, @@ -3333,7 +3480,6 @@ out:  static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,  					  struct btrfs_device *scrub_dev, -					  u64 chunk_tree, u64 chunk_objectid,  					  u64 chunk_offset, u64 length,  					  u64 dev_offset, int is_dev_replace)  { @@ -3384,10 +3530,8 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,  	struct btrfs_root *root = sctx->dev_root;  	struct btrfs_fs_info *fs_info = root->fs_info;  	u64 length; -	u64 chunk_tree; -	u64 chunk_objectid;  	u64 chunk_offset; -	int ret; +	int ret = 0;  	int slot;  	struct extent_buffer *l;  	struct btrfs_key key; @@ -3415,8 +3559,14 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,  			if (path->slots[0] >=  			    btrfs_header_nritems(path->nodes[0])) {  				ret = btrfs_next_leaf(root, path); -				if (ret) +				if (ret < 0) +					break; +				if (ret > 0) { +					ret = 0;  					break; +				} +			} else { +				ret = 0;  			}  		} @@ -3443,8 +3593,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,  		if (found_key.offset + length <= start)  			goto skip; -		chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); -		chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);  		chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);  		/* @@ -3458,12 +3606,27 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,  		if (!cache)  			goto skip; +		/* +		 * we need call btrfs_inc_block_group_ro() with scrubs_paused, +		 * to avoid deadlock caused by: +		 * btrfs_inc_block_group_ro() +		 * -> btrfs_wait_for_commit() +		 * -> btrfs_commit_transaction() +		 * -> btrfs_scrub_pause() +		 */ +		scrub_pause_on(fs_info); +		ret = btrfs_inc_block_group_ro(root, cache); +		scrub_pause_off(fs_info); +		if (ret) { +			btrfs_put_block_group(cache); +			break; +		} +  		dev_replace->cursor_right = found_key.offset + length;  		dev_replace->cursor_left = found_key.offset;  		dev_replace->item_needs_writeback = 1; -		ret = scrub_chunk(sctx, scrub_dev, chunk_tree, chunk_objectid, -				  chunk_offset, length, found_key.offset, -				  is_dev_replace); +		ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length, +				  found_key.offset, is_dev_replace);  		/*  		 * flush, submit all pending read and write bios, afterwards @@ -3483,8 +3646,8 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,  		wait_event(sctx->list_wait,  			   atomic_read(&sctx->bios_in_flight) == 0); -		atomic_inc(&fs_info->scrubs_paused); -		wake_up(&fs_info->scrub_pause_wait); + +		scrub_pause_on(fs_info);  		/*  		 * must be called before we decrease @scrub_paused. @@ -3495,11 +3658,9 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,  			   atomic_read(&sctx->workers_pending) == 0);  		atomic_set(&sctx->wr_ctx.flush_all_writes, 0); -		mutex_lock(&fs_info->scrub_lock); -		__scrub_blocked_if_needed(fs_info); -		atomic_dec(&fs_info->scrubs_paused); -		mutex_unlock(&fs_info->scrub_lock); -		wake_up(&fs_info->scrub_pause_wait); +		scrub_pause_off(fs_info); + +		btrfs_dec_block_group_ro(root, cache);  		btrfs_put_block_group(cache);  		if (ret) @@ -3523,11 +3684,7 @@ skip:  	btrfs_free_path(path); -	/* -	 * ret can still be 1 from search_slot or next_leaf, -	 * that's not an error -	 */ -	return ret < 0 ? ret : 0; +	return ret;  }  static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, @@ -3896,8 +4053,7 @@ static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,  		return 0;  	WARN_ON(!dev->bdev); -	wr_ctx->pages_per_wr_bio = min_t(int, SCRUB_PAGES_PER_WR_BIO, -					 bio_get_nr_vecs(dev->bdev)); +	wr_ctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO;  	wr_ctx->tgtdev = dev;  	atomic_set(&wr_ctx->flush_all_writes, 0);  	return 0;  | 
