diff options
Diffstat (limited to 'fs/btrfs/bio.c')
-rw-r--r-- | fs/btrfs/bio.c | 159 |
1 files changed, 84 insertions, 75 deletions
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index 1f216d07eff6..50b5fc1c06d7 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -27,12 +27,12 @@ struct btrfs_failed_bio { }; /* Is this a data path I/O that needs storage layer checksum and repair? */ -static inline bool is_data_bbio(struct btrfs_bio *bbio) +static inline bool is_data_bbio(const struct btrfs_bio *bbio) { return bbio->inode && is_data_inode(bbio->inode); } -static bool bbio_has_ordered_extent(struct btrfs_bio *bbio) +static bool bbio_has_ordered_extent(const struct btrfs_bio *bbio) { return is_data_bbio(bbio) && btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE; } @@ -81,6 +81,9 @@ static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info, bio = bio_split(&orig_bbio->bio, map_length >> SECTOR_SHIFT, GFP_NOFS, &btrfs_clone_bioset); + if (IS_ERR(bio)) + return ERR_CAST(bio); + bbio = btrfs_bio(bio); btrfs_bio_init(bbio, fs_info, NULL, orig_bbio); bbio->inode = orig_bbio->inode; @@ -94,33 +97,17 @@ static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info, return bbio; } -/* Free a bio that was never submitted to the underlying device. */ -static void btrfs_cleanup_bio(struct btrfs_bio *bbio) -{ - if (bbio_has_ordered_extent(bbio)) - btrfs_put_ordered_extent(bbio->ordered); - bio_put(&bbio->bio); -} - -static void __btrfs_bio_end_io(struct btrfs_bio *bbio) -{ - if (bbio_has_ordered_extent(bbio)) { - struct btrfs_ordered_extent *ordered = bbio->ordered; - - bbio->end_io(bbio); - btrfs_put_ordered_extent(ordered); - } else { - bbio->end_io(bbio); - } -} - void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status) { bbio->bio.bi_status = status; if (bbio->bio.bi_pool == &btrfs_clone_bioset) { struct btrfs_bio *orig_bbio = bbio->private; - btrfs_cleanup_bio(bbio); + /* Free bio that was never submitted to the underlying device. */ + if (bbio_has_ordered_extent(bbio)) + btrfs_put_ordered_extent(bbio->ordered); + bio_put(&bbio->bio); + bbio = orig_bbio; } @@ -135,18 +122,26 @@ void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status) /* Load split bio's error which might be set above. */ if (status == BLK_STS_OK) bbio->bio.bi_status = READ_ONCE(bbio->status); - __btrfs_bio_end_io(bbio); + + if (bbio_has_ordered_extent(bbio)) { + struct btrfs_ordered_extent *ordered = bbio->ordered; + + bbio->end_io(bbio); + btrfs_put_ordered_extent(ordered); + } else { + bbio->end_io(bbio); + } } } -static int next_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror) +static int next_repair_mirror(const struct btrfs_failed_bio *fbio, int cur_mirror) { if (cur_mirror == fbio->num_copies) return cur_mirror + 1 - fbio->num_copies; return cur_mirror + 1; } -static int prev_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror) +static int prev_repair_mirror(const struct btrfs_failed_bio *fbio, int cur_mirror) { if (cur_mirror == 1) return fbio->num_copies; @@ -170,12 +165,6 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio, struct bio_vec *bv = bio_first_bvec_all(&repair_bbio->bio); int mirror = repair_bbio->mirror_num; - /* - * We can only trigger this for data bio, which doesn't support larger - * folios yet. - */ - ASSERT(folio_order(page_folio(bv->bv_page)) == 0); - if (repair_bbio->bio.bi_status || !btrfs_data_csum_ok(repair_bbio, dev, 0, bv)) { bio_reset(&repair_bbio->bio, NULL, REQ_OP_READ); @@ -197,7 +186,7 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio, btrfs_repair_io_failure(fs_info, btrfs_ino(inode), repair_bbio->file_offset, fs_info->sectorsize, repair_bbio->saved_iter.bi_sector << SECTOR_SHIFT, - page_folio(bv->bv_page), bv->bv_offset, mirror); + bvec_phys(bv), mirror); } while (mirror != fbio->bbio->mirror_num); done: @@ -306,7 +295,7 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de btrfs_bio_end_io(bbio, bbio->bio.bi_status); } -static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev) +static void btrfs_log_dev_io_error(const struct bio *bio, struct btrfs_device *dev) { if (!dev || !dev->bdev) return; @@ -321,8 +310,8 @@ static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev) btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_FLUSH_ERRS); } -static struct workqueue_struct *btrfs_end_io_wq(struct btrfs_fs_info *fs_info, - struct bio *bio) +static struct workqueue_struct *btrfs_end_io_wq(const struct btrfs_fs_info *fs_info, + const struct bio *bio) { if (bio->bi_opf & REQ_META) return fs_info->endio_meta_workers; @@ -355,7 +344,7 @@ static void btrfs_simple_end_io(struct bio *bio) INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work); queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work); } else { - if (bio_op(bio) == REQ_OP_ZONE_APPEND && !bio->bi_status) + if (bio_is_zone_append(bio) && !bio->bi_status) btrfs_record_physical_zoned(bbio); btrfs_bio_end_io(bbio, bbio->bio.bi_status); } @@ -398,7 +387,7 @@ static void btrfs_orig_write_end_io(struct bio *bio) else bio->bi_status = BLK_STS_OK; - if (bio_op(bio) == REQ_OP_ZONE_APPEND && !bio->bi_status) + if (bio_is_zone_append(bio) && !bio->bi_status) stripe->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT; btrfs_bio_end_io(bbio, bbio->bio.bi_status); @@ -412,7 +401,7 @@ static void btrfs_clone_write_end_io(struct bio *bio) if (bio->bi_status) { atomic_inc(&stripe->bioc->error); btrfs_log_dev_io_error(bio, stripe->dev); - } else if (bio_op(bio) == REQ_OP_ZONE_APPEND) { + } else if (bio_is_zone_append(bio)) { stripe->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT; } @@ -444,12 +433,20 @@ static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio) ASSERT(btrfs_dev_is_sequential(dev, physical)); bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT; } - btrfs_debug_in_rcu(dev->fs_info, + btrfs_debug(dev->fs_info, "%s: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u", __func__, bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector, (unsigned long)dev->bdev->bd_dev, btrfs_dev_name(dev), dev->devid, bio->bi_iter.bi_size); + /* + * Track reads if tracking is enabled; ignore I/O operations before the + * filesystem is fully initialized. + */ + if (dev->fs_devices->collect_fs_stats && bio_op(bio) == REQ_OP_READ && dev->fs_info) + percpu_counter_add(&dev->fs_info->stats_read_blocks, + bio->bi_iter.bi_size >> dev->fs_info->sectorsize_bits); + if (bio->bi_opf & REQ_BTRFS_CGROUP_PUNT) blkcg_punt_bio_submit(bio); else @@ -509,7 +506,7 @@ static void btrfs_submit_bio(struct bio *bio, struct btrfs_io_context *bioc, } } -static blk_status_t btrfs_bio_csum(struct btrfs_bio *bbio) +static int btrfs_bio_csum(struct btrfs_bio *bbio) { if (bbio->bio.bi_opf & REQ_META) return btree_csum_one_bio(bbio); @@ -540,11 +537,11 @@ static void run_one_async_start(struct btrfs_work *work) { struct async_submit_bio *async = container_of(work, struct async_submit_bio, work); - blk_status_t ret; + int ret; ret = btrfs_bio_csum(async->bbio); if (ret) - async->bbio->bio.bi_status = ret; + async->bbio->bio.bi_status = errno_to_blk_status(ret); } /* @@ -570,7 +567,7 @@ static void run_one_async_done(struct btrfs_work *work, bool do_free) /* If an error occurred we just want to clean up the bio and move on. */ if (bio->bi_status) { - btrfs_bio_end_io(async->bbio, async->bbio->bio.bi_status); + btrfs_bio_end_io(async->bbio, bio->bi_status); return; } @@ -649,8 +646,14 @@ static u64 btrfs_append_map_length(struct btrfs_bio *bbio, u64 map_length) map_length = min(map_length, bbio->fs_info->max_zone_append_size); sector_offset = bio_split_rw_at(&bbio->bio, &bbio->fs_info->limits, &nr_segs, map_length); - if (sector_offset) - return sector_offset << SECTOR_SHIFT; + if (sector_offset) { + /* + * bio_split_rw_at() could split at a size smaller than our + * sectorsize and thus cause unaligned I/Os. Fix that by + * always rounding down to the nearest boundary. + */ + return ALIGN_DOWN(sector_offset << SECTOR_SHIFT, bbio->fs_info->sectorsize); + } return map_length; } @@ -665,8 +668,8 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) bool use_append = btrfs_use_zone_append(bbio); struct btrfs_io_context *bioc = NULL; struct btrfs_io_stripe smap; - blk_status_t ret; - int error; + blk_status_t status; + int ret; if (!bbio->inode || btrfs_is_data_reloc_root(inode->root)) smap.rst_search_commit_root = true; @@ -674,11 +677,12 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) smap.rst_search_commit_root = false; btrfs_bio_counter_inc_blocked(fs_info); - error = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length, - &bioc, &smap, &mirror_num); - if (error) { - ret = errno_to_blk_status(error); - goto fail; + ret = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length, + &bioc, &smap, &mirror_num); + if (ret) { + status = errno_to_blk_status(ret); + btrfs_bio_counter_dec(fs_info); + goto end_bbio; } map_length = min(map_length, length); @@ -686,7 +690,15 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) map_length = btrfs_append_map_length(bbio, map_length); if (map_length < length) { - bbio = btrfs_split_bio(fs_info, bbio, map_length); + struct btrfs_bio *split; + + split = btrfs_split_bio(fs_info, bbio, map_length); + if (IS_ERR(split)) { + status = errno_to_blk_status(PTR_ERR(split)); + btrfs_bio_counter_dec(fs_info); + goto end_bbio; + } + bbio = split; bio = &bbio->bio; } @@ -697,7 +709,8 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio)) { bbio->saved_iter = bio->bi_iter; ret = btrfs_lookup_bio_sums(bbio); - if (ret) + status = errno_to_blk_status(ret); + if (status) goto fail; } @@ -707,8 +720,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) bio->bi_opf |= REQ_OP_ZONE_APPEND; } - if (is_data_bbio(bbio) && bioc && - btrfs_need_stripe_tree_update(bioc->fs_info, bioc->map_type)) { + if (is_data_bbio(bbio) && bioc && bioc->use_rst) { /* * No locking for the list update, as we only add to * the list in the I/O submission path, and list @@ -731,13 +743,15 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) goto done; ret = btrfs_bio_csum(bbio); - if (ret) + status = errno_to_blk_status(ret); + if (status) goto fail; } else if (use_append || (btrfs_is_zoned(fs_info) && inode && inode->flags & BTRFS_INODE_NODATASUM)) { ret = btrfs_alloc_dummy_sum(bbio); - if (ret) + status = errno_to_blk_status(ret); + if (status) goto fail; } } @@ -758,9 +772,10 @@ fail: ASSERT(bbio->bio.bi_pool == &btrfs_clone_bioset); ASSERT(remaining); - btrfs_bio_end_io(remaining, ret); + btrfs_bio_end_io(remaining, status); } - btrfs_bio_end_io(bbio, ret); +end_bbio: + btrfs_bio_end_io(bbio, status); /* Do not submit another chunk */ return true; } @@ -785,8 +800,7 @@ void btrfs_submit_bbio(struct btrfs_bio *bbio, int mirror_num) * freeing the bio. */ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, - u64 length, u64 logical, struct folio *folio, - unsigned int folio_offset, int mirror_num) + u64 length, u64 logical, phys_addr_t paddr, int mirror_num) { struct btrfs_io_stripe smap = { 0 }; struct bio_vec bvec; @@ -817,8 +831,7 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, bio_init(&bio, smap.dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC); bio.bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT; - ret = bio_add_folio(&bio, folio, length, folio_offset); - ASSERT(ret); + __bio_add_page(&bio, phys_to_page(paddr), length, offset_in_page(paddr)); ret = submit_bio_wait(&bio); if (ret) { /* try to remap that extent elsewhere? */ @@ -826,7 +839,7 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, goto out_bio_uninit; } - btrfs_info_rl_in_rcu(fs_info, + btrfs_info_rl(fs_info, "read error corrected: ino %llu off %llu (dev %s sector %llu)", ino, start, btrfs_dev_name(smap.dev), smap.physical >> SECTOR_SHIFT); @@ -882,22 +895,18 @@ int __init btrfs_bioset_init(void) return -ENOMEM; if (bioset_init(&btrfs_clone_bioset, BIO_POOL_SIZE, offsetof(struct btrfs_bio, bio), 0)) - goto out_free_bioset; + goto out; if (bioset_init(&btrfs_repair_bioset, BIO_POOL_SIZE, offsetof(struct btrfs_bio, bio), BIOSET_NEED_BVECS)) - goto out_free_clone_bioset; + goto out; if (mempool_init_kmalloc_pool(&btrfs_failed_bio_pool, BIO_POOL_SIZE, sizeof(struct btrfs_failed_bio))) - goto out_free_repair_bioset; + goto out; return 0; -out_free_repair_bioset: - bioset_exit(&btrfs_repair_bioset); -out_free_clone_bioset: - bioset_exit(&btrfs_clone_bioset); -out_free_bioset: - bioset_exit(&btrfs_bioset); +out: + btrfs_bioset_exit(); return -ENOMEM; } |