diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bcache/bcache.h | 7 | ||||
-rw-r--r-- | drivers/md/bcache/bset.c | 12 | ||||
-rw-r--r-- | drivers/md/bcache/btree.c | 21 | ||||
-rw-r--r-- | drivers/md/bcache/debug.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/journal.c | 4 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 39 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 26 | ||||
-rw-r--r-- | drivers/md/bcache/sysfs.c | 29 | ||||
-rw-r--r-- | drivers/md/bcache/writeback.c | 42 | ||||
-rw-r--r-- | drivers/md/bcache/writeback.h | 4 | ||||
-rw-r--r-- | drivers/md/dm-bio-record.h | 9 | ||||
-rw-r--r-- | drivers/md/dm-cache-metadata.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-clone-target.c | 14 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 10 | ||||
-rw-r--r-- | drivers/md/dm-thin-metadata.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-zoned-metadata.c | 6 | ||||
-rw-r--r-- | drivers/md/dm.c | 14 | ||||
-rw-r--r-- | drivers/md/md-linear.c | 2 | ||||
-rw-r--r-- | drivers/md/md.c | 73 | ||||
-rw-r--r-- | drivers/md/md.h | 8 | ||||
-rw-r--r-- | drivers/md/raid1.c | 8 | ||||
-rw-r--r-- | drivers/md/raid10.c | 18 | ||||
-rw-r--r-- | drivers/md/raid5-ppl.c | 2 | ||||
-rw-r--r-- | drivers/md/raid5.c | 112 |
24 files changed, 270 insertions, 196 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 1d57f48307e6..848dd4db1659 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -373,6 +373,7 @@ struct cached_dev { unsigned int partial_stripes_expensive:1; unsigned int writeback_metadata:1; unsigned int writeback_running:1; + unsigned int writeback_consider_fragment:1; unsigned char writeback_percent; unsigned int writeback_delay; @@ -385,6 +386,9 @@ struct cached_dev { unsigned int writeback_rate_update_seconds; unsigned int writeback_rate_i_term_inverse; unsigned int writeback_rate_p_term_inverse; + unsigned int writeback_rate_fp_term_low; + unsigned int writeback_rate_fp_term_mid; + unsigned int writeback_rate_fp_term_high; unsigned int writeback_rate_minimum; enum stop_on_failure stop_when_cache_set_failed; @@ -1001,6 +1005,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent); extern struct workqueue_struct *bcache_wq; extern struct workqueue_struct *bch_journal_wq; +extern struct workqueue_struct *bch_flush_wq; extern struct mutex bch_register_lock; extern struct list_head bch_cache_sets; @@ -1042,5 +1047,7 @@ void bch_debug_exit(void); void bch_debug_init(void); void bch_request_exit(void); int bch_request_init(void); +void bch_btree_exit(void); +int bch_btree_init(void); #endif /* _BCACHE_H */ diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 67a2c47f4201..94d38e8a59b3 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -712,8 +712,10 @@ void bch_bset_build_written_tree(struct btree_keys *b) for (j = inorder_next(0, t->size); j; j = inorder_next(j, t->size)) { - while (bkey_to_cacheline(t, k) < cacheline) - prev = k, k = bkey_next(k); + while (bkey_to_cacheline(t, k) < cacheline) { + prev = k; + k = bkey_next(k); + } t->prev[j] = bkey_u64s(prev); t->tree[j].m = bkey_to_cacheline_offset(t, cacheline++, k); @@ -901,8 +903,10 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k, status = BTREE_INSERT_STATUS_INSERT; while (m != bset_bkey_last(i) && - bkey_cmp(k, b->ops->is_extents ? &START_KEY(m) : m) > 0) - prev = m, m = bkey_next(m); + bkey_cmp(k, b->ops->is_extents ? &START_KEY(m) : m) > 0) { + prev = m; + m = bkey_next(m); + } /* prev is in the tree, if we merge we're done */ status = BTREE_INSERT_STATUS_BACK_MERGE; diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 910df242c83d..fe6dce125aba 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -99,6 +99,8 @@ #define PTR_HASH(c, k) \ (((k)->ptr[0] >> c->bucket_bits) | PTR_GEN(k, 0)) +static struct workqueue_struct *btree_io_wq; + #define insert_lock(s, b) ((b)->level <= (s)->lock) @@ -308,7 +310,7 @@ static void __btree_node_write_done(struct closure *cl) btree_complete_write(b, w); if (btree_node_dirty(b)) - schedule_delayed_work(&b->work, 30 * HZ); + queue_delayed_work(btree_io_wq, &b->work, 30 * HZ); closure_return_with_destructor(cl, btree_node_write_unlock); } @@ -481,7 +483,7 @@ static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref) BUG_ON(!i->keys); if (!btree_node_dirty(b)) - schedule_delayed_work(&b->work, 30 * HZ); + queue_delayed_work(btree_io_wq, &b->work, 30 * HZ); set_btree_node_dirty(b); @@ -2764,3 +2766,18 @@ void bch_keybuf_init(struct keybuf *buf) spin_lock_init(&buf->lock); array_allocator_init(&buf->freelist); } + +void bch_btree_exit(void) +{ + if (btree_io_wq) + destroy_workqueue(btree_io_wq); +} + +int __init bch_btree_init(void) +{ + btree_io_wq = alloc_workqueue("bch_btree_io", WQ_MEM_RECLAIM, 0); + if (!btree_io_wq) + return -ENOMEM; + + return 0; +} diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index b00fd08d696b..63e809f38e3f 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -114,7 +114,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) check = bio_kmalloc(GFP_NOIO, bio_segments(bio)); if (!check) return; - check->bi_disk = bio->bi_disk; + bio_set_dev(check, bio->bi_bdev); check->bi_opf = REQ_OP_READ; check->bi_iter.bi_sector = bio->bi_iter.bi_sector; check->bi_iter.bi_size = bio->bi_iter.bi_size; diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index aefbdb7e003b..c6613e817333 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -932,8 +932,8 @@ atomic_t *bch_journal(struct cache_set *c, journal_try_write(c); } else if (!w->dirty) { w->dirty = true; - schedule_delayed_work(&c->journal.work, - msecs_to_jiffies(c->journal_delay_ms)); + queue_delayed_work(bch_flush_wq, &c->journal.work, + msecs_to_jiffies(c->journal_delay_ms)); spin_unlock(&c->journal.lock); } else { spin_unlock(&c->journal.lock); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 85b1f2a9b72d..29c231758293 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -475,7 +475,7 @@ struct search { unsigned int read_dirty_data:1; unsigned int cache_missed:1; - struct block_device *part; + struct block_device *orig_bdev; unsigned long start_time; struct btree_op op; @@ -670,8 +670,8 @@ static void bio_complete(struct search *s) { if (s->orig_bio) { /* Count on bcache device */ - part_end_io_acct(s->part, s->orig_bio, s->start_time); - + bio_end_io_acct_remapped(s->orig_bio, s->start_time, + s->orig_bdev); trace_bcache_request_end(s->d, s->orig_bio); s->orig_bio->bi_status = s->iop.status; bio_endio(s->orig_bio); @@ -714,7 +714,8 @@ static void search_free(struct closure *cl) } static inline struct search *search_alloc(struct bio *bio, - struct bcache_device *d) + struct bcache_device *d, struct block_device *orig_bdev, + unsigned long start_time) { struct search *s; @@ -732,7 +733,8 @@ static inline struct search *search_alloc(struct bio *bio, s->write = op_is_write(bio_op(bio)); s->read_dirty_data = 0; /* Count on the bcache device */ - s->start_time = part_start_io_acct(d->disk, &s->part, bio); + s->orig_bdev = orig_bdev; + s->start_time = start_time; s->iop.c = d->c; s->iop.bio = NULL; s->iop.inode = d->id; @@ -894,7 +896,8 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, !(bio->bi_opf & (REQ_META|REQ_PRIO)) && s->iop.c->gc_stats.in_use < CUTOFF_CACHE_READA) reada = min_t(sector_t, dc->readahead >> 9, - get_capacity(bio->bi_disk) - bio_end_sector(bio)); + get_capacity(bio->bi_bdev->bd_disk) - + bio_end_sector(bio)); s->insert_bio_sectors = min(sectors, bio_sectors(bio) + reada); @@ -1073,7 +1076,7 @@ struct detached_dev_io_private { unsigned long start_time; bio_end_io_t *bi_end_io; void *bi_private; - struct block_device *part; + struct block_device *orig_bdev; }; static void detached_dev_end_io(struct bio *bio) @@ -1085,7 +1088,7 @@ static void detached_dev_end_io(struct bio *bio) bio->bi_private = ddip->bi_private; /* Count on the bcache device */ - part_end_io_acct(ddip->part, bio, ddip->start_time); + bio_end_io_acct_remapped(bio, ddip->start_time, ddip->orig_bdev); if (bio->bi_status) { struct cached_dev *dc = container_of(ddip->d, @@ -1098,7 +1101,8 @@ static void detached_dev_end_io(struct bio *bio) bio->bi_end_io(bio); } -static void detached_dev_do_request(struct bcache_device *d, struct bio *bio) +static void detached_dev_do_request(struct bcache_device *d, struct bio *bio, + struct block_device *orig_bdev, unsigned long start_time) { struct detached_dev_io_private *ddip; struct cached_dev *dc = container_of(d, struct cached_dev, disk); @@ -1111,7 +1115,8 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio) ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO); ddip->d = d; /* Count on the bcache device */ - ddip->start_time = part_start_io_acct(d->disk, &ddip->part, bio); + ddip->orig_bdev = orig_bdev; + ddip->start_time = start_time; ddip->bi_end_io = bio->bi_end_io; ddip->bi_private = bio->bi_private; bio->bi_end_io = detached_dev_end_io; @@ -1167,8 +1172,10 @@ static void quit_max_writeback_rate(struct cache_set *c, blk_qc_t cached_dev_submit_bio(struct bio *bio) { struct search *s; - struct bcache_device *d = bio->bi_disk->private_data; + struct block_device *orig_bdev = bio->bi_bdev; + struct bcache_device *d = orig_bdev->bd_disk->private_data; struct cached_dev *dc = container_of(d, struct cached_dev, disk); + unsigned long start_time; int rw = bio_data_dir(bio); if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) || @@ -1193,11 +1200,13 @@ blk_qc_t cached_dev_submit_bio(struct bio *bio) } } + start_time = bio_start_io_acct(bio); + bio_set_dev(bio, dc->bdev); bio->bi_iter.bi_sector += dc->sb.data_offset; if (cached_dev_get(dc)) { - s = search_alloc(bio, d); + s = search_alloc(bio, d, orig_bdev, start_time); trace_bcache_request_start(s->d, bio); if (!bio->bi_iter.bi_size) { @@ -1218,7 +1227,7 @@ blk_qc_t cached_dev_submit_bio(struct bio *bio) } } else /* I/O request sent to backing device */ - detached_dev_do_request(d, bio); + detached_dev_do_request(d, bio, orig_bdev, start_time); return BLK_QC_T_NONE; } @@ -1274,7 +1283,7 @@ blk_qc_t flash_dev_submit_bio(struct bio *bio) { struct search *s; struct closure *cl; - struct bcache_device *d = bio->bi_disk->private_data; + struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) { bio->bi_status = BLK_STS_IOERR; @@ -1282,7 +1291,7 @@ blk_qc_t flash_dev_submit_bio(struct bio *bio) return BLK_QC_T_NONE; } - s = search_alloc(bio, d); + s = search_alloc(bio, d, bio->bi_bdev, bio_start_io_acct(bio)); cl = &s->cl; bio = &s->bio.bio; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 2047a9cccdb5..71691f32959b 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -49,6 +49,7 @@ static int bcache_major; static DEFINE_IDA(bcache_device_idx); static wait_queue_head_t unregister_wait; struct workqueue_struct *bcache_wq; +struct workqueue_struct *bch_flush_wq; struct workqueue_struct *bch_journal_wq; @@ -1939,7 +1940,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) goto err; if (bioset_init(&c->bio_split, 4, offsetof(struct bbio, bio), - BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER)) + BIOSET_NEED_RESCUER)) goto err; c->uuids = alloc_meta_bucket_pages(GFP_KERNEL, sb); @@ -2517,7 +2518,7 @@ out: module_put(THIS_MODULE); } -static void register_device_aync(struct async_reg_args *args) +static void register_device_async(struct async_reg_args *args) { if (SB_IS_BDEV(args->sb)) INIT_DELAYED_WORK(&args->reg_work, register_bdev_worker); @@ -2611,7 +2612,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, args->sb = sb; args->sb_disk = sb_disk; args->bdev = bdev; - register_device_aync(args); + register_device_async(args); /* No wait and returns to user space */ goto async_done; } @@ -2821,6 +2822,9 @@ static void bcache_exit(void) destroy_workqueue(bcache_wq); if (bch_journal_wq) destroy_workqueue(bch_journal_wq); + if (bch_flush_wq) + destroy_workqueue(bch_flush_wq); + bch_btree_exit(); if (bcache_major) unregister_blkdev(bcache_major, "bcache"); @@ -2876,10 +2880,26 @@ static int __init bcache_init(void) return bcache_major; } + if (bch_btree_init()) + goto err; + bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0); if (!bcache_wq) goto err; + /* + * Let's not make this `WQ_MEM_RECLAIM` for the following reasons: + * + * 1. It used `system_wq` before which also does no memory reclaim. + * 2. With `WQ_MEM_RECLAIM` desktop stalls, increased boot times, and + * reduced throughput can be observed. + * + * We still want to user our own queue to not congest the `system_wq`. + */ + bch_flush_wq = alloc_workqueue("bch_flush", 0, 0); + if (!bch_flush_wq) + goto err; + bch_journal_wq = alloc_workqueue("bch_journal", WQ_MEM_RECLAIM, 0); if (!bch_journal_wq) goto err; diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 00a520c03f41..cc89f3156d1a 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -117,10 +117,14 @@ rw_attribute(writeback_running); rw_attribute(writeback_percent); rw_attribute(writeback_delay); rw_attribute(writeback_rate); +rw_attribute(writeback_consider_fragment); rw_attribute(writeback_rate_update_seconds); rw_attribute(writeback_rate_i_term_inverse); rw_attribute(writeback_rate_p_term_inverse); +rw_attribute(writeback_rate_fp_term_low); +rw_attribute(writeback_rate_fp_term_mid); +rw_attribute(writeback_rate_fp_term_high); rw_attribute(writeback_rate_minimum); read_attribute(writeback_rate_debug); @@ -195,6 +199,7 @@ SHOW(__bch_cached_dev) var_printf(bypass_torture_test, "%i"); var_printf(writeback_metadata, "%i"); var_printf(writeback_running, "%i"); + var_printf(writeback_consider_fragment, "%i"); var_print(writeback_delay); var_print(writeback_percent); sysfs_hprint(writeback_rate, @@ -205,6 +210,9 @@ SHOW(__bch_cached_dev) var_print(writeback_rate_update_seconds); var_print(writeback_rate_i_term_inverse); var_print(writeback_rate_p_term_inverse); + var_print(writeback_rate_fp_term_low); + var_print(writeback_rate_fp_term_mid); + var_print(writeback_rate_fp_term_high); var_print(writeback_rate_minimum); if (attr == &sysfs_writeback_rate_debug) { @@ -303,6 +311,7 @@ STORE(__cached_dev) sysfs_strtoul_bool(bypass_torture_test, dc->bypass_torture_test); sysfs_strtoul_bool(writeback_metadata, dc->writeback_metadata); sysfs_strtoul_bool(writeback_running, dc->writeback_running); + sysfs_strtoul_bool(writeback_consider_fragment, dc->writeback_consider_fragment); sysfs_strtoul_clamp(writeback_delay, dc->writeback_delay, 0, UINT_MAX); sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, @@ -331,6 +340,16 @@ STORE(__cached_dev) sysfs_strtoul_clamp(writeback_rate_p_term_inverse, dc->writeback_rate_p_term_inverse, 1, UINT_MAX); + sysfs_strtoul_clamp(writeback_rate_fp_term_low, + dc->writeback_rate_fp_term_low, + 1, dc->writeback_rate_fp_term_mid - 1); + sysfs_strtoul_clamp(writeback_rate_fp_term_mid, + dc->writeback_rate_fp_term_mid, + dc->writeback_rate_fp_term_low + 1, + dc->writeback_rate_fp_term_high - 1); + sysfs_strtoul_clamp(writeback_rate_fp_term_high, + dc->writeback_rate_fp_term_high, + dc->writeback_rate_fp_term_mid + 1, UINT_MAX); sysfs_strtoul_clamp(writeback_rate_minimum, dc->writeback_rate_minimum, 1, UINT_MAX); @@ -499,9 +518,13 @@ static struct attribute *bch_cached_dev_files[] = { &sysfs_writeback_delay, &sysfs_writeback_percent, &sysfs_writeback_rate, + &sysfs_writeback_consider_fragment, &sysfs_writeback_rate_update_seconds, &sysfs_writeback_rate_i_term_inverse, &sysfs_writeback_rate_p_term_inverse, + &sysfs_writeback_rate_fp_term_low, + &sysfs_writeback_rate_fp_term_mid, + &sysfs_writeback_rate_fp_term_high, &sysfs_writeback_rate_minimum, &sysfs_writeback_rate_debug, &sysfs_io_errors, @@ -1071,8 +1094,10 @@ SHOW(__bch_cache) --n; while (cached < p + n && - *cached == BTREE_PRIO) - cached++, n--; + *cached == BTREE_PRIO) { + cached++; + n--; + } for (i = 0; i < n; i++) sum += INITIAL_PRIO - cached[i]; diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index a129e4d2707c..82d4e0880a99 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -88,6 +88,44 @@ static void __update_writeback_rate(struct cached_dev *dc) int64_t integral_scaled; uint32_t new_rate; + /* + * We need to consider the number of dirty buckets as well + * when calculating the proportional_scaled, Otherwise we might + * have an unreasonable small writeback rate at a highly fragmented situation + * when very few dirty sectors consumed a lot dirty buckets, the + * worst case is when dirty buckets reached cutoff_writeback_sync and + * dirty data is still not even reached to writeback percent, so the rate + * still will be at the minimum value, which will cause the write + * stuck at a non-writeback mode. + */ + struct cache_set *c = dc->disk.c; + + int64_t dirty_buckets = c->nbuckets - c->avail_nbuckets; + + if (dc->writeback_consider_fragment && + c->gc_stats.in_use > BCH_WRITEBACK_FRAGMENT_THRESHOLD_LOW && dirty > 0) { + int64_t fragment = + div_s64((dirty_buckets * c->cache->sb.bucket_size), dirty); + int64_t fp_term; + int64_t fps; + + if (c->gc_stats.in_use <= BCH_WRITEBACK_FRAGMENT_THRESHOLD_MID) { + fp_term = dc->writeback_rate_fp_term_low * + (c->gc_stats.in_use - BCH_WRITEBACK_FRAGMENT_THRESHOLD_LOW); + } else if (c->gc_stats.in_use <= BCH_WRITEBACK_FRAGMENT_THRESHOLD_HIGH) { + fp_term = dc->writeback_rate_fp_term_mid * + (c->gc_stats.in_use - BCH_WRITEBACK_FRAGMENT_THRESHOLD_MID); + } else { + fp_term = dc->writeback_rate_fp_term_high * + (c->gc_stats.in_use - BCH_WRITEBACK_FRAGMENT_THRESHOLD_HIGH); + } + fps = div_s64(dirty, dirty_buckets) * fp_term; + if (fragment > 3 && fps > proportional_scaled) { + /* Only overrite the p when fragment > 3 */ + proportional_scaled = fps; + } + } + if ((error < 0 && dc->writeback_rate_integral > 0) || (error > 0 && time_before64(local_clock(), dc->writeback_rate.next + NSEC_PER_MSEC))) { @@ -977,6 +1015,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) dc->writeback_metadata = true; dc->writeback_running = false; + dc->writeback_consider_fragment = true; dc->writeback_percent = 10; dc->writeback_delay = 30; atomic_long_set(&dc->writeback_rate.rate, 1024); @@ -984,6 +1023,9 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT; dc->writeback_rate_p_term_inverse = 40; + dc->writeback_rate_fp_term_low = 1; + dc->writeback_rate_fp_term_mid = 10; + dc->writeback_rate_fp_term_high = 1000; dc->writeback_rate_i_term_inverse = 10000; WARN_ON(test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)); diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 3f1230e22de0..02b2f9df73f6 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -16,6 +16,10 @@ #define BCH_AUTO_GC_DIRTY_THRESHOLD 50 +#define BCH_WRITEBACK_FRAGMENT_THRESHOLD_LOW 50 +#define BCH_WRITEBACK_FRAGMENT_THRESHOLD_MID 57 +#define BCH_WRITEBACK_FRAGMENT_THRESHOLD_HIGH 64 + #define BCH_DIRTY_INIT_THRD_MAX 64 /* * 14 (16384ths) is chosen here as something that each backing device diff --git a/drivers/md/dm-bio-record.h b/drivers/md/dm-bio-record.h index 2ea0360108e1..a3b71350eec8 100644 --- a/drivers/md/dm-bio-record.h +++ b/drivers/md/dm-bio-record.h @@ -18,8 +18,7 @@ */ struct dm_bio_details { - struct gendisk *bi_disk; - u8 bi_partno; + struct block_device *bi_bdev; int __bi_remaining; unsigned long bi_flags; struct bvec_iter bi_iter; @@ -31,8 +30,7 @@ struct dm_bio_details { static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio) { - bd->bi_disk = bio->bi_disk; - bd->bi_partno = bio->bi_partno; + bd->bi_bdev = bio->bi_bdev; bd->bi_flags = bio->bi_flags; bd->bi_iter = bio->bi_iter; bd->__bi_remaining = atomic_read(&bio->__bi_remaining); @@ -44,8 +42,7 @@ static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio) static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio) { - bio->bi_disk = bd->bi_disk; - bio->bi_partno = bd->bi_partno; + bio->bi_bdev = bd->bi_bdev; bio->bi_flags = bd->bi_flags; bio->bi_iter = bd->bi_iter; atomic_set(&bio->__bi_remaining, bd->__bi_remaining); diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index af6d4f898e4c..89a73204dbf4 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -449,7 +449,7 @@ static int __check_incompat_features(struct cache_disk_superblock *disk_super, /* * Check for read-only metadata to skip the following RDWR checks. */ - if (get_disk_ro(cmd->bdev->bd_disk)) + if (bdev_read_only(cmd->bdev)) return 0; features = le32_to_cpu(disk_super->compat_ro_flags) & ~DM_CACHE_FEATURE_COMPAT_RO_SUPP; diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c index bdb255edc200..a90bdf9b2ca6 100644 --- a/drivers/md/dm-clone-target.c +++ b/drivers/md/dm-clone-target.c @@ -85,12 +85,6 @@ struct clone { struct dm_clone_metadata *cmd; - /* - * bio used to flush the destination device, before committing the - * metadata. - */ - struct bio flush_bio; - /* Region hydration hash table */ struct hash_table_bucket *ht; @@ -1155,11 +1149,7 @@ static int commit_metadata(struct clone *clone, bool *dest_dev_flushed) goto out; } - bio_reset(&clone->flush_bio); - bio_set_dev(&clone->flush_bio, clone->dest_dev->bdev); - clone->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; - - r = submit_bio_wait(&clone->flush_bio); + r = blkdev_issue_flush(clone->dest_dev->bdev); if (unlikely(r)) { __metadata_operation_failed(clone, "flush destination device", r); goto out; @@ -1886,7 +1876,6 @@ static int clone_ctr(struct dm_target *ti, unsigned int argc, char **argv) bio_list_init(&clone->deferred_flush_completions); clone->hydration_offset = 0; atomic_set(&clone->hydrations_in_flight, 0); - bio_init(&clone->flush_bio, NULL, 0); clone->wq = alloc_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM, 0); if (!clone->wq) { @@ -1958,7 +1947,6 @@ static void clone_dtr(struct dm_target *ti) struct clone *clone = ti->private; mutex_destroy(&clone->commit_lock); - bio_uninit(&clone->flush_bio); for (i = 0; i < clone->nr_ctr_args; i++) kfree(clone->ctr_args[i]); diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index fa09bc4e4c54..b0a82f29a2e4 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -145,7 +145,7 @@ static void dispatch_bios(void *context, struct bio_list *bio_list) struct dm_raid1_bio_record { struct mirror *m; - /* if details->bi_disk == NULL, details were not saved */ + /* if details->bi_bdev == NULL, details were not saved */ struct dm_bio_details details; region_t write_region; }; @@ -1190,7 +1190,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio) struct dm_raid1_bio_record *bio_record = dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record)); - bio_record->details.bi_disk = NULL; + bio_record->details.bi_bdev = NULL; if (rw == WRITE) { /* Save region for mirror_end_io() handler */ @@ -1257,7 +1257,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, goto out; if (unlikely(*error)) { - if (!bio_record->details.bi_disk) { + if (!bio_record->details.bi_bdev) { /* * There wasn't enough memory to record necessary * information for a retry or there was no other @@ -1282,7 +1282,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, bd = &bio_record->details; dm_bio_restore(bd, bio); - bio_record->details.bi_disk = NULL; + bio_record->details.bi_bdev = NULL; bio->bi_status = 0; queue_bio(ms, bio, rw); @@ -1292,7 +1292,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, } out: - bio_record->details.bi_disk = NULL; + bio_record->details.bi_bdev = NULL; return DM_ENDIO_DONE; } diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 6ebb2127f3e2..e75b20480e46 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -636,7 +636,7 @@ static int __check_incompat_features(struct thin_disk_superblock *disk_super, /* * Check for read-only metadata to skip the following RDWR checks. */ - if (get_disk_ro(pmd->bdev->bd_disk)) + if (bdev_read_only(pmd->bdev)) return 0; features = le32_to_cpu(disk_super->compat_ro_flags) & ~THIN_FEATURE_COMPAT_RO_SUPP; diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index b298fefb022e..039d17b28938 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -819,7 +819,7 @@ static int dmz_write_sb(struct dmz_metadata *zmd, unsigned int set) ret = dmz_rdwr_block(dev, REQ_OP_WRITE, zmd->sb[set].block, mblk->page); if (ret == 0) - ret = blkdev_issue_flush(dev->bdev, GFP_NOIO); + ret = blkdev_issue_flush(dev->bdev); return ret; } @@ -862,7 +862,7 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd, /* Flush drive cache (this will also sync data) */ if (ret == 0) - ret = blkdev_issue_flush(dev->bdev, GFP_NOIO); + ret = blkdev_issue_flush(dev->bdev); return ret; } @@ -933,7 +933,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) /* If there are no dirty metadata blocks, just flush the device cache */ if (list_empty(&write_list)) { - ret = blkdev_issue_flush(dev->bdev, GFP_NOIO); + ret = blkdev_issue_flush(dev->bdev); goto err; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 7bac564f3faa..479ec5bea09e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -977,16 +977,17 @@ static void clone_endio(struct bio *bio) struct mapped_device *md = tio->io->md; dm_endio_fn endio = tio->ti->type->end_io; struct bio *orig_bio = io->orig_bio; + struct request_queue *q = bio->bi_bdev->bd_disk->queue; if (unlikely(error == BLK_STS_TARGET)) { if (bio_op(bio) == REQ_OP_DISCARD && - !bio->bi_disk->queue->limits.max_discard_sectors) + !q->limits.max_discard_sectors) disable_discard(md); else if (bio_op(bio) == REQ_OP_WRITE_SAME && - !bio->bi_disk->queue->limits.max_write_same_sectors) + !q->limits.max_write_same_sectors) disable_write_same(md); else if (bio_op(bio) == REQ_OP_WRITE_ZEROES && - !bio->bi_disk->queue->limits.max_write_zeroes_sectors) + !q->limits.max_write_zeroes_sectors) disable_write_zeroes(md); } @@ -996,7 +997,7 @@ static void clone_endio(struct bio *bio) */ if (bio_op(orig_bio) == REQ_OP_ZONE_APPEND) { sector_t written_sector = bio->bi_iter.bi_sector; - struct request_queue *q = orig_bio->bi_disk->queue; + struct request_queue *q = orig_bio->bi_bdev->bd_disk->queue; u64 mask = (u64)blk_queue_zone_sectors(q) - 1; orig_bio->bi_iter.bi_sector += written_sector & mask; @@ -1422,8 +1423,7 @@ static int __send_empty_flush(struct clone_info *ci) */ bio_init(&flush_bio, NULL, 0); flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; - flush_bio.bi_disk = ci->io->md->disk; - bio_associate_blkg(&flush_bio); + bio_set_dev(&flush_bio, ci->io->md->disk->part0); ci->bio = &flush_bio; ci->sector_count = 0; @@ -1626,7 +1626,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, static blk_qc_t dm_submit_bio(struct bio *bio) { - struct mapped_device *md = bio->bi_disk->private_data; + struct mapped_device *md = bio->bi_bdev->bd_disk->private_data; blk_qc_t ret = BLK_QC_T_NONE; int srcu_idx; struct dm_table *map; diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c index 68cac7d19278..63ed8329a98d 100644 --- a/drivers/md/md-linear.c +++ b/drivers/md/md-linear.c @@ -252,7 +252,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio) start_sector + data_offset; if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bio->bi_disk->queue))) { + !blk_queue_discard(bio->bi_bdev->bd_disk->queue))) { /* Just ignore it */ bio_endio(bio); } else { diff --git a/drivers/md/md.c b/drivers/md/md.c index 04384452a7ab..21da0c48f6c2 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -340,24 +340,6 @@ static int start_readonly; */ static bool create_on_open = true; -struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, - struct mddev *mddev) -{ - if (!mddev || !bioset_initialized(&mddev->bio_set)) - return bio_alloc(gfp_mask, nr_iovecs); - - return bio_alloc_bioset(gfp_mask, nr_iovecs, &mddev->bio_set); -} -EXPORT_SYMBOL_GPL(bio_alloc_mddev); - -static struct bio *md_bio_alloc_sync(struct mddev *mddev) -{ - if (!mddev || !bioset_initialized(&mddev->sync_set)) - return bio_alloc(GFP_NOIO, 1); - - return bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set); -} - /* * We have a system wide 'event count' that is incremented * on any 'interesting' event, and readers of /proc/mdstat @@ -463,8 +445,8 @@ struct md_io { struct mddev *mddev; bio_end_io_t *orig_bi_end_io; void *orig_bi_private; + struct block_device *orig_bi_bdev; unsigned long start_time; - struct block_device *part; }; static void md_end_io(struct bio *bio) @@ -472,7 +454,7 @@ static void md_end_io(struct bio *bio) struct md_io *md_io = bio->bi_private; struct mddev *mddev = md_io->mddev; - part_end_io_acct(md_io->part, bio, md_io->start_time); + bio_end_io_acct_remapped(bio, md_io->start_time, md_io->orig_bi_bdev); bio->bi_end_io = md_io->orig_bi_end_io; bio->bi_private = md_io->orig_bi_private; @@ -486,7 +468,7 @@ static void md_end_io(struct bio *bio) static blk_qc_t md_submit_bio(struct bio *bio) { const int rw = bio_data_dir(bio); - struct mddev *mddev = bio->bi_disk->private_data; + struct mddev *mddev = bio->bi_bdev->bd_disk->private_data; if (mddev == NULL || mddev->pers == NULL) { bio_io_error(bio); @@ -514,12 +496,12 @@ static blk_qc_t md_submit_bio(struct bio *bio) md_io->mddev = mddev; md_io->orig_bi_end_io = bio->bi_end_io; md_io->orig_bi_private = bio->bi_private; + md_io->orig_bi_bdev = bio->bi_bdev; bio->bi_end_io = md_end_io; bio->bi_private = md_io; - md_io->start_time = part_start_io_acct(mddev->gendisk, - &md_io->part, bio); + md_io->start_time = bio_start_io_acct(bio); } /* bio could be mergeable after passing to underlayer */ @@ -613,7 +595,7 @@ static void submit_flushes(struct work_struct *ws) atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending); rcu_read_unlock(); - bi = bio_alloc_mddev(GFP_NOIO, 0, mddev); + bi = bio_alloc_bioset(GFP_NOIO, 0, &mddev->bio_set); bi->bi_end_io = md_end_flush; bi->bi_private = rdev; bio_set_dev(bi, rdev->bdev); @@ -999,7 +981,7 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev, if (test_bit(Faulty, &rdev->flags)) return; - bio = md_bio_alloc_sync(mddev); + bio = bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set); atomic_inc(&rdev->nr_pending); @@ -1031,29 +1013,29 @@ int md_super_wait(struct mddev *mddev) int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, struct page *page, int op, int op_flags, bool metadata_op) { - struct bio *bio = md_bio_alloc_sync(rdev->mddev); - int ret; + struct bio bio; + struct bio_vec bvec; + + bio_init(&bio, &bvec, 1); if (metadata_op && rdev->meta_bdev) - bio_set_dev(bio, rdev->meta_bdev); + bio_set_dev(&bio, rdev->meta_bdev); else - bio_set_dev(bio, rdev->bdev); - bio_set_op_attrs(bio, op, op_flags); + bio_set_dev(&bio, rdev->bdev); + bio.bi_opf = op | op_flags; if (metadata_op) - bio->bi_iter.bi_sector = sector + rdev->sb_start; + bio.bi_iter.bi_sector = sector + rdev->sb_start; else if (rdev->mddev->reshape_position != MaxSector && (rdev->mddev->reshape_backwards == (sector >= rdev->mddev->reshape_position))) - bio->bi_iter.bi_sector = sector + rdev->new_data_offset; + bio.bi_iter.bi_sector = sector + rdev->new_data_offset; else - bio->bi_iter.bi_sector = sector + rdev->data_offset; - bio_add_page(bio, page, size, 0); + bio.bi_iter.bi_sector = sector + rdev->data_offset; + bio_add_page(&bio, page, size, 0); - submit_bio_wait(bio); + submit_bio_wait(&bio); - ret = !bio->bi_status; - bio_put(bio); - return ret; + return !bio.bi_status; } EXPORT_SYMBOL_GPL(sync_page_io); @@ -2417,6 +2399,12 @@ int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev) } EXPORT_SYMBOL(md_integrity_add_rdev); +static bool rdev_read_only(struct md_rdev *rdev) +{ + return bdev_read_only(rdev->bdev) || + (rdev->meta_bdev && bdev_read_only(rdev->meta_bdev)); +} + static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) { char b[BDEVNAME_SIZE]; @@ -2426,8 +2414,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) if (find_rdev(mddev, rdev->bdev->bd_dev)) return -EEXIST; - if ((bdev_read_only(rdev->bdev) || bdev_read_only(rdev->meta_bdev)) && - mddev->pers) + if (rdev_read_only(rdev) && mddev->pers) return -EROFS; /* make sure rdev->sectors exceeds mddev->dev_sectors */ @@ -5861,9 +5848,7 @@ int md_run(struct mddev *mddev) continue; sync_blockdev(rdev->bdev); invalidate_bdev(rdev->bdev); - if (mddev->ro != 1 && - (bdev_read_only(rdev->bdev) || - bdev_read_only(rdev->meta_bdev))) { + if (mddev->ro != 1 && rdev_read_only(rdev)) { mddev->ro = 1; if (mddev->gendisk) set_disk_ro(mddev->gendisk, 1); @@ -6158,7 +6143,7 @@ static int restart_array(struct mddev *mddev) if (test_bit(Journal, &rdev->flags) && !test_bit(Faulty, &rdev->flags)) has_journal = true; - if (bdev_read_only(rdev->bdev)) + if (rdev_read_only(rdev)) has_readonly = true; } rcu_read_unlock(); diff --git a/drivers/md/md.h b/drivers/md/md.h index 34070ab30a8a..bcbba1b5ec4a 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -556,7 +556,7 @@ static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sect static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors) { - atomic_add(nr_sectors, &bio->bi_disk->sync_io); + md_sync_acct(bio->bi_bdev, nr_sectors); } struct md_personality @@ -742,8 +742,6 @@ extern void md_rdev_clear(struct md_rdev *rdev); extern void md_handle_request(struct mddev *mddev, struct bio *bio); extern void mddev_suspend(struct mddev *mddev); extern void mddev_resume(struct mddev *mddev); -extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, - struct mddev *mddev); extern void md_reload_sb(struct mddev *mddev, int raid_disk); extern void md_update_sb(struct mddev *mddev, int force); @@ -793,14 +791,14 @@ static inline void mddev_clear_unsupported_flags(struct mddev *mddev, static inline void mddev_check_writesame(struct mddev *mddev, struct bio *bio) { if (bio_op(bio) == REQ_OP_WRITE_SAME && - !bio->bi_disk->queue->limits.max_write_same_sectors) + !bio->bi_bdev->bd_disk->queue->limits.max_write_same_sectors) mddev->queue->limits.max_write_same_sectors = 0; } static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio) { if (bio_op(bio) == REQ_OP_WRITE_ZEROES && - !bio->bi_disk->queue->limits.max_write_zeroes_sectors) + !bio->bi_bdev->bd_disk->queue->limits.max_write_zeroes_sectors) mddev->queue->limits.max_write_zeroes_sectors = 0; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index c0347997f6ff..d2378765dc15 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -794,13 +794,13 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio) while (bio) { /* submit pending writes */ struct bio *next = bio->bi_next; - struct md_rdev *rdev = (void *)bio->bi_disk; + struct md_rdev *rdev = (void *)bio->bi_bdev; bio->bi_next = NULL; bio_set_dev(bio, rdev->bdev); if (test_bit(Faulty, &rdev->flags)) { bio_io_error(bio); } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bio->bi_disk->queue))) + !blk_queue_discard(bio->bi_bdev->bd_disk->queue))) /* Just ignore it */ bio_endio(bio); else @@ -1104,7 +1104,7 @@ static void alloc_behind_master_bio(struct r1bio *r1_bio, int i = 0; struct bio *behind_bio = NULL; - behind_bio = bio_alloc_mddev(GFP_NOIO, vcnt, r1_bio->mddev); + behind_bio = bio_alloc_bioset(GFP_NOIO, vcnt, &r1_bio->mddev->bio_set); if (!behind_bio) return; @@ -1520,7 +1520,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, trace_block_bio_remap(mbio, disk_devt(mddev->gendisk), r1_bio->sector); /* flush_pending_writes() needs access to the rdev so...*/ - mbio->bi_disk = (void *)conf->mirrors[i].rdev; + mbio->bi_bdev = (void *)conf->mirrors[i].rdev; cb = blk_check_plugged(raid1_unplug, mddev, sizeof(*plug)); if (cb) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index c5d88ef6a45c..a9ae7d113492 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -882,13 +882,13 @@ static void flush_pending_writes(struct r10conf *conf) while (bio) { /* submit pending writes */ struct bio *next = bio->bi_next; - struct md_rdev *rdev = (void*)bio->bi_disk; + struct md_rdev *rdev = (void*)bio->bi_bdev; bio->bi_next = NULL; bio_set_dev(bio, rdev->bdev); if (test_bit(Faulty, &rdev->flags)) { bio_io_error(bio); } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bio->bi_disk->queue))) + !blk_queue_discard(bio->bi_bdev->bd_disk->queue))) /* Just ignore it */ bio_endio(bio); else @@ -1075,13 +1075,13 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) while (bio) { /* submit pending writes */ struct bio *next = bio->bi_next; - struct md_rdev *rdev = (void*)bio->bi_disk; + struct md_rdev *rdev = (void*)bio->bi_bdev; bio->bi_next = NULL; bio_set_dev(bio, rdev->bdev); if (test_bit(Faulty, &rdev->flags)) { bio_io_error(bio); } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bio->bi_disk->queue))) + !blk_queue_discard(bio->bi_bdev->bd_disk->queue))) /* Just ignore it */ bio_endio(bio); else @@ -1253,7 +1253,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, trace_block_bio_remap(mbio, disk_devt(conf->mddev->gendisk), r10_bio->sector); /* flush_pending_writes() needs access to the rdev so...*/ - mbio->bi_disk = (void *)rdev; + mbio->bi_bdev = (void *)rdev; atomic_inc(&r10_bio->remaining); @@ -3003,7 +3003,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, /* Again, very different code for resync and recovery. * Both must result in an r10bio with a list of bios that - * have bi_end_io, bi_sector, bi_disk set, + * have bi_end_io, bi_sector, bi_bdev set, * and bi_private set to the r10bio. * For recovery, we may actually create several r10bios * with 2 bios in each, that correspond to the bios in the main one. @@ -4531,7 +4531,7 @@ read_more: return sectors_done; } - read_bio = bio_alloc_mddev(GFP_KERNEL, RESYNC_PAGES, mddev); + read_bio = bio_alloc_bioset(GFP_KERNEL, RESYNC_PAGES, &mddev->bio_set); bio_set_dev(read_bio, rdev->bdev); read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr @@ -4539,10 +4539,6 @@ read_more: read_bio->bi_private = r10_bio; read_bio->bi_end_io = end_reshape_read; bio_set_op_attrs(read_bio, REQ_OP_READ, 0); - read_bio->bi_flags &= (~0UL << BIO_RESET_BITS); - read_bio->bi_status = 0; - read_bio->bi_vcnt = 0; - read_bio->bi_iter.bi_size = 0; r10_bio->master_bio = read_bio; r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum; diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index d0f540296fe9..e8c118e05dfd 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -1037,7 +1037,7 @@ static int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr, } /* flush the disk cache after recovery if necessary */ - ret = blkdev_issue_flush(rdev->bdev, GFP_KERNEL); + ret = blkdev_issue_flush(rdev->bdev); out: __free_page(page); return ret; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 3a90cc0e43ca..5d57a5bd171f 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5310,7 +5310,7 @@ static int in_chunk_boundary(struct mddev *mddev, struct bio *bio) unsigned int chunk_sectors; unsigned int bio_sectors = bio_sectors(bio); - WARN_ON_ONCE(bio->bi_partno); + WARN_ON_ONCE(bio->bi_bdev->bd_partno); chunk_sectors = min(conf->chunk_sectors, conf->prev_chunk_sectors); return chunk_sectors >= @@ -5393,90 +5393,72 @@ static void raid5_align_endio(struct bio *bi) static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) { struct r5conf *conf = mddev->private; - int dd_idx; - struct bio* align_bi; + struct bio *align_bio; struct md_rdev *rdev; - sector_t end_sector; + sector_t sector, end_sector, first_bad; + int bad_sectors, dd_idx; if (!in_chunk_boundary(mddev, raid_bio)) { pr_debug("%s: non aligned\n", __func__); return 0; } - /* - * use bio_clone_fast to make a copy of the bio - */ - align_bi = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->bio_set); - if (!align_bi) - return 0; - /* - * set bi_end_io to a new function, and set bi_private to the - * original bio. - */ - align_bi->bi_end_io = raid5_align_endio; - align_bi->bi_private = raid_bio; - /* - * compute position - */ - align_bi->bi_iter.bi_sector = - raid5_compute_sector(conf, raid_bio->bi_iter.bi_sector, - 0, &dd_idx, NULL); - end_sector = bio_end_sector(align_bi); + sector = raid5_compute_sector(conf, raid_bio->bi_iter.bi_sector, 0, + &dd_idx, NULL); + end_sector = bio_end_sector(raid_bio); + rcu_read_lock(); + if (r5c_big_stripe_cached(conf, sector)) + goto out_rcu_unlock; + rdev = rcu_dereference(conf->disks[dd_idx].replacement); if (!rdev || test_bit(Faulty, &rdev->flags) || rdev->recovery_offset < end_sector) { rdev = rcu_dereference(conf->disks[dd_idx].rdev); - if (rdev && - (test_bit(Faulty, &rdev->flags) || + if (!rdev) + goto out_rcu_unlock; + if (test_bit(Faulty, &rdev->flags) || !(test_bit(In_sync, &rdev->flags) || - rdev->recovery_offset >= end_sector))) - rdev = NULL; + rdev->recovery_offset >= end_sector)) + goto out_rcu_unlock; } - if (r5c_big_stripe_cached(conf, align_bi->bi_iter.bi_sector)) { - rcu_read_unlock(); - bio_put(align_bi); + atomic_inc(&rdev->nr_pending); + rcu_read_unlock(); + + align_bio = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->bio_set); + bio_set_dev(align_bio, rdev->bdev); + align_bio->bi_end_io = raid5_align_endio; + align_bio->bi_private = raid_bio; + align_bio->bi_iter.bi_sector = sector; + + raid_bio->bi_next = (void *)rdev; + + if (is_badblock(rdev, sector, bio_sectors(align_bio), &first_bad, + &bad_sectors)) { + bio_put(align_bio); + rdev_dec_pending(rdev, mddev); return 0; } - if (rdev) { - sector_t first_bad; - int bad_sectors; - - atomic_inc(&rdev->nr_pending); - rcu_read_unlock(); - raid_bio->bi_next = (void*)rdev; - bio_set_dev(align_bi, rdev->bdev); - - if (is_badblock(rdev, align_bi->bi_iter.bi_sector, - bio_sectors(align_bi), - &first_bad, &bad_sectors)) { - bio_put(align_bi); - rdev_dec_pending(rdev, mddev); - return 0; - } + /* No reshape active, so we can trust rdev->data_offset */ + align_bio->bi_iter.bi_sector += rdev->data_offset; - /* No reshape active, so we can trust rdev->data_offset */ - align_bi->bi_iter.bi_sector += rdev->data_offset; + spin_lock_irq(&conf->device_lock); + wait_event_lock_irq(conf->wait_for_quiescent, conf->quiesce == 0, + conf->device_lock); + atomic_inc(&conf->active_aligned_reads); + spin_unlock_irq(&conf->device_lock); - spin_lock_irq(&conf->device_lock); - wait_event_lock_irq(conf->wait_for_quiescent, - conf->quiesce == 0, - conf->device_lock); - atomic_inc(&conf->active_aligned_reads); - spin_unlock_irq(&conf->device_lock); + if (mddev->gendisk) + trace_block_bio_remap(align_bio, disk_devt(mddev->gendisk), + raid_bio->bi_iter.bi_sector); + submit_bio_noacct(align_bio); + return 1; - if (mddev->gendisk) - trace_block_bio_remap(align_bi, disk_devt(mddev->gendisk), - raid_bio->bi_iter.bi_sector); - submit_bio_noacct(align_bi); - return 1; - } else { - rcu_read_unlock(); - bio_put(align_bi); - return 0; - } +out_rcu_unlock: + rcu_read_unlock(); + return 0; } static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio) @@ -7661,7 +7643,7 @@ static int raid5_run(struct mddev *mddev) } /* device size must be a multiple of chunk size */ - mddev->dev_sectors &= ~(mddev->chunk_sectors - 1); + mddev->dev_sectors &= ~((sector_t)mddev->chunk_sectors - 1); mddev->resync_max_sectors = mddev->dev_sectors; if (mddev->degraded > dirty_parity_disks && |