diff options
Diffstat (limited to 'drivers/md')
29 files changed, 110 insertions, 457 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 921888df6764..30ba3573626c 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -27,7 +27,7 @@ config BLK_DEV_MD More information about Software RAID on Linux is contained in the Software RAID mini-HOWTO, available from - <http://www.tldp.org/docs.html#howto>. There you will also learn + <https://www.tldp.org/docs.html#howto>. There you will also learn where to get the supporting user space utilities raidtools. If unsure, say N. @@ -71,7 +71,7 @@ config MD_RAID0 Information about Software RAID on Linux is contained in the Software-RAID mini-HOWTO, available from - <http://www.tldp.org/docs.html#howto>. There you will also + <https://www.tldp.org/docs.html#howto>. There you will also learn where to get the supporting user space utilities raidtools. To compile this as a module, choose M here: the module @@ -93,7 +93,7 @@ config MD_RAID1 Information about Software RAID on Linux is contained in the Software-RAID mini-HOWTO, available from - <http://www.tldp.org/docs.html#howto>. There you will also + <https://www.tldp.org/docs.html#howto>. There you will also learn where to get the supporting user space utilities raidtools. If you want to use such a RAID-1 set, say Y. To compile this code @@ -148,7 +148,7 @@ config MD_RAID456 Information about Software RAID on Linux is contained in the Software-RAID mini-HOWTO, available from - <http://www.tldp.org/docs.html#howto>. There you will also + <https://www.tldp.org/docs.html#howto>. There you will also learn where to get the supporting user space utilities raidtools. If you want to use such a RAID-4/RAID-5/RAID-6 set, say Y. To diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 7de82c67597c..c7cadaafa947 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1232,36 +1232,8 @@ static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode, return __blkdev_driver_ioctl(dc->bdev, mode, cmd, arg); } -static int cached_dev_congested(void *data, int bits) -{ - struct bcache_device *d = data; - struct cached_dev *dc = container_of(d, struct cached_dev, disk); - struct request_queue *q = bdev_get_queue(dc->bdev); - int ret = 0; - - if (bdi_congested(q->backing_dev_info, bits)) - return 1; - - if (cached_dev_get(dc)) { - unsigned int i; - struct cache *ca; - - for_each_cache(ca, d->c, i) { - q = bdev_get_queue(ca->bdev); - ret |= bdi_congested(q->backing_dev_info, bits); - } - - cached_dev_put(dc); - } - - return ret; -} - void bch_cached_dev_request_init(struct cached_dev *dc) { - struct gendisk *g = dc->disk.disk; - - g->queue->backing_dev_info->congested_fn = cached_dev_congested; dc->disk.cache_miss = cached_dev_cache_miss; dc->disk.ioctl = cached_dev_ioctl; } @@ -1345,27 +1317,8 @@ static int flash_dev_ioctl(struct bcache_device *d, fmode_t mode, return -ENOTTY; } -static int flash_dev_congested(void *data, int bits) -{ - struct bcache_device *d = data; - struct request_queue *q; - struct cache *ca; - unsigned int i; - int ret = 0; - - for_each_cache(ca, d->c, i) { - q = bdev_get_queue(ca->bdev); - ret |= bdi_congested(q->backing_dev_info, bits); - } - - return ret; -} - void bch_flash_dev_request_init(struct bcache_device *d) { - struct gendisk *g = d->disk; - - g->queue->backing_dev_info->congested_fn = flash_dev_congested; d->cache_miss = flash_dev_cache_miss; d->ioctl = flash_dev_ioctl; } diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index a46698f08483..1bbdc410ee3c 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -945,7 +945,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, return -ENOMEM; d->disk->queue = q; - q->backing_dev_info->congested_data = d; q->limits.max_hw_sectors = UINT_MAX; q->limits.max_sectors = UINT_MAX; q->limits.max_segment_size = UINT_MAX; diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 9eccced92896..96c93802ee4d 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -421,8 +421,6 @@ struct cache { struct rw_semaphore quiesce_lock; - struct dm_target_callbacks callbacks; - /* * origin_blocks entries, discarded if set. */ @@ -2423,20 +2421,6 @@ static void set_cache_size(struct cache *cache, dm_cblock_t size) cache->cache_size = size; } -static int is_congested(struct dm_dev *dev, int bdi_bits) -{ - struct request_queue *q = bdev_get_queue(dev->bdev); - return bdi_congested(q->backing_dev_info, bdi_bits); -} - -static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits) -{ - struct cache *cache = container_of(cb, struct cache, callbacks); - - return is_congested(cache->origin_dev, bdi_bits) || - is_congested(cache->cache_dev, bdi_bits); -} - #define DEFAULT_MIGRATION_THRESHOLD 2048 static int cache_create(struct cache_args *ca, struct cache **result) @@ -2471,9 +2455,6 @@ static int cache_create(struct cache_args *ca, struct cache **result) goto bad; } - cache->callbacks.congested_fn = cache_is_congested; - dm_table_add_target_callbacks(ti->table, &cache->callbacks); - cache->metadata_dev = ca->metadata_dev; cache->origin_dev = ca->origin_dev; cache->cache_dev = ca->cache_dev; diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c index 59ed8a67c2e3..bdb255edc200 100644 --- a/drivers/md/dm-clone-target.c +++ b/drivers/md/dm-clone-target.c @@ -68,7 +68,6 @@ struct hash_table_bucket; struct clone { struct dm_target *ti; - struct dm_target_callbacks callbacks; struct dm_dev *metadata_dev; struct dm_dev *dest_dev; @@ -1518,18 +1517,6 @@ error: DMEMIT("Error"); } -static int clone_is_congested(struct dm_target_callbacks *cb, int bdi_bits) -{ - struct request_queue *dest_q, *source_q; - struct clone *clone = container_of(cb, struct clone, callbacks); - - source_q = bdev_get_queue(clone->source_dev->bdev); - dest_q = bdev_get_queue(clone->dest_dev->bdev); - - return (bdi_congested(dest_q->backing_dev_info, bdi_bits) | - bdi_congested(source_q->backing_dev_info, bdi_bits)); -} - static sector_t get_dev_size(struct dm_dev *dev) { return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; @@ -1930,8 +1917,6 @@ static int clone_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto out_with_mempool; mutex_init(&clone->commit_lock); - clone->callbacks.congested_fn = clone_is_congested; - dm_table_add_target_callbacks(ti->table, &clone->callbacks); /* Enable flushes */ ti->num_flush_bios = 1; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index ad324abb8c49..b437a14c4942 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -300,7 +300,7 @@ static struct crypto_aead *any_tfm_aead(struct crypt_config *cc) * elephant: The extended version of eboiv with additional Elephant diffuser * used with Bitlocker CBC mode. * This mode was used in older Windows systems - * http://download.microsoft.com/download/0/2/3/0238acaf-d3bf-4a6d-b3d6-0a0be4bbb36e/bitlockercipher200608.pdf + * https://download.microsoft.com/download/0/2/3/0238acaf-d3bf-4a6d-b3d6-0a0be4bbb36e/bitlockercipher200608.pdf */ static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv, diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c index 566ddbdb16a4..b24e3839bb3a 100644 --- a/drivers/md/dm-era-target.c +++ b/drivers/md/dm-era-target.c @@ -1137,7 +1137,6 @@ static int metadata_get_stats(struct era_metadata *md, void *ptr) struct era { struct dm_target *ti; - struct dm_target_callbacks callbacks; struct dm_dev *metadata_dev; struct dm_dev *origin_dev; @@ -1375,18 +1374,6 @@ static void stop_worker(struct era *era) /*---------------------------------------------------------------- * Target methods *--------------------------------------------------------------*/ -static int dev_is_congested(struct dm_dev *dev, int bdi_bits) -{ - struct request_queue *q = bdev_get_queue(dev->bdev); - return bdi_congested(q->backing_dev_info, bdi_bits); -} - -static int era_is_congested(struct dm_target_callbacks *cb, int bdi_bits) -{ - struct era *era = container_of(cb, struct era, callbacks); - return dev_is_congested(era->origin_dev, bdi_bits); -} - static void era_destroy(struct era *era) { if (era->md) @@ -1514,8 +1501,6 @@ static int era_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->flush_supported = true; ti->num_discard_bios = 1; - era->callbacks.congested_fn = era_is_congested; - dm_table_add_target_callbacks(ti->table, &era->callbacks); return 0; } diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index ae866e469e1b..5da3eb661e50 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2420,7 +2420,7 @@ static void integrity_writer(struct work_struct *w) unsigned prev_free_sectors; /* the following test is not needed, but it tests the replay code */ - if (unlikely(dm_suspended(ic->ti)) && !ic->meta_dev) + if (unlikely(dm_post_suspending(ic->ti)) && !ic->meta_dev) return; spin_lock_irq(&ic->endio_wait.lock); @@ -2481,7 +2481,7 @@ static void integrity_recalc(struct work_struct *w) next_chunk: - if (unlikely(dm_suspended(ic->ti))) + if (unlikely(dm_post_suspending(ic->ti))) goto unlock_ret; range.logical_sector = le64_to_cpu(ic->sb->recalc_sector); diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 81ffc59d05c9..4312007d2d34 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -306,7 +306,7 @@ static void do_region(int op, int op_flags, unsigned region, struct request_queue *q = bdev_get_queue(where->bdev); unsigned short logical_block_size = queue_logical_block_size(q); sector_t num_sectors; - unsigned int uninitialized_var(special_cmd_max_sectors); + unsigned int special_cmd_max_sectors; /* * Reject unsupported discard and write same requests. diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 489935d5f22d..056d891a32a9 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1844,7 +1844,7 @@ static int ctl_ioctl(struct file *file, uint command, struct dm_ioctl __user *us int ioctl_flags; int param_flags; unsigned int cmd; - struct dm_ioctl *uninitialized_var(param); + struct dm_ioctl *param; ioctl_fn fn = NULL; size_t input_param_size; struct dm_ioctl param_kernel; diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 10e8b2fe787b..d9e270957e18 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -242,7 +242,6 @@ struct raid_set { struct mddev md; struct raid_type *raid_type; - struct dm_target_callbacks callbacks; sector_t array_sectors; sector_t dev_sectors; @@ -1705,13 +1704,6 @@ static void do_table_event(struct work_struct *ws) dm_table_event(rs->ti->table); } -static int raid_is_congested(struct dm_target_callbacks *cb, int bits) -{ - struct raid_set *rs = container_of(cb, struct raid_set, callbacks); - - return mddev_congested(&rs->md, bits); -} - /* * Make sure a valid takover (level switch) is being requested on @rs * @@ -3248,9 +3240,6 @@ size_check: goto bad_md_start; } - rs->callbacks.congested_fn = raid_is_congested; - dm_table_add_target_callbacks(ti->table, &rs->callbacks); - /* If raid4/5/6 journal mode explicitly requested (only possible with journal dev) -> set it */ if (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags)) { r = r5c_journal_mode_set(&rs->md, rs->journal_dev.mode); @@ -3310,7 +3299,6 @@ static void raid_dtr(struct dm_target *ti) { struct raid_set *rs = ti->private; - list_del_init(&rs->callbacks.list); md_stop(&rs->md); raid_set_free(rs); } diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 5aec1cd09348..7ce387a1cc6a 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -146,10 +146,6 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig) */ static void rq_completed(struct mapped_device *md) { - /* nudge anyone waiting on suspend queue */ - if (unlikely(wq_has_sleeper(&md->wait))) - wake_up(&md->wait); - /* * dm_put() must be at the end of this function. See the comment above */ diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 2d1d4a4c399c..63fab7c769be 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -613,7 +613,7 @@ static int persistent_read_metadata(struct dm_exception_store *store, chunk_t old, chunk_t new), void *callback_context) { - int r, uninitialized_var(new_snapshot); + int r, new_snapshot; struct pstore *ps = get_info(store); /* diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 8277b959e00b..5edc3079e7c1 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -64,8 +64,6 @@ struct dm_table { void *event_context; struct dm_md_mempools *mempools; - - struct list_head target_callbacks; }; /* @@ -190,7 +188,6 @@ int dm_table_create(struct dm_table **result, fmode_t mode, return -ENOMEM; INIT_LIST_HEAD(&t->devices); - INIT_LIST_HEAD(&t->target_callbacks); if (!num_targets) num_targets = KEYS_PER_NODE; @@ -361,7 +358,7 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, * This upgrades the mode on an already open dm_dev, being * careful to leave things as they were if we fail to reopen the * device and not to touch the existing bdev field in case - * it is accessed concurrently inside dm_table_any_congested(). + * it is accessed concurrently. */ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, struct mapped_device *md) @@ -461,7 +458,8 @@ static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, return 0; } - if (bdev_stack_limits(limits, bdev, start) < 0) + if (blk_stack_limits(limits, &q->limits, + get_start_sect(bdev) + start) < 0) DMWARN("%s: adding target device %s caused an alignment inconsistency: " "physical_block_size=%u, logical_block_size=%u, " "alignment_offset=%u, start=%llu", @@ -470,9 +468,6 @@ static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, q->limits.logical_block_size, q->limits.alignment_offset, (unsigned long long) start << SECTOR_SHIFT); - - limits->zoned = blk_queue_zoned_model(q); - return 0; } @@ -642,7 +637,7 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table, */ unsigned short remaining = 0; - struct dm_target *uninitialized_var(ti); + struct dm_target *ti; struct queue_limits ti_limits; unsigned i; @@ -1531,22 +1526,6 @@ combine_limits: dm_device_name(table->md), (unsigned long long) ti->begin, (unsigned long long) ti->len); - - /* - * FIXME: this should likely be moved to blk_stack_limits(), would - * also eliminate limits->zoned stacking hack in dm_set_device_limits() - */ - if (limits->zoned == BLK_ZONED_NONE && ti_limits.zoned != BLK_ZONED_NONE) { - /* - * By default, the stacked limits zoned model is set to - * BLK_ZONED_NONE in blk_set_stacking_limits(). Update - * this model using the first target model reported - * that is not BLK_ZONED_NONE. This will be either the - * first target device zoned model or the model reported - * by the target .io_hints. - */ - limits->zoned = ti_limits.zoned; - } } /* @@ -2052,38 +2031,6 @@ int dm_table_resume_targets(struct dm_table *t) return 0; } -void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callbacks *cb) -{ - list_add(&cb->list, &t->target_callbacks); -} -EXPORT_SYMBOL_GPL(dm_table_add_target_callbacks); - -int dm_table_any_congested(struct dm_table *t, int bdi_bits) -{ - struct dm_dev_internal *dd; - struct list_head *devices = dm_table_get_devices(t); - struct dm_target_callbacks *cb; - int r = 0; - - list_for_each_entry(dd, devices, list) { - struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev); - char b[BDEVNAME_SIZE]; - - if (likely(q)) - r |= bdi_congested(q->backing_dev_info, bdi_bits); - else - DMWARN_LIMIT("%s: any_congested: nonexistent device %s", - dm_device_name(t->md), - bdevname(dd->dm_dev->bdev, b)); - } - - list_for_each_entry(cb, &t->target_callbacks, list) - if (cb->congested_fn) - r |= cb->congested_fn(cb, bdi_bits); - - return r; -} - struct mapped_device *dm_table_get_md(struct dm_table *t) { return t->md; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index fe2de2888709..fff4c50df74d 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -326,7 +326,6 @@ struct pool_c { struct pool *pool; struct dm_dev *data_dev; struct dm_dev *metadata_dev; - struct dm_target_callbacks callbacks; dm_block_t low_water_blocks; struct pool_features requested_pf; /* Features requested during table load */ @@ -2796,18 +2795,6 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) } } -static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits) -{ - struct pool_c *pt = container_of(cb, struct pool_c, callbacks); - struct request_queue *q; - - if (get_pool_mode(pt->pool) == PM_OUT_OF_DATA_SPACE) - return 1; - - q = bdev_get_queue(pt->data_dev->bdev); - return bdi_congested(q->backing_dev_info, bdi_bits); -} - static void requeue_bios(struct pool *pool) { struct thin_c *tc; @@ -3420,9 +3407,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) dm_pool_register_pre_commit_callback(pool->pmd, metadata_pre_commit_callback, pool); - pt->callbacks.congested_fn = pool_is_congested; - dm_table_add_target_callbacks(ti->table, &pt->callbacks); - mutex_unlock(&dm_thin_pool_table.mutex); return 0; diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 44004f07b4a1..cfea054863a4 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -1752,7 +1752,7 @@ static void writecache_writeback(struct work_struct *work) { struct dm_writecache *wc = container_of(work, struct dm_writecache, writeback_work); struct blk_plug plug; - struct wc_entry *f, *uninitialized_var(g), *e = NULL; + struct wc_entry *f, *g, *e = NULL; struct rb_node *node, *next_node; struct list_head skipped; struct writeback_list wbl; @@ -2266,6 +2266,12 @@ invalid_optional: } if (WC_MODE_PMEM(wc)) { + if (!dax_synchronous(wc->ssd_dev->dax_dev)) { + r = -EOPNOTSUPP; + ti->error = "Asynchronous persistent memory not supported as pmem cache"; + goto bad; + } + r = persistent_memory_claim(wc); if (r) { ti->error = "Unable to map persistent memory for cache"; diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 5cf6f5f552e0..b298fefb022e 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -2217,8 +2217,15 @@ struct dm_zone *dmz_alloc_zone(struct dmz_metadata *zmd, unsigned int dev_idx, { struct list_head *list; struct dm_zone *zone; - int i = 0; + int i; + + /* Schedule reclaim to ensure free zones are available */ + if (!(flags & DMZ_ALLOC_RECLAIM)) { + for (i = 0; i < zmd->nr_devs; i++) + dmz_schedule_reclaim(zmd->dev[i].reclaim); + } + i = 0; again: if (flags & DMZ_ALLOC_CACHE) list = &zmd->unmap_cache_list; diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c index dd1eebf6e50f..9c0ecc9568a4 100644 --- a/drivers/md/dm-zoned-reclaim.c +++ b/drivers/md/dm-zoned-reclaim.c @@ -456,6 +456,8 @@ static unsigned int dmz_reclaim_percentage(struct dmz_reclaim *zrc) nr_zones = dmz_nr_rnd_zones(zmd, zrc->dev_idx); nr_unmap = dmz_nr_unmap_rnd_zones(zmd, zrc->dev_idx); } + if (nr_unmap <= 1) + return 0; return nr_unmap * 100 / nr_zones; } @@ -501,7 +503,7 @@ static void dmz_reclaim_work(struct work_struct *work) { struct dmz_reclaim *zrc = container_of(work, struct dmz_reclaim, work.work); struct dmz_metadata *zmd = zrc->metadata; - unsigned int p_unmap, nr_unmap_rnd = 0, nr_rnd = 0; + unsigned int p_unmap; int ret; if (dmz_dev_is_dying(zmd)) @@ -527,9 +529,6 @@ static void dmz_reclaim_work(struct work_struct *work) zrc->kc_throttle.throttle = min(75U, 100U - p_unmap / 2); } - nr_unmap_rnd = dmz_nr_unmap_rnd_zones(zmd, zrc->dev_idx); - nr_rnd = dmz_nr_rnd_zones(zmd, zrc->dev_idx); - DMDEBUG("(%s/%u): Reclaim (%u): %s, %u%% free zones (%u/%u cache %u/%u random)", dmz_metadata_label(zmd), zrc->dev_idx, zrc->kc_throttle.throttle, diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 7fcf198547d5..697f9de37355 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -400,15 +400,7 @@ static void dmz_handle_bio(struct dmz_target *dmz, struct dm_chunk_work *cw, dm_per_bio_data(bio, sizeof(struct dmz_bioctx)); struct dmz_metadata *zmd = dmz->metadata; struct dm_zone *zone; - int i, ret; - - /* - * Write may trigger a zone allocation. So make sure the - * allocation can succeed. - */ - if (bio_op(bio) == REQ_OP_WRITE) - for (i = 0; i < dmz->nr_ddevs; i++) - dmz_schedule_reclaim(dmz->dev[i].reclaim); + int ret; dmz_lock_metadata(zmd); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a02842afe358..87cf45f619fd 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -12,6 +12,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mutex.h> +#include <linux/sched/mm.h> #include <linux/sched/signal.h> #include <linux/blkpg.h> #include <linux/bio.h> @@ -142,6 +143,7 @@ EXPORT_SYMBOL_GPL(dm_bio_get_target_bio_nr); #define DMF_NOFLUSH_SUSPENDING 5 #define DMF_DEFERRED_REMOVE 6 #define DMF_SUSPENDED_INTERNALLY 7 +#define DMF_POST_SUSPENDING 8 #define DM_NUMA_NODE NUMA_NO_NODE static int dm_numa_node = DM_NUMA_NODE; @@ -654,28 +656,6 @@ static void free_tio(struct dm_target_io *tio) bio_put(&tio->clone); } -static bool md_in_flight_bios(struct mapped_device *md) -{ - int cpu; - struct hd_struct *part = &dm_disk(md)->part0; - long sum = 0; - - for_each_possible_cpu(cpu) { - sum += part_stat_local_read_cpu(part, in_flight[0], cpu); - sum += part_stat_local_read_cpu(part, in_flight[1], cpu); - } - - return sum != 0; -} - -static bool md_in_flight(struct mapped_device *md) -{ - if (queue_is_mq(md->queue)) - return blk_mq_queue_inflight(md->queue); - else - return md_in_flight_bios(md); -} - u64 dm_start_time_ns_from_clone(struct bio *bio) { struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); @@ -1834,31 +1814,6 @@ static blk_qc_t dm_submit_bio(struct bio *bio) return ret; } -static int dm_any_congested(void *congested_data, int bdi_bits) -{ - int r = bdi_bits; - struct mapped_device *md = congested_data; - struct dm_table *map; - - if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { - if (dm_request_based(md)) { - /* - * With request-based DM we only need to check the - * top-level queue for congestion. - */ - struct backing_dev_info *bdi = md->queue->backing_dev_info; - r = bdi->wb.congested->state & bdi_bits; - } else { - map = dm_get_live_table_fast(md); - if (map) - r = dm_table_any_congested(map, bdi_bits); - dm_put_live_table_fast(md); - } - } - - return r; -} - /*----------------------------------------------------------------- * An IDR is used to keep track of allocated minor numbers. *---------------------------------------------------------------*/ @@ -2297,12 +2252,6 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md) } EXPORT_SYMBOL_GPL(dm_get_queue_limits); -static void dm_init_congested_fn(struct mapped_device *md) -{ - md->queue->backing_dev_info->congested_data = md; - md->queue->backing_dev_info->congested_fn = dm_any_congested; -} - /* * Setup the DM device's queue based on md's type */ @@ -2319,12 +2268,10 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) DMERR("Cannot initialize queue for request-based dm-mq mapped device"); return r; } - dm_init_congested_fn(md); break; case DM_TYPE_BIO_BASED: case DM_TYPE_DAX_BIO_BASED: case DM_TYPE_NVME_BIO_BASED: - dm_init_congested_fn(md); break; case DM_TYPE_NONE: WARN_ON_ONCE(true); @@ -2424,6 +2371,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait) if (!dm_suspended_md(md)) { dm_table_presuspend_targets(map); set_bit(DMF_SUSPENDED, &md->flags); + set_bit(DMF_POST_SUSPENDING, &md->flags); dm_table_postsuspend_targets(map); } /* dm_put_live_table must be before msleep, otherwise deadlock is possible */ @@ -2464,15 +2412,29 @@ void dm_put(struct mapped_device *md) } EXPORT_SYMBOL_GPL(dm_put); -static int dm_wait_for_completion(struct mapped_device *md, long task_state) +static bool md_in_flight_bios(struct mapped_device *md) +{ + int cpu; + struct hd_struct *part = &dm_disk(md)->part0; + long sum = 0; + + for_each_possible_cpu(cpu) { + sum += part_stat_local_read_cpu(part, in_flight[0], cpu); + sum += part_stat_local_read_cpu(part, in_flight[1], cpu); + } + + return sum != 0; +} + +static int dm_wait_for_bios_completion(struct mapped_device *md, long task_state) { int r = 0; DEFINE_WAIT(wait); - while (1) { + while (true) { prepare_to_wait(&md->wait, &wait, task_state); - if (!md_in_flight(md)) + if (!md_in_flight_bios(md)) break; if (signal_pending_state(task_state, current)) { @@ -2487,6 +2449,28 @@ static int dm_wait_for_completion(struct mapped_device *md, long task_state) return r; } +static int dm_wait_for_completion(struct mapped_device *md, long task_state) +{ + int r = 0; + + if (!queue_is_mq(md->queue)) + return dm_wait_for_bios_completion(md, task_state); + + while (true) { + if (!blk_mq_queue_inflight(md->queue)) + break; + + if (signal_pending_state(task_state, current)) { + r = -EINTR; + break; + } + + msleep(5); + } + + return r; +} + /* * Process the deferred bios */ @@ -2746,7 +2730,9 @@ retry: if (r) goto out_unlock; + set_bit(DMF_POST_SUSPENDING, &md->flags); dm_table_postsuspend_targets(map); + clear_bit(DMF_POST_SUSPENDING, &md->flags); out_unlock: mutex_unlock(&md->suspend_lock); @@ -2843,7 +2829,9 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_fla (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE, DMF_SUSPENDED_INTERNALLY); + set_bit(DMF_POST_SUSPENDING, &md->flags); dm_table_postsuspend_targets(map); + clear_bit(DMF_POST_SUSPENDING, &md->flags); } static void __dm_internal_resume(struct mapped_device *md) @@ -2920,17 +2908,25 @@ EXPORT_SYMBOL_GPL(dm_internal_resume_fast); int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, unsigned cookie) { + int r; + unsigned noio_flag; char udev_cookie[DM_COOKIE_LENGTH]; char *envp[] = { udev_cookie, NULL }; + noio_flag = memalloc_noio_save(); + if (!cookie) - return kobject_uevent(&disk_to_dev(md->disk)->kobj, action); + r = kobject_uevent(&disk_to_dev(md->disk)->kobj, action); else { snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u", DM_COOKIE_ENV_VAR_NAME, cookie); - return kobject_uevent_env(&disk_to_dev(md->disk)->kobj, - action, envp); + r = kobject_uevent_env(&disk_to_dev(md->disk)->kobj, + action, envp); } + + memalloc_noio_restore(noio_flag); + + return r; } uint32_t dm_next_uevent_seq(struct mapped_device *md) @@ -2996,6 +2992,11 @@ int dm_suspended_md(struct mapped_device *md) return test_bit(DMF_SUSPENDED, &md->flags); } +static int dm_post_suspending_md(struct mapped_device *md) +{ + return test_bit(DMF_POST_SUSPENDING, &md->flags); +} + int dm_suspended_internally_md(struct mapped_device *md) { return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); @@ -3012,6 +3013,12 @@ int dm_suspended(struct dm_target *ti) } EXPORT_SYMBOL_GPL(dm_suspended); +int dm_post_suspending(struct dm_target *ti) +{ + return dm_post_suspending_md(dm_table_get_md(ti->table)); +} +EXPORT_SYMBOL_GPL(dm_post_suspending); + int dm_noflush_suspending(struct dm_target *ti) { return __noflush_suspending(dm_table_get_md(ti->table)); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index d7c4f6606b5f..4f5fe664d05a 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -63,7 +63,6 @@ void dm_table_presuspend_targets(struct dm_table *t); void dm_table_presuspend_undo_targets(struct dm_table *t); void dm_table_postsuspend_targets(struct dm_table *t); int dm_table_resume_targets(struct dm_table *t); -int dm_table_any_congested(struct dm_table *t, int bdi_bits); enum dm_queue_mode dm_table_get_type(struct dm_table *t); struct target_type *dm_table_get_immutable_target_type(struct dm_table *t); struct dm_target *dm_table_get_immutable_target(struct dm_table *t); diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c index 8efada3ee16f..c2ae9125c4c3 100644 --- a/drivers/md/md-linear.c +++ b/drivers/md/md-linear.c @@ -46,29 +46,6 @@ static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector) return conf->disks + lo; } -/* - * In linear_congested() conf->raid_disks is used as a copy of - * mddev->raid_disks to iterate conf->disks[], because conf->raid_disks - * and conf->disks[] are created in linear_conf(), they are always - * consitent with each other, but mddev->raid_disks does not. - */ -static int linear_congested(struct mddev *mddev, int bits) -{ - struct linear_conf *conf; - int i, ret = 0; - - rcu_read_lock(); - conf = rcu_dereference(mddev->private); - - for (i = 0; i < conf->raid_disks && !ret ; i++) { - struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev); - ret |= bdi_congested(q->backing_dev_info, bits); - } - - rcu_read_unlock(); - return ret; -} - static sector_t linear_size(struct mddev *mddev, sector_t sectors, int raid_disks) { struct linear_conf *conf; @@ -322,7 +299,6 @@ static struct md_personality linear_personality = .hot_add_disk = linear_add, .size = linear_size, .quiesce = linear_quiesce, - .congested = linear_congested, }; static int __init linear_init (void) diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c index 277fdfd9ee54..776bbe542db5 100644 --- a/drivers/md/md-multipath.c +++ b/drivers/md/md-multipath.c @@ -151,28 +151,6 @@ static void multipath_status(struct seq_file *seq, struct mddev *mddev) seq_putc(seq, ']'); } -static int multipath_congested(struct mddev *mddev, int bits) -{ - struct mpconf *conf = mddev->private; - int i, ret = 0; - - rcu_read_lock(); - for (i = 0; i < mddev->raid_disks ; i++) { - struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev); - if (rdev && !test_bit(Faulty, &rdev->flags)) { - struct request_queue *q = bdev_get_queue(rdev->bdev); - - ret |= bdi_congested(q->backing_dev_info, bits); - /* Just like multipath_map, we just check the - * first available device - */ - break; - } - } - rcu_read_unlock(); - return ret; -} - /* * Careful, this can execute in IRQ contexts as well! */ @@ -478,7 +456,6 @@ static struct md_personality multipath_personality = .hot_add_disk = multipath_add_disk, .hot_remove_disk= multipath_remove_disk, .size = multipath_size, - .congested = multipath_congested, }; static int __init multipath_init (void) diff --git a/drivers/md/md.c b/drivers/md/md.c index 6b511c9007d3..e6b6e5192291 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -579,26 +579,6 @@ void mddev_resume(struct mddev *mddev) } EXPORT_SYMBOL_GPL(mddev_resume); -int mddev_congested(struct mddev *mddev, int bits) -{ - struct md_personality *pers = mddev->pers; - int ret = 0; - - rcu_read_lock(); - if (mddev->suspended) - ret = 1; - else if (pers && pers->congested) - ret = pers->congested(mddev, bits); - rcu_read_unlock(); - return ret; -} -EXPORT_SYMBOL_GPL(mddev_congested); -static int md_congested(void *data, int bits) -{ - struct mddev *mddev = data; - return mddev_congested(mddev, bits); -} - /* * Generic flush handling for md */ @@ -5761,6 +5741,7 @@ static int md_alloc(dev_t dev, char *name) * remove it now. */ disk->flags |= GENHD_FL_EXT_DEVT; + disk->events |= DISK_EVENT_MEDIA_CHANGE; mddev->gendisk = disk; /* As soon as we call add_disk(), another thread could get * through to md_open, so make sure it doesn't get too far @@ -6056,8 +6037,6 @@ int md_run(struct mddev *mddev) blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue); else blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue); - mddev->queue->backing_dev_info->congested_data = mddev; - mddev->queue->backing_dev_info->congested_fn = md_congested; } if (pers->sync_request) { if (mddev->kobj.sd && @@ -6444,7 +6423,6 @@ static int do_md_stop(struct mddev *mddev, int mode, __md_stop_writes(mddev); __md_stop(mddev); - mddev->queue->backing_dev_info->congested_fn = NULL; /* tell userspace to handle 'inactive' */ sysfs_notify_dirent_safe(mddev->sysfs_state); @@ -7902,20 +7880,17 @@ static void md_release(struct gendisk *disk, fmode_t mode) mddev_put(mddev); } -static int md_media_changed(struct gendisk *disk) -{ - struct mddev *mddev = disk->private_data; - - return mddev->changed; -} - -static int md_revalidate(struct gendisk *disk) +static unsigned int md_check_events(struct gendisk *disk, unsigned int clearing) { struct mddev *mddev = disk->private_data; + unsigned int ret = 0; + if (mddev->changed) + ret = DISK_EVENT_MEDIA_CHANGE; mddev->changed = 0; - return 0; + return ret; } + static const struct block_device_operations md_fops = { .owner = THIS_MODULE, @@ -7927,8 +7902,7 @@ static const struct block_device_operations md_fops = .compat_ioctl = md_compat_ioctl, #endif .getgeo = md_getgeo, - .media_changed = md_media_changed, - .revalidate_disk= md_revalidate, + .check_events = md_check_events, }; static int md_thread(void *arg) diff --git a/drivers/md/md.h b/drivers/md/md.h index 1e172b3e499f..f79b5b4101ef 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -604,9 +604,6 @@ struct md_personality * array. */ void *(*takeover) (struct mddev *mddev); - /* congested implements bdi.congested_fn(). - * Will not be called while array is 'suspended' */ - int (*congested)(struct mddev *mddev, int bits); /* Changes the consistency policy of an active array. */ int (*change_consistency_policy)(struct mddev *mddev, const char *buf); }; @@ -717,7 +714,6 @@ extern void md_done_sync(struct mddev *mddev, int blocks, int ok); extern void md_error(struct mddev *mddev, struct md_rdev *rdev); extern void md_finish_reshape(struct mddev *mddev); -extern int mddev_congested(struct mddev *mddev, int bits); extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio); extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev, sector_t sector, int size, struct page *page); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index e9e91c8d8afc..f54a449f97aa 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -29,21 +29,6 @@ module_param(default_layout, int, 0644); (1L << MD_HAS_PPL) | \ (1L << MD_HAS_MULTIPLE_PPLS)) -static int raid0_congested(struct mddev *mddev, int bits) -{ - struct r0conf *conf = mddev->private; - struct md_rdev **devlist = conf->devlist; - int raid_disks = conf->strip_zone[0].nb_dev; - int i, ret = 0; - - for (i = 0; i < raid_disks && !ret ; i++) { - struct request_queue *q = bdev_get_queue(devlist[i]->bdev); - - ret |= bdi_congested(q->backing_dev_info, bits); - } - return ret; -} - /* * inform the user of the raid configuration */ @@ -818,7 +803,6 @@ static struct md_personality raid0_personality= .size = raid0_size, .takeover = raid0_takeover, .quiesce = raid0_quiesce, - .congested = raid0_congested, }; static int __init raid0_init (void) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 2aa2649cca66..960d854c07f8 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -786,36 +786,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect return best_disk; } -static int raid1_congested(struct mddev *mddev, int bits) -{ - struct r1conf *conf = mddev->private; - int i, ret = 0; - - if ((bits & (1 << WB_async_congested)) && - conf->pending_count >= max_queued_requests) - return 1; - - rcu_read_lock(); - for (i = 0; i < conf->raid_disks * 2; i++) { - struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); - if (rdev && !test_bit(Faulty, &rdev->flags)) { - struct request_queue *q = bdev_get_queue(rdev->bdev); - - BUG_ON(!q); - - /* Note the '|| 1' - when read_balance prefers - * non-congested targets, it can be removed - */ - if ((bits & (1 << WB_async_congested)) || 1) - ret |= bdi_congested(q->backing_dev_info, bits); - else - ret &= bdi_congested(q->backing_dev_info, bits); - } - } - rcu_read_unlock(); - return ret; -} - static void flush_bio_list(struct r1conf *conf, struct bio *bio) { /* flush any pending bitmap writes to disk before proceeding w/ I/O */ @@ -3396,7 +3366,6 @@ static struct md_personality raid1_personality = .check_reshape = raid1_reshape, .quiesce = raid1_quiesce, .takeover = raid1_takeover, - .congested = raid1_congested, }; static int __init raid_init(void) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index cefda2abd34f..e8fa32733917 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -848,31 +848,6 @@ static struct md_rdev *read_balance(struct r10conf *conf, return rdev; } -static int raid10_congested(struct mddev *mddev, int bits) -{ - struct r10conf *conf = mddev->private; - int i, ret = 0; - - if ((bits & (1 << WB_async_congested)) && - conf->pending_count >= max_queued_requests) - return 1; - - rcu_read_lock(); - for (i = 0; - (i < conf->geo.raid_disks || i < conf->prev.raid_disks) - && ret == 0; - i++) { - struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); - if (rdev && !test_bit(Faulty, &rdev->flags)) { - struct request_queue *q = bdev_get_queue(rdev->bdev); - - ret |= bdi_congested(q->backing_dev_info, bits); - } - } - rcu_read_unlock(); - return ret; -} - static void flush_pending_writes(struct r10conf *conf) { /* Any writes that have been queued but are awaiting @@ -4937,7 +4912,6 @@ static struct md_personality raid10_personality = .start_reshape = raid10_start_reshape, .finish_reshape = raid10_finish_reshape, .update_reshape_pos = raid10_update_reshape_pos, - .congested = raid10_congested, }; static int __init raid_init(void) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 657634a7e8d1..fb8d1fb14088 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2620,7 +2620,7 @@ static void raid5_end_write_request(struct bio *bi) struct stripe_head *sh = bi->bi_private; struct r5conf *conf = sh->raid_conf; int disks = sh->disks, i; - struct md_rdev *uninitialized_var(rdev); + struct md_rdev *rdev; sector_t first_bad; int bad_sectors; int replacement = 0; @@ -5127,28 +5127,6 @@ static void activate_bit_delay(struct r5conf *conf, } } -static int raid5_congested(struct mddev *mddev, int bits) -{ - struct r5conf *conf = mddev->private; - - /* No difference between reads and writes. Just check - * how busy the stripe_cache is - */ - - if (test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) - return 1; - - /* Also checks whether there is pressure on r5cache log space */ - if (test_bit(R5C_LOG_TIGHT, &conf->cache_state)) - return 1; - if (conf->quiesce) - return 1; - if (atomic_read(&conf->empty_inactive_list_nr)) - return 1; - - return 0; -} - static int in_chunk_boundary(struct mddev *mddev, struct bio *bio) { struct r5conf *conf = mddev->private; @@ -8533,7 +8511,6 @@ static struct md_personality raid6_personality = .finish_reshape = raid5_finish_reshape, .quiesce = raid5_quiesce, .takeover = raid6_takeover, - .congested = raid5_congested, .change_consistency_policy = raid5_change_consistency_policy, }; static struct md_personality raid5_personality = @@ -8558,7 +8535,6 @@ static struct md_personality raid5_personality = .finish_reshape = raid5_finish_reshape, .quiesce = raid5_quiesce, .takeover = raid5_takeover, - .congested = raid5_congested, .change_consistency_policy = raid5_change_consistency_policy, }; @@ -8584,7 +8560,6 @@ static struct md_personality raid4_personality = .finish_reshape = raid5_finish_reshape, .quiesce = raid5_quiesce, .takeover = raid4_takeover, - .congested = raid5_congested, .change_consistency_policy = raid5_change_consistency_policy, }; |