diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bitmap.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-cache-metadata.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-cache-policy-cleaner.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 17 | ||||
-rw-r--r-- | drivers/md/dm-exception-store.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-exception-store.h | 5 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-snap-persistent.c | 30 | ||||
-rw-r--r-- | drivers/md/dm-snap-transient.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 14 | ||||
-rw-r--r-- | drivers/md/dm-thin.c | 6 | ||||
-rw-r--r-- | drivers/md/dm.c | 11 | ||||
-rw-r--r-- | drivers/md/md.c | 8 | ||||
-rw-r--r-- | drivers/md/multipath.c | 3 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-btree-remove.c | 17 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-btree.c | 2 | ||||
-rw-r--r-- | drivers/md/raid0.c | 12 | ||||
-rw-r--r-- | drivers/md/raid1.c | 28 | ||||
-rw-r--r-- | drivers/md/raid10.c | 52 | ||||
-rw-r--r-- | drivers/md/raid5.c | 17 |
20 files changed, 159 insertions, 82 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index e51de52eeb94..48b5890c28e3 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1997,7 +1997,8 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file) ret = bitmap_storage_alloc(&store, chunks, !bitmap->mddev->bitmap_info.external, - bitmap->cluster_slot); + mddev_is_clustered(bitmap->mddev) + ? bitmap->cluster_slot : 0); if (ret) goto err; diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 20cc36b01b77..0a17d1b91a81 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -634,10 +634,10 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, disk_super = dm_block_data(sblock); + disk_super->flags = cpu_to_le32(cmd->flags); if (mutator) update_flags(disk_super, mutator); - disk_super->flags = cpu_to_le32(cmd->flags); disk_super->mapping_root = cpu_to_le64(cmd->root); disk_super->hint_root = cpu_to_le64(cmd->hint_root); disk_super->discard_root = cpu_to_le64(cmd->discard_root); diff --git a/drivers/md/dm-cache-policy-cleaner.c b/drivers/md/dm-cache-policy-cleaner.c index 240c9f0e85e7..8a096456579b 100644 --- a/drivers/md/dm-cache-policy-cleaner.c +++ b/drivers/md/dm-cache-policy-cleaner.c @@ -436,7 +436,7 @@ static struct dm_cache_policy *wb_create(dm_cblock_t cache_size, static struct dm_cache_policy_type wb_policy_type = { .name = "cleaner", .version = {1, 0, 0}, - .hint_size = 0, + .hint_size = 4, .owner = THIS_MODULE, .create = wb_create }; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index d60c88df5234..4b3b6f8aff0c 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -968,7 +968,8 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone); /* * Generate a new unfragmented bio with the given size - * This should never violate the device limitations + * This should never violate the device limitations (but only because + * max_segment_size is being constrained to PAGE_SIZE). * * This function may be called concurrently. If we allocate from the mempool * concurrently, there is a possibility of deadlock. For example, if we have @@ -2045,9 +2046,20 @@ static int crypt_iterate_devices(struct dm_target *ti, return fn(ti, cc->dev, cc->start, ti->len, data); } +static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) +{ + /* + * Unfortunate constraint that is required to avoid the potential + * for exceeding underlying device's max_segments limits -- due to + * crypt_alloc_buffer() possibly allocating pages for the encryption + * bio that are not as physically contiguous as the original bio. + */ + limits->max_segment_size = PAGE_SIZE; +} + static struct target_type crypt_target = { .name = "crypt", - .version = {1, 14, 0}, + .version = {1, 14, 1}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, @@ -2058,6 +2070,7 @@ static struct target_type crypt_target = { .resume = crypt_resume, .message = crypt_message, .iterate_devices = crypt_iterate_devices, + .io_hints = crypt_io_hints, }; static int __init dm_crypt_init(void) diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index ebaa4f803eec..192bb8beeb6b 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -203,7 +203,7 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv, return -EINVAL; } - tmp_store = kmalloc(sizeof(*tmp_store), GFP_KERNEL); + tmp_store = kzalloc(sizeof(*tmp_store), GFP_KERNEL); if (!tmp_store) { ti->error = "Exception store allocation failed"; return -ENOMEM; @@ -215,7 +215,7 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv, else if (persistent == 'N') type = get_type("N"); else { - ti->error = "Persistent flag is not P or N"; + ti->error = "Exception store type is not P or N"; r = -EINVAL; goto bad_type; } @@ -233,7 +233,7 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv, if (r) goto bad; - r = type->ctr(tmp_store, 0, NULL); + r = type->ctr(tmp_store, (strlen(argv[0]) > 1 ? &argv[0][1] : NULL)); if (r) { ti->error = "Exception store type constructor failed"; goto bad; diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index 0b2536247cf5..fae34e7a0b1e 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h @@ -42,8 +42,7 @@ struct dm_exception_store_type { const char *name; struct module *module; - int (*ctr) (struct dm_exception_store *store, - unsigned argc, char **argv); + int (*ctr) (struct dm_exception_store *store, char *options); /* * Destroys this object when you've finished with it. @@ -123,6 +122,8 @@ struct dm_exception_store { unsigned chunk_shift; void *context; + + bool userspace_supports_overflow; }; /* diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 97e165183e79..a0901214aef5 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -329,8 +329,7 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) */ if (min_region_size > (1 << 13)) { /* If not a power of 2, make it the next power of 2 */ - if (min_region_size & (min_region_size - 1)) - region_size = 1 << fls(region_size); + region_size = roundup_pow_of_two(min_region_size); DMINFO("Choosing default region size of %lu sectors", region_size); } else { diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index bf71583296f7..117a05e40090 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -7,6 +7,7 @@ #include "dm-exception-store.h" +#include <linux/ctype.h> #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/vmalloc.h> @@ -843,10 +844,10 @@ static void persistent_drop_snapshot(struct dm_exception_store *store) DMWARN("write header failed"); } -static int persistent_ctr(struct dm_exception_store *store, - unsigned argc, char **argv) +static int persistent_ctr(struct dm_exception_store *store, char *options) { struct pstore *ps; + int r; /* allocate the pstore */ ps = kzalloc(sizeof(*ps), GFP_KERNEL); @@ -868,14 +869,32 @@ static int persistent_ctr(struct dm_exception_store *store, ps->metadata_wq = alloc_workqueue("ksnaphd", WQ_MEM_RECLAIM, 0); if (!ps->metadata_wq) { - kfree(ps); DMERR("couldn't start header metadata update thread"); - return -ENOMEM; + r = -ENOMEM; + goto err_workqueue; + } + + if (options) { + char overflow = toupper(options[0]); + if (overflow == 'O') + store->userspace_supports_overflow = true; + else { + DMERR("Unsupported persistent store option: %s", options); + r = -EINVAL; + goto err_options; + } } store->context = ps; return 0; + +err_options: + destroy_workqueue(ps->metadata_wq); +err_workqueue: + kfree(ps); + + return r; } static unsigned persistent_status(struct dm_exception_store *store, @@ -888,7 +907,8 @@ static unsigned persistent_status(struct dm_exception_store *store, case STATUSTYPE_INFO: break; case STATUSTYPE_TABLE: - DMEMIT(" P %llu", (unsigned long long)store->chunk_size); + DMEMIT(" %s %llu", store->userspace_supports_overflow ? "PO" : "P", + (unsigned long long)store->chunk_size); } return sz; diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c index 1ce9a2586e41..9b7c8c8049d6 100644 --- a/drivers/md/dm-snap-transient.c +++ b/drivers/md/dm-snap-transient.c @@ -70,8 +70,7 @@ static void transient_usage(struct dm_exception_store *store, *metadata_sectors = 0; } -static int transient_ctr(struct dm_exception_store *store, - unsigned argc, char **argv) +static int transient_ctr(struct dm_exception_store *store, char *options) { struct transient_c *tc; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index c0bcd6516dfe..c06b74e91cd6 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1098,7 +1098,7 @@ static void stop_merge(struct dm_snapshot *s) } /* - * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size> + * Construct a snapshot mapping: <origin_dev> <COW-dev> <p|po|n> <chunk-size> */ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) { @@ -1302,6 +1302,7 @@ static void __handover_exceptions(struct dm_snapshot *snap_src, u.store_swap = snap_dest->store; snap_dest->store = snap_src->store; + snap_dest->store->userspace_supports_overflow = u.store_swap->userspace_supports_overflow; snap_src->store = u.store_swap; snap_dest->store->snap = snap_dest; @@ -1739,8 +1740,11 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) pe = __find_pending_exception(s, pe, chunk); if (!pe) { - s->snapshot_overflowed = 1; - DMERR("Snapshot overflowed: Unable to allocate exception."); + if (s->store->userspace_supports_overflow) { + s->snapshot_overflowed = 1; + DMERR("Snapshot overflowed: Unable to allocate exception."); + } else + __invalidate_snapshot(s, -ENOMEM); r = -EIO; goto out_unlock; } @@ -2365,7 +2369,7 @@ static struct target_type origin_target = { static struct target_type snapshot_target = { .name = "snapshot", - .version = {1, 14, 0}, + .version = {1, 15, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -2379,7 +2383,7 @@ static struct target_type snapshot_target = { static struct target_type merge_target = { .name = dm_snapshot_merge_target_name, - .version = {1, 3, 0}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 6578b7bc1fbb..3897b90bd462 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -3201,7 +3201,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) metadata_low_callback, pool); if (r) - goto out_free_pt; + goto out_flags_changed; pt->callbacks.congested_fn = pool_is_congested; dm_table_add_target_callbacks(ti->table, &pt->callbacks); @@ -4249,6 +4249,10 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) { struct thin_c *tc = ti->private; struct pool *pool = tc->pool; + struct queue_limits *pool_limits = dm_get_queue_limits(pool->pool_md); + + if (!pool_limits->discard_granularity) + return; /* pool's discard support is disabled */ limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; limits->max_discard_sectors = 2048 * 1024 * 16; /* 16G */ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 6264781dc69a..1b5c6047e4f1 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1001,6 +1001,7 @@ static void end_clone_bio(struct bio *clone) struct dm_rq_target_io *tio = info->tio; struct bio *bio = info->orig; unsigned int nr_bytes = info->orig->bi_iter.bi_size; + int error = clone->bi_error; bio_put(clone); @@ -1011,13 +1012,13 @@ static void end_clone_bio(struct bio *clone) * the remainder. */ return; - else if (bio->bi_error) { + else if (error) { /* * Don't notice the error to the upper layer yet. * The error handling decision is made by the target driver, * when the request is completed. */ - tio->error = bio->bi_error; + tio->error = error; return; } @@ -2837,8 +2838,6 @@ static void __dm_destroy(struct mapped_device *md, bool wait) might_sleep(); - map = dm_get_live_table(md, &srcu_idx); - spin_lock(&_minor_lock); idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md)))); set_bit(DMF_FREEING, &md->flags); @@ -2852,14 +2851,14 @@ static void __dm_destroy(struct mapped_device *md, bool wait) * do not race with internal suspend. */ mutex_lock(&md->suspend_lock); + map = dm_get_live_table(md, &srcu_idx); if (!dm_suspended_md(md)) { dm_table_presuspend_targets(map); dm_table_postsuspend_targets(map); } - mutex_unlock(&md->suspend_lock); - /* dm_put_live_table must be before msleep, otherwise deadlock is possible */ dm_put_live_table(md, srcu_idx); + mutex_unlock(&md->suspend_lock); /* * Rare, but there may be I/O requests still going to complete, diff --git a/drivers/md/md.c b/drivers/md/md.c index 4f5ecbe94ccb..3fe3d04a968a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5409,9 +5409,13 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) * which will now never happen */ wake_up_process(mddev->sync_thread->tsk); + if (mddev->external && test_bit(MD_CHANGE_PENDING, &mddev->flags)) + return -EBUSY; mddev_unlock(mddev); wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)); + wait_event(mddev->sb_wait, + !test_bit(MD_CHANGE_PENDING, &mddev->flags)); mddev_lock_nointr(mddev); mutex_lock(&mddev->open_mutex); @@ -8036,8 +8040,7 @@ static int remove_and_add_spares(struct mddev *mddev, !test_bit(Bitmap_sync, &rdev->flags))) continue; - if (rdev->saved_raid_disk < 0) - rdev->recovery_offset = 0; + rdev->recovery_offset = 0; if (mddev->pers-> hot_add_disk(mddev, rdev) == 0) { if (sysfs_link_rdev(mddev, rdev)) @@ -8160,6 +8163,7 @@ void md_check_recovery(struct mddev *mddev) md_reap_sync_thread(mddev); clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + clear_bit(MD_CHANGE_PENDING, &mddev->flags); goto unlock; } diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index d222522c52e0..d132f06afdd1 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -470,8 +470,7 @@ static int multipath_run (struct mddev *mddev) return 0; out_free_conf: - if (conf->pool) - mempool_destroy(conf->pool); + mempool_destroy(conf->pool); kfree(conf->multipaths); kfree(conf); mddev->private = NULL; diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index 421a36c593e3..2e4c4cb79e4d 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -301,11 +301,16 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, { int s; uint32_t max_entries = le32_to_cpu(left->header.max_entries); - unsigned target = (nr_left + nr_center + nr_right) / 3; - BUG_ON(target > max_entries); + unsigned total = nr_left + nr_center + nr_right; + unsigned target_right = total / 3; + unsigned remainder = (target_right * 3) != total; + unsigned target_left = target_right + remainder; + + BUG_ON(target_left > max_entries); + BUG_ON(target_right > max_entries); if (nr_left < nr_right) { - s = nr_left - target; + s = nr_left - target_left; if (s < 0 && nr_center < -s) { /* not enough in central node */ @@ -316,10 +321,10 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, } else shift(left, center, s); - shift(center, right, target - nr_right); + shift(center, right, target_right - nr_right); } else { - s = target - nr_right; + s = target_right - nr_right; if (s > 0 && nr_center < s) { /* not enough in central node */ shift(center, right, nr_center); @@ -329,7 +334,7 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, } else shift(center, right, s); - shift(left, center, nr_left - target); + shift(left, center, nr_left - target_left); } *key_ptr(parent, c->index) = center->keys[0]; diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index b6cec258cc21..0e09aef43998 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -523,7 +523,7 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key) r = new_block(s->info, &right); if (r < 0) { - /* FIXME: put left */ + unlock_block(s->info, left); return r; } diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 63e619b2f44e..f8e5db0cb5aa 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -376,12 +376,6 @@ static int raid0_run(struct mddev *mddev) struct md_rdev *rdev; bool discard_supported = false; - rdev_for_each(rdev, mddev) { - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); - if (blk_queue_discard(bdev_get_queue(rdev->bdev))) - discard_supported = true; - } blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors); @@ -390,6 +384,12 @@ static int raid0_run(struct mddev *mddev) blk_queue_io_opt(mddev->queue, (mddev->chunk_sectors << 9) * mddev->raid_disks); + rdev_for_each(rdev, mddev) { + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); + if (blk_queue_discard(bdev_get_queue(rdev->bdev))) + discard_supported = true; + } if (!discard_supported) queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); else diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 4517f06c41ba..d9d031ede4bf 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -881,8 +881,7 @@ static sector_t wait_barrier(struct r1conf *conf, struct bio *bio) } if (bio && bio_data_dir(bio) == WRITE) { - if (bio->bi_iter.bi_sector >= - conf->mddev->curr_resync_completed) { + if (bio->bi_iter.bi_sector >= conf->next_resync) { if (conf->start_next_window == MaxSector) conf->start_next_window = conf->next_resync + @@ -1516,7 +1515,7 @@ static void close_sync(struct r1conf *conf) conf->r1buf_pool = NULL; spin_lock_irq(&conf->resync_lock); - conf->next_resync = 0; + conf->next_resync = MaxSector - 2 * NEXT_NORMALIO_DISTANCE; conf->start_next_window = MaxSector; conf->current_window_requests += conf->next_window_requests; @@ -2196,7 +2195,7 @@ static int narrow_write_error(struct r1bio *r1_bio, int i) bio_trim(wbio, sector - r1_bio->sector, sectors); wbio->bi_iter.bi_sector += rdev->data_offset; wbio->bi_bdev = rdev->bdev; - if (submit_bio_wait(WRITE, wbio) == 0) + if (submit_bio_wait(WRITE, wbio) < 0) /* failure! */ ok = rdev_set_badblocks(rdev, sector, sectors, 0) @@ -2259,15 +2258,16 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio) rdev_dec_pending(conf->mirrors[m].rdev, conf->mddev); } - if (test_bit(R1BIO_WriteError, &r1_bio->state)) - close_write(r1_bio); if (fail) { spin_lock_irq(&conf->device_lock); list_add(&r1_bio->retry_list, &conf->bio_end_io_list); spin_unlock_irq(&conf->device_lock); md_wakeup_thread(conf->mddev->thread); - } else + } else { + if (test_bit(R1BIO_WriteError, &r1_bio->state)) + close_write(r1_bio); raid_end_bio_io(r1_bio); + } } static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) @@ -2383,9 +2383,13 @@ static void raid1d(struct md_thread *thread) } spin_unlock_irqrestore(&conf->device_lock, flags); while (!list_empty(&tmp)) { - r1_bio = list_first_entry(&conf->bio_end_io_list, - struct r1bio, retry_list); + r1_bio = list_first_entry(&tmp, struct r1bio, + retry_list); list_del(&r1_bio->retry_list); + if (mddev->degraded) + set_bit(R1BIO_Degraded, &r1_bio->state); + if (test_bit(R1BIO_WriteError, &r1_bio->state)) + close_write(r1_bio); raid_end_bio_io(r1_bio); } } @@ -2843,8 +2847,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) abort: if (conf) { - if (conf->r1bio_pool) - mempool_destroy(conf->r1bio_pool); + mempool_destroy(conf->r1bio_pool); kfree(conf->mirrors); safe_put_page(conf->tmppage); kfree(conf->poolinfo); @@ -2946,8 +2949,7 @@ static void raid1_free(struct mddev *mddev, void *priv) { struct r1conf *conf = priv; - if (conf->r1bio_pool) - mempool_destroy(conf->r1bio_pool); + mempool_destroy(conf->r1bio_pool); kfree(conf->mirrors); safe_put_page(conf->tmppage); kfree(conf->poolinfo); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 0fc33eb88855..96f365968306 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -39,6 +39,7 @@ * far_copies (stored in second byte of layout) * far_offset (stored in bit 16 of layout ) * use_far_sets (stored in bit 17 of layout ) + * use_far_sets_bugfixed (stored in bit 18 of layout ) * * The data to be stored is divided into chunks using chunksize. Each device * is divided into far_copies sections. In each section, chunks are laid out @@ -1497,6 +1498,8 @@ static void status(struct seq_file *seq, struct mddev *mddev) seq_printf(seq, " %d offset-copies", conf->geo.far_copies); else seq_printf(seq, " %d far-copies", conf->geo.far_copies); + if (conf->geo.far_set_size != conf->geo.raid_disks) + seq_printf(seq, " %d devices per set", conf->geo.far_set_size); } seq_printf(seq, " [%d/%d] [", conf->geo.raid_disks, conf->geo.raid_disks - mddev->degraded); @@ -2467,7 +2470,7 @@ static int narrow_write_error(struct r10bio *r10_bio, int i) choose_data_offset(r10_bio, rdev) + (sector - r10_bio->sector)); wbio->bi_bdev = rdev->bdev; - if (submit_bio_wait(WRITE, wbio) == 0) + if (submit_bio_wait(WRITE, wbio) < 0) /* Failure! */ ok = rdev_set_badblocks(rdev, sector, sectors, 0) @@ -2654,16 +2657,17 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) rdev_dec_pending(rdev, conf->mddev); } } - if (test_bit(R10BIO_WriteError, - &r10_bio->state)) - close_write(r10_bio); if (fail) { spin_lock_irq(&conf->device_lock); list_add(&r10_bio->retry_list, &conf->bio_end_io_list); spin_unlock_irq(&conf->device_lock); md_wakeup_thread(conf->mddev->thread); - } else + } else { + if (test_bit(R10BIO_WriteError, + &r10_bio->state)) + close_write(r10_bio); raid_end_bio_io(r10_bio); + } } } @@ -2688,9 +2692,15 @@ static void raid10d(struct md_thread *thread) } spin_unlock_irqrestore(&conf->device_lock, flags); while (!list_empty(&tmp)) { - r10_bio = list_first_entry(&conf->bio_end_io_list, - struct r10bio, retry_list); + r10_bio = list_first_entry(&tmp, struct r10bio, + retry_list); list_del(&r10_bio->retry_list); + if (mddev->degraded) + set_bit(R10BIO_Degraded, &r10_bio->state); + + if (test_bit(R10BIO_WriteError, + &r10_bio->state)) + close_write(r10_bio); raid_end_bio_io(r10_bio); } } @@ -3387,7 +3397,7 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new) disks = mddev->raid_disks + mddev->delta_disks; break; } - if (layout >> 18) + if (layout >> 19) return -1; if (chunk < (PAGE_SIZE >> 9) || !is_power_of_2(chunk)) @@ -3399,7 +3409,22 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new) geo->near_copies = nc; geo->far_copies = fc; geo->far_offset = fo; - geo->far_set_size = (layout & (1<<17)) ? disks / fc : disks; + switch (layout >> 17) { + case 0: /* original layout. simple but not always optimal */ + geo->far_set_size = disks; + break; + case 1: /* "improved" layout which was buggy. Hopefully no-one is + * actually using this, but leave code here just in case.*/ + geo->far_set_size = disks/fc; + WARN(geo->far_set_size < fc, + "This RAID10 layout does not provide data safety - please backup and create new array\n"); + break; + case 2: /* "improved" layout fixed to match documentation */ + geo->far_set_size = fc * nc; + break; + default: /* Not a valid layout */ + return -1; + } geo->chunk_mask = chunk - 1; geo->chunk_shift = ffz(~chunk); return nc*fc; @@ -3486,8 +3511,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) printk(KERN_ERR "md/raid10:%s: couldn't allocate memory.\n", mdname(mddev)); if (conf) { - if (conf->r10bio_pool) - mempool_destroy(conf->r10bio_pool); + mempool_destroy(conf->r10bio_pool); kfree(conf->mirrors); safe_put_page(conf->tmppage); kfree(conf); @@ -3682,8 +3706,7 @@ static int run(struct mddev *mddev) out_free_conf: md_unregister_thread(&mddev->thread); - if (conf->r10bio_pool) - mempool_destroy(conf->r10bio_pool); + mempool_destroy(conf->r10bio_pool); safe_put_page(conf->tmppage); kfree(conf->mirrors); kfree(conf); @@ -3696,8 +3719,7 @@ static void raid10_free(struct mddev *mddev, void *priv) { struct r10conf *conf = priv; - if (conf->r10bio_pool) - mempool_destroy(conf->r10bio_pool); + mempool_destroy(conf->r10bio_pool); safe_put_page(conf->tmppage); kfree(conf->mirrors); kfree(conf->mirrors_old); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 15ef2c641b2b..45933c160697 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2271,8 +2271,7 @@ static void shrink_stripes(struct r5conf *conf) drop_one_stripe(conf)) ; - if (conf->slab_cache) - kmem_cache_destroy(conf->slab_cache); + kmem_cache_destroy(conf->slab_cache); conf->slab_cache = NULL; } @@ -3150,6 +3149,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, spin_unlock_irq(&sh->stripe_lock); if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) wake_up(&conf->wait_for_overlap); + if (bi) + s->to_read--; while (bi && bi->bi_iter.bi_sector < sh->dev[i].sector + STRIPE_SECTORS) { struct bio *nextbi = @@ -3169,6 +3170,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, */ clear_bit(R5_LOCKED, &sh->dev[i].flags); } + s->to_write = 0; + s->written = 0; if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) if (atomic_dec_and_test(&conf->pending_full_writes)) @@ -3300,7 +3303,7 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s, */ return 0; - for (i = 0; i < s->failed; i++) { + for (i = 0; i < s->failed && i < 2; i++) { if (fdev[i]->towrite && !test_bit(R5_UPTODATE, &fdev[i]->flags) && !test_bit(R5_OVERWRITE, &fdev[i]->flags)) @@ -3324,7 +3327,7 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s, sh->sector < sh->raid_conf->mddev->recovery_cp) /* reconstruct-write isn't being forced */ return 0; - for (i = 0; i < s->failed; i++) { + for (i = 0; i < s->failed && i < 2; i++) { if (s->failed_num[i] != sh->pd_idx && s->failed_num[i] != sh->qd_idx && !test_bit(R5_UPTODATE, &fdev[i]->flags) && @@ -3496,6 +3499,7 @@ returnbi: } if (!discard_pending && test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { + int hash; clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags); clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); if (sh->qd_idx >= 0) { @@ -3509,16 +3513,17 @@ returnbi: * no updated data, so remove it from hash list and the stripe * will be reinitialized */ - spin_lock_irq(&conf->device_lock); unhash: + hash = sh->hash_lock_index; + spin_lock_irq(conf->hash_locks + hash); remove_hash(sh); + spin_unlock_irq(conf->hash_locks + hash); if (head_sh->batch_head) { sh = list_first_entry(&sh->batch_list, struct stripe_head, batch_list); if (sh != head_sh) goto unhash; } - spin_unlock_irq(&conf->device_lock); sh = head_sh; if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) |