diff options
33 files changed, 1373 insertions, 1315 deletions
diff --git a/Documentation/device-mapper/striped.txt b/Documentation/device-mapper/striped.txt index f34d3236b9da..45f3b91ea4c3 100644 --- a/Documentation/device-mapper/striped.txt +++ b/Documentation/device-mapper/striped.txt @@ -9,15 +9,14 @@ devices in parallel. Parameters: <num devs> <chunk size> [<dev path> <offset>]+ <num devs>: Number of underlying devices. - <chunk size>: Size of each chunk of data. Must be a power-of-2 and at - least as large as the system's PAGE_SIZE. + <chunk size>: Size of each chunk of data. Must be at least as + large as the system's PAGE_SIZE. <dev path>: Full pathname to the underlying block-device, or a "major:minor" device-number. <offset>: Starting sector within the device. One or more underlying devices can be specified. The striped device size must -be a multiple of the chunk size and a multiple of the number of underlying -devices. +be a multiple of the chunk size multiplied by the number of underlying devices. Example scripts diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt index f5cfc62b7ad3..30b8b83bd333 100644 --- a/Documentation/device-mapper/thin-provisioning.txt +++ b/Documentation/device-mapper/thin-provisioning.txt @@ -231,6 +231,9 @@ i) Constructor no_discard_passdown: Don't pass discards down to the underlying data device, but just remove the mapping. + read_only: Don't allow any changes to be made to the pool + metadata. + Data block size must be between 64KB (128 sectors) and 1GB (2097152 sectors) inclusive. @@ -239,7 +242,7 @@ ii) Status <transaction id> <used metadata blocks>/<total metadata blocks> <used data blocks>/<total data blocks> <held metadata root> - + [no_]discard_passdown ro|rw transaction id: A 64-bit number used by userspace to help synchronise with metadata @@ -257,6 +260,21 @@ ii) Status held root. This feature is not yet implemented so '-' is always returned. + discard_passdown|no_discard_passdown + Whether or not discards are actually being passed down to the + underlying device. When this is enabled when loading the table, + it can get disabled if the underlying device doesn't support it. + + ro|rw + If the pool encounters certain types of device failures it will + drop into a read-only metadata mode in which no changes to + the pool metadata (like allocating new blocks) are permitted. + + In serious cases where even a read-only mode is deemed unsafe + no further I/O will be permitted and the status will just + contain the string 'Fail'. The userspace recovery tools + should then be used. + iii) Messages create_thin <dev id> @@ -329,3 +347,7 @@ regain some space then send the 'trim' message to the pool. ii) Status <nr mapped sectors> <highest mapped sector> + + If the pool has encountered device errors and failed, the status + will just contain the string 'Fail'. The userspace recovery + tools should then be used. diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 10f122a3a856..1eee45b69b71 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -260,15 +260,6 @@ config DM_DEBUG_BLOCK_STACK_TRACING If unsure, say N. -config DM_DEBUG_SPACE_MAPS - boolean "Extra validation for thin provisioning space maps" - depends on DM_THIN_PROVISIONING - ---help--- - Enable this for messages that may help debug problems with the - space maps used by thin provisioning. - - If unsure, say N. - config DM_MIRROR tristate "Mirror target" depends on BLK_DEV_DM diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 3f06df59fd82..664743d6a6cd 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -42,21 +42,21 @@ struct convert_context { unsigned int offset_out; unsigned int idx_in; unsigned int idx_out; - sector_t sector; - atomic_t pending; + sector_t cc_sector; + atomic_t cc_pending; }; /* * per bio private data */ struct dm_crypt_io { - struct dm_target *target; + struct crypt_config *cc; struct bio *base_bio; struct work_struct work; struct convert_context ctx; - atomic_t pending; + atomic_t io_pending; int error; sector_t sector; struct dm_crypt_io *base_io; @@ -109,9 +109,6 @@ enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID }; */ struct crypt_cpu { struct ablkcipher_request *req; - /* ESSIV: struct crypto_cipher *essiv_tfm */ - void *iv_private; - struct crypto_ablkcipher *tfms[0]; }; /* @@ -151,6 +148,10 @@ struct crypt_config { * per_cpu_ptr() only. */ struct crypt_cpu __percpu *cpu; + + /* ESSIV: struct crypto_cipher *essiv_tfm */ + void *iv_private; + struct crypto_ablkcipher **tfms; unsigned tfms_count; /* @@ -193,7 +194,7 @@ static struct crypt_cpu *this_crypt_config(struct crypt_config *cc) */ static struct crypto_ablkcipher *any_tfm(struct crypt_config *cc) { - return __this_cpu_ptr(cc->cpu)->tfms[0]; + return cc->tfms[0]; } /* @@ -258,7 +259,7 @@ static int crypt_iv_essiv_init(struct crypt_config *cc) struct hash_desc desc; struct scatterlist sg; struct crypto_cipher *essiv_tfm; - int err, cpu; + int err; sg_init_one(&sg, cc->key, cc->key_size); desc.tfm = essiv->hash_tfm; @@ -268,14 +269,12 @@ static int crypt_iv_essiv_init(struct crypt_config *cc) if (err) return err; - for_each_possible_cpu(cpu) { - essiv_tfm = per_cpu_ptr(cc->cpu, cpu)->iv_private, + essiv_tfm = cc->iv_private; - err = crypto_cipher_setkey(essiv_tfm, essiv->salt, - crypto_hash_digestsize(essiv->hash_tfm)); - if (err) - return err; - } + err = crypto_cipher_setkey(essiv_tfm, essiv->salt, + crypto_hash_digestsize(essiv->hash_tfm)); + if (err) + return err; return 0; } @@ -286,16 +285,14 @@ static int crypt_iv_essiv_wipe(struct crypt_config *cc) struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; unsigned salt_size = crypto_hash_digestsize(essiv->hash_tfm); struct crypto_cipher *essiv_tfm; - int cpu, r, err = 0; + int r, err = 0; memset(essiv->salt, 0, salt_size); - for_each_possible_cpu(cpu) { - essiv_tfm = per_cpu_ptr(cc->cpu, cpu)->iv_private; - r = crypto_cipher_setkey(essiv_tfm, essiv->salt, salt_size); - if (r) - err = r; - } + essiv_tfm = cc->iv_private; + r = crypto_cipher_setkey(essiv_tfm, essiv->salt, salt_size); + if (r) + err = r; return err; } @@ -335,8 +332,6 @@ static struct crypto_cipher *setup_essiv_cpu(struct crypt_config *cc, static void crypt_iv_essiv_dtr(struct crypt_config *cc) { - int cpu; - struct crypt_cpu *cpu_cc; struct crypto_cipher *essiv_tfm; struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; @@ -346,15 +341,12 @@ static void crypt_iv_essiv_dtr(struct crypt_config *cc) kzfree(essiv->salt); essiv->salt = NULL; - for_each_possible_cpu(cpu) { - cpu_cc = per_cpu_ptr(cc->cpu, cpu); - essiv_tfm = cpu_cc->iv_private; + essiv_tfm = cc->iv_private; - if (essiv_tfm) - crypto_free_cipher(essiv_tfm); + if (essiv_tfm) + crypto_free_cipher(essiv_tfm); - cpu_cc->iv_private = NULL; - } + cc->iv_private = NULL; } static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, @@ -363,7 +355,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, struct crypto_cipher *essiv_tfm = NULL; struct crypto_hash *hash_tfm = NULL; u8 *salt = NULL; - int err, cpu; + int err; if (!opts) { ti->error = "Digest algorithm missing for ESSIV mode"; @@ -388,15 +380,13 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, cc->iv_gen_private.essiv.salt = salt; cc->iv_gen_private.essiv.hash_tfm = hash_tfm; - for_each_possible_cpu(cpu) { - essiv_tfm = setup_essiv_cpu(cc, ti, salt, - crypto_hash_digestsize(hash_tfm)); - if (IS_ERR(essiv_tfm)) { - crypt_iv_essiv_dtr(cc); - return PTR_ERR(essiv_tfm); - } - per_cpu_ptr(cc->cpu, cpu)->iv_private = essiv_tfm; + essiv_tfm = setup_essiv_cpu(cc, ti, salt, + crypto_hash_digestsize(hash_tfm)); + if (IS_ERR(essiv_tfm)) { + crypt_iv_essiv_dtr(cc); + return PTR_ERR(essiv_tfm); } + cc->iv_private = essiv_tfm; return 0; @@ -410,7 +400,7 @@ bad: static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, struct dm_crypt_request *dmreq) { - struct crypto_cipher *essiv_tfm = this_crypt_config(cc)->iv_private; + struct crypto_cipher *essiv_tfm = cc->iv_private; memset(iv, 0, cc->iv_size); *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector); @@ -664,7 +654,7 @@ static void crypt_convert_init(struct crypt_config *cc, ctx->offset_out = 0; ctx->idx_in = bio_in ? bio_in->bi_idx : 0; ctx->idx_out = bio_out ? bio_out->bi_idx : 0; - ctx->sector = sector + cc->iv_offset; + ctx->cc_sector = sector + cc->iv_offset; init_completion(&ctx->restart); } @@ -695,12 +685,12 @@ static int crypt_convert_block(struct crypt_config *cc, struct bio_vec *bv_out = bio_iovec_idx(ctx->bio_out, ctx->idx_out); struct dm_crypt_request *dmreq; u8 *iv; - int r = 0; + int r; dmreq = dmreq_of_req(cc, req); iv = iv_of_dmreq(cc, dmreq); - dmreq->iv_sector = ctx->sector; + dmreq->iv_sector = ctx->cc_sector; dmreq->ctx = ctx; sg_init_table(&dmreq->sg_in, 1); sg_set_page(&dmreq->sg_in, bv_in->bv_page, 1 << SECTOR_SHIFT, @@ -749,12 +739,12 @@ static void crypt_alloc_req(struct crypt_config *cc, struct convert_context *ctx) { struct crypt_cpu *this_cc = this_crypt_config(cc); - unsigned key_index = ctx->sector & (cc->tfms_count - 1); + unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1); if (!this_cc->req) this_cc->req = mempool_alloc(cc->req_pool, GFP_NOIO); - ablkcipher_request_set_tfm(this_cc->req, this_cc->tfms[key_index]); + ablkcipher_request_set_tfm(this_cc->req, cc->tfms[key_index]); ablkcipher_request_set_callback(this_cc->req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, kcryptd_async_done, dmreq_of_req(cc, this_cc->req)); @@ -769,14 +759,14 @@ static int crypt_convert(struct crypt_config *cc, struct crypt_cpu *this_cc = this_crypt_config(cc); int r; - atomic_set(&ctx->pending, 1); + atomic_set(&ctx->cc_pending, 1); while(ctx->idx_in < ctx->bio_in->bi_vcnt && ctx->idx_out < ctx->bio_out->bi_vcnt) { crypt_alloc_req(cc, ctx); - atomic_inc(&ctx->pending); + atomic_inc(&ctx->cc_pending); r = crypt_convert_block(cc, ctx, this_cc->req); @@ -788,19 +778,19 @@ static int crypt_convert(struct crypt_config *cc, /* fall through*/ case -EINPROGRESS: this_cc->req = NULL; - ctx->sector++; + ctx->cc_sector++; continue; /* sync */ case 0: - atomic_dec(&ctx->pending); - ctx->sector++; + atomic_dec(&ctx->cc_pending); + ctx->cc_sector++; cond_resched(); continue; /* error */ default: - atomic_dec(&ctx->pending); + atomic_dec(&ctx->cc_pending); return r; } } @@ -811,7 +801,7 @@ static int crypt_convert(struct crypt_config *cc, static void dm_crypt_bio_destructor(struct bio *bio) { struct dm_crypt_io *io = bio->bi_private; - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; bio_free(bio, cc->bs); } @@ -825,7 +815,7 @@ static void dm_crypt_bio_destructor(struct bio *bio) static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size, unsigned *out_of_pages) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; struct bio *clone; unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM; @@ -884,26 +874,25 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) } } -static struct dm_crypt_io *crypt_io_alloc(struct dm_target *ti, +static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc, struct bio *bio, sector_t sector) { - struct crypt_config *cc = ti->private; struct dm_crypt_io *io; io = mempool_alloc(cc->io_pool, GFP_NOIO); - io->target = ti; + io->cc = cc; io->base_bio = bio; io->sector = sector; io->error = 0; io->base_io = NULL; - atomic_set(&io->pending, 0); + atomic_set(&io->io_pending, 0); return io; } static void crypt_inc_pending(struct dm_crypt_io *io) { - atomic_inc(&io->pending); + atomic_inc(&io->io_pending); } /* @@ -913,12 +902,12 @@ static void crypt_inc_pending(struct dm_crypt_io *io) */ static void crypt_dec_pending(struct dm_crypt_io *io) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; struct bio *base_bio = io->base_bio; struct dm_crypt_io *base_io = io->base_io; int error = io->error; - if (!atomic_dec_and_test(&io->pending)) + if (!atomic_dec_and_test(&io->io_pending)) return; mempool_free(io, cc->io_pool); @@ -952,7 +941,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io) static void crypt_endio(struct bio *clone, int error) { struct dm_crypt_io *io = clone->bi_private; - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; unsigned rw = bio_data_dir(clone); if (unlikely(!bio_flagged(clone, BIO_UPTODATE) && !error)) @@ -979,7 +968,7 @@ static void crypt_endio(struct bio *clone, int error) static void clone_init(struct dm_crypt_io *io, struct bio *clone) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; clone->bi_private = io; clone->bi_end_io = crypt_endio; @@ -990,7 +979,7 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone) static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; struct bio *base_bio = io->base_bio; struct bio *clone; @@ -1038,7 +1027,7 @@ static void kcryptd_io(struct work_struct *work) static void kcryptd_queue_io(struct dm_crypt_io *io) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; INIT_WORK(&io->work, kcryptd_io); queue_work(cc->io_queue, &io->work); @@ -1047,7 +1036,7 @@ static void kcryptd_queue_io(struct dm_crypt_io *io) static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) { struct bio *clone = io->ctx.bio_out; - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; if (unlikely(io->error < 0)) { crypt_free_buffer_pages(cc, clone); @@ -1069,7 +1058,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; struct bio *clone; struct dm_crypt_io *new_io; int crypt_finished; @@ -1107,7 +1096,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) if (r < 0) io->error = -EIO; - crypt_finished = atomic_dec_and_test(&io->ctx.pending); + crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending); /* Encryption was already finished, submit io now */ if (crypt_finished) { @@ -1135,7 +1124,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) * between fragments, so switch to a new dm_crypt_io structure. */ if (unlikely(!crypt_finished && remaining)) { - new_io = crypt_io_alloc(io->target, io->base_bio, + new_io = crypt_io_alloc(io->cc, io->base_bio, sector); crypt_inc_pending(new_io); crypt_convert_init(cc, &new_io->ctx, NULL, @@ -1169,7 +1158,7 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io) static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; int r = 0; crypt_inc_pending(io); @@ -1181,7 +1170,7 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) if (r < 0) io->error = -EIO; - if (atomic_dec_and_test(&io->ctx.pending)) + if (atomic_dec_and_test(&io->ctx.cc_pending)) kcryptd_crypt_read_done(io); crypt_dec_pending(io); @@ -1193,7 +1182,7 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, struct dm_crypt_request *dmreq = async_req->data; struct convert_context *ctx = dmreq->ctx; struct dm_crypt_io *io = container_of(ctx, struct dm_crypt_io, ctx); - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; if (error == -EINPROGRESS) { complete(&ctx->restart); @@ -1208,7 +1197,7 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool); - if (!atomic_dec_and_test(&ctx->pending)) + if (!atomic_dec_and_test(&ctx->cc_pending)) return; if (bio_data_dir(io->base_bio) == READ) @@ -1229,7 +1218,7 @@ static void kcryptd_crypt(struct work_struct *work) static void kcryptd_queue_crypt(struct dm_crypt_io *io) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; INIT_WORK(&io->work, kcryptd_crypt); queue_work(cc->crypt_queue, &io->work); @@ -1241,7 +1230,6 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io) static int crypt_decode_key(u8 *key, char *hex, unsigned int size) { char buffer[3]; - char *endp; unsigned int i; buffer[2] = '\0'; @@ -1250,9 +1238,7 @@ static int crypt_decode_key(u8 *key, char *hex, unsigned int size) buffer[0] = *hex++; buffer[1] = *hex++; - key[i] = (u8)simple_strtoul(buffer, &endp, 16); - - if (endp != &buffer[2]) + if (kstrtou8(buffer, 16, &key[i])) return -EINVAL; } @@ -1276,29 +1262,38 @@ static void crypt_encode_key(char *hex, u8 *key, unsigned int size) } } -static void crypt_free_tfms(struct crypt_config *cc, int cpu) +static void crypt_free_tfms(struct crypt_config *cc) { - struct crypt_cpu *cpu_cc = per_cpu_ptr(cc->cpu, cpu); unsigned i; + if (!cc->tfms) + return; + for (i = 0; i < cc->tfms_count; i++) - if (cpu_cc->tfms[i] && !IS_ERR(cpu_cc->tfms[i])) { - crypto_free_ablkcipher(cpu_cc->tfms[i]); - cpu_cc->tfms[i] = NULL; + if (cc->tfms[i] && !IS_ERR(cc->tfms[i])) { + crypto_free_ablkcipher(cc->tfms[i]); + cc->tfms[i] = NULL; } + + kfree(cc->tfms); + cc->tfms = NULL; } -static int crypt_alloc_tfms(struct crypt_config *cc, int cpu, char *ciphermode) +static int crypt_alloc_tfms(struct crypt_config *cc, char *ciphermode) { - struct crypt_cpu *cpu_cc = per_cpu_ptr(cc->cpu, cpu); unsigned i; int err; + cc->tfms = kmalloc(cc->tfms_count * sizeof(struct crypto_ablkcipher *), + GFP_KERNEL); + if (!cc->tfms) + return -ENOMEM; + for (i = 0; i < cc->tfms_count; i++) { - cpu_cc->tfms[i] = crypto_alloc_ablkcipher(ciphermode, 0, 0); - if (IS_ERR(cpu_cc->tfms[i])) { - err = PTR_ERR(cpu_cc->tfms[i]); - crypt_free_tfms(cc, cpu); + cc->tfms[i] = crypto_alloc_ablkcipher(ciphermode, 0, 0); + if (IS_ERR(cc->tfms[i])) { + err = PTR_ERR(cc->tfms[i]); + crypt_free_tfms(cc); return err; } } @@ -1309,15 +1304,14 @@ static int crypt_alloc_tfms(struct crypt_config *cc, int cpu, char *ciphermode) static int crypt_setkey_allcpus(struct crypt_config *cc) { unsigned subkey_size = cc->key_size >> ilog2(cc->tfms_count); - int cpu, err = 0, i, r; - - for_each_possible_cpu(cpu) { - for (i = 0; i < cc->tfms_count; i++) { - r = crypto_ablkcipher_setkey(per_cpu_ptr(cc->cpu, cpu)->tfms[i], - cc->key + (i * subkey_size), subkey_size); - if (r) - err = r; - } + int err = 0, i, r; + + for (i = 0; i < cc->tfms_count; i++) { + r = crypto_ablkcipher_setkey(cc->tfms[i], + cc->key + (i * subkey_size), + subkey_size); + if (r) + err = r; } return err; @@ -1379,9 +1373,10 @@ static void crypt_dtr(struct dm_target *ti) cpu_cc = per_cpu_ptr(cc->cpu, cpu); if (cpu_cc->req) mempool_free(cpu_cc->req, cc->req_pool); - crypt_free_tfms(cc, cpu); } + crypt_free_tfms(cc); + if (cc->bs) bioset_free(cc->bs); @@ -1414,7 +1409,7 @@ static int crypt_ctr_cipher(struct dm_target *ti, struct crypt_config *cc = ti->private; char *tmp, *cipher, *chainmode, *ivmode, *ivopts, *keycount; char *cipher_api = NULL; - int cpu, ret = -EINVAL; + int ret = -EINVAL; char dummy; /* Convert to crypto api definition? */ @@ -1455,8 +1450,7 @@ static int crypt_ctr_cipher(struct dm_target *ti, if (tmp) DMWARN("Ignoring unexpected additional cipher options"); - cc->cpu = __alloc_percpu(sizeof(*(cc->cpu)) + - cc->tfms_count * sizeof(*(cc->cpu->tfms)), + cc->cpu = __alloc_percpu(sizeof(*(cc->cpu)), __alignof__(struct crypt_cpu)); if (!cc->cpu) { ti->error = "Cannot allocate per cpu state"; @@ -1489,12 +1483,10 @@ static int crypt_ctr_cipher(struct dm_target *ti, } /* Allocate cipher */ - for_each_possible_cpu(cpu) { - ret = crypt_alloc_tfms(cc, cpu, cipher_api); - if (ret < 0) { - ti->error = "Error allocating crypto tfm"; - goto bad; - } + ret = crypt_alloc_tfms(cc, cipher_api); + if (ret < 0) { + ti->error = "Error allocating crypto tfm"; + goto bad; } /* Initialize and set key */ @@ -1702,7 +1694,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } ti->num_flush_requests = 1; - ti->discard_zeroes_data_unsupported = 1; + ti->discard_zeroes_data_unsupported = true; return 0; @@ -1715,7 +1707,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) { struct dm_crypt_io *io; - struct crypt_config *cc; + struct crypt_config *cc = ti->private; /* * If bio is REQ_FLUSH or REQ_DISCARD, just bypass crypt queues. @@ -1723,14 +1715,13 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, * - for REQ_DISCARD caller must use flush if IO ordering matters */ if (unlikely(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) { - cc = ti->private; bio->bi_bdev = cc->dev->bdev; if (bio_sectors(bio)) bio->bi_sector = cc->start + dm_target_offset(ti, bio->bi_sector); return DM_MAPIO_REMAPPED; } - io = crypt_io_alloc(ti, bio, dm_target_offset(ti, bio->bi_sector)); + io = crypt_io_alloc(cc, bio, dm_target_offset(ti, bio->bi_sector)); if (bio_data_dir(io->base_bio) == READ) { if (kcryptd_io_read(io, GFP_NOWAIT)) @@ -1742,7 +1733,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, } static int crypt_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { struct crypt_config *cc = ti->private; unsigned int sz = 0; diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 2dc22dddb2ae..f53846f9ab50 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -295,7 +295,7 @@ static int delay_map(struct dm_target *ti, struct bio *bio, } static int delay_status(struct dm_target *ti, status_type_t type, - char *result, unsigned maxlen) + unsigned status_flags, char *result, unsigned maxlen) { struct delay_c *dc = ti->private; int sz = 0; diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index aa70f7d43a1a..ebaa4f803eec 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -142,24 +142,19 @@ EXPORT_SYMBOL(dm_exception_store_type_unregister); static int set_chunk_size(struct dm_exception_store *store, const char *chunk_size_arg, char **error) { - unsigned long chunk_size_ulong; - char *value; + unsigned chunk_size; - chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10); - if (*chunk_size_arg == '\0' || *value != '\0' || - chunk_size_ulong > UINT_MAX) { + if (kstrtouint(chunk_size_arg, 10, &chunk_size)) { *error = "Invalid chunk size"; return -EINVAL; } - if (!chunk_size_ulong) { + if (!chunk_size) { store->chunk_size = store->chunk_mask = store->chunk_shift = 0; return 0; } - return dm_exception_store_set_chunk_size(store, - (unsigned) chunk_size_ulong, - error); + return dm_exception_store_set_chunk_size(store, chunk_size, error); } int dm_exception_store_set_chunk_size(struct dm_exception_store *store, diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index ac49c01f1a44..cc15543a6ad7 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -333,7 +333,7 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, } static int flakey_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { unsigned sz = 0; struct flakey_c *fc = ti->private; diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index a1a3e6df17b8..afd95986d099 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1054,6 +1054,7 @@ static void retrieve_status(struct dm_table *table, char *outbuf, *outptr; status_type_t type; size_t remaining, len, used = 0; + unsigned status_flags = 0; outptr = outbuf = get_result_buffer(param, param_size, &len); @@ -1090,7 +1091,9 @@ static void retrieve_status(struct dm_table *table, /* Get the status/table string from the target driver */ if (ti->type->status) { - if (ti->type->status(ti, type, outptr, remaining)) { + if (param->flags & DM_NOFLUSH_FLAG) + status_flags |= DM_STATUS_NOFLUSH_FLAG; + if (ti->type->status(ti, type, status_flags, outptr, remaining)) { param->flags |= DM_BUFFER_FULL_FLAG; break; } diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 3639eeab6042..1bf19a93eef0 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -96,7 +96,7 @@ static int linear_map(struct dm_target *ti, struct bio *bio, } static int linear_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { struct linear_c *lc = (struct linear_c *) ti->private; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 638dae048b4f..d8abb90a6c2f 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -85,6 +85,7 @@ struct multipath { unsigned queue_io:1; /* Must we queue all I/O? */ unsigned queue_if_no_path:1; /* Queue I/O if last path fails? */ unsigned saved_queue_if_no_path:1; /* Saved state during suspension */ + unsigned retain_attached_hw_handler:1; /* If there's already a hw_handler present, don't change it. */ unsigned pg_init_retries; /* Number of times to retry pg_init */ unsigned pg_init_count; /* Number of times pg_init called */ @@ -568,6 +569,8 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps int r; struct pgpath *p; struct multipath *m = ti->private; + struct request_queue *q = NULL; + const char *attached_handler_name; /* we need at least a path arg */ if (as->argc < 1) { @@ -586,13 +589,37 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps goto bad; } - if (m->hw_handler_name) { - struct request_queue *q = bdev_get_queue(p->path.dev->bdev); + if (m->retain_attached_hw_handler || m->hw_handler_name) + q = bdev_get_queue(p->path.dev->bdev); + + if (m->retain_attached_hw_handler) { + attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL); + if (attached_handler_name) { + /* + * Reset hw_handler_name to match the attached handler + * and clear any hw_handler_params associated with the + * ignored handler. + * + * NB. This modifies the table line to show the actual + * handler instead of the original table passed in. + */ + kfree(m->hw_handler_name); + m->hw_handler_name = attached_handler_name; + + kfree(m->hw_handler_params); + m->hw_handler_params = NULL; + } + } + if (m->hw_handler_name) { + /* + * Increments scsi_dh reference, even when using an + * already-attached handler. + */ r = scsi_dh_attach(q, m->hw_handler_name); if (r == -EBUSY) { /* - * Already attached to different hw_handler, + * Already attached to different hw_handler: * try to reattach with correct one. */ scsi_dh_detach(q); @@ -760,7 +787,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) const char *arg_name; static struct dm_arg _args[] = { - {0, 5, "invalid number of feature args"}, + {0, 6, "invalid number of feature args"}, {1, 50, "pg_init_retries must be between 1 and 50"}, {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"}, }; @@ -781,6 +808,11 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) continue; } + if (!strcasecmp(arg_name, "retain_attached_hw_handler")) { + m->retain_attached_hw_handler = 1; + continue; + } + if (!strcasecmp(arg_name, "pg_init_retries") && (argc >= 1)) { r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error); @@ -1346,7 +1378,7 @@ static void multipath_resume(struct dm_target *ti) * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+ */ static int multipath_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { int sz = 0; unsigned long flags; @@ -1364,13 +1396,16 @@ static int multipath_status(struct dm_target *ti, status_type_t type, else { DMEMIT("%u ", m->queue_if_no_path + (m->pg_init_retries > 0) * 2 + - (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2); + (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 + + m->retain_attached_hw_handler); if (m->queue_if_no_path) DMEMIT("queue_if_no_path "); if (m->pg_init_retries) DMEMIT("pg_init_retries %u ", m->pg_init_retries); if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs); + if (m->retain_attached_hw_handler) + DMEMIT("retain_attached_hw_handler "); } if (!m->hw_handler_name || type == STATUSTYPE_INFO) @@ -1656,7 +1691,7 @@ out: *---------------------------------------------------------------*/ static struct target_type multipath_target = { .name = "multipath", - .version = {1, 4, 0}, + .version = {1, 5, 0}, .module = THIS_MODULE, .ctr = multipath_ctr, .dtr = multipath_dtr, diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 017c34d78d61..f2f29c526544 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -101,20 +101,12 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra { unsigned i; struct raid_set *rs; - sector_t sectors_per_dev; if (raid_devs <= raid_type->parity_devs) { ti->error = "Insufficient number of devices"; return ERR_PTR(-EINVAL); } - sectors_per_dev = ti->len; - if ((raid_type->level > 1) && - sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) { - ti->error = "Target length not divisible by number of data devices"; - return ERR_PTR(-EINVAL); - } - rs = kzalloc(sizeof(*rs) + raid_devs * sizeof(rs->dev[0]), GFP_KERNEL); if (!rs) { ti->error = "Cannot allocate raid context"; @@ -128,7 +120,6 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra rs->md.raid_disks = raid_devs; rs->md.level = raid_type->level; rs->md.new_level = rs->md.level; - rs->md.dev_sectors = sectors_per_dev; rs->md.layout = raid_type->algorithm; rs->md.new_layout = rs->md.layout; rs->md.delta_disks = 0; @@ -143,6 +134,7 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra * rs->md.external * rs->md.chunk_sectors * rs->md.new_chunk_sectors + * rs->md.dev_sectors */ return rs; @@ -353,6 +345,8 @@ static int parse_raid_params(struct raid_set *rs, char **argv, { unsigned i, rebuild_cnt = 0; unsigned long value, region_size = 0; + sector_t sectors_per_dev = rs->ti->len; + sector_t max_io_len; char *key; /* @@ -429,13 +423,28 @@ static int parse_raid_params(struct raid_set *rs, char **argv, if (!strcasecmp(key, "rebuild")) { rebuild_cnt++; - if (((rs->raid_type->level != 1) && - (rebuild_cnt > rs->raid_type->parity_devs)) || - ((rs->raid_type->level == 1) && - (rebuild_cnt > (rs->md.raid_disks - 1)))) { - rs->ti->error = "Too many rebuild devices specified for given RAID type"; + + switch (rs->raid_type->level) { + case 1: + if (rebuild_cnt >= rs->md.raid_disks) { + rs->ti->error = "Too many rebuild devices specified"; + return -EINVAL; + } + break; + case 4: + case 5: + case 6: + if (rebuild_cnt > rs->raid_type->parity_devs) { + rs->ti->error = "Too many rebuild devices specified for given RAID type"; + return -EINVAL; + } + break; + default: + DMERR("The rebuild parameter is not supported for %s", rs->raid_type->name); + rs->ti->error = "Rebuild not supported for this RAID type"; return -EINVAL; } + if (value > rs->md.raid_disks) { rs->ti->error = "Invalid rebuild index given"; return -EINVAL; @@ -522,14 +531,19 @@ static int parse_raid_params(struct raid_set *rs, char **argv, return -EINVAL; if (rs->md.chunk_sectors) - rs->ti->split_io = rs->md.chunk_sectors; + max_io_len = rs->md.chunk_sectors; else - rs->ti->split_io = region_size; + max_io_len = region_size; - if (rs->md.chunk_sectors) - rs->ti->split_io = rs->md.chunk_sectors; - else - rs->ti->split_io = region_size; + if (dm_set_target_max_io_len(rs->ti, max_io_len)) + return -EINVAL; + + if ((rs->raid_type->level > 1) && + sector_div(sectors_per_dev, (rs->md.raid_disks - rs->raid_type->parity_devs))) { + rs->ti->error = "Target length not divisible by number of data devices"; + return -EINVAL; + } + rs->md.dev_sectors = sectors_per_dev; /* Assume there are no metadata devices until the drives are parsed */ rs->md.persistent = 0; @@ -1067,7 +1081,7 @@ static int raid_map(struct dm_target *ti, struct bio *bio, union map_info *map_c } static int raid_status(struct dm_target *ti, status_type_t type, - char *result, unsigned maxlen) + unsigned status_flags, char *result, unsigned maxlen) { struct raid_set *rs = ti->private; unsigned raid_param_cnt = 1; /* at least 1 for chunksize */ diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index b58b7a33914a..bc5ddba8045b 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1081,10 +1081,14 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) } ti->private = ms; - ti->split_io = dm_rh_get_region_size(ms->rh); + + r = dm_set_target_max_io_len(ti, dm_rh_get_region_size(ms->rh)); + if (r) + goto err_free_context; + ti->num_flush_requests = 1; ti->num_discard_requests = 1; - ti->discard_zeroes_data_unsupported = 1; + ti->discard_zeroes_data_unsupported = true; ms->kmirrord_wq = alloc_workqueue("kmirrord", WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0); @@ -1363,7 +1367,7 @@ static char device_status_char(struct mirror *m) static int mirror_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { unsigned int m, sz = 0; struct mirror_set *ms = (struct mirror_set *) ti->private; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 6f758870fc19..a143921feaf6 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -691,7 +691,7 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new) * Return a minimum chunk size of all snapshots that have the specified origin. * Return zero if the origin has no snapshots. */ -static sector_t __minimum_chunk_size(struct origin *o) +static uint32_t __minimum_chunk_size(struct origin *o) { struct dm_snapshot *snap; unsigned chunk_size = 0; @@ -701,7 +701,7 @@ static sector_t __minimum_chunk_size(struct origin *o) chunk_size = min_not_zero(chunk_size, snap->store->chunk_size); - return chunk_size; + return (uint32_t) chunk_size; } /* @@ -1172,7 +1172,10 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->error = "Chunk size not set"; goto bad_read_metadata; } - ti->split_io = s->store->chunk_size; + + r = dm_set_target_max_io_len(ti, s->store->chunk_size); + if (r) + goto bad_read_metadata; return 0; @@ -1239,7 +1242,7 @@ static void __handover_exceptions(struct dm_snapshot *snap_src, snap_dest->store->snap = snap_dest; snap_src->store->snap = snap_src; - snap_dest->ti->split_io = snap_dest->store->chunk_size; + snap_dest->ti->max_io_len = snap_dest->store->chunk_size; snap_dest->valid = snap_src->valid; /* @@ -1817,9 +1820,9 @@ static void snapshot_resume(struct dm_target *ti) up_write(&s->lock); } -static sector_t get_origin_minimum_chunksize(struct block_device *bdev) +static uint32_t get_origin_minimum_chunksize(struct block_device *bdev) { - sector_t min_chunksize; + uint32_t min_chunksize; down_read(&_origins_lock); min_chunksize = __minimum_chunk_size(__lookup_origin(bdev)); @@ -1838,15 +1841,15 @@ static void snapshot_merge_resume(struct dm_target *ti) snapshot_resume(ti); /* - * snapshot-merge acts as an origin, so set ti->split_io + * snapshot-merge acts as an origin, so set ti->max_io_len */ - ti->split_io = get_origin_minimum_chunksize(s->origin->bdev); + ti->max_io_len = get_origin_minimum_chunksize(s->origin->bdev); start_merge(s); } static int snapshot_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { unsigned sz = 0; struct dm_snapshot *snap = ti->private; @@ -2073,12 +2076,12 @@ static int origin_write_extent(struct dm_snapshot *merging_snap, struct origin *o; /* - * The origin's __minimum_chunk_size() got stored in split_io + * The origin's __minimum_chunk_size() got stored in max_io_len * by snapshot_merge_resume(). */ down_read(&_origins_lock); o = __lookup_origin(merging_snap->origin->bdev); - for (n = 0; n < size; n += merging_snap->ti->split_io) + for (n = 0; n < size; n += merging_snap->ti->max_io_len) if (__origin_write(&o->snapshots, sector + n, NULL) == DM_MAPIO_SUBMITTED) must_wait = 1; @@ -2138,18 +2141,18 @@ static int origin_map(struct dm_target *ti, struct bio *bio, } /* - * Set the target "split_io" field to the minimum of all the snapshots' + * Set the target "max_io_len" field to the minimum of all the snapshots' * chunk sizes. */ static void origin_resume(struct dm_target *ti) { struct dm_dev *dev = ti->private; - ti->split_io = get_origin_minimum_chunksize(dev->bdev); + ti->max_io_len = get_origin_minimum_chunksize(dev->bdev); } -static int origin_status(struct dm_target *ti, status_type_t type, char *result, - unsigned int maxlen) +static int origin_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct dm_dev *dev = ti->private; @@ -2176,7 +2179,6 @@ static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm, return max_size; bvm->bi_bdev = dev->bdev; - bvm->bi_sector = bvm->bi_sector; return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); } diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 35c94ff24ad5..a087bf2a8d66 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -26,14 +26,12 @@ struct stripe { struct stripe_c { uint32_t stripes; int stripes_shift; - sector_t stripes_mask; /* The size of this target / num. stripes */ sector_t stripe_width; - /* stripe chunk size */ - uint32_t chunk_shift; - sector_t chunk_mask; + uint32_t chunk_size; + int chunk_size_shift; /* Needed for handling events */ struct dm_target *ti; @@ -91,7 +89,7 @@ static int get_stripe(struct dm_target *ti, struct stripe_c *sc, /* * Construct a striped mapping. - * <number of stripes> <chunk size (2^^n)> [<dev_path> <offset>]+ + * <number of stripes> <chunk size> [<dev_path> <offset>]+ */ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) { @@ -99,7 +97,6 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) sector_t width; uint32_t stripes; uint32_t chunk_size; - char *end; int r; unsigned int i; @@ -108,34 +105,23 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) return -EINVAL; } - stripes = simple_strtoul(argv[0], &end, 10); - if (!stripes || *end) { + if (kstrtouint(argv[0], 10, &stripes) || !stripes) { ti->error = "Invalid stripe count"; return -EINVAL; } - chunk_size = simple_strtoul(argv[1], &end, 10); - if (*end) { + if (kstrtouint(argv[1], 10, &chunk_size) || !chunk_size) { ti->error = "Invalid chunk_size"; return -EINVAL; } - /* - * chunk_size is a power of two - */ - if (!is_power_of_2(chunk_size) || - (chunk_size < (PAGE_SIZE >> SECTOR_SHIFT))) { - ti->error = "Invalid chunk size"; - return -EINVAL; - } - - if (ti->len & (chunk_size - 1)) { + width = ti->len; + if (sector_div(width, chunk_size)) { ti->error = "Target length not divisible by " "chunk size"; return -EINVAL; } - width = ti->len; if (sector_div(width, stripes)) { ti->error = "Target length not divisible by " "number of stripes"; @@ -167,17 +153,21 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (stripes & (stripes - 1)) sc->stripes_shift = -1; - else { - sc->stripes_shift = ffs(stripes) - 1; - sc->stripes_mask = ((sector_t) stripes) - 1; - } + else + sc->stripes_shift = __ffs(stripes); + + r = dm_set_target_max_io_len(ti, chunk_size); + if (r) + return r; - ti->split_io = chunk_size; ti->num_flush_requests = stripes; ti->num_discard_requests = stripes; - sc->chunk_shift = ffs(chunk_size) - 1; - sc->chunk_mask = ((sector_t) chunk_size) - 1; + sc->chunk_size = chunk_size; + if (chunk_size & (chunk_size - 1)) + sc->chunk_size_shift = -1; + else + sc->chunk_size_shift = __ffs(chunk_size); /* * Get the stripe destinations. @@ -216,17 +206,29 @@ static void stripe_dtr(struct dm_target *ti) static void stripe_map_sector(struct stripe_c *sc, sector_t sector, uint32_t *stripe, sector_t *result) { - sector_t offset = dm_target_offset(sc->ti, sector); - sector_t chunk = offset >> sc->chunk_shift; + sector_t chunk = dm_target_offset(sc->ti, sector); + sector_t chunk_offset; + + if (sc->chunk_size_shift < 0) + chunk_offset = sector_div(chunk, sc->chunk_size); + else { + chunk_offset = chunk & (sc->chunk_size - 1); + chunk >>= sc->chunk_size_shift; + } if (sc->stripes_shift < 0) *stripe = sector_div(chunk, sc->stripes); else { - *stripe = chunk & sc->stripes_mask; + *stripe = chunk & (sc->stripes - 1); chunk >>= sc->stripes_shift; } - *result = (chunk << sc->chunk_shift) | (offset & sc->chunk_mask); + if (sc->chunk_size_shift < 0) + chunk *= sc->chunk_size; + else + chunk <<= sc->chunk_size_shift; + + *result = chunk + chunk_offset; } static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector, @@ -237,9 +239,16 @@ static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector, stripe_map_sector(sc, sector, &stripe, result); if (stripe == target_stripe) return; - *result &= ~sc->chunk_mask; /* round down */ + + /* round down */ + sector = *result; + if (sc->chunk_size_shift < 0) + *result -= sector_div(sector, sc->chunk_size); + else + *result = sector & ~(sector_t)(sc->chunk_size - 1); + if (target_stripe < stripe) - *result += sc->chunk_mask + 1; /* next chunk */ + *result += sc->chunk_size; /* next chunk */ } static int stripe_map_discard(struct stripe_c *sc, struct bio *bio, @@ -302,8 +311,8 @@ static int stripe_map(struct dm_target *ti, struct bio *bio, * */ -static int stripe_status(struct dm_target *ti, - status_type_t type, char *result, unsigned int maxlen) +static int stripe_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct stripe_c *sc = (struct stripe_c *) ti->private; char buffer[sc->stripes + 1]; @@ -324,7 +333,7 @@ static int stripe_status(struct dm_target *ti, case STATUSTYPE_TABLE: DMEMIT("%d %llu", sc->stripes, - (unsigned long long)sc->chunk_mask + 1); + (unsigned long long)sc->chunk_size); for (i = 0; i < sc->stripes; i++) DMEMIT(" %s %llu", sc->stripe[i].dev->name, (unsigned long long)sc->stripe[i].physical_start); @@ -391,7 +400,7 @@ static void stripe_io_hints(struct dm_target *ti, struct queue_limits *limits) { struct stripe_c *sc = ti->private; - unsigned chunk_size = (sc->chunk_mask + 1) << 9; + unsigned chunk_size = sc->chunk_size << SECTOR_SHIFT; blk_limits_io_min(limits, chunk_size); blk_limits_io_opt(limits, chunk_size * sc->stripes); @@ -419,7 +428,7 @@ static int stripe_merge(struct dm_target *ti, struct bvec_merge_data *bvm, static struct target_type stripe_target = { .name = "striped", - .version = {1, 4, 0}, + .version = {1, 5, 0}, .module = THIS_MODULE, .ctr = stripe_ctr, .dtr = stripe_dtr, diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 2e227fbf1622..f90069029aae 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1319,6 +1319,9 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned flush) if (!ti->num_flush_requests) continue; + if (ti->flush_supported) + return 1; + if (ti->type->iterate_devices && ti->type->iterate_devices(ti, device_flush_capable, &flush)) return 1; diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 3e2907f0bc46..693e149e9727 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011 Red Hat, Inc. + * Copyright (C) 2011-2012 Red Hat, Inc. * * This file is released under the GPL. */ @@ -80,6 +80,12 @@ #define THIN_METADATA_CACHE_SIZE 64 #define SECTOR_TO_BLOCK_SHIFT 3 +/* + * 3 for btree insert + + * 2 for btree lookup used within space map + */ +#define THIN_MAX_CONCURRENT_LOCKS 5 + /* This should be plenty */ #define SPACE_MAP_ROOT_SIZE 128 @@ -172,13 +178,20 @@ struct dm_pool_metadata { struct rw_semaphore root_lock; uint32_t time; - int need_commit; dm_block_t root; dm_block_t details_root; struct list_head thin_devices; uint64_t trans_id; unsigned long flags; sector_t data_block_size; + bool read_only:1; + + /* + * Set if a transaction has to be aborted but the attempt to roll back + * to the previous (good) transaction failed. The only pool metadata + * operation possible in this state is the closing of the device. + */ + bool fail_io:1; }; struct dm_thin_device { @@ -187,7 +200,8 @@ struct dm_thin_device { dm_thin_id id; int open_count; - int changed; + bool changed:1; + bool aborted_with_changes:1; uint64_t mapped_blocks; uint64_t transaction_id; uint32_t creation_time; @@ -338,7 +352,21 @@ static int subtree_equal(void *context, void *value1_le, void *value2_le) /*----------------------------------------------------------------*/ -static int superblock_all_zeroes(struct dm_block_manager *bm, int *result) +static int superblock_lock_zero(struct dm_pool_metadata *pmd, + struct dm_block **sblock) +{ + return dm_bm_write_lock_zero(pmd->bm, THIN_SUPERBLOCK_LOCATION, + &sb_validator, sblock); +} + +static int superblock_lock(struct dm_pool_metadata *pmd, + struct dm_block **sblock) +{ + return dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, + &sb_validator, sblock); +} + +static int __superblock_all_zeroes(struct dm_block_manager *bm, int *result) { int r; unsigned i; @@ -365,72 +393,9 @@ static int superblock_all_zeroes(struct dm_block_manager *bm, int *result) return dm_bm_unlock(b); } -static int init_pmd(struct dm_pool_metadata *pmd, - struct dm_block_manager *bm, - dm_block_t nr_blocks, int create) +static void __setup_btree_details(struct dm_pool_metadata *pmd) { - int r; - struct dm_space_map *sm, *data_sm; - struct dm_transaction_manager *tm; - struct dm_block *sblock; - - if (create) { - r = dm_tm_create_with_sm(bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, &tm, &sm, &sblock); - if (r < 0) { - DMERR("tm_create_with_sm failed"); - return r; - } - - data_sm = dm_sm_disk_create(tm, nr_blocks); - if (IS_ERR(data_sm)) { - DMERR("sm_disk_create failed"); - dm_tm_unlock(tm, sblock); - r = PTR_ERR(data_sm); - goto bad; - } - } else { - struct thin_disk_superblock *disk_super = NULL; - size_t space_map_root_offset = - offsetof(struct thin_disk_superblock, metadata_space_map_root); - - r = dm_tm_open_with_sm(bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, space_map_root_offset, - SPACE_MAP_ROOT_SIZE, &tm, &sm, &sblock); - if (r < 0) { - DMERR("tm_open_with_sm failed"); - return r; - } - - disk_super = dm_block_data(sblock); - data_sm = dm_sm_disk_open(tm, disk_super->data_space_map_root, - sizeof(disk_super->data_space_map_root)); - if (IS_ERR(data_sm)) { - DMERR("sm_disk_open failed"); - r = PTR_ERR(data_sm); - goto bad; - } - } - - - r = dm_tm_unlock(tm, sblock); - if (r < 0) { - DMERR("couldn't unlock superblock"); - goto bad_data_sm; - } - - pmd->bm = bm; - pmd->metadata_sm = sm; - pmd->data_sm = data_sm; - pmd->tm = tm; - pmd->nb_tm = dm_tm_create_non_blocking_clone(tm); - if (!pmd->nb_tm) { - DMERR("could not create clone tm"); - r = -ENOMEM; - goto bad_data_sm; - } - - pmd->info.tm = tm; + pmd->info.tm = pmd->tm; pmd->info.levels = 2; pmd->info.value_type.context = pmd->data_sm; pmd->info.value_type.size = sizeof(__le64); @@ -441,7 +406,7 @@ static int init_pmd(struct dm_pool_metadata *pmd, memcpy(&pmd->nb_info, &pmd->info, sizeof(pmd->nb_info)); pmd->nb_info.tm = pmd->nb_tm; - pmd->tl_info.tm = tm; + pmd->tl_info.tm = pmd->tm; pmd->tl_info.levels = 1; pmd->tl_info.value_type.context = &pmd->info; pmd->tl_info.value_type.size = sizeof(__le64); @@ -449,7 +414,7 @@ static int init_pmd(struct dm_pool_metadata *pmd, pmd->tl_info.value_type.dec = subtree_dec; pmd->tl_info.value_type.equal = subtree_equal; - pmd->bl_info.tm = tm; + pmd->bl_info.tm = pmd->tm; pmd->bl_info.levels = 1; pmd->bl_info.value_type.context = pmd->data_sm; pmd->bl_info.value_type.size = sizeof(__le64); @@ -457,48 +422,266 @@ static int init_pmd(struct dm_pool_metadata *pmd, pmd->bl_info.value_type.dec = data_block_dec; pmd->bl_info.value_type.equal = data_block_equal; - pmd->details_info.tm = tm; + pmd->details_info.tm = pmd->tm; pmd->details_info.levels = 1; pmd->details_info.value_type.context = NULL; pmd->details_info.value_type.size = sizeof(struct disk_device_details); pmd->details_info.value_type.inc = NULL; pmd->details_info.value_type.dec = NULL; pmd->details_info.value_type.equal = NULL; +} - pmd->root = 0; +static int __write_initial_superblock(struct dm_pool_metadata *pmd) +{ + int r; + struct dm_block *sblock; + size_t metadata_len, data_len; + struct thin_disk_superblock *disk_super; + sector_t bdev_size = i_size_read(pmd->bdev->bd_inode) >> SECTOR_SHIFT; - init_rwsem(&pmd->root_lock); - pmd->time = 0; - pmd->need_commit = 0; - pmd->details_root = 0; - pmd->trans_id = 0; - pmd->flags = 0; - INIT_LIST_HEAD(&pmd->thin_devices); + if (bdev_size > THIN_METADATA_MAX_SECTORS) + bdev_size = THIN_METADATA_MAX_SECTORS; + + r = dm_sm_root_size(pmd->metadata_sm, &metadata_len); + if (r < 0) + return r; + + r = dm_sm_root_size(pmd->data_sm, &data_len); + if (r < 0) + return r; + + r = dm_sm_commit(pmd->data_sm); + if (r < 0) + return r; + + r = dm_tm_pre_commit(pmd->tm); + if (r < 0) + return r; + + r = superblock_lock_zero(pmd, &sblock); + if (r) + return r; + + disk_super = dm_block_data(sblock); + disk_super->flags = 0; + memset(disk_super->uuid, 0, sizeof(disk_super->uuid)); + disk_super->magic = cpu_to_le64(THIN_SUPERBLOCK_MAGIC); + disk_super->version = cpu_to_le32(THIN_VERSION); + disk_super->time = 0; + disk_super->trans_id = 0; + disk_super->held_root = 0; + + r = dm_sm_copy_root(pmd->metadata_sm, &disk_super->metadata_space_map_root, + metadata_len); + if (r < 0) + goto bad_locked; + + r = dm_sm_copy_root(pmd->data_sm, &disk_super->data_space_map_root, + data_len); + if (r < 0) + goto bad_locked; + + disk_super->data_mapping_root = cpu_to_le64(pmd->root); + disk_super->device_details_root = cpu_to_le64(pmd->details_root); + disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); + disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT); + disk_super->data_block_size = cpu_to_le32(pmd->data_block_size); + + return dm_tm_commit(pmd->tm, sblock); + +bad_locked: + dm_bm_unlock(sblock); + return r; +} + +static int __format_metadata(struct dm_pool_metadata *pmd) +{ + int r; + + r = dm_tm_create_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION, + &pmd->tm, &pmd->metadata_sm); + if (r < 0) { + DMERR("tm_create_with_sm failed"); + return r; + } + + pmd->data_sm = dm_sm_disk_create(pmd->tm, 0); + if (IS_ERR(pmd->data_sm)) { + DMERR("sm_disk_create failed"); + r = PTR_ERR(pmd->data_sm); + goto bad_cleanup_tm; + } + + pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm); + if (!pmd->nb_tm) { + DMERR("could not create non-blocking clone tm"); + r = -ENOMEM; + goto bad_cleanup_data_sm; + } + + __setup_btree_details(pmd); + + r = dm_btree_empty(&pmd->info, &pmd->root); + if (r < 0) + goto bad_cleanup_nb_tm; + + r = dm_btree_empty(&pmd->details_info, &pmd->details_root); + if (r < 0) { + DMERR("couldn't create devices root"); + goto bad_cleanup_nb_tm; + } + + r = __write_initial_superblock(pmd); + if (r) + goto bad_cleanup_nb_tm; return 0; -bad_data_sm: - dm_sm_destroy(data_sm); -bad: - dm_tm_destroy(tm); - dm_sm_destroy(sm); +bad_cleanup_nb_tm: + dm_tm_destroy(pmd->nb_tm); +bad_cleanup_data_sm: + dm_sm_destroy(pmd->data_sm); +bad_cleanup_tm: + dm_tm_destroy(pmd->tm); + dm_sm_destroy(pmd->metadata_sm); + + return r; +} + +static int __check_incompat_features(struct thin_disk_superblock *disk_super, + struct dm_pool_metadata *pmd) +{ + uint32_t features; + + features = le32_to_cpu(disk_super->incompat_flags) & ~THIN_FEATURE_INCOMPAT_SUPP; + if (features) { + DMERR("could not access metadata due to unsupported optional features (%lx).", + (unsigned long)features); + return -EINVAL; + } + + /* + * Check for read-only metadata to skip the following RDWR checks. + */ + if (get_disk_ro(pmd->bdev->bd_disk)) + return 0; + + features = le32_to_cpu(disk_super->compat_ro_flags) & ~THIN_FEATURE_COMPAT_RO_SUPP; + if (features) { + DMERR("could not access metadata RDWR due to unsupported optional features (%lx).", + (unsigned long)features); + return -EINVAL; + } + + return 0; +} + +static int __open_metadata(struct dm_pool_metadata *pmd) +{ + int r; + struct dm_block *sblock; + struct thin_disk_superblock *disk_super; + + r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, + &sb_validator, &sblock); + if (r < 0) { + DMERR("couldn't read superblock"); + return r; + } + + disk_super = dm_block_data(sblock); + + r = __check_incompat_features(disk_super, pmd); + if (r < 0) + goto bad_unlock_sblock; + + r = dm_tm_open_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION, + disk_super->metadata_space_map_root, + sizeof(disk_super->metadata_space_map_root), + &pmd->tm, &pmd->metadata_sm); + if (r < 0) { + DMERR("tm_open_with_sm failed"); + goto bad_unlock_sblock; + } + + pmd->data_sm = dm_sm_disk_open(pmd->tm, disk_super->data_space_map_root, + sizeof(disk_super->data_space_map_root)); + if (IS_ERR(pmd->data_sm)) { + DMERR("sm_disk_open failed"); + r = PTR_ERR(pmd->data_sm); + goto bad_cleanup_tm; + } + + pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm); + if (!pmd->nb_tm) { + DMERR("could not create non-blocking clone tm"); + r = -ENOMEM; + goto bad_cleanup_data_sm; + } + + __setup_btree_details(pmd); + return dm_bm_unlock(sblock); + +bad_cleanup_data_sm: + dm_sm_destroy(pmd->data_sm); +bad_cleanup_tm: + dm_tm_destroy(pmd->tm); + dm_sm_destroy(pmd->metadata_sm); +bad_unlock_sblock: + dm_bm_unlock(sblock); + + return r; +} + +static int __open_or_format_metadata(struct dm_pool_metadata *pmd, bool format_device) +{ + int r, unformatted; + + r = __superblock_all_zeroes(pmd->bm, &unformatted); + if (r) + return r; + + if (unformatted) + return format_device ? __format_metadata(pmd) : -EPERM; + + return __open_metadata(pmd); +} + +static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, bool format_device) +{ + int r; + + pmd->bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE, + THIN_METADATA_CACHE_SIZE, + THIN_MAX_CONCURRENT_LOCKS); + if (IS_ERR(pmd->bm)) { + DMERR("could not create block manager"); + return PTR_ERR(pmd->bm); + } + + r = __open_or_format_metadata(pmd, format_device); + if (r) + dm_block_manager_destroy(pmd->bm); return r; } +static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd) +{ + dm_sm_destroy(pmd->data_sm); + dm_sm_destroy(pmd->metadata_sm); + dm_tm_destroy(pmd->nb_tm); + dm_tm_destroy(pmd->tm); + dm_block_manager_destroy(pmd->bm); +} + static int __begin_transaction(struct dm_pool_metadata *pmd) { int r; - u32 features; struct thin_disk_superblock *disk_super; struct dm_block *sblock; /* - * __maybe_commit_transaction() resets these - */ - WARN_ON(pmd->need_commit); - - /* * We re-read the superblock every time. Shouldn't need to do this * really. */ @@ -515,32 +698,8 @@ static int __begin_transaction(struct dm_pool_metadata *pmd) pmd->flags = le32_to_cpu(disk_super->flags); pmd->data_block_size = le32_to_cpu(disk_super->data_block_size); - features = le32_to_cpu(disk_super->incompat_flags) & ~THIN_FEATURE_INCOMPAT_SUPP; - if (features) { - DMERR("could not access metadata due to " - "unsupported optional features (%lx).", - (unsigned long)features); - r = -EINVAL; - goto out; - } - - /* - * Check for read-only metadata to skip the following RDWR checks. - */ - if (get_disk_ro(pmd->bdev->bd_disk)) - goto out; - - features = le32_to_cpu(disk_super->compat_ro_flags) & ~THIN_FEATURE_COMPAT_RO_SUPP; - if (features) { - DMERR("could not access metadata RDWR due to " - "unsupported optional features (%lx).", - (unsigned long)features); - r = -EINVAL; - } - -out: dm_bm_unlock(sblock); - return r; + return 0; } static int __write_changed_details(struct dm_pool_metadata *pmd) @@ -573,8 +732,6 @@ static int __write_changed_details(struct dm_pool_metadata *pmd) list_del(&td->list); kfree(td); } - - pmd->need_commit = 1; } return 0; @@ -582,9 +739,6 @@ static int __write_changed_details(struct dm_pool_metadata *pmd) static int __commit_transaction(struct dm_pool_metadata *pmd) { - /* - * FIXME: Associated pool should be made read-only on failure. - */ int r; size_t metadata_len, data_len; struct thin_disk_superblock *disk_super; @@ -597,31 +751,27 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) r = __write_changed_details(pmd); if (r < 0) - goto out; - - if (!pmd->need_commit) - goto out; + return r; r = dm_sm_commit(pmd->data_sm); if (r < 0) - goto out; + return r; r = dm_tm_pre_commit(pmd->tm); if (r < 0) - goto out; + return r; r = dm_sm_root_size(pmd->metadata_sm, &metadata_len); if (r < 0) - goto out; + return r; r = dm_sm_root_size(pmd->data_sm, &data_len); if (r < 0) - goto out; + return r; - r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, &sblock); + r = superblock_lock(pmd, &sblock); if (r) - goto out; + return r; disk_super = dm_block_data(sblock); disk_super->time = cpu_to_le32(pmd->time); @@ -640,12 +790,7 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) if (r < 0) goto out_locked; - r = dm_tm_commit(pmd->tm, sblock); - if (!r) - pmd->need_commit = 0; - -out: - return r; + return dm_tm_commit(pmd->tm, sblock); out_locked: dm_bm_unlock(sblock); @@ -653,15 +798,11 @@ out_locked: } struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, - sector_t data_block_size) + sector_t data_block_size, + bool format_device) { int r; - struct thin_disk_superblock *disk_super; struct dm_pool_metadata *pmd; - sector_t bdev_size = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; - struct dm_block_manager *bm; - int create; - struct dm_block *sblock; pmd = kmalloc(sizeof(*pmd), GFP_KERNEL); if (!pmd) { @@ -669,90 +810,28 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, return ERR_PTR(-ENOMEM); } - /* - * Max hex locks: - * 3 for btree insert + - * 2 for btree lookup used within space map - */ - bm = dm_block_manager_create(bdev, THIN_METADATA_BLOCK_SIZE, - THIN_METADATA_CACHE_SIZE, 5); - if (!bm) { - DMERR("could not create block manager"); - kfree(pmd); - return ERR_PTR(-ENOMEM); - } - - r = superblock_all_zeroes(bm, &create); - if (r) { - dm_block_manager_destroy(bm); - kfree(pmd); - return ERR_PTR(r); - } - + init_rwsem(&pmd->root_lock); + pmd->time = 0; + INIT_LIST_HEAD(&pmd->thin_devices); + pmd->read_only = false; + pmd->fail_io = false; + pmd->bdev = bdev; + pmd->data_block_size = data_block_size; - r = init_pmd(pmd, bm, 0, create); + r = __create_persistent_data_objects(pmd, format_device); if (r) { - dm_block_manager_destroy(bm); kfree(pmd); return ERR_PTR(r); } - pmd->bdev = bdev; - - if (!create) { - r = __begin_transaction(pmd); - if (r < 0) - goto bad; - return pmd; - } - - /* - * Create. - */ - r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, &sblock); - if (r) - goto bad; - - if (bdev_size > THIN_METADATA_MAX_SECTORS) - bdev_size = THIN_METADATA_MAX_SECTORS; - - disk_super = dm_block_data(sblock); - disk_super->magic = cpu_to_le64(THIN_SUPERBLOCK_MAGIC); - disk_super->version = cpu_to_le32(THIN_VERSION); - disk_super->time = 0; - disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); - disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT); - disk_super->data_block_size = cpu_to_le32(data_block_size); - - r = dm_bm_unlock(sblock); - if (r < 0) - goto bad; - - r = dm_btree_empty(&pmd->info, &pmd->root); - if (r < 0) - goto bad; - - r = dm_btree_empty(&pmd->details_info, &pmd->details_root); - if (r < 0) { - DMERR("couldn't create devices root"); - goto bad; - } - pmd->flags = 0; - pmd->need_commit = 1; - r = dm_pool_commit_metadata(pmd); + r = __begin_transaction(pmd); if (r < 0) { - DMERR("%s: dm_pool_commit_metadata() failed, error = %d", - __func__, r); - goto bad; + if (dm_pool_metadata_close(pmd) < 0) + DMWARN("%s: dm_pool_metadata_close() failed.", __func__); + return ERR_PTR(r); } return pmd; - -bad: - if (dm_pool_metadata_close(pmd) < 0) - DMWARN("%s: dm_pool_metadata_close() failed.", __func__); - return ERR_PTR(r); } int dm_pool_metadata_close(struct dm_pool_metadata *pmd) @@ -778,18 +857,17 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd) return -EBUSY; } - r = __commit_transaction(pmd); - if (r < 0) - DMWARN("%s: __commit_transaction() failed, error = %d", - __func__, r); + if (!pmd->read_only && !pmd->fail_io) { + r = __commit_transaction(pmd); + if (r < 0) + DMWARN("%s: __commit_transaction() failed, error = %d", + __func__, r); + } - dm_tm_destroy(pmd->tm); - dm_tm_destroy(pmd->nb_tm); - dm_block_manager_destroy(pmd->bm); - dm_sm_destroy(pmd->metadata_sm); - dm_sm_destroy(pmd->data_sm); - kfree(pmd); + if (!pmd->fail_io) + __destroy_persistent_data_objects(pmd); + kfree(pmd); return 0; } @@ -850,6 +928,7 @@ static int __open_device(struct dm_pool_metadata *pmd, (*td)->id = dev; (*td)->open_count = 1; (*td)->changed = changed; + (*td)->aborted_with_changes = false; (*td)->mapped_blocks = le64_to_cpu(details_le.mapped_blocks); (*td)->transaction_id = le64_to_cpu(details_le.transaction_id); (*td)->creation_time = le32_to_cpu(details_le.creation_time); @@ -911,10 +990,11 @@ static int __create_thin(struct dm_pool_metadata *pmd, int dm_pool_create_thin(struct dm_pool_metadata *pmd, dm_thin_id dev) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __create_thin(pmd, dev); + if (!pmd->fail_io) + r = __create_thin(pmd, dev); up_write(&pmd->root_lock); return r; @@ -1001,10 +1081,11 @@ int dm_pool_create_snap(struct dm_pool_metadata *pmd, dm_thin_id dev, dm_thin_id origin) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __create_snap(pmd, dev, origin); + if (!pmd->fail_io) + r = __create_snap(pmd, dev, origin); up_write(&pmd->root_lock); return r; @@ -1037,18 +1118,17 @@ static int __delete_device(struct dm_pool_metadata *pmd, dm_thin_id dev) if (r) return r; - pmd->need_commit = 1; - return 0; } int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __delete_device(pmd, dev); + if (!pmd->fail_io) + r = __delete_device(pmd, dev); up_write(&pmd->root_lock); return r; @@ -1058,28 +1138,40 @@ int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd, uint64_t current_id, uint64_t new_id) { + int r = -EINVAL; + down_write(&pmd->root_lock); + + if (pmd->fail_io) + goto out; + if (pmd->trans_id != current_id) { - up_write(&pmd->root_lock); DMERR("mismatched transaction id"); - return -EINVAL; + goto out; } pmd->trans_id = new_id; - pmd->need_commit = 1; + r = 0; + +out: up_write(&pmd->root_lock); - return 0; + return r; } int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd, uint64_t *result) { + int r = -EINVAL; + down_read(&pmd->root_lock); - *result = pmd->trans_id; + if (!pmd->fail_io) { + *result = pmd->trans_id; + r = 0; + } up_read(&pmd->root_lock); - return 0; + return r; } static int __reserve_metadata_snap(struct dm_pool_metadata *pmd) @@ -1108,8 +1200,6 @@ static int __reserve_metadata_snap(struct dm_pool_metadata *pmd) dm_tm_dec(pmd->tm, held_root); dm_tm_unlock(pmd->tm, copy); - pmd->need_commit = 1; - return -EBUSY; } @@ -1131,29 +1221,25 @@ static int __reserve_metadata_snap(struct dm_pool_metadata *pmd) /* * Write the held root into the superblock. */ - r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, &sblock); + r = superblock_lock(pmd, &sblock); if (r) { dm_tm_dec(pmd->tm, held_root); - pmd->need_commit = 1; return r; } disk_super = dm_block_data(sblock); disk_super->held_root = cpu_to_le64(held_root); dm_bm_unlock(sblock); - - pmd->need_commit = 1; - return 0; } int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __reserve_metadata_snap(pmd); + if (!pmd->fail_io) + r = __reserve_metadata_snap(pmd); up_write(&pmd->root_lock); return r; @@ -1166,15 +1252,13 @@ static int __release_metadata_snap(struct dm_pool_metadata *pmd) struct dm_block *sblock, *copy; dm_block_t held_root; - r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, &sblock); + r = superblock_lock(pmd, &sblock); if (r) return r; disk_super = dm_block_data(sblock); held_root = le64_to_cpu(disk_super->held_root); disk_super->held_root = cpu_to_le64(0); - pmd->need_commit = 1; dm_bm_unlock(sblock); @@ -1197,10 +1281,11 @@ static int __release_metadata_snap(struct dm_pool_metadata *pmd) int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __release_metadata_snap(pmd); + if (!pmd->fail_io) + r = __release_metadata_snap(pmd); up_write(&pmd->root_lock); return r; @@ -1227,10 +1312,11 @@ static int __get_metadata_snap(struct dm_pool_metadata *pmd, int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = __get_metadata_snap(pmd, result); + if (!pmd->fail_io) + r = __get_metadata_snap(pmd, result); up_read(&pmd->root_lock); return r; @@ -1239,10 +1325,11 @@ int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd, int dm_pool_open_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev, struct dm_thin_device **td) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __open_device(pmd, dev, 0, td); + if (!pmd->fail_io) + r = __open_device(pmd, dev, 0, td); up_write(&pmd->root_lock); return r; @@ -1262,7 +1349,7 @@ dm_thin_id dm_thin_dev_id(struct dm_thin_device *td) return td->id; } -static int __snapshotted_since(struct dm_thin_device *td, uint32_t time) +static bool __snapshotted_since(struct dm_thin_device *td, uint32_t time) { return td->snapshotted_time > time; } @@ -1270,28 +1357,31 @@ static int __snapshotted_since(struct dm_thin_device *td, uint32_t time) int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block, int can_block, struct dm_thin_lookup_result *result) { - int r; + int r = -EINVAL; uint64_t block_time = 0; __le64 value; struct dm_pool_metadata *pmd = td->pmd; dm_block_t keys[2] = { td->id, block }; + struct dm_btree_info *info; if (can_block) { down_read(&pmd->root_lock); - r = dm_btree_lookup(&pmd->info, pmd->root, keys, &value); - if (!r) - block_time = le64_to_cpu(value); - up_read(&pmd->root_lock); - - } else if (down_read_trylock(&pmd->root_lock)) { - r = dm_btree_lookup(&pmd->nb_info, pmd->root, keys, &value); - if (!r) - block_time = le64_to_cpu(value); - up_read(&pmd->root_lock); - - } else + info = &pmd->info; + } else if (down_read_trylock(&pmd->root_lock)) + info = &pmd->nb_info; + else return -EWOULDBLOCK; + if (pmd->fail_io) + goto out; + + r = dm_btree_lookup(info, pmd->root, keys, &value); + if (!r) + block_time = le64_to_cpu(value); + +out: + up_read(&pmd->root_lock); + if (!r) { dm_block_t exception_block; uint32_t exception_time; @@ -1312,7 +1402,6 @@ static int __insert(struct dm_thin_device *td, dm_block_t block, struct dm_pool_metadata *pmd = td->pmd; dm_block_t keys[2] = { td->id, block }; - pmd->need_commit = 1; value = cpu_to_le64(pack_block_time(data_block, pmd->time)); __dm_bless_for_disk(&value); @@ -1321,10 +1410,9 @@ static int __insert(struct dm_thin_device *td, dm_block_t block, if (r) return r; - if (inserted) { + td->changed = 1; + if (inserted) td->mapped_blocks++; - td->changed = 1; - } return 0; } @@ -1332,10 +1420,11 @@ static int __insert(struct dm_thin_device *td, dm_block_t block, int dm_thin_insert_block(struct dm_thin_device *td, dm_block_t block, dm_block_t data_block) { - int r; + int r = -EINVAL; down_write(&td->pmd->root_lock); - r = __insert(td, block, data_block); + if (!td->pmd->fail_io) + r = __insert(td, block, data_block); up_write(&td->pmd->root_lock); return r; @@ -1353,31 +1442,51 @@ static int __remove(struct dm_thin_device *td, dm_block_t block) td->mapped_blocks--; td->changed = 1; - pmd->need_commit = 1; return 0; } int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block) { - int r; + int r = -EINVAL; down_write(&td->pmd->root_lock); - r = __remove(td, block); + if (!td->pmd->fail_io) + r = __remove(td, block); up_write(&td->pmd->root_lock); return r; } -int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result) +bool dm_thin_changed_this_transaction(struct dm_thin_device *td) { int r; - down_write(&pmd->root_lock); + down_read(&td->pmd->root_lock); + r = td->changed; + up_read(&td->pmd->root_lock); - r = dm_sm_new_block(pmd->data_sm, result); - pmd->need_commit = 1; + return r; +} + +bool dm_thin_aborted_changes(struct dm_thin_device *td) +{ + bool r; + down_read(&td->pmd->root_lock); + r = td->aborted_with_changes; + up_read(&td->pmd->root_lock); + + return r; +} + +int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result) +{ + int r = -EINVAL; + + down_write(&pmd->root_lock); + if (!pmd->fail_io) + r = dm_sm_new_block(pmd->data_sm, result); up_write(&pmd->root_lock); return r; @@ -1385,9 +1494,11 @@ int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result) int dm_pool_commit_metadata(struct dm_pool_metadata *pmd) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); + if (pmd->fail_io) + goto out; r = __commit_transaction(pmd); if (r <= 0) @@ -1402,12 +1513,41 @@ out: return r; } +static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd) +{ + struct dm_thin_device *td; + + list_for_each_entry(td, &pmd->thin_devices, list) + td->aborted_with_changes = td->changed; +} + +int dm_pool_abort_metadata(struct dm_pool_metadata *pmd) +{ + int r = -EINVAL; + + down_write(&pmd->root_lock); + if (pmd->fail_io) + goto out; + + __set_abort_with_changes_flags(pmd); + __destroy_persistent_data_objects(pmd); + r = __create_persistent_data_objects(pmd, false); + if (r) + pmd->fail_io = true; + +out: + up_write(&pmd->root_lock); + + return r; +} + int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = dm_sm_get_nr_free(pmd->data_sm, result); + if (!pmd->fail_io) + r = dm_sm_get_nr_free(pmd->data_sm, result); up_read(&pmd->root_lock); return r; @@ -1416,10 +1556,11 @@ int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *resul int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = dm_sm_get_nr_free(pmd->metadata_sm, result); + if (!pmd->fail_io) + r = dm_sm_get_nr_free(pmd->metadata_sm, result); up_read(&pmd->root_lock); return r; @@ -1428,10 +1569,11 @@ int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd, int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = dm_sm_get_nr_blocks(pmd->metadata_sm, result); + if (!pmd->fail_io) + r = dm_sm_get_nr_blocks(pmd->metadata_sm, result); up_read(&pmd->root_lock); return r; @@ -1448,10 +1590,11 @@ int dm_pool_get_data_block_size(struct dm_pool_metadata *pmd, sector_t *result) int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = dm_sm_get_nr_blocks(pmd->data_sm, result); + if (!pmd->fail_io) + r = dm_sm_get_nr_blocks(pmd->data_sm, result); up_read(&pmd->root_lock); return r; @@ -1459,13 +1602,17 @@ int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) int dm_thin_get_mapped_count(struct dm_thin_device *td, dm_block_t *result) { + int r = -EINVAL; struct dm_pool_metadata *pmd = td->pmd; down_read(&pmd->root_lock); - *result = td->mapped_blocks; + if (!pmd->fail_io) { + *result = td->mapped_blocks; + r = 0; + } up_read(&pmd->root_lock); - return 0; + return r; } static int __highest_block(struct dm_thin_device *td, dm_block_t *result) @@ -1487,11 +1634,12 @@ static int __highest_block(struct dm_thin_device *td, dm_block_t *result) int dm_thin_get_highest_mapped_block(struct dm_thin_device *td, dm_block_t *result) { - int r; + int r = -EINVAL; struct dm_pool_metadata *pmd = td->pmd; down_read(&pmd->root_lock); - r = __highest_block(td, result); + if (!pmd->fail_io) + r = __highest_block(td, result); up_read(&pmd->root_lock); return r; @@ -1514,20 +1662,25 @@ static int __resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) return -EINVAL; } - r = dm_sm_extend(pmd->data_sm, new_count - old_count); - if (!r) - pmd->need_commit = 1; - - return r; + return dm_sm_extend(pmd->data_sm, new_count - old_count); } int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __resize_data_dev(pmd, new_count); + if (!pmd->fail_io) + r = __resize_data_dev(pmd, new_count); up_write(&pmd->root_lock); return r; } + +void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd) +{ + down_write(&pmd->root_lock); + pmd->read_only = true; + dm_bm_set_read_only(pmd->bm); + up_write(&pmd->root_lock); +} diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index b88918ccdaf6..0cecc3702885 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h @@ -38,7 +38,8 @@ typedef uint64_t dm_thin_id; * Reopens or creates a new, empty metadata volume. */ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, - sector_t data_block_size); + sector_t data_block_size, + bool format_device); int dm_pool_metadata_close(struct dm_pool_metadata *pmd); @@ -79,6 +80,16 @@ int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd, int dm_pool_commit_metadata(struct dm_pool_metadata *pmd); /* + * Discards all uncommitted changes. Rereads the superblock, rolling back + * to the last good transaction. Thin devices remain open. + * dm_thin_aborted_changes() tells you if they had uncommitted changes. + * + * If this call fails it's only useful to call dm_pool_metadata_close(). + * All other methods will fail with -EINVAL. + */ +int dm_pool_abort_metadata(struct dm_pool_metadata *pmd); + +/* * Set/get userspace transaction id. */ int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd, @@ -119,7 +130,7 @@ dm_thin_id dm_thin_dev_id(struct dm_thin_device *td); struct dm_thin_lookup_result { dm_block_t block; - int shared; + unsigned shared:1; }; /* @@ -147,6 +158,10 @@ int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block); /* * Queries. */ +bool dm_thin_changed_this_transaction(struct dm_thin_device *td); + +bool dm_thin_aborted_changes(struct dm_thin_device *td); + int dm_thin_get_highest_mapped_block(struct dm_thin_device *td, dm_block_t *highest_mapped); @@ -171,6 +186,12 @@ int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result); */ int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_size); +/* + * Flicks the underlying block manager into read only mode, so you know + * that nothing is changing. + */ +void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd); + /*----------------------------------------------------------------*/ #endif diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 68694da0d21d..af1fc3b2c2ad 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1,10 +1,11 @@ /* - * Copyright (C) 2011 Red Hat UK. + * Copyright (C) 2011-2012 Red Hat UK. * * This file is released under the GPL. */ #include "dm-thin-metadata.h" +#include "dm.h" #include <linux/device-mapper.h> #include <linux/dm-io.h> @@ -19,7 +20,7 @@ /* * Tunable constants */ -#define ENDIO_HOOK_POOL_SIZE 10240 +#define ENDIO_HOOK_POOL_SIZE 1024 #define DEFERRED_SET_SIZE 64 #define MAPPING_POOL_SIZE 1024 #define PRISON_CELLS 1024 @@ -496,12 +497,27 @@ static void build_virtual_key(struct dm_thin_device *td, dm_block_t b, */ struct dm_thin_new_mapping; +/* + * The pool runs in 3 modes. Ordered in degraded order for comparisons. + */ +enum pool_mode { + PM_WRITE, /* metadata may be changed */ + PM_READ_ONLY, /* metadata may not be changed */ + PM_FAIL, /* all I/O fails */ +}; + struct pool_features { + enum pool_mode mode; + unsigned zero_new_blocks:1; unsigned discard_enabled:1; unsigned discard_passdown:1; }; +struct thin_c; +typedef void (*process_bio_fn)(struct thin_c *tc, struct bio *bio); +typedef void (*process_mapping_fn)(struct dm_thin_new_mapping *m); + struct pool { struct list_head list; struct dm_target *ti; /* Only set if a pool target is bound */ @@ -510,10 +526,9 @@ struct pool { struct block_device *md_dev; struct dm_pool_metadata *pmd; - uint32_t sectors_per_block; - unsigned block_shift; - dm_block_t offset_mask; dm_block_t low_water_blocks; + uint32_t sectors_per_block; + int sectors_per_block_shift; struct pool_features pf; unsigned low_water_triggered:1; /* A dm event has been sent */ @@ -526,8 +541,8 @@ struct pool { struct work_struct worker; struct delayed_work waker; - unsigned ref_count; unsigned long last_commit_jiffies; + unsigned ref_count; spinlock_t lock; struct bio_list deferred_bios; @@ -543,8 +558,17 @@ struct pool { struct dm_thin_new_mapping *next_mapping; mempool_t *mapping_pool; mempool_t *endio_hook_pool; + + process_bio_fn process_bio; + process_bio_fn process_discard; + + process_mapping_fn process_prepared_mapping; + process_mapping_fn process_prepared_discard; }; +static enum pool_mode get_pool_mode(struct pool *pool); +static void set_pool_mode(struct pool *pool, enum pool_mode mode); + /* * Target context for a pool. */ @@ -679,16 +703,28 @@ static void requeue_io(struct thin_c *tc) static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio) { - return bio->bi_sector >> tc->pool->block_shift; + sector_t block_nr = bio->bi_sector; + + if (tc->pool->sectors_per_block_shift < 0) + (void) sector_div(block_nr, tc->pool->sectors_per_block); + else + block_nr >>= tc->pool->sectors_per_block_shift; + + return block_nr; } static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block) { struct pool *pool = tc->pool; + sector_t bi_sector = bio->bi_sector; bio->bi_bdev = tc->pool_dev->bdev; - bio->bi_sector = (block << pool->block_shift) + - (bio->bi_sector & pool->offset_mask); + if (tc->pool->sectors_per_block_shift < 0) + bio->bi_sector = (block * pool->sectors_per_block) + + sector_div(bi_sector, pool->sectors_per_block); + else + bio->bi_sector = (block << pool->sectors_per_block_shift) | + (bi_sector & (pool->sectors_per_block - 1)); } static void remap_to_origin(struct thin_c *tc, struct bio *bio) @@ -696,21 +732,39 @@ static void remap_to_origin(struct thin_c *tc, struct bio *bio) bio->bi_bdev = tc->origin_dev->bdev; } +static int bio_triggers_commit(struct thin_c *tc, struct bio *bio) +{ + return (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && + dm_thin_changed_this_transaction(tc->td); +} + static void issue(struct thin_c *tc, struct bio *bio) { struct pool *pool = tc->pool; unsigned long flags; + if (!bio_triggers_commit(tc, bio)) { + generic_make_request(bio); + return; + } + /* - * Batch together any FUA/FLUSH bios we find and then issue - * a single commit for them in process_deferred_bios(). + * Complete bio with an error if earlier I/O caused changes to + * the metadata that can't be committed e.g, due to I/O errors + * on the metadata device. */ - if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { - spin_lock_irqsave(&pool->lock, flags); - bio_list_add(&pool->deferred_flush_bios, bio); - spin_unlock_irqrestore(&pool->lock, flags); - } else - generic_make_request(bio); + if (dm_thin_aborted_changes(tc->td)) { + bio_io_error(bio); + return; + } + + /* + * Batch together any bios that trigger commits and then issue a + * single commit for them in process_deferred_bios(). + */ + spin_lock_irqsave(&pool->lock, flags); + bio_list_add(&pool->deferred_flush_bios, bio); + spin_unlock_irqrestore(&pool->lock, flags); } static void remap_to_origin_and_issue(struct thin_c *tc, struct bio *bio) @@ -847,6 +901,14 @@ static void cell_defer_except(struct thin_c *tc, struct dm_bio_prison_cell *cell wake_worker(pool); } +static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) +{ + if (m->bio) + m->bio->bi_end_io = m->saved_bi_end_io; + cell_error(m->cell); + list_del(&m->list); + mempool_free(m, m->tc->pool->mapping_pool); +} static void process_prepared_mapping(struct dm_thin_new_mapping *m) { struct thin_c *tc = m->tc; @@ -859,7 +921,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) if (m->err) { cell_error(m->cell); - return; + goto out; } /* @@ -871,7 +933,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) if (r) { DMERR("dm_thin_insert_block() failed"); cell_error(m->cell); - return; + goto out; } /* @@ -886,22 +948,25 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) } else cell_defer(tc, m->cell, m->data_block); +out: list_del(&m->list); mempool_free(m, tc->pool->mapping_pool); } -static void process_prepared_discard(struct dm_thin_new_mapping *m) +static void process_prepared_discard_fail(struct dm_thin_new_mapping *m) { - int r; struct thin_c *tc = m->tc; - r = dm_thin_remove_block(tc->td, m->virt_block); - if (r) - DMERR("dm_thin_remove_block() failed"); + bio_io_error(m->bio); + cell_defer_except(tc, m->cell); + cell_defer_except(tc, m->cell2); + mempool_free(m, tc->pool->mapping_pool); +} + +static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m) +{ + struct thin_c *tc = m->tc; - /* - * Pass the discard down to the underlying device? - */ if (m->pass_discard) remap_and_issue(tc, m->bio, m->data_block); else @@ -912,8 +977,20 @@ static void process_prepared_discard(struct dm_thin_new_mapping *m) mempool_free(m, tc->pool->mapping_pool); } +static void process_prepared_discard(struct dm_thin_new_mapping *m) +{ + int r; + struct thin_c *tc = m->tc; + + r = dm_thin_remove_block(tc->td, m->virt_block); + if (r) + DMERR("dm_thin_remove_block() failed"); + + process_prepared_discard_passdown(m); +} + static void process_prepared(struct pool *pool, struct list_head *head, - void (*fn)(struct dm_thin_new_mapping *)) + process_mapping_fn *fn) { unsigned long flags; struct list_head maps; @@ -925,7 +1002,7 @@ static void process_prepared(struct pool *pool, struct list_head *head, spin_unlock_irqrestore(&pool->lock, flags); list_for_each_entry_safe(m, tmp, &maps, list) - fn(m); + (*fn)(m); } /* @@ -933,9 +1010,7 @@ static void process_prepared(struct pool *pool, struct list_head *head, */ static int io_overlaps_block(struct pool *pool, struct bio *bio) { - return !(bio->bi_sector & pool->offset_mask) && - (bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT)); - + return bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT); } static int io_overwrites_block(struct pool *pool, struct bio *bio) @@ -1093,6 +1168,35 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, } } +static int commit(struct pool *pool) +{ + int r; + + r = dm_pool_commit_metadata(pool->pmd); + if (r) + DMERR("commit failed, error = %d", r); + + return r; +} + +/* + * A non-zero return indicates read_only or fail_io mode. + * Many callers don't care about the return value. + */ +static int commit_or_fallback(struct pool *pool) +{ + int r; + + if (get_pool_mode(pool) != PM_WRITE) + return -EINVAL; + + r = commit(pool); + if (r) + set_pool_mode(pool, PM_READ_ONLY); + + return r; +} + static int alloc_data_block(struct thin_c *tc, dm_block_t *result) { int r; @@ -1121,12 +1225,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) * Try to commit to see if that will free up some * more space. */ - r = dm_pool_commit_metadata(pool->pmd); - if (r) { - DMERR("%s: dm_pool_commit_metadata() failed, error = %d", - __func__, r); - return r; - } + (void) commit_or_fallback(pool); r = dm_pool_get_free_block_count(pool->pmd, &free_blocks); if (r) @@ -1218,7 +1317,7 @@ static void process_discard(struct thin_c *tc, struct bio *bio) */ m = get_next_mapping(pool); m->tc = tc; - m->pass_discard = (!lookup_result.shared) & pool->pf.discard_passdown; + m->pass_discard = (!lookup_result.shared) && pool->pf.discard_passdown; m->virt_block = block; m->data_block = lookup_result.block; m->cell = cell; @@ -1234,15 +1333,10 @@ static void process_discard(struct thin_c *tc, struct bio *bio) } } else { /* - * This path is hit if people are ignoring - * limits->discard_granularity. It ignores any - * part of the discard that is in a subsequent - * block. + * The DM core makes sure that the discard doesn't span + * a block boundary. So we submit the discard of a + * partial block appropriately. */ - sector_t offset = bio->bi_sector - (block << pool->block_shift); - unsigned remaining = (pool->sectors_per_block - offset) << 9; - bio->bi_size = min(bio->bi_size, remaining); - cell_release_singleton(cell, bio); cell_release_singleton(cell2, bio); if ((!lookup_result.shared) && pool->pf.discard_passdown) @@ -1310,7 +1404,7 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio, if (bio_detain(pool->prison, &key, bio, &cell)) return; - if (bio_data_dir(bio) == WRITE) + if (bio_data_dir(bio) == WRITE && bio->bi_size) break_sharing(tc, bio, block, &key, lookup_result, cell); else { struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; @@ -1362,6 +1456,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block default: DMERR("%s: alloc_data_block() failed, error = %d", __func__, r); + set_pool_mode(tc->pool, PM_READ_ONLY); cell_error(cell); break; } @@ -1419,6 +1514,49 @@ static void process_bio(struct thin_c *tc, struct bio *bio) } } +static void process_bio_read_only(struct thin_c *tc, struct bio *bio) +{ + int r; + int rw = bio_data_dir(bio); + dm_block_t block = get_bio_block(tc, bio); + struct dm_thin_lookup_result lookup_result; + + r = dm_thin_find_block(tc->td, block, 1, &lookup_result); + switch (r) { + case 0: + if (lookup_result.shared && (rw == WRITE) && bio->bi_size) + bio_io_error(bio); + else + remap_and_issue(tc, bio, lookup_result.block); + break; + + case -ENODATA: + if (rw != READ) { + bio_io_error(bio); + break; + } + + if (tc->origin_dev) { + remap_to_origin_and_issue(tc, bio); + break; + } + + zero_fill_bio(bio); + bio_endio(bio, 0); + break; + + default: + DMERR("dm_thin_find_block() failed, error = %d", r); + bio_io_error(bio); + break; + } +} + +static void process_bio_fail(struct thin_c *tc, struct bio *bio) +{ + bio_io_error(bio); +} + static int need_commit_due_to_time(struct pool *pool) { return jiffies < pool->last_commit_jiffies || @@ -1430,7 +1568,6 @@ static void process_deferred_bios(struct pool *pool) unsigned long flags; struct bio *bio; struct bio_list bios; - int r; bio_list_init(&bios); @@ -1457,9 +1594,9 @@ static void process_deferred_bios(struct pool *pool) } if (bio->bi_rw & REQ_DISCARD) - process_discard(tc, bio); + pool->process_discard(tc, bio); else - process_bio(tc, bio); + pool->process_bio(tc, bio); } /* @@ -1475,10 +1612,7 @@ static void process_deferred_bios(struct pool *pool) if (bio_list_empty(&bios) && !need_commit_due_to_time(pool)) return; - r = dm_pool_commit_metadata(pool->pmd); - if (r) { - DMERR("%s: dm_pool_commit_metadata() failed, error = %d", - __func__, r); + if (commit_or_fallback(pool)) { while ((bio = bio_list_pop(&bios))) bio_io_error(bio); return; @@ -1493,8 +1627,8 @@ static void do_worker(struct work_struct *ws) { struct pool *pool = container_of(ws, struct pool, worker); - process_prepared(pool, &pool->prepared_mappings, process_prepared_mapping); - process_prepared(pool, &pool->prepared_discards, process_prepared_discard); + process_prepared(pool, &pool->prepared_mappings, &pool->process_prepared_mapping); + process_prepared(pool, &pool->prepared_discards, &pool->process_prepared_discard); process_deferred_bios(pool); } @@ -1511,6 +1645,52 @@ static void do_waker(struct work_struct *ws) /*----------------------------------------------------------------*/ +static enum pool_mode get_pool_mode(struct pool *pool) +{ + return pool->pf.mode; +} + +static void set_pool_mode(struct pool *pool, enum pool_mode mode) +{ + int r; + + pool->pf.mode = mode; + + switch (mode) { + case PM_FAIL: + DMERR("switching pool to failure mode"); + pool->process_bio = process_bio_fail; + pool->process_discard = process_bio_fail; + pool->process_prepared_mapping = process_prepared_mapping_fail; + pool->process_prepared_discard = process_prepared_discard_fail; + break; + + case PM_READ_ONLY: + DMERR("switching pool to read-only mode"); + r = dm_pool_abort_metadata(pool->pmd); + if (r) { + DMERR("aborting transaction failed"); + set_pool_mode(pool, PM_FAIL); + } else { + dm_pool_metadata_read_only(pool->pmd); + pool->process_bio = process_bio_read_only; + pool->process_discard = process_discard; + pool->process_prepared_mapping = process_prepared_mapping_fail; + pool->process_prepared_discard = process_prepared_discard_passdown; + } + break; + + case PM_WRITE: + pool->process_bio = process_bio; + pool->process_discard = process_discard; + pool->process_prepared_mapping = process_prepared_mapping; + pool->process_prepared_discard = process_prepared_discard; + break; + } +} + +/*----------------------------------------------------------------*/ + /* * Mapping functions. */ @@ -1556,6 +1736,12 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, struct dm_thin_lookup_result result; map_context->ptr = thin_hook_bio(tc, bio); + + if (get_pool_mode(tc->pool) == PM_FAIL) { + bio_io_error(bio); + return DM_MAPIO_SUBMITTED; + } + if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) { thin_defer_bio(tc, bio); return DM_MAPIO_SUBMITTED; @@ -1592,14 +1778,35 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, break; case -ENODATA: + if (get_pool_mode(tc->pool) == PM_READ_ONLY) { + /* + * This block isn't provisioned, and we have no way + * of doing so. Just error it. + */ + bio_io_error(bio); + r = DM_MAPIO_SUBMITTED; + break; + } + /* fall through */ + + case -EWOULDBLOCK: /* * In future, the failed dm_thin_find_block above could * provide the hint to load the metadata into cache. */ - case -EWOULDBLOCK: thin_defer_bio(tc, bio); r = DM_MAPIO_SUBMITTED; break; + + default: + /* + * Must always call bio_io_error on failure. + * dm_thin_find_block can fail with -EINVAL if the + * pool is switched to fail-io mode. + */ + bio_io_error(bio); + r = DM_MAPIO_SUBMITTED; + break; } return r; @@ -1636,15 +1843,26 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti) { struct pool_c *pt = ti->private; + /* + * We want to make sure that degraded pools are never upgraded. + */ + enum pool_mode old_mode = pool->pf.mode; + enum pool_mode new_mode = pt->pf.mode; + + if (old_mode > new_mode) + new_mode = old_mode; + pool->ti = ti; pool->low_water_blocks = pt->low_water_blocks; pool->pf = pt->pf; + set_pool_mode(pool, new_mode); /* * If discard_passdown was enabled verify that the data device * supports discards. Disable discard_passdown if not; otherwise * -EOPNOTSUPP will be returned. */ + /* FIXME: pull this out into a sep fn. */ if (pt->pf.discard_passdown) { struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); if (!q || !blk_queue_discard(q)) { @@ -1670,6 +1888,7 @@ static void unbind_control_target(struct pool *pool, struct dm_target *ti) /* Initialize pool features. */ static void pool_features_init(struct pool_features *pf) { + pf->mode = PM_WRITE; pf->zero_new_blocks = 1; pf->discard_enabled = 1; pf->discard_passdown = 1; @@ -1700,14 +1919,16 @@ static struct kmem_cache *_endio_hook_cache; static struct pool *pool_create(struct mapped_device *pool_md, struct block_device *metadata_dev, - unsigned long block_size, char **error) + unsigned long block_size, + int read_only, char **error) { int r; void *err_p; struct pool *pool; struct dm_pool_metadata *pmd; + bool format_device = read_only ? false : true; - pmd = dm_pool_metadata_open(metadata_dev, block_size); + pmd = dm_pool_metadata_open(metadata_dev, block_size, format_device); if (IS_ERR(pmd)) { *error = "Error creating metadata object"; return (struct pool *)pmd; @@ -1722,8 +1943,10 @@ static struct pool *pool_create(struct mapped_device *pool_md, pool->pmd = pmd; pool->sectors_per_block = block_size; - pool->block_shift = ffs(block_size) - 1; - pool->offset_mask = block_size - 1; + if (block_size & (block_size - 1)) + pool->sectors_per_block_shift = -1; + else + pool->sectors_per_block_shift = __ffs(block_size); pool->low_water_blocks = 0; pool_features_init(&pool->pf); pool->prison = prison_create(PRISON_CELLS); @@ -1822,25 +2045,29 @@ static void __pool_dec(struct pool *pool) static struct pool *__pool_find(struct mapped_device *pool_md, struct block_device *metadata_dev, - unsigned long block_size, char **error, - int *created) + unsigned long block_size, int read_only, + char **error, int *created) { struct pool *pool = __pool_table_lookup_metadata_dev(metadata_dev); if (pool) { - if (pool->pool_md != pool_md) + if (pool->pool_md != pool_md) { + *error = "metadata device already in use by a pool"; return ERR_PTR(-EBUSY); + } __pool_inc(pool); } else { pool = __pool_table_lookup(pool_md); if (pool) { - if (pool->md_dev != metadata_dev) + if (pool->md_dev != metadata_dev) { + *error = "different pool cannot replace a pool"; return ERR_PTR(-EINVAL); + } __pool_inc(pool); } else { - pool = pool_create(pool_md, metadata_dev, block_size, error); + pool = pool_create(pool_md, metadata_dev, block_size, read_only, error); *created = 1; } } @@ -1891,19 +2118,23 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf, arg_name = dm_shift_arg(as); argc--; - if (!strcasecmp(arg_name, "skip_block_zeroing")) { + if (!strcasecmp(arg_name, "skip_block_zeroing")) pf->zero_new_blocks = 0; - continue; - } else if (!strcasecmp(arg_name, "ignore_discard")) { + + else if (!strcasecmp(arg_name, "ignore_discard")) pf->discard_enabled = 0; - continue; - } else if (!strcasecmp(arg_name, "no_discard_passdown")) { + + else if (!strcasecmp(arg_name, "no_discard_passdown")) pf->discard_passdown = 0; - continue; - } - ti->error = "Unrecognised pool feature requested"; - r = -EINVAL; + else if (!strcasecmp(arg_name, "read_only")) + pf->mode = PM_READ_ONLY; + + else { + ti->error = "Unrecognised pool feature requested"; + r = -EINVAL; + break; + } } return r; @@ -1967,7 +2198,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) if (kstrtoul(argv[2], 10, &block_size) || !block_size || block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS || block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS || - !is_power_of_2(block_size)) { + block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) { ti->error = "Invalid block size"; r = -EINVAL; goto out; @@ -1996,7 +2227,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) } pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev, - block_size, &ti->error, &pool_created); + block_size, pf.mode == PM_READ_ONLY, &ti->error, &pool_created); if (IS_ERR(pool)) { r = PTR_ERR(pool); goto out_free_pt; @@ -2014,6 +2245,15 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) goto out_flags_changed; } + /* + * The block layer requires discard_granularity to be a power of 2. + */ + if (pf.discard_enabled && !is_power_of_2(block_size)) { + ti->error = "Discard support must be disabled when the block size is not a power of 2"; + r = -EINVAL; + goto out_flags_changed; + } + pt->pool = pool; pt->ti = ti; pt->metadata_dev = metadata_dev; @@ -2033,7 +2273,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) * stacking of discard limits (this keeps the pool and * thin devices' discard limits consistent). */ - ti->discards_supported = 1; + ti->discards_supported = true; } ti->private = pt; @@ -2093,7 +2333,8 @@ static int pool_preresume(struct dm_target *ti) int r; struct pool_c *pt = ti->private; struct pool *pool = pt->pool; - dm_block_t data_size, sb_data_size; + sector_t data_size = ti->len; + dm_block_t sb_data_size; /* * Take control of the pool object. @@ -2102,7 +2343,8 @@ static int pool_preresume(struct dm_target *ti) if (r) return r; - data_size = ti->len >> pool->block_shift; + (void) sector_div(data_size, pool->sectors_per_block); + r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size); if (r) { DMERR("failed to retrieve data device size"); @@ -2111,22 +2353,19 @@ static int pool_preresume(struct dm_target *ti) if (data_size < sb_data_size) { DMERR("pool target too small, is %llu blocks (expected %llu)", - data_size, sb_data_size); + (unsigned long long)data_size, sb_data_size); return -EINVAL; } else if (data_size > sb_data_size) { r = dm_pool_resize_data_dev(pool->pmd, data_size); if (r) { DMERR("failed to resize data device"); + /* FIXME Stricter than necessary: Rollback transaction instead here */ + set_pool_mode(pool, PM_READ_ONLY); return r; } - r = dm_pool_commit_metadata(pool->pmd); - if (r) { - DMERR("%s: dm_pool_commit_metadata() failed, error = %d", - __func__, r); - return r; - } + (void) commit_or_fallback(pool); } return 0; @@ -2149,19 +2388,12 @@ static void pool_resume(struct dm_target *ti) static void pool_postsuspend(struct dm_target *ti) { - int r; struct pool_c *pt = ti->private; struct pool *pool = pt->pool; cancel_delayed_work(&pool->waker); flush_workqueue(pool->wq); - - r = dm_pool_commit_metadata(pool->pmd); - if (r < 0) { - DMERR("%s: dm_pool_commit_metadata() failed, error = %d", - __func__, r); - /* FIXME: invalidate device? error the next FUA or FLUSH bio ?*/ - } + (void) commit_or_fallback(pool); } static int check_arg_count(unsigned argc, unsigned args_required) @@ -2295,12 +2527,7 @@ static int process_reserve_metadata_snap_mesg(unsigned argc, char **argv, struct if (r) return r; - r = dm_pool_commit_metadata(pool->pmd); - if (r) { - DMERR("%s: dm_pool_commit_metadata() failed, error = %d", - __func__, r); - return r; - } + (void) commit_or_fallback(pool); r = dm_pool_reserve_metadata_snap(pool->pmd); if (r) @@ -2361,25 +2588,41 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv) else DMWARN("Unrecognised thin pool target message received: %s", argv[0]); - if (!r) { - r = dm_pool_commit_metadata(pool->pmd); - if (r) - DMERR("%s message: dm_pool_commit_metadata() failed, error = %d", - argv[0], r); - } + if (!r) + (void) commit_or_fallback(pool); return r; } +static void emit_flags(struct pool_features *pf, char *result, + unsigned sz, unsigned maxlen) +{ + unsigned count = !pf->zero_new_blocks + !pf->discard_enabled + + !pf->discard_passdown + (pf->mode == PM_READ_ONLY); + DMEMIT("%u ", count); + + if (!pf->zero_new_blocks) + DMEMIT("skip_block_zeroing "); + + if (!pf->discard_enabled) + DMEMIT("ignore_discard "); + + if (!pf->discard_passdown) + DMEMIT("no_discard_passdown "); + + if (pf->mode == PM_READ_ONLY) + DMEMIT("read_only "); +} + /* * Status line is: * <transaction id> <used metadata sectors>/<total metadata sectors> * <used data sectors>/<total data sectors> <held metadata root> */ static int pool_status(struct dm_target *ti, status_type_t type, - char *result, unsigned maxlen) + unsigned status_flags, char *result, unsigned maxlen) { - int r, count; + int r; unsigned sz = 0; uint64_t transaction_id; dm_block_t nr_free_blocks_data; @@ -2394,6 +2637,15 @@ static int pool_status(struct dm_target *ti, status_type_t type, switch (type) { case STATUSTYPE_INFO: + if (get_pool_mode(pool) == PM_FAIL) { + DMEMIT("Fail"); + break; + } + + /* Commit to ensure statistics aren't out-of-date */ + if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) + (void) commit_or_fallback(pool); + r = dm_pool_get_metadata_transaction_id(pool->pmd, &transaction_id); if (r) @@ -2429,9 +2681,19 @@ static int pool_status(struct dm_target *ti, status_type_t type, (unsigned long long)nr_blocks_data); if (held_root) - DMEMIT("%llu", held_root); + DMEMIT("%llu ", held_root); + else + DMEMIT("- "); + + if (pool->pf.mode == PM_READ_ONLY) + DMEMIT("ro "); + else + DMEMIT("rw "); + + if (pool->pf.discard_enabled && pool->pf.discard_passdown) + DMEMIT("discard_passdown"); else - DMEMIT("-"); + DMEMIT("no_discard_passdown"); break; @@ -2441,20 +2703,7 @@ static int pool_status(struct dm_target *ti, status_type_t type, format_dev_t(buf2, pt->data_dev->bdev->bd_dev), (unsigned long)pool->sectors_per_block, (unsigned long long)pt->low_water_blocks); - - count = !pool->pf.zero_new_blocks + !pool->pf.discard_enabled + - !pt->pf.discard_passdown; - DMEMIT("%u ", count); - - if (!pool->pf.zero_new_blocks) - DMEMIT("skip_block_zeroing "); - - if (!pool->pf.discard_enabled) - DMEMIT("ignore_discard "); - - if (!pt->pf.discard_passdown) - DMEMIT("no_discard_passdown "); - + emit_flags(&pt->pf, result, sz, maxlen); break; } @@ -2492,7 +2741,8 @@ static void set_discard_limits(struct pool *pool, struct queue_limits *limits) /* * This is just a hint, and not enforced. We have to cope with - * bios that overlap 2 blocks. + * bios that cover a block partially. A discard that spans a block + * boundary is not sent to this target. */ limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; limits->discard_zeroes_data = pool->pf.zero_new_blocks; @@ -2513,7 +2763,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 2, 0}, + .version = {1, 3, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -2618,20 +2868,31 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) } __pool_inc(tc->pool); + if (get_pool_mode(tc->pool) == PM_FAIL) { + ti->error = "Couldn't open thin device, Pool is in fail mode"; + goto bad_thin_open; + } + r = dm_pool_open_thin_device(tc->pool->pmd, tc->dev_id, &tc->td); if (r) { ti->error = "Couldn't open thin internal device"; goto bad_thin_open; } - ti->split_io = tc->pool->sectors_per_block; + r = dm_set_target_max_io_len(ti, tc->pool->sectors_per_block); + if (r) + goto bad_thin_open; + ti->num_flush_requests = 1; + ti->flush_supported = true; /* In case the pool supports discards, pass them on. */ if (tc->pool->pf.discard_enabled) { - ti->discards_supported = 1; + ti->discards_supported = true; ti->num_discard_requests = 1; - ti->discard_zeroes_data_unsupported = 1; + ti->discard_zeroes_data_unsupported = true; + /* Discard requests must be split on a block boundary */ + ti->split_discard_requests = true; } dm_put(pool_md); @@ -2712,7 +2973,7 @@ static void thin_postsuspend(struct dm_target *ti) * <nr mapped sectors> <highest mapped sector> */ static int thin_status(struct dm_target *ti, status_type_t type, - char *result, unsigned maxlen) + unsigned status_flags, char *result, unsigned maxlen) { int r; ssize_t sz = 0; @@ -2720,6 +2981,11 @@ static int thin_status(struct dm_target *ti, status_type_t type, char buf[BDEVNAME_SIZE]; struct thin_c *tc = ti->private; + if (get_pool_mode(tc->pool) == PM_FAIL) { + DMEMIT("Fail"); + return 0; + } + if (!tc->td) DMEMIT("-"); else { @@ -2757,19 +3023,21 @@ static int thin_status(struct dm_target *ti, status_type_t type, static int thin_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { - dm_block_t blocks; + sector_t blocks; struct thin_c *tc = ti->private; + struct pool *pool = tc->pool; /* * We can't call dm_pool_get_data_dev_size() since that blocks. So * we follow a more convoluted path through to the pool's target. */ - if (!tc->pool->ti) + if (!pool->ti) return 0; /* nothing is bound */ - blocks = tc->pool->ti->len >> tc->pool->block_shift; + blocks = pool->ti->len; + (void) sector_div(blocks, pool->sectors_per_block); if (blocks) - return fn(ti, tc->pool_dev, 0, tc->pool->sectors_per_block * blocks, data); + return fn(ti, tc->pool_dev, 0, pool->sectors_per_block * blocks, data); return 0; } @@ -2786,7 +3054,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type thin_target = { .name = "thin", - .version = {1, 1, 0}, + .version = {1, 3, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index fa365d39b612..254d19268ad2 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -515,7 +515,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio, * Status: V (valid) or C (corruption found) */ static int verity_status(struct dm_target *ti, status_type_t type, - char *result, unsigned maxlen) + unsigned status_flags, char *result, unsigned maxlen) { struct dm_verity *v = ti->private; unsigned sz = 0; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e24143cc2040..4e09b6ff5b49 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -968,22 +968,41 @@ static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti static sector_t max_io_len(sector_t sector, struct dm_target *ti) { sector_t len = max_io_len_target_boundary(sector, ti); + sector_t offset, max_len; /* - * Does the target need to split even further ? + * Does the target need to split even further? */ - if (ti->split_io) { - sector_t boundary; - sector_t offset = dm_target_offset(ti, sector); - boundary = ((offset + ti->split_io) & ~(ti->split_io - 1)) - - offset; - if (len > boundary) - len = boundary; + if (ti->max_io_len) { + offset = dm_target_offset(ti, sector); + if (unlikely(ti->max_io_len & (ti->max_io_len - 1))) + max_len = sector_div(offset, ti->max_io_len); + else + max_len = offset & (ti->max_io_len - 1); + max_len = ti->max_io_len - max_len; + + if (len > max_len) + len = max_len; } return len; } +int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) +{ + if (len > UINT_MAX) { + DMERR("Specified maximum size of target IO (%llu) exceeds limit (%u)", + (unsigned long long)len, UINT_MAX); + ti->error = "Maximum size of target IO is too large"; + return -EINVAL; + } + + ti->max_io_len = (uint32_t) len; + + return 0; +} +EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); + static void __map_bio(struct dm_target *ti, struct bio *clone, struct dm_target_io *tio) { @@ -1196,7 +1215,10 @@ static int __clone_and_map_discard(struct clone_info *ci) if (!ti->num_discard_requests) return -EOPNOTSUPP; - len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); + if (!ti->split_discard_requests) + len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); + else + len = min(ci->sector_count, max_io_len(ci->sector, ti)); __issue_target_requests(ci, ti, ti->num_discard_requests, len); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index b7dacd59d8d7..52eef493d266 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -23,6 +23,11 @@ #define DM_SUSPEND_NOFLUSH_FLAG (1 << 1) /* + * Status feature flags + */ +#define DM_STATUS_NOFLUSH_FLAG (1 << 0) + +/* * Type of table and mapped_device's mempool */ #define DM_TYPE_NONE 0 diff --git a/drivers/md/persistent-data/Makefile b/drivers/md/persistent-data/Makefile index cfa95f662230..d8e7cb767c1e 100644 --- a/drivers/md/persistent-data/Makefile +++ b/drivers/md/persistent-data/Makefile @@ -1,7 +1,6 @@ obj-$(CONFIG_DM_PERSISTENT_DATA) += dm-persistent-data.o dm-persistent-data-objs := \ dm-block-manager.o \ - dm-space-map-checker.o \ dm-space-map-common.o \ dm-space-map-disk.o \ dm-space-map-metadata.o \ diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 0317ecdc6e53..5ba277768d99 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -325,11 +325,6 @@ static struct dm_buffer *to_buffer(struct dm_block *b) return (struct dm_buffer *) b; } -static struct dm_bufio_client *to_bufio(struct dm_block_manager *bm) -{ - return (struct dm_bufio_client *) bm; -} - dm_block_t dm_block_location(struct dm_block *b) { return dm_bufio_get_block_number(to_buffer(b)); @@ -367,34 +362,60 @@ static void dm_block_manager_write_callback(struct dm_buffer *buf) /*---------------------------------------------------------------- * Public interface *--------------------------------------------------------------*/ +struct dm_block_manager { + struct dm_bufio_client *bufio; + bool read_only:1; +}; + struct dm_block_manager *dm_block_manager_create(struct block_device *bdev, unsigned block_size, unsigned cache_size, unsigned max_held_per_thread) { - return (struct dm_block_manager *) - dm_bufio_client_create(bdev, block_size, max_held_per_thread, - sizeof(struct buffer_aux), - dm_block_manager_alloc_callback, - dm_block_manager_write_callback); + int r; + struct dm_block_manager *bm; + + bm = kmalloc(sizeof(*bm), GFP_KERNEL); + if (!bm) { + r = -ENOMEM; + goto bad; + } + + bm->bufio = dm_bufio_client_create(bdev, block_size, max_held_per_thread, + sizeof(struct buffer_aux), + dm_block_manager_alloc_callback, + dm_block_manager_write_callback); + if (IS_ERR(bm->bufio)) { + r = PTR_ERR(bm->bufio); + kfree(bm); + goto bad; + } + + bm->read_only = false; + + return bm; + +bad: + return ERR_PTR(r); } EXPORT_SYMBOL_GPL(dm_block_manager_create); void dm_block_manager_destroy(struct dm_block_manager *bm) { - return dm_bufio_client_destroy(to_bufio(bm)); + dm_bufio_client_destroy(bm->bufio); + kfree(bm); } EXPORT_SYMBOL_GPL(dm_block_manager_destroy); unsigned dm_bm_block_size(struct dm_block_manager *bm) { - return dm_bufio_get_block_size(to_bufio(bm)); + return dm_bufio_get_block_size(bm->bufio); } EXPORT_SYMBOL_GPL(dm_bm_block_size); dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm) { - return dm_bufio_get_device_size(to_bufio(bm)); + return dm_bufio_get_device_size(bm->bufio); } static int dm_bm_validate_buffer(struct dm_block_manager *bm, @@ -406,7 +427,7 @@ static int dm_bm_validate_buffer(struct dm_block_manager *bm, int r; if (!v) return 0; - r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(to_bufio(bm))); + r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio)); if (unlikely(r)) return r; aux->validator = v; @@ -430,7 +451,7 @@ int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b, void *p; int r; - p = dm_bufio_read(to_bufio(bm), b, (struct dm_buffer **) result); + p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); if (unlikely(IS_ERR(p))) return PTR_ERR(p); @@ -463,7 +484,10 @@ int dm_bm_write_lock(struct dm_block_manager *bm, void *p; int r; - p = dm_bufio_read(to_bufio(bm), b, (struct dm_buffer **) result); + if (bm->read_only) + return -EPERM; + + p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); if (unlikely(IS_ERR(p))) return PTR_ERR(p); @@ -496,7 +520,7 @@ int dm_bm_read_try_lock(struct dm_block_manager *bm, void *p; int r; - p = dm_bufio_get(to_bufio(bm), b, (struct dm_buffer **) result); + p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result); if (unlikely(IS_ERR(p))) return PTR_ERR(p); if (unlikely(!p)) @@ -529,7 +553,10 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm, struct buffer_aux *aux; void *p; - p = dm_bufio_new(to_bufio(bm), b, (struct dm_buffer **) result); + if (bm->read_only) + return -EPERM; + + p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result); if (unlikely(IS_ERR(p))) return PTR_ERR(p); @@ -547,6 +574,7 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm, return 0; } +EXPORT_SYMBOL_GPL(dm_bm_write_lock_zero); int dm_bm_unlock(struct dm_block *b) { @@ -565,45 +593,30 @@ int dm_bm_unlock(struct dm_block *b) } EXPORT_SYMBOL_GPL(dm_bm_unlock); -int dm_bm_unlock_move(struct dm_block *b, dm_block_t n) -{ - struct buffer_aux *aux; - - aux = dm_bufio_get_aux_data(to_buffer(b)); - - if (aux->write_locked) { - dm_bufio_mark_buffer_dirty(to_buffer(b)); - bl_up_write(&aux->lock); - } else - bl_up_read(&aux->lock); - - dm_bufio_release_move(to_buffer(b), n); - return 0; -} - int dm_bm_flush_and_unlock(struct dm_block_manager *bm, struct dm_block *superblock) { int r; - r = dm_bufio_write_dirty_buffers(to_bufio(bm)); - if (unlikely(r)) - return r; - r = dm_bufio_issue_flush(to_bufio(bm)); - if (unlikely(r)) + if (bm->read_only) + return -EPERM; + + r = dm_bufio_write_dirty_buffers(bm->bufio); + if (unlikely(r)) { + dm_bm_unlock(superblock); return r; + } dm_bm_unlock(superblock); - r = dm_bufio_write_dirty_buffers(to_bufio(bm)); - if (unlikely(r)) - return r; - r = dm_bufio_issue_flush(to_bufio(bm)); - if (unlikely(r)) - return r; + return dm_bufio_write_dirty_buffers(bm->bufio); +} - return 0; +void dm_bm_set_read_only(struct dm_block_manager *bm) +{ + bm->read_only = true; } +EXPORT_SYMBOL_GPL(dm_bm_set_read_only); u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor) { diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h index 924833d2dfa6..be5bff61be28 100644 --- a/drivers/md/persistent-data/dm-block-manager.h +++ b/drivers/md/persistent-data/dm-block-manager.h @@ -97,14 +97,6 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm, dm_block_t b, int dm_bm_unlock(struct dm_block *b); /* - * An optimisation; we often want to copy a block's contents to a new - * block. eg, as part of the shadowing operation. It's far better for - * bufio to do this move behind the scenes than hold 2 locks and memcpy the - * data. - */ -int dm_bm_unlock_move(struct dm_block *b, dm_block_t n); - -/* * It's a common idiom to have a superblock that should be committed last. * * @superblock should be write-locked on entry. It will be unlocked during @@ -116,6 +108,19 @@ int dm_bm_unlock_move(struct dm_block *b, dm_block_t n); int dm_bm_flush_and_unlock(struct dm_block_manager *bm, struct dm_block *superblock); +/* + * Switches the bm to a read only mode. Once read-only mode + * has been entered the following functions will return -EPERM. + * + * dm_bm_write_lock + * dm_bm_write_lock_zero + * dm_bm_flush_and_unlock + * + * Additionally you should not use dm_bm_unlock_move, however no error will + * be returned if you do. + */ +void dm_bm_set_read_only(struct dm_block_manager *bm); + u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor); /*----------------------------------------------------------------*/ diff --git a/drivers/md/persistent-data/dm-space-map-checker.c b/drivers/md/persistent-data/dm-space-map-checker.c deleted file mode 100644 index fc90c11620ad..000000000000 --- a/drivers/md/persistent-data/dm-space-map-checker.c +++ /dev/null @@ -1,446 +0,0 @@ -/* - * Copyright (C) 2011 Red Hat, Inc. - * - * This file is released under the GPL. - */ - -#include "dm-space-map-checker.h" - -#include <linux/device-mapper.h> -#include <linux/export.h> -#include <linux/vmalloc.h> - -#ifdef CONFIG_DM_DEBUG_SPACE_MAPS - -#define DM_MSG_PREFIX "space map checker" - -/*----------------------------------------------------------------*/ - -struct count_array { - dm_block_t nr; - dm_block_t nr_free; - - uint32_t *counts; -}; - -static int ca_get_count(struct count_array *ca, dm_block_t b, uint32_t *count) -{ - if (b >= ca->nr) - return -EINVAL; - - *count = ca->counts[b]; - return 0; -} - -static int ca_count_more_than_one(struct count_array *ca, dm_block_t b, int *r) -{ - if (b >= ca->nr) - return -EINVAL; - - *r = ca->counts[b] > 1; - return 0; -} - -static int ca_set_count(struct count_array *ca, dm_block_t b, uint32_t count) -{ - uint32_t old_count; - - if (b >= ca->nr) - return -EINVAL; - - old_count = ca->counts[b]; - - if (!count && old_count) - ca->nr_free++; - - else if (count && !old_count) - ca->nr_free--; - - ca->counts[b] = count; - return 0; -} - -static int ca_inc_block(struct count_array *ca, dm_block_t b) -{ - if (b >= ca->nr) - return -EINVAL; - - ca_set_count(ca, b, ca->counts[b] + 1); - return 0; -} - -static int ca_dec_block(struct count_array *ca, dm_block_t b) -{ - if (b >= ca->nr) - return -EINVAL; - - BUG_ON(ca->counts[b] == 0); - ca_set_count(ca, b, ca->counts[b] - 1); - return 0; -} - -static int ca_create(struct count_array *ca, struct dm_space_map *sm) -{ - int r; - dm_block_t nr_blocks; - - r = dm_sm_get_nr_blocks(sm, &nr_blocks); - if (r) - return r; - - ca->nr = nr_blocks; - ca->nr_free = nr_blocks; - - if (!nr_blocks) - ca->counts = NULL; - else { - ca->counts = vzalloc(sizeof(*ca->counts) * nr_blocks); - if (!ca->counts) - return -ENOMEM; - } - - return 0; -} - -static void ca_destroy(struct count_array *ca) -{ - vfree(ca->counts); -} - -static int ca_load(struct count_array *ca, struct dm_space_map *sm) -{ - int r; - uint32_t count; - dm_block_t nr_blocks, i; - - r = dm_sm_get_nr_blocks(sm, &nr_blocks); - if (r) - return r; - - BUG_ON(ca->nr != nr_blocks); - - DMWARN("Loading debug space map from disk. This may take some time"); - for (i = 0; i < nr_blocks; i++) { - r = dm_sm_get_count(sm, i, &count); - if (r) { - DMERR("load failed"); - return r; - } - - ca_set_count(ca, i, count); - } - DMWARN("Load complete"); - - return 0; -} - -static int ca_extend(struct count_array *ca, dm_block_t extra_blocks) -{ - dm_block_t nr_blocks = ca->nr + extra_blocks; - uint32_t *counts = vzalloc(sizeof(*counts) * nr_blocks); - if (!counts) - return -ENOMEM; - - if (ca->counts) { - memcpy(counts, ca->counts, sizeof(*counts) * ca->nr); - ca_destroy(ca); - } - ca->nr = nr_blocks; - ca->nr_free += extra_blocks; - ca->counts = counts; - return 0; -} - -static int ca_commit(struct count_array *old, struct count_array *new) -{ - if (old->nr != new->nr) { - BUG_ON(old->nr > new->nr); - ca_extend(old, new->nr - old->nr); - } - - BUG_ON(old->nr != new->nr); - old->nr_free = new->nr_free; - memcpy(old->counts, new->counts, sizeof(*old->counts) * old->nr); - return 0; -} - -/*----------------------------------------------------------------*/ - -struct sm_checker { - struct dm_space_map sm; - - struct count_array old_counts; - struct count_array counts; - - struct dm_space_map *real_sm; -}; - -static void sm_checker_destroy(struct dm_space_map *sm) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - - dm_sm_destroy(smc->real_sm); - ca_destroy(&smc->old_counts); - ca_destroy(&smc->counts); - kfree(smc); -} - -static int sm_checker_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - int r = dm_sm_get_nr_blocks(smc->real_sm, count); - if (!r) - BUG_ON(smc->old_counts.nr != *count); - return r; -} - -static int sm_checker_get_nr_free(struct dm_space_map *sm, dm_block_t *count) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - int r = dm_sm_get_nr_free(smc->real_sm, count); - if (!r) { - /* - * Slow, but we know it's correct. - */ - dm_block_t b, n = 0; - for (b = 0; b < smc->old_counts.nr; b++) - if (smc->old_counts.counts[b] == 0 && - smc->counts.counts[b] == 0) - n++; - - if (n != *count) - DMERR("free block counts differ, checker %u, sm-disk:%u", - (unsigned) n, (unsigned) *count); - } - return r; -} - -static int sm_checker_new_block(struct dm_space_map *sm, dm_block_t *b) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - int r = dm_sm_new_block(smc->real_sm, b); - - if (!r) { - BUG_ON(*b >= smc->old_counts.nr); - BUG_ON(smc->old_counts.counts[*b] != 0); - BUG_ON(*b >= smc->counts.nr); - BUG_ON(smc->counts.counts[*b] != 0); - ca_set_count(&smc->counts, *b, 1); - } - - return r; -} - -static int sm_checker_inc_block(struct dm_space_map *sm, dm_block_t b) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - int r = dm_sm_inc_block(smc->real_sm, b); - int r2 = ca_inc_block(&smc->counts, b); - BUG_ON(r != r2); - return r; -} - -static int sm_checker_dec_block(struct dm_space_map *sm, dm_block_t b) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - int r = dm_sm_dec_block(smc->real_sm, b); - int r2 = ca_dec_block(&smc->counts, b); - BUG_ON(r != r2); - return r; -} - -static int sm_checker_get_count(struct dm_space_map *sm, dm_block_t b, uint32_t *result) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - uint32_t result2 = 0; - int r = dm_sm_get_count(smc->real_sm, b, result); - int r2 = ca_get_count(&smc->counts, b, &result2); - - BUG_ON(r != r2); - if (!r) - BUG_ON(*result != result2); - return r; -} - -static int sm_checker_count_more_than_one(struct dm_space_map *sm, dm_block_t b, int *result) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - int result2 = 0; - int r = dm_sm_count_is_more_than_one(smc->real_sm, b, result); - int r2 = ca_count_more_than_one(&smc->counts, b, &result2); - - BUG_ON(r != r2); - if (!r) - BUG_ON(!(*result) && result2); - return r; -} - -static int sm_checker_set_count(struct dm_space_map *sm, dm_block_t b, uint32_t count) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - uint32_t old_rc; - int r = dm_sm_set_count(smc->real_sm, b, count); - int r2; - - BUG_ON(b >= smc->counts.nr); - old_rc = smc->counts.counts[b]; - r2 = ca_set_count(&smc->counts, b, count); - BUG_ON(r != r2); - - return r; -} - -static int sm_checker_commit(struct dm_space_map *sm) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - int r; - - r = dm_sm_commit(smc->real_sm); - if (r) - return r; - - r = ca_commit(&smc->old_counts, &smc->counts); - if (r) - return r; - - return 0; -} - -static int sm_checker_extend(struct dm_space_map *sm, dm_block_t extra_blocks) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - int r = dm_sm_extend(smc->real_sm, extra_blocks); - if (r) - return r; - - return ca_extend(&smc->counts, extra_blocks); -} - -static int sm_checker_root_size(struct dm_space_map *sm, size_t *result) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - return dm_sm_root_size(smc->real_sm, result); -} - -static int sm_checker_copy_root(struct dm_space_map *sm, void *copy_to_here_le, size_t len) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - return dm_sm_copy_root(smc->real_sm, copy_to_here_le, len); -} - -/*----------------------------------------------------------------*/ - -static struct dm_space_map ops_ = { - .destroy = sm_checker_destroy, - .get_nr_blocks = sm_checker_get_nr_blocks, - .get_nr_free = sm_checker_get_nr_free, - .inc_block = sm_checker_inc_block, - .dec_block = sm_checker_dec_block, - .new_block = sm_checker_new_block, - .get_count = sm_checker_get_count, - .count_is_more_than_one = sm_checker_count_more_than_one, - .set_count = sm_checker_set_count, - .commit = sm_checker_commit, - .extend = sm_checker_extend, - .root_size = sm_checker_root_size, - .copy_root = sm_checker_copy_root -}; - -struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm) -{ - int r; - struct sm_checker *smc; - - if (IS_ERR_OR_NULL(sm)) - return ERR_PTR(-EINVAL); - - smc = kmalloc(sizeof(*smc), GFP_KERNEL); - if (!smc) - return ERR_PTR(-ENOMEM); - - memcpy(&smc->sm, &ops_, sizeof(smc->sm)); - r = ca_create(&smc->old_counts, sm); - if (r) { - kfree(smc); - return ERR_PTR(r); - } - - r = ca_create(&smc->counts, sm); - if (r) { - ca_destroy(&smc->old_counts); - kfree(smc); - return ERR_PTR(r); - } - - smc->real_sm = sm; - - r = ca_load(&smc->counts, sm); - if (r) { - ca_destroy(&smc->counts); - ca_destroy(&smc->old_counts); - kfree(smc); - return ERR_PTR(r); - } - - r = ca_commit(&smc->old_counts, &smc->counts); - if (r) { - ca_destroy(&smc->counts); - ca_destroy(&smc->old_counts); - kfree(smc); - return ERR_PTR(r); - } - - return &smc->sm; -} -EXPORT_SYMBOL_GPL(dm_sm_checker_create); - -struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm) -{ - int r; - struct sm_checker *smc; - - if (IS_ERR_OR_NULL(sm)) - return ERR_PTR(-EINVAL); - - smc = kmalloc(sizeof(*smc), GFP_KERNEL); - if (!smc) - return ERR_PTR(-ENOMEM); - - memcpy(&smc->sm, &ops_, sizeof(smc->sm)); - r = ca_create(&smc->old_counts, sm); - if (r) { - kfree(smc); - return ERR_PTR(r); - } - - r = ca_create(&smc->counts, sm); - if (r) { - ca_destroy(&smc->old_counts); - kfree(smc); - return ERR_PTR(r); - } - - smc->real_sm = sm; - return &smc->sm; -} -EXPORT_SYMBOL_GPL(dm_sm_checker_create_fresh); - -/*----------------------------------------------------------------*/ - -#else - -struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm) -{ - return sm; -} -EXPORT_SYMBOL_GPL(dm_sm_checker_create); - -struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm) -{ - return sm; -} -EXPORT_SYMBOL_GPL(dm_sm_checker_create_fresh); - -/*----------------------------------------------------------------*/ - -#endif diff --git a/drivers/md/persistent-data/dm-space-map-checker.h b/drivers/md/persistent-data/dm-space-map-checker.h deleted file mode 100644 index 444dccf6688c..000000000000 --- a/drivers/md/persistent-data/dm-space-map-checker.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (C) 2011 Red Hat, Inc. - * - * This file is released under the GPL. - */ - -#ifndef SNAPSHOTS_SPACE_MAP_CHECKER_H -#define SNAPSHOTS_SPACE_MAP_CHECKER_H - -#include "dm-space-map.h" - -/*----------------------------------------------------------------*/ - -/* - * This space map wraps a real on-disk space map, and verifies all of its - * operations. It uses a lot of memory, so only use if you have a specific - * problem that you're debugging. - * - * Ownership of @sm passes. - */ -struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm); -struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm); - -/*----------------------------------------------------------------*/ - -#endif diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index ff3beed6ad2d..d77602d63c83 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -224,6 +224,7 @@ static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm) ll->nr_blocks = 0; ll->bitmap_root = 0; ll->ref_count_root = 0; + ll->bitmap_index_changed = false; return 0; } @@ -476,7 +477,15 @@ int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev) int sm_ll_commit(struct ll_disk *ll) { - return ll->commit(ll); + int r = 0; + + if (ll->bitmap_index_changed) { + r = ll->commit(ll); + if (!r) + ll->bitmap_index_changed = false; + } + + return r; } /*----------------------------------------------------------------*/ @@ -491,6 +500,7 @@ static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index, static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index, struct disk_index_entry *ie) { + ll->bitmap_index_changed = true; memcpy(ll->mi_le.index + index, ie, sizeof(*ie)); return 0; } diff --git a/drivers/md/persistent-data/dm-space-map-common.h b/drivers/md/persistent-data/dm-space-map-common.h index 8f220821a9a9..b3078d5eda0c 100644 --- a/drivers/md/persistent-data/dm-space-map-common.h +++ b/drivers/md/persistent-data/dm-space-map-common.h @@ -78,6 +78,7 @@ struct ll_disk { open_index_fn open_index; max_index_entries_fn max_entries; commit_fn commit; + bool bitmap_index_changed:1; }; struct disk_sm_root { diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c index 3d0ed5332883..f6d29e614ab7 100644 --- a/drivers/md/persistent-data/dm-space-map-disk.c +++ b/drivers/md/persistent-data/dm-space-map-disk.c @@ -4,7 +4,6 @@ * This file is released under the GPL. */ -#include "dm-space-map-checker.h" #include "dm-space-map-common.h" #include "dm-space-map-disk.h" #include "dm-space-map.h" @@ -252,9 +251,8 @@ static struct dm_space_map ops = { .copy_root = sm_disk_copy_root }; -static struct dm_space_map *dm_sm_disk_create_real( - struct dm_transaction_manager *tm, - dm_block_t nr_blocks) +struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm, + dm_block_t nr_blocks) { int r; struct sm_disk *smd; @@ -285,27 +283,10 @@ bad: kfree(smd); return ERR_PTR(r); } - -struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm, - dm_block_t nr_blocks) -{ - struct dm_space_map *sm = dm_sm_disk_create_real(tm, nr_blocks); - struct dm_space_map *smc; - - if (IS_ERR_OR_NULL(sm)) - return sm; - - smc = dm_sm_checker_create_fresh(sm); - if (IS_ERR(smc)) - dm_sm_destroy(sm); - - return smc; -} EXPORT_SYMBOL_GPL(dm_sm_disk_create); -static struct dm_space_map *dm_sm_disk_open_real( - struct dm_transaction_manager *tm, - void *root_le, size_t len) +struct dm_space_map *dm_sm_disk_open(struct dm_transaction_manager *tm, + void *root_le, size_t len) { int r; struct sm_disk *smd; @@ -332,13 +313,6 @@ bad: kfree(smd); return ERR_PTR(r); } - -struct dm_space_map *dm_sm_disk_open(struct dm_transaction_manager *tm, - void *root_le, size_t len) -{ - return dm_sm_checker_create( - dm_sm_disk_open_real(tm, root_le, len)); -} EXPORT_SYMBOL_GPL(dm_sm_disk_open); /*----------------------------------------------------------------*/ diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c index e5604b32d91f..d247a35da3c6 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c @@ -5,7 +5,6 @@ */ #include "dm-transaction-manager.h" #include "dm-space-map.h" -#include "dm-space-map-checker.h" #include "dm-space-map-disk.h" #include "dm-space-map-metadata.h" #include "dm-persistent-data-internal.h" @@ -220,13 +219,24 @@ static int __shadow_block(struct dm_transaction_manager *tm, dm_block_t orig, if (r < 0) return r; - r = dm_bm_unlock_move(orig_block, new); - if (r < 0) { + /* + * It would be tempting to use dm_bm_unlock_move here, but some + * code, such as the space maps, keeps using the old data structures + * secure in the knowledge they won't be changed until the next + * transaction. Using unlock_move would force a synchronous read + * since the old block would no longer be in the cache. + */ + r = dm_bm_write_lock_zero(tm->bm, new, v, result); + if (r) { dm_bm_unlock(orig_block); return r; } - return dm_bm_write_lock(tm->bm, new, v, result); + memcpy(dm_block_data(*result), dm_block_data(orig_block), + dm_bm_block_size(tm->bm)); + + dm_bm_unlock(orig_block); + return r; } int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig, @@ -311,98 +321,61 @@ struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm) static int dm_tm_create_internal(struct dm_block_manager *bm, dm_block_t sb_location, - struct dm_block_validator *sb_validator, - size_t root_offset, size_t root_max_len, struct dm_transaction_manager **tm, struct dm_space_map **sm, - struct dm_block **sblock, - int create) + int create, + void *sm_root, size_t sm_len) { int r; - struct dm_space_map *inner; - inner = dm_sm_metadata_init(); - if (IS_ERR(inner)) - return PTR_ERR(inner); + *sm = dm_sm_metadata_init(); + if (IS_ERR(*sm)) + return PTR_ERR(*sm); - *tm = dm_tm_create(bm, inner); + *tm = dm_tm_create(bm, *sm); if (IS_ERR(*tm)) { - dm_sm_destroy(inner); + dm_sm_destroy(*sm); return PTR_ERR(*tm); } if (create) { - r = dm_bm_write_lock_zero(dm_tm_get_bm(*tm), sb_location, - sb_validator, sblock); - if (r < 0) { - DMERR("couldn't lock superblock"); - goto bad1; - } - - r = dm_sm_metadata_create(inner, *tm, dm_bm_nr_blocks(bm), + r = dm_sm_metadata_create(*sm, *tm, dm_bm_nr_blocks(bm), sb_location); if (r) { DMERR("couldn't create metadata space map"); - goto bad2; - } - - *sm = dm_sm_checker_create(inner); - if (IS_ERR(*sm)) { - r = PTR_ERR(*sm); - goto bad2; + goto bad; } } else { - r = dm_bm_write_lock(dm_tm_get_bm(*tm), sb_location, - sb_validator, sblock); - if (r < 0) { - DMERR("couldn't lock superblock"); - goto bad1; - } - - r = dm_sm_metadata_open(inner, *tm, - dm_block_data(*sblock) + root_offset, - root_max_len); + r = dm_sm_metadata_open(*sm, *tm, sm_root, sm_len); if (r) { DMERR("couldn't open metadata space map"); - goto bad2; - } - - *sm = dm_sm_checker_create(inner); - if (IS_ERR(*sm)) { - r = PTR_ERR(*sm); - goto bad2; + goto bad; } } return 0; -bad2: - dm_tm_unlock(*tm, *sblock); -bad1: +bad: dm_tm_destroy(*tm); - dm_sm_destroy(inner); + dm_sm_destroy(*sm); return r; } int dm_tm_create_with_sm(struct dm_block_manager *bm, dm_block_t sb_location, - struct dm_block_validator *sb_validator, struct dm_transaction_manager **tm, - struct dm_space_map **sm, struct dm_block **sblock) + struct dm_space_map **sm) { - return dm_tm_create_internal(bm, sb_location, sb_validator, - 0, 0, tm, sm, sblock, 1); + return dm_tm_create_internal(bm, sb_location, tm, sm, 1, NULL, 0); } EXPORT_SYMBOL_GPL(dm_tm_create_with_sm); int dm_tm_open_with_sm(struct dm_block_manager *bm, dm_block_t sb_location, - struct dm_block_validator *sb_validator, - size_t root_offset, size_t root_max_len, + void *sm_root, size_t root_len, struct dm_transaction_manager **tm, - struct dm_space_map **sm, struct dm_block **sblock) + struct dm_space_map **sm) { - return dm_tm_create_internal(bm, sb_location, sb_validator, root_offset, - root_max_len, tm, sm, sblock, 0); + return dm_tm_create_internal(bm, sb_location, tm, sm, 0, sm_root, root_len); } EXPORT_SYMBOL_GPL(dm_tm_open_with_sm); diff --git a/drivers/md/persistent-data/dm-transaction-manager.h b/drivers/md/persistent-data/dm-transaction-manager.h index 6da784871db4..b5b139076ca5 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.h +++ b/drivers/md/persistent-data/dm-transaction-manager.h @@ -115,16 +115,17 @@ struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm); * * Returns a tm that has an open transaction to write the new disk sm. * Caller should store the new sm root and commit. + * + * The superblock location is passed so the metadata space map knows it + * shouldn't be used. */ int dm_tm_create_with_sm(struct dm_block_manager *bm, dm_block_t sb_location, - struct dm_block_validator *sb_validator, struct dm_transaction_manager **tm, - struct dm_space_map **sm, struct dm_block **sblock); + struct dm_space_map **sm); int dm_tm_open_with_sm(struct dm_block_manager *bm, dm_block_t sb_location, - struct dm_block_validator *sb_validator, - size_t root_offset, size_t root_max_len, + void *sm_root, size_t root_len, struct dm_transaction_manager **tm, - struct dm_space_map **sm, struct dm_block **sblock); + struct dm_space_map **sm); #endif /* _LINUX_DM_TRANSACTION_MANAGER_H */ diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 98f34b886f95..38d27a10aa5d 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -66,14 +66,13 @@ typedef int (*dm_request_endio_fn) (struct dm_target *ti, struct request *clone, int error, union map_info *map_context); -typedef void (*dm_flush_fn) (struct dm_target *ti); typedef void (*dm_presuspend_fn) (struct dm_target *ti); typedef void (*dm_postsuspend_fn) (struct dm_target *ti); typedef int (*dm_preresume_fn) (struct dm_target *ti); typedef void (*dm_resume_fn) (struct dm_target *ti); typedef int (*dm_status_fn) (struct dm_target *ti, status_type_t status_type, - char *result, unsigned int maxlen); + unsigned status_flags, char *result, unsigned maxlen); typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv); @@ -139,7 +138,6 @@ struct target_type { dm_map_request_fn map_rq; dm_endio_fn end_io; dm_request_endio_fn rq_end_io; - dm_flush_fn flush; dm_presuspend_fn presuspend; dm_postsuspend_fn postsuspend; dm_preresume_fn preresume; @@ -188,8 +186,8 @@ struct dm_target { sector_t begin; sector_t len; - /* Always a power of 2 */ - sector_t split_io; + /* If non-zero, maximum size of I/O submitted to a target. */ + uint32_t max_io_len; /* * A number of zero-length barrier requests that will be submitted @@ -214,15 +212,27 @@ struct dm_target { char *error; /* + * Set if this target needs to receive flushes regardless of + * whether or not its underlying devices have support. + */ + bool flush_supported:1; + + /* * Set if this target needs to receive discards regardless of * whether or not its underlying devices have support. */ - unsigned discards_supported:1; + bool discards_supported:1; + + /* + * Set if the target required discard request to be split + * on max_io_len boundary. + */ + bool split_discard_requests:1; /* * Set if this target does not return zeroes on discarded blocks. */ - unsigned discard_zeroes_data_unsupported:1; + bool discard_zeroes_data_unsupported:1; }; /* Each target can link one of these into the table */ @@ -360,6 +370,11 @@ void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callback int dm_table_complete(struct dm_table *t); /* + * Target may require that it is never sent I/O larger than len. + */ +int __must_check dm_set_target_max_io_len(struct dm_target *ti, sector_t len); + +/* * Table reference counting. */ struct dm_table *dm_get_live_table(struct mapped_device *md); diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h index 75fd5573516e..91e3a360f611 100644 --- a/include/linux/dm-ioctl.h +++ b/include/linux/dm-ioctl.h @@ -267,9 +267,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 22 +#define DM_VERSION_MINOR 23 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2011-10-19)" +#define DM_VERSION_EXTRA "-ioctl (2012-07-25)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ @@ -307,6 +307,8 @@ enum { /* * Set this to suspend without flushing queued ios. + * Also disables flushing uncommitted changes in the thin target before + * generating statistics for DM_TABLE_STATUS and DM_DEV_WAIT. */ #define DM_NOFLUSH_FLAG (1 << 11) /* In */ |