From d793e684277124d55c5d2444007e224635821346 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 10 May 2013 14:37:14 +0100 Subject: dm stripe: fix regression in stripe_width calculation Fix a regression in the calculation of the stripe_width in the dm stripe target which led to incorrect processing of device limits. The stripe_width is the stripe device length divided by the number of stripes. The group of commits in the range f14fa69 ("dm stripe: fix size test") to eb850de ("dm stripe: support for non power of 2 chunksize") interfered with each other (a merging error) and led to the stripe_width being set incorrectly to the stripe device length divided by chunk_size * stripe_count. For example, a stripe device's table with: 0 33553920 striped 3 512 ... should result in a stripe_width of 11184640 (33553920 / 3), but due to the bug it was getting set to 21845 (33553920 / (512 * 3)). The impact of this bug is that device topologies that previously worked fine with the stripe target are no longer considered valid. In particular, there is a higher risk of seeing this issue if one of the stripe devices has a 4K logical block size. Resulting in an error message like this: "device-mapper: table: 253:4: len=21845 not aligned to h/w logical block size 4096 of dm-1" The fix is to swap the order of the divisions and to use a temporary variable for the second one, so that width retains the intended value. Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # 3.6+ Signed-off-by: Alasdair G Kergon --- drivers/md/dm-stripe.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index ea5e878a30b9..d907ca6227ce 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -94,7 +94,7 @@ static int get_stripe(struct dm_target *ti, struct stripe_c *sc, static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct stripe_c *sc; - sector_t width; + sector_t width, tmp_len; uint32_t stripes; uint32_t chunk_size; int r; @@ -116,15 +116,16 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) } width = ti->len; - if (sector_div(width, chunk_size)) { + if (sector_div(width, stripes)) { ti->error = "Target length not divisible by " - "chunk size"; + "number of stripes"; return -EINVAL; } - if (sector_div(width, stripes)) { + tmp_len = width; + if (sector_div(tmp_len, chunk_size)) { ti->error = "Target length not divisible by " - "number of stripes"; + "chunk size"; return -EINVAL; } -- cgit v1.2.3 From fa4d683af3693863bec761e2761a07e4c1351f86 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 10 May 2013 14:37:14 +0100 Subject: dm cache: fix error return code in cache_create Return -ENOMEM if memory allocation fails in cache_create instead of 0 (to avoid NULL pointer dereference). Signed-off-by: Wei Yongjun Cc: stable@vger.kernel.org Signed-off-by: Alasdair G Kergon --- drivers/md/dm-cache-target.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 10744091e6ca..6feaba24fcac 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1971,6 +1971,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) atomic_set(&cache->nr_migrations, 0); init_waitqueue_head(&cache->migration_wait); + r = -ENOMEM; cache->nr_dirty = 0; cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size)); if (!cache->dirty_bitset) { -- cgit v1.2.3 From 09e8b813897a0f85bb401435d009228644c81214 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 10 May 2013 14:37:15 +0100 Subject: dm snapshot: fix error return code in snapshot_ctr Return -ENOMEM instead of success if unable to allocate pending exception mempool in snapshot_ctr. Signed-off-by: Wei Yongjun Cc: stable@vger.kernel.org Signed-off-by: Alasdair G Kergon --- drivers/md/dm-snap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index c0e07026a8d1..c434e5aab2df 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1121,6 +1121,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache); if (!s->pending_pool) { ti->error = "Could not allocate mempool for pending exceptions"; + r = -ENOMEM; goto bad_pending_pool; } -- cgit v1.2.3 From 502624bdad3dba45dfaacaf36b7d83e39e74b2d2 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 10 May 2013 14:37:15 +0100 Subject: dm bufio: avoid a possible __vmalloc deadlock This patch uses memalloc_noio_save to avoid a possible deadlock in dm-bufio. (it could happen only with large block size, at most PAGE_SIZE << MAX_ORDER (typically 8MiB). __vmalloc doesn't fully respect gfp flags. The specified gfp flags are used for allocation of requested pages, structures vmap_area, vmap_block and vm_struct and the radix tree nodes. However, the kernel pagetables are allocated always with GFP_KERNEL. Thus the allocation of pagetables can recurse back to the I/O layer and cause a deadlock. This patch uses the function memalloc_noio_save to set per-process PF_MEMALLOC_NOIO flag and the function memalloc_noio_restore to restore it. When this flag is set, all allocations in the process are done with implied GFP_NOIO flag, thus the deadlock can't happen. This should be backported to stable kernels, but they don't have the PF_MEMALLOC_NOIO flag and memalloc_noio_save/memalloc_noio_restore functions. So, PF_MEMALLOC should be set and restored instead. Signed-off-by: Mikulas Patocka Cc: stable@kernel.org Signed-off-by: Alasdair G Kergon --- drivers/md/dm-bufio.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index c6083132c4b8..0387e05cdb98 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -319,6 +319,9 @@ static void __cache_size_refresh(void) static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask, enum data_mode *data_mode) { + unsigned noio_flag; + void *ptr; + if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) { *data_mode = DATA_MODE_SLAB; return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask); @@ -332,7 +335,26 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask, } *data_mode = DATA_MODE_VMALLOC; - return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL); + + /* + * __vmalloc allocates the data pages and auxiliary structures with + * gfp_flags that were specified, but pagetables are always allocated + * with GFP_KERNEL, no matter what was specified as gfp_mask. + * + * Consequently, we must set per-process flag PF_MEMALLOC_NOIO so that + * all allocations done by this process (including pagetables) are done + * as if GFP_NOIO was specified. + */ + + if (gfp_mask & __GFP_NORETRY) + noio_flag = memalloc_noio_save(); + + ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL); + + if (gfp_mask & __GFP_NORETRY) + memalloc_noio_restore(noio_flag); + + return ptr; } /* -- cgit v1.2.3 From dc019b21fb92d620a3b52ccecc135ac968a7c7ec Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 10 May 2013 14:37:16 +0100 Subject: dm table: fix write same support If device_not_write_same_capable() returns true then the iterate_devices loop in dm_table_supports_write_same() should return false. Reported-by: Bharata B Rao Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # v3.8+ Signed-off-by: Alasdair G Kergon --- drivers/md/dm-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index e50dad0c65f4..1ff252ab7d46 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1442,7 +1442,7 @@ static bool dm_table_supports_write_same(struct dm_table *t) return false; if (!ti->type->iterate_devices || - !ti->type->iterate_devices(ti, device_not_write_same_capable, NULL)) + ti->type->iterate_devices(ti, device_not_write_same_capable, NULL)) return false; } -- cgit v1.2.3 From 042bcef889f4319d366c67fb694f1f2498fcb68e Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 10 May 2013 14:37:16 +0100 Subject: dm mpath: enable WRITE SAME support Enable WRITE SAME support in dm multipath. As far as multipath is concerned it is just another write request. Signed-off-by: Mike Snitzer Tested-by: Bharata B Rao Signed-off-by: Alasdair G Kergon --- drivers/md/dm-mpath.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 51bb81676be3..bdf26f5bd326 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -907,6 +907,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, ti->num_flush_bios = 1; ti->num_discard_bios = 1; + ti->num_write_same_bios = 1; return 0; -- cgit v1.2.3 From f8350daf7af05e3b8cf98e9550de3f623af03fe7 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 10 May 2013 14:37:16 +0100 Subject: dm cache: tune migration throttling Tune the dm cache migration throttling. i) Issue a tick every second, just in case there's no i/o going through. ii) Drop the migration threshold right down to something suitable for background work. Signed-off-by: Joe Thornber Signed-off-by: Alasdair G Kergon --- drivers/md/dm-cache-target.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 6feaba24fcac..c3c18527da39 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1445,6 +1445,7 @@ static void do_worker(struct work_struct *ws) static void do_waker(struct work_struct *ws) { struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker); + policy_tick(cache->policy); wake_worker(cache); queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD); } @@ -1886,7 +1887,7 @@ static sector_t calculate_discard_block_size(sector_t cache_block_size, return discard_block_size; } -#define DEFAULT_MIGRATION_THRESHOLD (2048 * 100) +#define DEFAULT_MIGRATION_THRESHOLD 2048 static int cache_create(struct cache_args *ca, struct cache **result) { -- cgit v1.2.3 From 88a488f6243c98b38ac5191d4255e09d3b1c6455 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 10 May 2013 14:37:17 +0100 Subject: dm persistent data: fix error message typos Fix some typos in dm-space-map-metadata.c error messages. Signed-off-by: Joe Thornber Signed-off-by: Alasdair G Kergon --- drivers/md/persistent-data/dm-space-map-metadata.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 906cf3df71af..d87a30a243db 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -410,7 +410,7 @@ static void sm_bootstrap_destroy(struct dm_space_map *sm) static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks) { - DMERR("boostrap doesn't support extend"); + DMERR("bootstrap doesn't support extend"); return -EINVAL; } @@ -450,7 +450,7 @@ static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm, static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b, uint32_t count) { - DMERR("boostrap doesn't support set_count"); + DMERR("bootstrap doesn't support set_count"); return -EINVAL; } @@ -491,7 +491,7 @@ static int sm_bootstrap_commit(struct dm_space_map *sm) static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result) { - DMERR("boostrap doesn't support root_size"); + DMERR("bootstrap doesn't support root_size"); return -EINVAL; } @@ -499,7 +499,7 @@ static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result) static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where, size_t max) { - DMERR("boostrap doesn't support copy_root"); + DMERR("bootstrap doesn't support copy_root"); return -EINVAL; } -- cgit v1.2.3 From 058ce5ca8155a4c8eeac9b9e8a70e6664996337b Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Fri, 10 May 2013 14:37:17 +0100 Subject: dm: document iterate_devices Document iterate_devices in device-mapper.h. Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 1e483fa7afb4..3cd32478f2fd 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -79,11 +79,26 @@ typedef int (*dm_ioctl_fn) (struct dm_target *ti, unsigned int cmd, typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm, struct bio_vec *biovec, int max_size); +/* + * These iteration functions are typically used to check (and combine) + * properties of underlying devices. + * E.g. Does at least one underlying device support flush? + * Does any underlying device not support WRITE_SAME? + * + * The callout function is called once for each contiguous section of + * an underlying device. State can be maintained in *data. + * Return non-zero to stop iterating through any further devices. + */ typedef int (*iterate_devices_callout_fn) (struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data); +/* + * This function must iterate through each section of device used by the + * target until it encounters a non-zero return code, which it then returns. + * Returns zero if no callout returned non-zero. + */ typedef int (*dm_iterate_devices_fn) (struct dm_target *ti, iterate_devices_callout_fn fn, void *data); -- cgit v1.2.3 From e12c1fd9d680f6b1181ae13efefc416743bd80e5 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Fri, 10 May 2013 14:37:17 +0100 Subject: dm cache policy: fix description of lookup fn Correct the documented requirement on the return code from dm cache policy lookup functions stated in the policy module header file. Signed-off-by: Alasdair G Kergon --- drivers/md/dm-cache-policy.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-cache-policy.h b/drivers/md/dm-cache-policy.h index 558bdfdabf5f..33369ca9614f 100644 --- a/drivers/md/dm-cache-policy.h +++ b/drivers/md/dm-cache-policy.h @@ -130,8 +130,8 @@ struct dm_cache_policy { * * Must not block. * - * Returns 1 iff in cache, 0 iff not, < 0 on error (-EWOULDBLOCK - * would be typical). + * Returns 0 if in cache, -ENOENT if not, < 0 for other errors + * (-EWOULDBLOCK would be typical). */ int (*lookup)(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock); -- cgit v1.2.3 From aeed1420a39afb9bd4b7acfcda7a573e349bf27a Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 10 May 2013 14:37:18 +0100 Subject: dm cache: fix typos in comments Fix up some typos in dm-cache comments. Signed-off-by: Joe Thornber Signed-off-by: Alasdair G Kergon --- drivers/md/dm-cache-target.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index c3c18527da39..004d7053a5b1 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -205,7 +205,7 @@ struct per_bio_data { /* * writethrough fields. These MUST remain at the end of this * structure and the 'cache' member must be the first as it - * is used to determine the offsetof the writethrough fields. + * is used to determine the offset of the writethrough fields. */ struct cache *cache; dm_cblock_t cblock; @@ -393,7 +393,7 @@ static int get_cell(struct cache *cache, return r; } - /*----------------------------------------------------------------*/ +/*----------------------------------------------------------------*/ static bool is_dirty(struct cache *cache, dm_cblock_t b) { @@ -419,6 +419,7 @@ static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cbl } /*----------------------------------------------------------------*/ + static bool block_size_is_power_of_two(struct cache *cache) { return cache->sectors_per_block_shift >= 0; @@ -667,7 +668,7 @@ static void writethrough_endio(struct bio *bio, int err) /* * We can't issue this bio directly, since we're in interrupt - * context. So it get's put on a bio list for processing by the + * context. So it gets put on a bio list for processing by the * worker thread. */ defer_writethrough_bio(pb->cache, bio); -- cgit v1.2.3 From 8c5008fac4af85c7a597c4e7f2c328ac90652bc2 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 10 May 2013 14:37:18 +0100 Subject: dm cache: replace memcpy with struct assignment Use struct assignment rather than memcpy in dm cache. Signed-off-by: Joe Thornber Signed-off-by: Alasdair G Kergon --- drivers/md/dm-cache-metadata.c | 4 ++-- drivers/md/dm-cache-target.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 83e995fece88..1af7255bbffb 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -1044,7 +1044,7 @@ void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd, struct dm_cache_statistics *stats) { down_read(&cmd->root_lock); - memcpy(stats, &cmd->stats, sizeof(*stats)); + *stats = cmd->stats; up_read(&cmd->root_lock); } @@ -1052,7 +1052,7 @@ void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd, struct dm_cache_statistics *stats) { down_write(&cmd->root_lock); - memcpy(&cmd->stats, stats, sizeof(*stats)); + cmd->stats = *stats; up_write(&cmd->root_lock); } diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 004d7053a5b1..2f3348c76267 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1913,7 +1913,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) ti->discards_supported = true; ti->discard_zeroes_data_unsupported = true; - memcpy(&cache->features, &ca->features, sizeof(cache->features)); + cache->features = ca->features; ti->per_bio_data_size = get_per_bio_data_size(cache); cache->callbacks.congested_fn = cache_is_congested; -- cgit v1.2.3 From b17446df2ebcaf32889376d90f4b9b2baebb2db6 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 10 May 2013 14:37:18 +0100 Subject: dm thin: refactor data dev resize Refactor device size functions in preparation for similar metadata device resizing functions. Signed-off-by: Joe Thornber Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 10 ++--- drivers/md/dm-thin.c | 87 +++++++++++++++++++++++++++++-------------- 2 files changed, 64 insertions(+), 33 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 00cee02f8fc9..9452a489ed99 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1645,12 +1645,12 @@ int dm_thin_get_highest_mapped_block(struct dm_thin_device *td, return r; } -static int __resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) +static int __resize_space_map(struct dm_space_map *sm, dm_block_t new_count) { int r; dm_block_t old_count; - r = dm_sm_get_nr_blocks(pmd->data_sm, &old_count); + r = dm_sm_get_nr_blocks(sm, &old_count); if (r) return r; @@ -1658,11 +1658,11 @@ static int __resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) return 0; if (new_count < old_count) { - DMERR("cannot reduce size of data device"); + DMERR("cannot reduce size of space map"); return -EINVAL; } - return dm_sm_extend(pmd->data_sm, new_count - old_count); + return dm_sm_extend(sm, new_count - old_count); } int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) @@ -1671,7 +1671,7 @@ int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) down_write(&pmd->root_lock); if (!pmd->fail_io) - r = __resize_data_dev(pmd, new_count); + r = __resize_space_map(pmd->data_sm, new_count); up_write(&pmd->root_lock); return r; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 004ad1652b73..111c148fb1d0 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -922,7 +922,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) return r; if (free_blocks <= pool->low_water_blocks && !pool->low_water_triggered) { - DMWARN("%s: reached low water mark, sending event.", + DMWARN("%s: reached low water mark for data device: sending event.", dm_device_name(pool->pool_md)); spin_lock_irqsave(&pool->lock, flags); pool->low_water_triggered = 1; @@ -1909,6 +1909,20 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf, return r; } +static sector_t get_metadata_dev_size(struct block_device *bdev) +{ + sector_t metadata_dev_size = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; + char buffer[BDEVNAME_SIZE]; + + if (metadata_dev_size > THIN_METADATA_MAX_SECTORS_WARNING) { + DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.", + bdevname(bdev, buffer), THIN_METADATA_MAX_SECTORS); + metadata_dev_size = THIN_METADATA_MAX_SECTORS_WARNING; + } + + return metadata_dev_size; +} + /* * thin-pool * @@ -1931,8 +1945,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) unsigned long block_size; dm_block_t low_water_blocks; struct dm_dev *metadata_dev; - sector_t metadata_dev_size; - char b[BDEVNAME_SIZE]; /* * FIXME Remove validation from scope of lock. @@ -1953,10 +1965,11 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) goto out_unlock; } - metadata_dev_size = i_size_read(metadata_dev->bdev->bd_inode) >> SECTOR_SHIFT; - if (metadata_dev_size > THIN_METADATA_MAX_SECTORS_WARNING) - DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.", - bdevname(metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS); + /* + * Run for the side-effect of possibly issuing a warning if the + * device is too big. + */ + (void) get_metadata_dev_size(metadata_dev->bdev); r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &data_dev); if (r) { @@ -2079,18 +2092,7 @@ static int pool_map(struct dm_target *ti, struct bio *bio) return r; } -/* - * Retrieves the number of blocks of the data device from - * the superblock and compares it to the actual device size, - * thus resizing the data device in case it has grown. - * - * This both copes with opening preallocated data devices in the ctr - * being followed by a resume - * -and- - * calling the resume method individually after userspace has - * grown the data device in reaction to a table event. - */ -static int pool_preresume(struct dm_target *ti) +static int maybe_resize_data_dev(struct dm_target *ti, bool *need_commit) { int r; struct pool_c *pt = ti->private; @@ -2098,12 +2100,7 @@ static int pool_preresume(struct dm_target *ti) sector_t data_size = ti->len; dm_block_t sb_data_size; - /* - * Take control of the pool object. - */ - r = bind_control_target(pool, ti); - if (r) - return r; + *need_commit = false; (void) sector_div(data_size, pool->sectors_per_block); @@ -2114,7 +2111,7 @@ static int pool_preresume(struct dm_target *ti) } if (data_size < sb_data_size) { - DMERR("pool target too small, is %llu blocks (expected %llu)", + DMERR("pool target (%llu blocks) too small: expected %llu", (unsigned long long)data_size, sb_data_size); return -EINVAL; @@ -2122,17 +2119,51 @@ static int pool_preresume(struct dm_target *ti) r = dm_pool_resize_data_dev(pool->pmd, data_size); if (r) { DMERR("failed to resize data device"); - /* FIXME Stricter than necessary: Rollback transaction instead here */ set_pool_mode(pool, PM_READ_ONLY); return r; } - (void) commit_or_fallback(pool); + *need_commit = true; } return 0; } +/* + * Retrieves the number of blocks of the data device from + * the superblock and compares it to the actual device size, + * thus resizing the data device in case it has grown. + * + * This both copes with opening preallocated data devices in the ctr + * being followed by a resume + * -and- + * calling the resume method individually after userspace has + * grown the data device in reaction to a table event. + */ +static int pool_preresume(struct dm_target *ti) +{ + int r; + bool need_commit1; + struct pool_c *pt = ti->private; + struct pool *pool = pt->pool; + + /* + * Take control of the pool object. + */ + r = bind_control_target(pool, ti); + if (r) + return r; + + r = maybe_resize_data_dev(ti, &need_commit1); + if (r) + return r; + + if (need_commit1) + (void) commit_or_fallback(pool); + + return 0; +} + static void pool_resume(struct dm_target *ti) { struct pool_c *pt = ti->private; -- cgit v1.2.3 From 5d0db96d13a4e2cd22b52494fb19ce5a9c8b8d90 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 10 May 2013 14:37:19 +0100 Subject: dm thin: open dev read only when possible If a thin pool is created in read-only-metadata mode then only open the metadata device read-only. Previously it was always opened with FMODE_READ | FMODE_WRITE. (Note that dm_get_device() still allows read-only dm devices to be used read-write at the moment: If I create a read-only linear device for the metadata, via dmsetup load --readonly, then I can still create a rw pool out of it.) Signed-off-by: Joe Thornber Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 111c148fb1d0..ef021b0c8106 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1945,6 +1945,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) unsigned long block_size; dm_block_t low_water_blocks; struct dm_dev *metadata_dev; + fmode_t metadata_mode; /* * FIXME Remove validation from scope of lock. @@ -1956,10 +1957,22 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) r = -EINVAL; goto out_unlock; } + as.argc = argc; as.argv = argv; - r = dm_get_device(ti, argv[0], FMODE_READ | FMODE_WRITE, &metadata_dev); + /* + * Set default pool features. + */ + pool_features_init(&pf); + + dm_consume_args(&as, 4); + r = parse_pool_features(&as, &pf, ti); + if (r) + goto out_unlock; + + metadata_mode = FMODE_READ | ((pf.mode == PM_READ_ONLY) ? 0 : FMODE_WRITE); + r = dm_get_device(ti, argv[0], metadata_mode, &metadata_dev); if (r) { ti->error = "Error opening metadata block device"; goto out_unlock; @@ -1992,16 +2005,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) goto out; } - /* - * Set default pool features. - */ - pool_features_init(&pf); - - dm_consume_args(&as, 4); - r = parse_pool_features(&as, &pf, ti); - if (r) - goto out; - pt = kzalloc(sizeof(*pt), GFP_KERNEL); if (!pt) { r = -ENOMEM; -- cgit v1.2.3 From 1921c56d95c4ac92b359ad44ffbc1e9a36060b29 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 10 May 2013 14:37:19 +0100 Subject: dm persistent data: support space map resizing Support extending a dm persistent data metadata space map. The extend itself is implemented by switching back to the boostrap allocator and pointing to the new space. The extra bitmap indexes are then allocated from the new space, and finally we switch back to the proper space map ops and tweak the reference counts. Signed-off-by: Joe Thornber Signed-off-by: Alasdair G Kergon --- drivers/md/persistent-data/dm-space-map-metadata.c | 38 ++++++++++++++++++---- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index d87a30a243db..51ca9edef444 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -144,12 +144,6 @@ static void sm_metadata_destroy(struct dm_space_map *sm) kfree(smm); } -static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) -{ - DMERR("doesn't support extend"); - return -EINVAL; -} - static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) { struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); @@ -382,6 +376,8 @@ static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t return 0; } +static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks); + static struct dm_space_map ops = { .destroy = sm_metadata_destroy, .extend = sm_metadata_extend, @@ -522,6 +518,36 @@ static struct dm_space_map bootstrap_ops = { /*----------------------------------------------------------------*/ +static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) +{ + int r, i; + enum allocation_event ev; + struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); + dm_block_t old_len = smm->ll.nr_blocks; + + /* + * Flick into a mode where all blocks get allocated in the new area. + */ + smm->begin = old_len; + memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); + + /* + * Extend. + */ + r = sm_ll_extend(&smm->ll, extra_blocks); + + /* + * Switch back to normal behaviour. + */ + memcpy(&smm->sm, &ops, sizeof(smm->sm)); + for (i = old_len; !r && i < smm->begin; i++) + r = sm_ll_inc(&smm->ll, i, &ev); + + return r; +} + +/*----------------------------------------------------------------*/ + struct dm_space_map *dm_sm_metadata_init(void) { struct sm_metadata *smm; -- cgit v1.2.3 From 24347e9595704464f62a4ed8f46abf62b4c79cdd Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 10 May 2013 14:37:19 +0100 Subject: dm thin: detect metadata device resizing Allow the dm thin pool metadata device to be extended. Whenever a pool is resumed, detect whether the size of the metadata device has increased, and if so, extend the metadata to use the new space. Signed-off-by: Joe Thornber Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 12 ++++++++++ drivers/md/dm-thin-metadata.h | 1 + drivers/md/dm-thin.c | 54 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 64 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 9452a489ed99..f553ed66603c 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1677,6 +1677,18 @@ int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) return r; } +int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) +{ + int r = -EINVAL; + + down_write(&pmd->root_lock); + if (!pmd->fail_io) + r = __resize_space_map(pmd->metadata_sm, new_count); + up_write(&pmd->root_lock); + + return r; +} + void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd) { down_write(&pmd->root_lock); diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index 0cecc3702885..ef8dd709e34e 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h @@ -185,6 +185,7 @@ int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result); * blocks would be lost. */ int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_size); +int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_size); /* * Flicks the underlying block manager into read only mode, so you know diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index ef021b0c8106..f4632f97bd7b 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1923,6 +1923,15 @@ static sector_t get_metadata_dev_size(struct block_device *bdev) return metadata_dev_size; } +static dm_block_t get_metadata_dev_size_in_blocks(struct block_device *bdev) +{ + sector_t metadata_dev_size = get_metadata_dev_size(bdev); + + sector_div(metadata_dev_size, THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); + + return metadata_dev_size; +} + /* * thin-pool * @@ -2132,6 +2141,41 @@ static int maybe_resize_data_dev(struct dm_target *ti, bool *need_commit) return 0; } +static int maybe_resize_metadata_dev(struct dm_target *ti, bool *need_commit) +{ + int r; + struct pool_c *pt = ti->private; + struct pool *pool = pt->pool; + dm_block_t metadata_dev_size, sb_metadata_dev_size; + + *need_commit = false; + + metadata_dev_size = get_metadata_dev_size(pool->md_dev); + + r = dm_pool_get_metadata_dev_size(pool->pmd, &sb_metadata_dev_size); + if (r) { + DMERR("failed to retrieve data device size"); + return r; + } + + if (metadata_dev_size < sb_metadata_dev_size) { + DMERR("metadata device (%llu sectors) too small: expected %llu", + metadata_dev_size, sb_metadata_dev_size); + return -EINVAL; + + } else if (metadata_dev_size > sb_metadata_dev_size) { + r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size); + if (r) { + DMERR("failed to resize metadata device"); + return r; + } + + *need_commit = true; + } + + return 0; +} + /* * Retrieves the number of blocks of the data device from * the superblock and compares it to the actual device size, @@ -2146,7 +2190,7 @@ static int maybe_resize_data_dev(struct dm_target *ti, bool *need_commit) static int pool_preresume(struct dm_target *ti) { int r; - bool need_commit1; + bool need_commit1, need_commit2; struct pool_c *pt = ti->private; struct pool *pool = pt->pool; @@ -2161,7 +2205,11 @@ static int pool_preresume(struct dm_target *ti) if (r) return r; - if (need_commit1) + r = maybe_resize_metadata_dev(ti, &need_commit2); + if (r) + return r; + + if (need_commit1 || need_commit2) (void) commit_or_fallback(pool); return 0; @@ -2583,7 +2631,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 7, 0}, + .version = {1, 8, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, -- cgit v1.2.3 From 7c3d3f2a87b01ff167a5f048285d5e3dee920235 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 10 May 2013 14:37:20 +0100 Subject: dm persistent data: add threshold callback to space map Add a threshold callback function to the persistent data space map interface for a subsequent patch to use. dm-thin and dm-cache are interested in knowing when they're getting low on metadata or data blocks. This patch introduces a new method for registering a callback against a threshold. Signed-off-by: Joe Thornber Signed-off-by: Alasdair G Kergon --- drivers/md/persistent-data/dm-space-map-disk.c | 3 ++- drivers/md/persistent-data/dm-space-map-metadata.c | 6 ++++-- drivers/md/persistent-data/dm-space-map.h | 23 ++++++++++++++++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c index f6d29e614ab7..e735a6d5a793 100644 --- a/drivers/md/persistent-data/dm-space-map-disk.c +++ b/drivers/md/persistent-data/dm-space-map-disk.c @@ -248,7 +248,8 @@ static struct dm_space_map ops = { .new_block = sm_disk_new_block, .commit = sm_disk_commit, .root_size = sm_disk_root_size, - .copy_root = sm_disk_copy_root + .copy_root = sm_disk_copy_root, + .register_threshold_callback = NULL }; struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm, diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 51ca9edef444..883b465794d4 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -391,7 +391,8 @@ static struct dm_space_map ops = { .new_block = sm_metadata_new_block, .commit = sm_metadata_commit, .root_size = sm_metadata_root_size, - .copy_root = sm_metadata_copy_root + .copy_root = sm_metadata_copy_root, + .register_threshold_callback = NULL }; /*----------------------------------------------------------------*/ @@ -513,7 +514,8 @@ static struct dm_space_map bootstrap_ops = { .new_block = sm_bootstrap_new_block, .commit = sm_bootstrap_commit, .root_size = sm_bootstrap_root_size, - .copy_root = sm_bootstrap_copy_root + .copy_root = sm_bootstrap_copy_root, + .register_threshold_callback = NULL }; /*----------------------------------------------------------------*/ diff --git a/drivers/md/persistent-data/dm-space-map.h b/drivers/md/persistent-data/dm-space-map.h index 1cbfc6b1638a..3e6d1153b7c4 100644 --- a/drivers/md/persistent-data/dm-space-map.h +++ b/drivers/md/persistent-data/dm-space-map.h @@ -9,6 +9,8 @@ #include "dm-block-manager.h" +typedef void (*dm_sm_threshold_fn)(void *context); + /* * struct dm_space_map keeps a record of how many times each block in a device * is referenced. It needs to be fixed on disk as part of the transaction. @@ -59,6 +61,15 @@ struct dm_space_map { */ int (*root_size)(struct dm_space_map *sm, size_t *result); int (*copy_root)(struct dm_space_map *sm, void *copy_to_here_le, size_t len); + + /* + * You can register one threshold callback which is edge-triggered + * when the free space in the space map drops below the threshold. + */ + int (*register_threshold_callback)(struct dm_space_map *sm, + dm_block_t threshold, + dm_sm_threshold_fn fn, + void *context); }; /*----------------------------------------------------------------*/ @@ -131,4 +142,16 @@ static inline int dm_sm_copy_root(struct dm_space_map *sm, void *copy_to_here_le return sm->copy_root(sm, copy_to_here_le, len); } +static inline int dm_sm_register_threshold_callback(struct dm_space_map *sm, + dm_block_t threshold, + dm_sm_threshold_fn fn, + void *context) +{ + if (sm->register_threshold_callback) + return sm->register_threshold_callback(sm, threshold, fn, context); + + return -EINVAL; +} + + #endif /* _LINUX_DM_SPACE_MAP_H */ -- cgit v1.2.3 From 2fc48021f4afdd109b9e52b6eef5db89ca80bac7 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 10 May 2013 14:37:20 +0100 Subject: dm persistent metadata: add space map threshold callback Add a threshold callback to dm persistent data space maps. Signed-off-by: Joe Thornber Signed-off-by: Alasdair G Kergon --- drivers/md/persistent-data/dm-space-map-metadata.c | 77 +++++++++++++++++++++- 1 file changed, 76 insertions(+), 1 deletion(-) diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 883b465794d4..1c959684caef 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -16,6 +16,55 @@ /*----------------------------------------------------------------*/ +/* + * An edge triggered threshold. + */ +struct threshold { + bool threshold_set; + bool value_set; + dm_block_t threshold; + dm_block_t current_value; + dm_sm_threshold_fn fn; + void *context; +}; + +static void threshold_init(struct threshold *t) +{ + t->threshold_set = false; + t->value_set = false; +} + +static void set_threshold(struct threshold *t, dm_block_t value, + dm_sm_threshold_fn fn, void *context) +{ + t->threshold_set = true; + t->threshold = value; + t->fn = fn; + t->context = context; +} + +static bool below_threshold(struct threshold *t, dm_block_t value) +{ + return t->threshold_set && value <= t->threshold; +} + +static bool threshold_already_triggered(struct threshold *t) +{ + return t->value_set && below_threshold(t, t->current_value); +} + +static void check_threshold(struct threshold *t, dm_block_t value) +{ + if (below_threshold(t, value) && + !threshold_already_triggered(t)) + t->fn(t->context); + + t->value_set = true; + t->current_value = value; +} + +/*----------------------------------------------------------------*/ + /* * Space map interface. * @@ -54,6 +103,8 @@ struct sm_metadata { unsigned allocated_this_transaction; unsigned nr_uncommitted; struct block_op uncommitted[MAX_RECURSIVE_ALLOCATIONS]; + + struct threshold threshold; }; static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b) @@ -329,9 +380,19 @@ static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b) static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) { + dm_block_t count; + struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); + int r = sm_metadata_new_block_(sm, b); if (r) DMERR("unable to allocate new metadata block"); + + r = sm_metadata_get_nr_free(sm, &count); + if (r) + DMERR("couldn't get free block count"); + + check_threshold(&smm->threshold, count); + return r; } @@ -351,6 +412,18 @@ static int sm_metadata_commit(struct dm_space_map *sm) return 0; } +static int sm_metadata_register_threshold_callback(struct dm_space_map *sm, + dm_block_t threshold, + dm_sm_threshold_fn fn, + void *context) +{ + struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); + + set_threshold(&smm->threshold, threshold, fn, context); + + return 0; +} + static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result) { *result = sizeof(struct disk_sm_root); @@ -392,7 +465,7 @@ static struct dm_space_map ops = { .commit = sm_metadata_commit, .root_size = sm_metadata_root_size, .copy_root = sm_metadata_copy_root, - .register_threshold_callback = NULL + .register_threshold_callback = sm_metadata_register_threshold_callback }; /*----------------------------------------------------------------*/ @@ -577,6 +650,7 @@ int dm_sm_metadata_create(struct dm_space_map *sm, smm->recursion_count = 0; smm->allocated_this_transaction = 0; smm->nr_uncommitted = 0; + threshold_init(&smm->threshold); memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); @@ -618,6 +692,7 @@ int dm_sm_metadata_open(struct dm_space_map *sm, smm->recursion_count = 0; smm->allocated_this_transaction = 0; smm->nr_uncommitted = 0; + threshold_init(&smm->threshold); memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); return 0; -- cgit v1.2.3 From ac8c3f3df65e487bbcabf274eeeb9cd222f5da1e Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 10 May 2013 14:37:21 +0100 Subject: dm thin: generate event when metadata threshold passed Generate a dm event when the amount of remaining thin pool metadata space falls below a certain level. The threshold is taken to be a quarter of the size of the metadata device with a minimum threshold of 4MB. Signed-off-by: Joe Thornber Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 14 ++++++++++++++ drivers/md/dm-thin-metadata.h | 6 ++++++ drivers/md/dm-thin.c | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index f553ed66603c..60bce435f4fa 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1696,3 +1696,17 @@ void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd) dm_bm_set_read_only(pmd->bm); up_write(&pmd->root_lock); } + +int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, + dm_block_t threshold, + dm_sm_threshold_fn fn, + void *context) +{ + int r; + + down_write(&pmd->root_lock); + r = dm_sm_register_threshold_callback(pmd->metadata_sm, threshold, fn, context); + up_write(&pmd->root_lock); + + return r; +} diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index ef8dd709e34e..845ebbe589a9 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h @@ -8,6 +8,7 @@ #define DM_THIN_METADATA_H #include "persistent-data/dm-block-manager.h" +#include "persistent-data/dm-space-map.h" #define THIN_METADATA_BLOCK_SIZE 4096 @@ -193,6 +194,11 @@ int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_siz */ void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd); +int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, + dm_block_t threshold, + dm_sm_threshold_fn fn, + void *context); + /*----------------------------------------------------------------*/ #endif diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index f4632f97bd7b..759cffc45cab 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1281,6 +1281,10 @@ static void process_bio_fail(struct thin_c *tc, struct bio *bio) bio_io_error(bio); } +/* + * FIXME: should we also commit due to size of transaction, measured in + * metadata blocks? + */ static int need_commit_due_to_time(struct pool *pool) { return jiffies < pool->last_commit_jiffies || @@ -1909,6 +1913,16 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf, return r; } +static void metadata_low_callback(void *context) +{ + struct pool *pool = context; + + DMWARN("%s: reached low water mark for metadata device: sending event.", + dm_device_name(pool->pool_md)); + + dm_table_event(pool->ti->table); +} + static sector_t get_metadata_dev_size(struct block_device *bdev) { sector_t metadata_dev_size = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; @@ -1932,6 +1946,23 @@ static dm_block_t get_metadata_dev_size_in_blocks(struct block_device *bdev) return metadata_dev_size; } +/* + * When a metadata threshold is crossed a dm event is triggered, and + * userland should respond by growing the metadata device. We could let + * userland set the threshold, like we do with the data threshold, but I'm + * not sure they know enough to do this well. + */ +static dm_block_t calc_metadata_threshold(struct pool_c *pt) +{ + /* + * 4M is ample for all ops with the possible exception of thin + * device deletion which is harmless if it fails (just retry the + * delete after you've grown the device). + */ + dm_block_t quarter = get_metadata_dev_size_in_blocks(pt->metadata_dev->bdev) / 4; + return min((dm_block_t)1024ULL /* 4M */, quarter); +} + /* * thin-pool * @@ -2065,6 +2096,13 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) } ti->private = pt; + r = dm_pool_register_metadata_threshold(pt->pool->pmd, + calc_metadata_threshold(pt), + metadata_low_callback, + pool); + if (r) + goto out_free_pt; + pt->callbacks.congested_fn = pool_is_congested; dm_table_add_target_callbacks(ti->table, &pt->callbacks); -- cgit v1.2.3 From 2c73c471fb3b1e127df1efda506e796b83da44d2 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Fri, 10 May 2013 14:37:21 +0100 Subject: dm cache: move config fns Move process_config_option() in dm-cache-target.c to make the next patch more readable. Signed-off-by: Alasdair G Kergon --- drivers/md/dm-cache-target.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 2f3348c76267..5159e25ea082 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1811,6 +1811,23 @@ static int parse_cache_args(struct cache_args *ca, int argc, char **argv, static struct kmem_cache *migration_cache; +#define NOT_CORE_OPTION 1 + +static int process_config_option(struct cache *cache, char **argv) +{ + unsigned long tmp; + + if (!strcasecmp(argv[0], "migration_threshold")) { + if (kstrtoul(argv[1], 10, &tmp)) + return -EINVAL; + + cache->migration_threshold = tmp; + return 0; + } + + return NOT_CORE_OPTION; +} + static int set_config_values(struct dm_cache_policy *p, int argc, const char **argv) { int r = 0; @@ -2520,23 +2537,6 @@ err: DMEMIT("Error"); } -#define NOT_CORE_OPTION 1 - -static int process_config_option(struct cache *cache, char **argv) -{ - unsigned long tmp; - - if (!strcasecmp(argv[0], "migration_threshold")) { - if (kstrtoul(argv[1], 10, &tmp)) - return -EINVAL; - - cache->migration_threshold = tmp; - return 0; - } - - return NOT_CORE_OPTION; -} - /* * Supports . * -- cgit v1.2.3 From 2f14f4b51ed34fe2b704af8df178f5cd8c81f65e Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 10 May 2013 14:37:21 +0100 Subject: dm cache: set config value Share configuration option processing code between the dm cache ctr and message functions. Signed-off-by: Joe Thornber Signed-off-by: Alasdair G Kergon --- drivers/md/dm-cache-target.c | 59 +++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 5159e25ea082..df44b60e66f2 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1813,12 +1813,12 @@ static struct kmem_cache *migration_cache; #define NOT_CORE_OPTION 1 -static int process_config_option(struct cache *cache, char **argv) +static int process_config_option(struct cache *cache, const char *key, const char *value) { unsigned long tmp; - if (!strcasecmp(argv[0], "migration_threshold")) { - if (kstrtoul(argv[1], 10, &tmp)) + if (!strcasecmp(key, "migration_threshold")) { + if (kstrtoul(value, 10, &tmp)) return -EINVAL; cache->migration_threshold = tmp; @@ -1828,7 +1828,20 @@ static int process_config_option(struct cache *cache, char **argv) return NOT_CORE_OPTION; } -static int set_config_values(struct dm_cache_policy *p, int argc, const char **argv) +static int set_config_value(struct cache *cache, const char *key, const char *value) +{ + int r = process_config_option(cache, key, value); + + if (r == NOT_CORE_OPTION) + r = policy_set_config_value(cache->policy, key, value); + + if (r) + DMWARN("bad config value for %s: %s", key, value); + + return r; +} + +static int set_config_values(struct cache *cache, int argc, const char **argv) { int r = 0; @@ -1838,12 +1851,9 @@ static int set_config_values(struct dm_cache_policy *p, int argc, const char **a } while (argc) { - r = policy_set_config_value(p, argv[0], argv[1]); - if (r) { - DMWARN("policy_set_config_value failed: key = '%s', value = '%s'", - argv[0], argv[1]); - return r; - } + r = set_config_value(cache, argv[0], argv[1]); + if (r) + break; argc -= 2; argv += 2; @@ -1855,8 +1865,6 @@ static int set_config_values(struct dm_cache_policy *p, int argc, const char **a static int create_cache_policy(struct cache *cache, struct cache_args *ca, char **error) { - int r; - cache->policy = dm_cache_policy_create(ca->policy_name, cache->cache_size, cache->origin_sectors, @@ -1866,14 +1874,7 @@ static int create_cache_policy(struct cache *cache, struct cache_args *ca, return -ENOMEM; } - r = set_config_values(cache->policy, ca->policy_argc, ca->policy_argv); - if (r) { - *error = "Error setting cache policy's config values"; - dm_cache_policy_destroy(cache->policy); - cache->policy = NULL; - } - - return r; + return 0; } /* @@ -1967,7 +1968,15 @@ static int cache_create(struct cache_args *ca, struct cache **result) r = create_cache_policy(cache, ca, error); if (r) goto bad; + cache->policy_nr_args = ca->policy_argc; + cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD; + + r = set_config_values(cache, ca->policy_argc, ca->policy_argv); + if (r) { + *error = "Error setting cache policy's config values"; + goto bad; + } cmd = dm_cache_metadata_open(cache->metadata_dev->bdev, ca->block_size, may_format, @@ -1986,7 +1995,6 @@ static int cache_create(struct cache_args *ca, struct cache **result) INIT_LIST_HEAD(&cache->quiesced_migrations); INIT_LIST_HEAD(&cache->completed_migrations); INIT_LIST_HEAD(&cache->need_commit_migrations); - cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD; atomic_set(&cache->nr_migrations, 0); init_waitqueue_head(&cache->migration_wait); @@ -2544,17 +2552,12 @@ err: */ static int cache_message(struct dm_target *ti, unsigned argc, char **argv) { - int r; struct cache *cache = ti->private; if (argc != 2) return -EINVAL; - r = process_config_option(cache, argv); - if (r == NOT_CORE_OPTION) - return policy_set_config_value(cache->policy, argv[0], argv[1]); - - return r; + return set_config_value(cache, argv[0], argv[1]); } static int cache_iterate_devices(struct dm_target *ti, @@ -2612,7 +2615,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type cache_target = { .name = "cache", - .version = {1, 1, 0}, + .version = {1, 1, 1}, .module = THIS_MODULE, .ctr = cache_ctr, .dtr = cache_dtr, -- cgit v1.2.3