summaryrefslogtreecommitdiff
path: root/drivers/md/dm-thin.c
diff options
context:
space:
mode:
authorKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2012-09-12 19:14:33 +0400
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2012-09-12 19:14:33 +0400
commit25a765b7f05cb8460fa01b54568894b20e184862 (patch)
tree0b56db57b4d9f912393ab303c269e0fe6cdf8635 /drivers/md/dm-thin.c
parent9d2be9287107695708e6aae5105a8a518a6cb4d0 (diff)
parent64282278989d5b0398dcb3ba7904cb00c621dc35 (diff)
downloadlinux-25a765b7f05cb8460fa01b54568894b20e184862.tar.xz
Merge branch 'x86/platform' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into stable/for-linus-3.7
* 'x86/platform' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (9690 commits) x86: Document x86_init.paging.pagetable_init() x86: xen: Cleanup and remove x86_init.paging.pagetable_setup_done() x86: Move paging_init() call to x86_init.paging.pagetable_init() x86: Rename pagetable_setup_start() to pagetable_init() x86: Remove base argument from x86_init.paging.pagetable_setup_start Linux 3.6-rc5 HID: tpkbd: work even if the new Lenovo Keyboard driver is not configured Remove user-triggerable BUG from mpol_to_str xen/pciback: Fix proper FLR steps. uml: fix compile error in deliver_alarm() dj: memory scribble in logi_dj Fix order of arguments to compat_put_time[spec|val] xen: Use correct masking in xen_swiotlb_alloc_coherent. xen: fix logical error in tlb flushing xen/p2m: Fix one-off error in checking the P2M tree directory. powerpc: Don't use __put_user() in patch_instruction powerpc: Make sure IPI handlers see data written by IPI senders powerpc: Restore correct DSCR in context switch powerpc: Fix DSCR inheritance in copy_thread() powerpc: Keep thread.dscr and thread.dscr_inherit in sync ...
Diffstat (limited to 'drivers/md/dm-thin.c')
-rw-r--r--drivers/md/dm-thin.c542
1 files changed, 405 insertions, 137 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 68694da0d21d..af1fc3b2c2ad 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -1,10 +1,11 @@
/*
- * Copyright (C) 2011 Red Hat UK.
+ * Copyright (C) 2011-2012 Red Hat UK.
*
* This file is released under the GPL.
*/
#include "dm-thin-metadata.h"
+#include "dm.h"
#include <linux/device-mapper.h>
#include <linux/dm-io.h>
@@ -19,7 +20,7 @@
/*
* Tunable constants
*/
-#define ENDIO_HOOK_POOL_SIZE 10240
+#define ENDIO_HOOK_POOL_SIZE 1024
#define DEFERRED_SET_SIZE 64
#define MAPPING_POOL_SIZE 1024
#define PRISON_CELLS 1024
@@ -496,12 +497,27 @@ static void build_virtual_key(struct dm_thin_device *td, dm_block_t b,
*/
struct dm_thin_new_mapping;
+/*
+ * The pool runs in 3 modes. Ordered in degraded order for comparisons.
+ */
+enum pool_mode {
+ PM_WRITE, /* metadata may be changed */
+ PM_READ_ONLY, /* metadata may not be changed */
+ PM_FAIL, /* all I/O fails */
+};
+
struct pool_features {
+ enum pool_mode mode;
+
unsigned zero_new_blocks:1;
unsigned discard_enabled:1;
unsigned discard_passdown:1;
};
+struct thin_c;
+typedef void (*process_bio_fn)(struct thin_c *tc, struct bio *bio);
+typedef void (*process_mapping_fn)(struct dm_thin_new_mapping *m);
+
struct pool {
struct list_head list;
struct dm_target *ti; /* Only set if a pool target is bound */
@@ -510,10 +526,9 @@ struct pool {
struct block_device *md_dev;
struct dm_pool_metadata *pmd;
- uint32_t sectors_per_block;
- unsigned block_shift;
- dm_block_t offset_mask;
dm_block_t low_water_blocks;
+ uint32_t sectors_per_block;
+ int sectors_per_block_shift;
struct pool_features pf;
unsigned low_water_triggered:1; /* A dm event has been sent */
@@ -526,8 +541,8 @@ struct pool {
struct work_struct worker;
struct delayed_work waker;
- unsigned ref_count;
unsigned long last_commit_jiffies;
+ unsigned ref_count;
spinlock_t lock;
struct bio_list deferred_bios;
@@ -543,8 +558,17 @@ struct pool {
struct dm_thin_new_mapping *next_mapping;
mempool_t *mapping_pool;
mempool_t *endio_hook_pool;
+
+ process_bio_fn process_bio;
+ process_bio_fn process_discard;
+
+ process_mapping_fn process_prepared_mapping;
+ process_mapping_fn process_prepared_discard;
};
+static enum pool_mode get_pool_mode(struct pool *pool);
+static void set_pool_mode(struct pool *pool, enum pool_mode mode);
+
/*
* Target context for a pool.
*/
@@ -679,16 +703,28 @@ static void requeue_io(struct thin_c *tc)
static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
{
- return bio->bi_sector >> tc->pool->block_shift;
+ sector_t block_nr = bio->bi_sector;
+
+ if (tc->pool->sectors_per_block_shift < 0)
+ (void) sector_div(block_nr, tc->pool->sectors_per_block);
+ else
+ block_nr >>= tc->pool->sectors_per_block_shift;
+
+ return block_nr;
}
static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
{
struct pool *pool = tc->pool;
+ sector_t bi_sector = bio->bi_sector;
bio->bi_bdev = tc->pool_dev->bdev;
- bio->bi_sector = (block << pool->block_shift) +
- (bio->bi_sector & pool->offset_mask);
+ if (tc->pool->sectors_per_block_shift < 0)
+ bio->bi_sector = (block * pool->sectors_per_block) +
+ sector_div(bi_sector, pool->sectors_per_block);
+ else
+ bio->bi_sector = (block << pool->sectors_per_block_shift) |
+ (bi_sector & (pool->sectors_per_block - 1));
}
static void remap_to_origin(struct thin_c *tc, struct bio *bio)
@@ -696,21 +732,39 @@ static void remap_to_origin(struct thin_c *tc, struct bio *bio)
bio->bi_bdev = tc->origin_dev->bdev;
}
+static int bio_triggers_commit(struct thin_c *tc, struct bio *bio)
+{
+ return (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) &&
+ dm_thin_changed_this_transaction(tc->td);
+}
+
static void issue(struct thin_c *tc, struct bio *bio)
{
struct pool *pool = tc->pool;
unsigned long flags;
+ if (!bio_triggers_commit(tc, bio)) {
+ generic_make_request(bio);
+ return;
+ }
+
/*
- * Batch together any FUA/FLUSH bios we find and then issue
- * a single commit for them in process_deferred_bios().
+ * Complete bio with an error if earlier I/O caused changes to
+ * the metadata that can't be committed e.g, due to I/O errors
+ * on the metadata device.
*/
- if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
- spin_lock_irqsave(&pool->lock, flags);
- bio_list_add(&pool->deferred_flush_bios, bio);
- spin_unlock_irqrestore(&pool->lock, flags);
- } else
- generic_make_request(bio);
+ if (dm_thin_aborted_changes(tc->td)) {
+ bio_io_error(bio);
+ return;
+ }
+
+ /*
+ * Batch together any bios that trigger commits and then issue a
+ * single commit for them in process_deferred_bios().
+ */
+ spin_lock_irqsave(&pool->lock, flags);
+ bio_list_add(&pool->deferred_flush_bios, bio);
+ spin_unlock_irqrestore(&pool->lock, flags);
}
static void remap_to_origin_and_issue(struct thin_c *tc, struct bio *bio)
@@ -847,6 +901,14 @@ static void cell_defer_except(struct thin_c *tc, struct dm_bio_prison_cell *cell
wake_worker(pool);
}
+static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
+{
+ if (m->bio)
+ m->bio->bi_end_io = m->saved_bi_end_io;
+ cell_error(m->cell);
+ list_del(&m->list);
+ mempool_free(m, m->tc->pool->mapping_pool);
+}
static void process_prepared_mapping(struct dm_thin_new_mapping *m)
{
struct thin_c *tc = m->tc;
@@ -859,7 +921,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
if (m->err) {
cell_error(m->cell);
- return;
+ goto out;
}
/*
@@ -871,7 +933,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
if (r) {
DMERR("dm_thin_insert_block() failed");
cell_error(m->cell);
- return;
+ goto out;
}
/*
@@ -886,22 +948,25 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
} else
cell_defer(tc, m->cell, m->data_block);
+out:
list_del(&m->list);
mempool_free(m, tc->pool->mapping_pool);
}
-static void process_prepared_discard(struct dm_thin_new_mapping *m)
+static void process_prepared_discard_fail(struct dm_thin_new_mapping *m)
{
- int r;
struct thin_c *tc = m->tc;
- r = dm_thin_remove_block(tc->td, m->virt_block);
- if (r)
- DMERR("dm_thin_remove_block() failed");
+ bio_io_error(m->bio);
+ cell_defer_except(tc, m->cell);
+ cell_defer_except(tc, m->cell2);
+ mempool_free(m, tc->pool->mapping_pool);
+}
+
+static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m)
+{
+ struct thin_c *tc = m->tc;
- /*
- * Pass the discard down to the underlying device?
- */
if (m->pass_discard)
remap_and_issue(tc, m->bio, m->data_block);
else
@@ -912,8 +977,20 @@ static void process_prepared_discard(struct dm_thin_new_mapping *m)
mempool_free(m, tc->pool->mapping_pool);
}
+static void process_prepared_discard(struct dm_thin_new_mapping *m)
+{
+ int r;
+ struct thin_c *tc = m->tc;
+
+ r = dm_thin_remove_block(tc->td, m->virt_block);
+ if (r)
+ DMERR("dm_thin_remove_block() failed");
+
+ process_prepared_discard_passdown(m);
+}
+
static void process_prepared(struct pool *pool, struct list_head *head,
- void (*fn)(struct dm_thin_new_mapping *))
+ process_mapping_fn *fn)
{
unsigned long flags;
struct list_head maps;
@@ -925,7 +1002,7 @@ static void process_prepared(struct pool *pool, struct list_head *head,
spin_unlock_irqrestore(&pool->lock, flags);
list_for_each_entry_safe(m, tmp, &maps, list)
- fn(m);
+ (*fn)(m);
}
/*
@@ -933,9 +1010,7 @@ static void process_prepared(struct pool *pool, struct list_head *head,
*/
static int io_overlaps_block(struct pool *pool, struct bio *bio)
{
- return !(bio->bi_sector & pool->offset_mask) &&
- (bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT));
-
+ return bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT);
}
static int io_overwrites_block(struct pool *pool, struct bio *bio)
@@ -1093,6 +1168,35 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
}
}
+static int commit(struct pool *pool)
+{
+ int r;
+
+ r = dm_pool_commit_metadata(pool->pmd);
+ if (r)
+ DMERR("commit failed, error = %d", r);
+
+ return r;
+}
+
+/*
+ * A non-zero return indicates read_only or fail_io mode.
+ * Many callers don't care about the return value.
+ */
+static int commit_or_fallback(struct pool *pool)
+{
+ int r;
+
+ if (get_pool_mode(pool) != PM_WRITE)
+ return -EINVAL;
+
+ r = commit(pool);
+ if (r)
+ set_pool_mode(pool, PM_READ_ONLY);
+
+ return r;
+}
+
static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
{
int r;
@@ -1121,12 +1225,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
* Try to commit to see if that will free up some
* more space.
*/
- r = dm_pool_commit_metadata(pool->pmd);
- if (r) {
- DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
- __func__, r);
- return r;
- }
+ (void) commit_or_fallback(pool);
r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
if (r)
@@ -1218,7 +1317,7 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
*/
m = get_next_mapping(pool);
m->tc = tc;
- m->pass_discard = (!lookup_result.shared) & pool->pf.discard_passdown;
+ m->pass_discard = (!lookup_result.shared) && pool->pf.discard_passdown;
m->virt_block = block;
m->data_block = lookup_result.block;
m->cell = cell;
@@ -1234,15 +1333,10 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
}
} else {
/*
- * This path is hit if people are ignoring
- * limits->discard_granularity. It ignores any
- * part of the discard that is in a subsequent
- * block.
+ * The DM core makes sure that the discard doesn't span
+ * a block boundary. So we submit the discard of a
+ * partial block appropriately.
*/
- sector_t offset = bio->bi_sector - (block << pool->block_shift);
- unsigned remaining = (pool->sectors_per_block - offset) << 9;
- bio->bi_size = min(bio->bi_size, remaining);
-
cell_release_singleton(cell, bio);
cell_release_singleton(cell2, bio);
if ((!lookup_result.shared) && pool->pf.discard_passdown)
@@ -1310,7 +1404,7 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio,
if (bio_detain(pool->prison, &key, bio, &cell))
return;
- if (bio_data_dir(bio) == WRITE)
+ if (bio_data_dir(bio) == WRITE && bio->bi_size)
break_sharing(tc, bio, block, &key, lookup_result, cell);
else {
struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
@@ -1362,6 +1456,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block
default:
DMERR("%s: alloc_data_block() failed, error = %d", __func__, r);
+ set_pool_mode(tc->pool, PM_READ_ONLY);
cell_error(cell);
break;
}
@@ -1419,6 +1514,49 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
}
}
+static void process_bio_read_only(struct thin_c *tc, struct bio *bio)
+{
+ int r;
+ int rw = bio_data_dir(bio);
+ dm_block_t block = get_bio_block(tc, bio);
+ struct dm_thin_lookup_result lookup_result;
+
+ r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
+ switch (r) {
+ case 0:
+ if (lookup_result.shared && (rw == WRITE) && bio->bi_size)
+ bio_io_error(bio);
+ else
+ remap_and_issue(tc, bio, lookup_result.block);
+ break;
+
+ case -ENODATA:
+ if (rw != READ) {
+ bio_io_error(bio);
+ break;
+ }
+
+ if (tc->origin_dev) {
+ remap_to_origin_and_issue(tc, bio);
+ break;
+ }
+
+ zero_fill_bio(bio);
+ bio_endio(bio, 0);
+ break;
+
+ default:
+ DMERR("dm_thin_find_block() failed, error = %d", r);
+ bio_io_error(bio);
+ break;
+ }
+}
+
+static void process_bio_fail(struct thin_c *tc, struct bio *bio)
+{
+ bio_io_error(bio);
+}
+
static int need_commit_due_to_time(struct pool *pool)
{
return jiffies < pool->last_commit_jiffies ||
@@ -1430,7 +1568,6 @@ static void process_deferred_bios(struct pool *pool)
unsigned long flags;
struct bio *bio;
struct bio_list bios;
- int r;
bio_list_init(&bios);
@@ -1457,9 +1594,9 @@ static void process_deferred_bios(struct pool *pool)
}
if (bio->bi_rw & REQ_DISCARD)
- process_discard(tc, bio);
+ pool->process_discard(tc, bio);
else
- process_bio(tc, bio);
+ pool->process_bio(tc, bio);
}
/*
@@ -1475,10 +1612,7 @@ static void process_deferred_bios(struct pool *pool)
if (bio_list_empty(&bios) && !need_commit_due_to_time(pool))
return;
- r = dm_pool_commit_metadata(pool->pmd);
- if (r) {
- DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
- __func__, r);
+ if (commit_or_fallback(pool)) {
while ((bio = bio_list_pop(&bios)))
bio_io_error(bio);
return;
@@ -1493,8 +1627,8 @@ static void do_worker(struct work_struct *ws)
{
struct pool *pool = container_of(ws, struct pool, worker);
- process_prepared(pool, &pool->prepared_mappings, process_prepared_mapping);
- process_prepared(pool, &pool->prepared_discards, process_prepared_discard);
+ process_prepared(pool, &pool->prepared_mappings, &pool->process_prepared_mapping);
+ process_prepared(pool, &pool->prepared_discards, &pool->process_prepared_discard);
process_deferred_bios(pool);
}
@@ -1511,6 +1645,52 @@ static void do_waker(struct work_struct *ws)
/*----------------------------------------------------------------*/
+static enum pool_mode get_pool_mode(struct pool *pool)
+{
+ return pool->pf.mode;
+}
+
+static void set_pool_mode(struct pool *pool, enum pool_mode mode)
+{
+ int r;
+
+ pool->pf.mode = mode;
+
+ switch (mode) {
+ case PM_FAIL:
+ DMERR("switching pool to failure mode");
+ pool->process_bio = process_bio_fail;
+ pool->process_discard = process_bio_fail;
+ pool->process_prepared_mapping = process_prepared_mapping_fail;
+ pool->process_prepared_discard = process_prepared_discard_fail;
+ break;
+
+ case PM_READ_ONLY:
+ DMERR("switching pool to read-only mode");
+ r = dm_pool_abort_metadata(pool->pmd);
+ if (r) {
+ DMERR("aborting transaction failed");
+ set_pool_mode(pool, PM_FAIL);
+ } else {
+ dm_pool_metadata_read_only(pool->pmd);
+ pool->process_bio = process_bio_read_only;
+ pool->process_discard = process_discard;
+ pool->process_prepared_mapping = process_prepared_mapping_fail;
+ pool->process_prepared_discard = process_prepared_discard_passdown;
+ }
+ break;
+
+ case PM_WRITE:
+ pool->process_bio = process_bio;
+ pool->process_discard = process_discard;
+ pool->process_prepared_mapping = process_prepared_mapping;
+ pool->process_prepared_discard = process_prepared_discard;
+ break;
+ }
+}
+
+/*----------------------------------------------------------------*/
+
/*
* Mapping functions.
*/
@@ -1556,6 +1736,12 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio,
struct dm_thin_lookup_result result;
map_context->ptr = thin_hook_bio(tc, bio);
+
+ if (get_pool_mode(tc->pool) == PM_FAIL) {
+ bio_io_error(bio);
+ return DM_MAPIO_SUBMITTED;
+ }
+
if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) {
thin_defer_bio(tc, bio);
return DM_MAPIO_SUBMITTED;
@@ -1592,14 +1778,35 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio,
break;
case -ENODATA:
+ if (get_pool_mode(tc->pool) == PM_READ_ONLY) {
+ /*
+ * This block isn't provisioned, and we have no way
+ * of doing so. Just error it.
+ */
+ bio_io_error(bio);
+ r = DM_MAPIO_SUBMITTED;
+ break;
+ }
+ /* fall through */
+
+ case -EWOULDBLOCK:
/*
* In future, the failed dm_thin_find_block above could
* provide the hint to load the metadata into cache.
*/
- case -EWOULDBLOCK:
thin_defer_bio(tc, bio);
r = DM_MAPIO_SUBMITTED;
break;
+
+ default:
+ /*
+ * Must always call bio_io_error on failure.
+ * dm_thin_find_block can fail with -EINVAL if the
+ * pool is switched to fail-io mode.
+ */
+ bio_io_error(bio);
+ r = DM_MAPIO_SUBMITTED;
+ break;
}
return r;
@@ -1636,15 +1843,26 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti)
{
struct pool_c *pt = ti->private;
+ /*
+ * We want to make sure that degraded pools are never upgraded.
+ */
+ enum pool_mode old_mode = pool->pf.mode;
+ enum pool_mode new_mode = pt->pf.mode;
+
+ if (old_mode > new_mode)
+ new_mode = old_mode;
+
pool->ti = ti;
pool->low_water_blocks = pt->low_water_blocks;
pool->pf = pt->pf;
+ set_pool_mode(pool, new_mode);
/*
* If discard_passdown was enabled verify that the data device
* supports discards. Disable discard_passdown if not; otherwise
* -EOPNOTSUPP will be returned.
*/
+ /* FIXME: pull this out into a sep fn. */
if (pt->pf.discard_passdown) {
struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
if (!q || !blk_queue_discard(q)) {
@@ -1670,6 +1888,7 @@ static void unbind_control_target(struct pool *pool, struct dm_target *ti)
/* Initialize pool features. */
static void pool_features_init(struct pool_features *pf)
{
+ pf->mode = PM_WRITE;
pf->zero_new_blocks = 1;
pf->discard_enabled = 1;
pf->discard_passdown = 1;
@@ -1700,14 +1919,16 @@ static struct kmem_cache *_endio_hook_cache;
static struct pool *pool_create(struct mapped_device *pool_md,
struct block_device *metadata_dev,
- unsigned long block_size, char **error)
+ unsigned long block_size,
+ int read_only, char **error)
{
int r;
void *err_p;
struct pool *pool;
struct dm_pool_metadata *pmd;
+ bool format_device = read_only ? false : true;
- pmd = dm_pool_metadata_open(metadata_dev, block_size);
+ pmd = dm_pool_metadata_open(metadata_dev, block_size, format_device);
if (IS_ERR(pmd)) {
*error = "Error creating metadata object";
return (struct pool *)pmd;
@@ -1722,8 +1943,10 @@ static struct pool *pool_create(struct mapped_device *pool_md,
pool->pmd = pmd;
pool->sectors_per_block = block_size;
- pool->block_shift = ffs(block_size) - 1;
- pool->offset_mask = block_size - 1;
+ if (block_size & (block_size - 1))
+ pool->sectors_per_block_shift = -1;
+ else
+ pool->sectors_per_block_shift = __ffs(block_size);
pool->low_water_blocks = 0;
pool_features_init(&pool->pf);
pool->prison = prison_create(PRISON_CELLS);
@@ -1822,25 +2045,29 @@ static void __pool_dec(struct pool *pool)
static struct pool *__pool_find(struct mapped_device *pool_md,
struct block_device *metadata_dev,
- unsigned long block_size, char **error,
- int *created)
+ unsigned long block_size, int read_only,
+ char **error, int *created)
{
struct pool *pool = __pool_table_lookup_metadata_dev(metadata_dev);
if (pool) {
- if (pool->pool_md != pool_md)
+ if (pool->pool_md != pool_md) {
+ *error = "metadata device already in use by a pool";
return ERR_PTR(-EBUSY);
+ }
__pool_inc(pool);
} else {
pool = __pool_table_lookup(pool_md);
if (pool) {
- if (pool->md_dev != metadata_dev)
+ if (pool->md_dev != metadata_dev) {
+ *error = "different pool cannot replace a pool";
return ERR_PTR(-EINVAL);
+ }
__pool_inc(pool);
} else {
- pool = pool_create(pool_md, metadata_dev, block_size, error);
+ pool = pool_create(pool_md, metadata_dev, block_size, read_only, error);
*created = 1;
}
}
@@ -1891,19 +2118,23 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf,
arg_name = dm_shift_arg(as);
argc--;
- if (!strcasecmp(arg_name, "skip_block_zeroing")) {
+ if (!strcasecmp(arg_name, "skip_block_zeroing"))
pf->zero_new_blocks = 0;
- continue;
- } else if (!strcasecmp(arg_name, "ignore_discard")) {
+
+ else if (!strcasecmp(arg_name, "ignore_discard"))
pf->discard_enabled = 0;
- continue;
- } else if (!strcasecmp(arg_name, "no_discard_passdown")) {
+
+ else if (!strcasecmp(arg_name, "no_discard_passdown"))
pf->discard_passdown = 0;
- continue;
- }
- ti->error = "Unrecognised pool feature requested";
- r = -EINVAL;
+ else if (!strcasecmp(arg_name, "read_only"))
+ pf->mode = PM_READ_ONLY;
+
+ else {
+ ti->error = "Unrecognised pool feature requested";
+ r = -EINVAL;
+ break;
+ }
}
return r;
@@ -1967,7 +2198,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
if (kstrtoul(argv[2], 10, &block_size) || !block_size ||
block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
- !is_power_of_2(block_size)) {
+ block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
ti->error = "Invalid block size";
r = -EINVAL;
goto out;
@@ -1996,7 +2227,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
}
pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev,
- block_size, &ti->error, &pool_created);
+ block_size, pf.mode == PM_READ_ONLY, &ti->error, &pool_created);
if (IS_ERR(pool)) {
r = PTR_ERR(pool);
goto out_free_pt;
@@ -2014,6 +2245,15 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto out_flags_changed;
}
+ /*
+ * The block layer requires discard_granularity to be a power of 2.
+ */
+ if (pf.discard_enabled && !is_power_of_2(block_size)) {
+ ti->error = "Discard support must be disabled when the block size is not a power of 2";
+ r = -EINVAL;
+ goto out_flags_changed;
+ }
+
pt->pool = pool;
pt->ti = ti;
pt->metadata_dev = metadata_dev;
@@ -2033,7 +2273,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
* stacking of discard limits (this keeps the pool and
* thin devices' discard limits consistent).
*/
- ti->discards_supported = 1;
+ ti->discards_supported = true;
}
ti->private = pt;
@@ -2093,7 +2333,8 @@ static int pool_preresume(struct dm_target *ti)
int r;
struct pool_c *pt = ti->private;
struct pool *pool = pt->pool;
- dm_block_t data_size, sb_data_size;
+ sector_t data_size = ti->len;
+ dm_block_t sb_data_size;
/*
* Take control of the pool object.
@@ -2102,7 +2343,8 @@ static int pool_preresume(struct dm_target *ti)
if (r)
return r;
- data_size = ti->len >> pool->block_shift;
+ (void) sector_div(data_size, pool->sectors_per_block);
+
r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size);
if (r) {
DMERR("failed to retrieve data device size");
@@ -2111,22 +2353,19 @@ static int pool_preresume(struct dm_target *ti)
if (data_size < sb_data_size) {
DMERR("pool target too small, is %llu blocks (expected %llu)",
- data_size, sb_data_size);
+ (unsigned long long)data_size, sb_data_size);
return -EINVAL;
} else if (data_size > sb_data_size) {
r = dm_pool_resize_data_dev(pool->pmd, data_size);
if (r) {
DMERR("failed to resize data device");
+ /* FIXME Stricter than necessary: Rollback transaction instead here */
+ set_pool_mode(pool, PM_READ_ONLY);
return r;
}
- r = dm_pool_commit_metadata(pool->pmd);
- if (r) {
- DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
- __func__, r);
- return r;
- }
+ (void) commit_or_fallback(pool);
}
return 0;
@@ -2149,19 +2388,12 @@ static void pool_resume(struct dm_target *ti)
static void pool_postsuspend(struct dm_target *ti)
{
- int r;
struct pool_c *pt = ti->private;
struct pool *pool = pt->pool;
cancel_delayed_work(&pool->waker);
flush_workqueue(pool->wq);
-
- r = dm_pool_commit_metadata(pool->pmd);
- if (r < 0) {
- DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
- __func__, r);
- /* FIXME: invalidate device? error the next FUA or FLUSH bio ?*/
- }
+ (void) commit_or_fallback(pool);
}
static int check_arg_count(unsigned argc, unsigned args_required)
@@ -2295,12 +2527,7 @@ static int process_reserve_metadata_snap_mesg(unsigned argc, char **argv, struct
if (r)
return r;
- r = dm_pool_commit_metadata(pool->pmd);
- if (r) {
- DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
- __func__, r);
- return r;
- }
+ (void) commit_or_fallback(pool);
r = dm_pool_reserve_metadata_snap(pool->pmd);
if (r)
@@ -2361,25 +2588,41 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv)
else
DMWARN("Unrecognised thin pool target message received: %s", argv[0]);
- if (!r) {
- r = dm_pool_commit_metadata(pool->pmd);
- if (r)
- DMERR("%s message: dm_pool_commit_metadata() failed, error = %d",
- argv[0], r);
- }
+ if (!r)
+ (void) commit_or_fallback(pool);
return r;
}
+static void emit_flags(struct pool_features *pf, char *result,
+ unsigned sz, unsigned maxlen)
+{
+ unsigned count = !pf->zero_new_blocks + !pf->discard_enabled +
+ !pf->discard_passdown + (pf->mode == PM_READ_ONLY);
+ DMEMIT("%u ", count);
+
+ if (!pf->zero_new_blocks)
+ DMEMIT("skip_block_zeroing ");
+
+ if (!pf->discard_enabled)
+ DMEMIT("ignore_discard ");
+
+ if (!pf->discard_passdown)
+ DMEMIT("no_discard_passdown ");
+
+ if (pf->mode == PM_READ_ONLY)
+ DMEMIT("read_only ");
+}
+
/*
* Status line is:
* <transaction id> <used metadata sectors>/<total metadata sectors>
* <used data sectors>/<total data sectors> <held metadata root>
*/
static int pool_status(struct dm_target *ti, status_type_t type,
- char *result, unsigned maxlen)
+ unsigned status_flags, char *result, unsigned maxlen)
{
- int r, count;
+ int r;
unsigned sz = 0;
uint64_t transaction_id;
dm_block_t nr_free_blocks_data;
@@ -2394,6 +2637,15 @@ static int pool_status(struct dm_target *ti, status_type_t type,
switch (type) {
case STATUSTYPE_INFO:
+ if (get_pool_mode(pool) == PM_FAIL) {
+ DMEMIT("Fail");
+ break;
+ }
+
+ /* Commit to ensure statistics aren't out-of-date */
+ if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
+ (void) commit_or_fallback(pool);
+
r = dm_pool_get_metadata_transaction_id(pool->pmd,
&transaction_id);
if (r)
@@ -2429,9 +2681,19 @@ static int pool_status(struct dm_target *ti, status_type_t type,
(unsigned long long)nr_blocks_data);
if (held_root)
- DMEMIT("%llu", held_root);
+ DMEMIT("%llu ", held_root);
+ else
+ DMEMIT("- ");
+
+ if (pool->pf.mode == PM_READ_ONLY)
+ DMEMIT("ro ");
+ else
+ DMEMIT("rw ");
+
+ if (pool->pf.discard_enabled && pool->pf.discard_passdown)
+ DMEMIT("discard_passdown");
else
- DMEMIT("-");
+ DMEMIT("no_discard_passdown");
break;
@@ -2441,20 +2703,7 @@ static int pool_status(struct dm_target *ti, status_type_t type,
format_dev_t(buf2, pt->data_dev->bdev->bd_dev),
(unsigned long)pool->sectors_per_block,
(unsigned long long)pt->low_water_blocks);
-
- count = !pool->pf.zero_new_blocks + !pool->pf.discard_enabled +
- !pt->pf.discard_passdown;
- DMEMIT("%u ", count);
-
- if (!pool->pf.zero_new_blocks)
- DMEMIT("skip_block_zeroing ");
-
- if (!pool->pf.discard_enabled)
- DMEMIT("ignore_discard ");
-
- if (!pt->pf.discard_passdown)
- DMEMIT("no_discard_passdown ");
-
+ emit_flags(&pt->pf, result, sz, maxlen);
break;
}
@@ -2492,7 +2741,8 @@ static void set_discard_limits(struct pool *pool, struct queue_limits *limits)
/*
* This is just a hint, and not enforced. We have to cope with
- * bios that overlap 2 blocks.
+ * bios that cover a block partially. A discard that spans a block
+ * boundary is not sent to this target.
*/
limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
limits->discard_zeroes_data = pool->pf.zero_new_blocks;
@@ -2513,7 +2763,7 @@ static struct target_type pool_target = {
.name = "thin-pool",
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
DM_TARGET_IMMUTABLE,
- .version = {1, 2, 0},
+ .version = {1, 3, 0},
.module = THIS_MODULE,
.ctr = pool_ctr,
.dtr = pool_dtr,
@@ -2618,20 +2868,31 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
}
__pool_inc(tc->pool);
+ if (get_pool_mode(tc->pool) == PM_FAIL) {
+ ti->error = "Couldn't open thin device, Pool is in fail mode";
+ goto bad_thin_open;
+ }
+
r = dm_pool_open_thin_device(tc->pool->pmd, tc->dev_id, &tc->td);
if (r) {
ti->error = "Couldn't open thin internal device";
goto bad_thin_open;
}
- ti->split_io = tc->pool->sectors_per_block;
+ r = dm_set_target_max_io_len(ti, tc->pool->sectors_per_block);
+ if (r)
+ goto bad_thin_open;
+
ti->num_flush_requests = 1;
+ ti->flush_supported = true;
/* In case the pool supports discards, pass them on. */
if (tc->pool->pf.discard_enabled) {
- ti->discards_supported = 1;
+ ti->discards_supported = true;
ti->num_discard_requests = 1;
- ti->discard_zeroes_data_unsupported = 1;
+ ti->discard_zeroes_data_unsupported = true;
+ /* Discard requests must be split on a block boundary */
+ ti->split_discard_requests = true;
}
dm_put(pool_md);
@@ -2712,7 +2973,7 @@ static void thin_postsuspend(struct dm_target *ti)
* <nr mapped sectors> <highest mapped sector>
*/
static int thin_status(struct dm_target *ti, status_type_t type,
- char *result, unsigned maxlen)
+ unsigned status_flags, char *result, unsigned maxlen)
{
int r;
ssize_t sz = 0;
@@ -2720,6 +2981,11 @@ static int thin_status(struct dm_target *ti, status_type_t type,
char buf[BDEVNAME_SIZE];
struct thin_c *tc = ti->private;
+ if (get_pool_mode(tc->pool) == PM_FAIL) {
+ DMEMIT("Fail");
+ return 0;
+ }
+
if (!tc->td)
DMEMIT("-");
else {
@@ -2757,19 +3023,21 @@ static int thin_status(struct dm_target *ti, status_type_t type,
static int thin_iterate_devices(struct dm_target *ti,
iterate_devices_callout_fn fn, void *data)
{
- dm_block_t blocks;
+ sector_t blocks;
struct thin_c *tc = ti->private;
+ struct pool *pool = tc->pool;
/*
* We can't call dm_pool_get_data_dev_size() since that blocks. So
* we follow a more convoluted path through to the pool's target.
*/
- if (!tc->pool->ti)
+ if (!pool->ti)
return 0; /* nothing is bound */
- blocks = tc->pool->ti->len >> tc->pool->block_shift;
+ blocks = pool->ti->len;
+ (void) sector_div(blocks, pool->sectors_per_block);
if (blocks)
- return fn(ti, tc->pool_dev, 0, tc->pool->sectors_per_block * blocks, data);
+ return fn(ti, tc->pool_dev, 0, pool->sectors_per_block * blocks, data);
return 0;
}
@@ -2786,7 +3054,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type thin_target = {
.name = "thin",
- .version = {1, 1, 0},
+ .version = {1, 3, 0},
.module = THIS_MODULE,
.ctr = thin_ctr,
.dtr = thin_dtr,