summaryrefslogtreecommitdiff
path: root/drivers/md/dm-thin.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-thin.c')
-rw-r--r--drivers/md/dm-thin.c214
1 files changed, 82 insertions, 132 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index c33f61a4cc28..6578b7bc1fbb 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -18,6 +18,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/vmalloc.h>
#include <linux/sort.h>
#include <linux/rbtree.h>
@@ -268,7 +269,7 @@ struct pool {
process_mapping_fn process_prepared_mapping;
process_mapping_fn process_prepared_discard;
- struct dm_bio_prison_cell *cell_sort_array[CELL_SORT_ARRAY_SIZE];
+ struct dm_bio_prison_cell **cell_sort_array;
};
static enum pool_mode get_pool_mode(struct pool *pool);
@@ -331,9 +332,6 @@ struct thin_c {
*
* Description:
* Asynchronously issue a discard request for the sectors in question.
- * NOTE: this variant of blk-core's blkdev_issue_discard() is a stop-gap
- * that is being kept local to DM thinp until the block changes to allow
- * late bio splitting land upstream.
*/
static int __blkdev_issue_discard_async(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags,
@@ -341,91 +339,36 @@ static int __blkdev_issue_discard_async(struct block_device *bdev, sector_t sect
{
struct request_queue *q = bdev_get_queue(bdev);
int type = REQ_WRITE | REQ_DISCARD;
- unsigned int max_discard_sectors, granularity;
- int alignment;
struct bio *bio;
- int ret = 0;
- struct blk_plug plug;
- if (!q)
+ if (!q || !nr_sects)
return -ENXIO;
if (!blk_queue_discard(q))
return -EOPNOTSUPP;
- /* Zero-sector (unknown) and one-sector granularities are the same. */
- granularity = max(q->limits.discard_granularity >> 9, 1U);
- alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
-
- /*
- * Ensure that max_discard_sectors is of the proper
- * granularity, so that requests stay aligned after a split.
- */
- max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
- max_discard_sectors -= max_discard_sectors % granularity;
- if (unlikely(!max_discard_sectors)) {
- /* Avoid infinite loop below. Being cautious never hurts. */
- return -EOPNOTSUPP;
- }
-
if (flags & BLKDEV_DISCARD_SECURE) {
if (!blk_queue_secdiscard(q))
return -EOPNOTSUPP;
type |= REQ_SECURE;
}
- blk_start_plug(&plug);
- while (nr_sects) {
- unsigned int req_sects;
- sector_t end_sect, tmp;
-
- /*
- * Required bio_put occurs in bio_endio thanks to bio_chain below
- */
- bio = bio_alloc(gfp_mask, 1);
- if (!bio) {
- ret = -ENOMEM;
- break;
- }
-
- req_sects = min_t(sector_t, nr_sects, max_discard_sectors);
-
- /*
- * If splitting a request, and the next starting sector would be
- * misaligned, stop the discard at the previous aligned sector.
- */
- end_sect = sector + req_sects;
- tmp = end_sect;
- if (req_sects < nr_sects &&
- sector_div(tmp, granularity) != alignment) {
- end_sect = end_sect - alignment;
- sector_div(end_sect, granularity);
- end_sect = end_sect * granularity + alignment;
- req_sects = end_sect - sector;
- }
-
- bio_chain(bio, parent_bio);
-
- bio->bi_iter.bi_sector = sector;
- bio->bi_bdev = bdev;
+ /*
+ * Required bio_put occurs in bio_endio thanks to bio_chain below
+ */
+ bio = bio_alloc(gfp_mask, 1);
+ if (!bio)
+ return -ENOMEM;
- bio->bi_iter.bi_size = req_sects << 9;
- nr_sects -= req_sects;
- sector = end_sect;
+ bio_chain(bio, parent_bio);
- submit_bio(type, bio);
+ bio->bi_iter.bi_sector = sector;
+ bio->bi_bdev = bdev;
+ bio->bi_iter.bi_size = nr_sects << 9;
- /*
- * We can loop for a long time in here, if someone does
- * full device discards (like mkfs). Be nice and allow
- * us to schedule out to avoid softlocking if preempt
- * is disabled.
- */
- cond_resched();
- }
- blk_finish_plug(&plug);
+ submit_bio(type, bio);
- return ret;
+ return 0;
}
static bool block_size_is_power_of_two(struct pool *pool)
@@ -614,8 +557,10 @@ static void error_bio_list(struct bio_list *bios, int error)
{
struct bio *bio;
- while ((bio = bio_list_pop(bios)))
- bio_endio(bio, error);
+ while ((bio = bio_list_pop(bios))) {
+ bio->bi_error = error;
+ bio_endio(bio);
+ }
}
static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master, int error)
@@ -665,16 +610,21 @@ static void requeue_io(struct thin_c *tc)
requeue_deferred_cells(tc);
}
-static void error_retry_list(struct pool *pool)
+static void error_retry_list_with_code(struct pool *pool, int error)
{
struct thin_c *tc;
rcu_read_lock();
list_for_each_entry_rcu(tc, &pool->active_thins, list)
- error_thin_bio_list(tc, &tc->retry_on_resume_list, -EIO);
+ error_thin_bio_list(tc, &tc->retry_on_resume_list, error);
rcu_read_unlock();
}
+static void error_retry_list(struct pool *pool)
+{
+ return error_retry_list_with_code(pool, -EIO);
+}
+
/*
* This section of code contains the logic for processing a thin device's IO.
* Much of the code depends on pool object resources (lists, workqueues, etc)
@@ -864,14 +814,14 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
complete_mapping_preparation(m);
}
-static void overwrite_endio(struct bio *bio, int err)
+static void overwrite_endio(struct bio *bio)
{
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
struct dm_thin_new_mapping *m = h->overwrite_mapping;
bio->bi_end_io = m->saved_bi_end_io;
- m->err = err;
+ m->err = bio->bi_error;
complete_mapping_preparation(m);
}
@@ -996,7 +946,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
*/
if (bio) {
inc_remap_and_issue_cell(tc, m->cell, m->data_block);
- bio_endio(bio, 0);
+ bio_endio(bio);
} else {
inc_all_io_entry(tc->pool, m->cell->holder);
remap_and_issue(tc, m->cell->holder, m->data_block);
@@ -1026,7 +976,7 @@ static void process_prepared_discard_fail(struct dm_thin_new_mapping *m)
static void process_prepared_discard_success(struct dm_thin_new_mapping *m)
{
- bio_endio(m->bio, 0);
+ bio_endio(m->bio);
free_discard_mapping(m);
}
@@ -1040,7 +990,7 @@ static void process_prepared_discard_no_passdown(struct dm_thin_new_mapping *m)
metadata_operation_failed(tc->pool, "dm_thin_remove_range", r);
bio_io_error(m->bio);
} else
- bio_endio(m->bio, 0);
+ bio_endio(m->bio);
cell_defer_no_holder(tc, m->cell);
mempool_free(m, tc->pool->mapping_pool);
@@ -1111,7 +1061,8 @@ static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m)
* Even if r is set, there could be sub discards in flight that we
* need to wait for.
*/
- bio_endio(m->bio, r);
+ m->bio->bi_error = r;
+ bio_endio(m->bio);
cell_defer_no_holder(tc, m->cell);
mempool_free(m, pool->mapping_pool);
}
@@ -1487,9 +1438,10 @@ static void handle_unserviceable_bio(struct pool *pool, struct bio *bio)
{
int error = should_error_unserviceable_bio(pool);
- if (error)
- bio_endio(bio, error);
- else
+ if (error) {
+ bio->bi_error = error;
+ bio_endio(bio);
+ } else
retry_on_resume(bio);
}
@@ -1533,9 +1485,8 @@ static void process_discard_cell_no_passdown(struct thin_c *tc,
}
/*
- * FIXME: DM local hack to defer parent bios's end_io until we
- * _know_ all chained sub range discard bios have completed.
- * Will go away once late bio splitting lands upstream!
+ * __bio_inc_remaining() is used to defer parent bios's end_io until
+ * we _know_ all chained sub range discard bios have completed.
*/
static inline void __bio_inc_remaining(struct bio *bio)
{
@@ -1625,7 +1576,7 @@ static void process_discard_cell_passdown(struct thin_c *tc, struct dm_bio_priso
* will prevent completion until the sub range discards have
* completed.
*/
- bio_endio(bio, 0);
+ bio_endio(bio);
}
static void process_discard_bio(struct thin_c *tc, struct bio *bio)
@@ -1639,7 +1590,7 @@ static void process_discard_bio(struct thin_c *tc, struct bio *bio)
/*
* The discard covers less than a block.
*/
- bio_endio(bio, 0);
+ bio_endio(bio);
return;
}
@@ -1784,7 +1735,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block
if (bio_data_dir(bio) == READ) {
zero_fill_bio(bio);
cell_defer_no_holder(tc, cell);
- bio_endio(bio, 0);
+ bio_endio(bio);
return;
}
@@ -1849,7 +1800,7 @@ static void process_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell)
} else {
zero_fill_bio(bio);
- bio_endio(bio, 0);
+ bio_endio(bio);
}
} else
provision_block(tc, bio, block, cell);
@@ -1920,7 +1871,7 @@ static void __process_bio_read_only(struct thin_c *tc, struct bio *bio,
}
zero_fill_bio(bio);
- bio_endio(bio, 0);
+ bio_endio(bio);
break;
default:
@@ -1945,7 +1896,7 @@ static void process_cell_read_only(struct thin_c *tc, struct dm_bio_prison_cell
static void process_bio_success(struct thin_c *tc, struct bio *bio)
{
- bio_endio(bio, 0);
+ bio_endio(bio);
}
static void process_bio_fail(struct thin_c *tc, struct bio *bio)
@@ -2281,18 +2232,23 @@ static void do_waker(struct work_struct *ws)
queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
}
+static void notify_of_pool_mode_change_to_oods(struct pool *pool);
+
/*
* We're holding onto IO to allow userland time to react. After the
* timeout either the pool will have been resized (and thus back in
- * PM_WRITE mode), or we degrade to PM_READ_ONLY and start erroring IO.
+ * PM_WRITE mode), or we degrade to PM_OUT_OF_DATA_SPACE w/ error_if_no_space.
*/
static void do_no_space_timeout(struct work_struct *ws)
{
struct pool *pool = container_of(to_delayed_work(ws), struct pool,
no_space_timeout);
- if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space)
- set_pool_mode(pool, PM_READ_ONLY);
+ if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
+ pool->pf.error_if_no_space = true;
+ notify_of_pool_mode_change_to_oods(pool);
+ error_retry_list_with_code(pool, -ENOSPC);
+ }
}
/*----------------------------------------------------------------*/
@@ -2370,6 +2326,14 @@ static void notify_of_pool_mode_change(struct pool *pool, const char *new_mode)
dm_device_name(pool->pool_md), new_mode);
}
+static void notify_of_pool_mode_change_to_oods(struct pool *pool)
+{
+ if (!pool->pf.error_if_no_space)
+ notify_of_pool_mode_change(pool, "out-of-data-space (queue IO)");
+ else
+ notify_of_pool_mode_change(pool, "out-of-data-space (error IO)");
+}
+
static bool passdown_enabled(struct pool_c *pt)
{
return pt->adjusted_pf.discard_passdown;
@@ -2454,7 +2418,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
* frequently seeing this mode.
*/
if (old_mode != new_mode)
- notify_of_pool_mode_change(pool, "out-of-data-space");
+ notify_of_pool_mode_change_to_oods(pool);
pool->process_bio = process_bio_read_only;
pool->process_discard = process_discard_bio;
pool->process_cell = process_cell_read_only;
@@ -2581,7 +2545,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
thin_hook_bio(tc, bio);
if (tc->requeue_mode) {
- bio_endio(bio, DM_ENDIO_REQUEUE);
+ bio->bi_error = DM_ENDIO_REQUEUE;
+ bio_endio(bio);
return DM_MAPIO_SUBMITTED;
}
@@ -2777,6 +2742,7 @@ static void __pool_destroy(struct pool *pool)
{
__pool_table_remove(pool);
+ vfree(pool->cell_sort_array);
if (dm_pool_metadata_close(pool->pmd) < 0)
DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
@@ -2889,6 +2855,13 @@ static struct pool *pool_create(struct mapped_device *pool_md,
goto bad_mapping_pool;
}
+ pool->cell_sort_array = vmalloc(sizeof(*pool->cell_sort_array) * CELL_SORT_ARRAY_SIZE);
+ if (!pool->cell_sort_array) {
+ *error = "Error allocating cell sort array";
+ err_p = ERR_PTR(-ENOMEM);
+ goto bad_sort_array;
+ }
+
pool->ref_count = 1;
pool->last_commit_jiffies = jiffies;
pool->pool_md = pool_md;
@@ -2897,6 +2870,8 @@ static struct pool *pool_create(struct mapped_device *pool_md,
return pool;
+bad_sort_array:
+ mempool_destroy(pool->mapping_pool);
bad_mapping_pool:
dm_deferred_set_destroy(pool->all_io_ds);
bad_all_io_ds:
@@ -3714,6 +3689,7 @@ static void emit_flags(struct pool_features *pf, char *result,
* Status line is:
* <transaction id> <used metadata sectors>/<total metadata sectors>
* <used data sectors>/<total data sectors> <held metadata root>
+ * <pool mode> <discard config> <no space config> <needs_check>
*/
static void pool_status(struct dm_target *ti, status_type_t type,
unsigned status_flags, char *result, unsigned maxlen)
@@ -3815,6 +3791,11 @@ static void pool_status(struct dm_target *ti, status_type_t type,
else
DMEMIT("queue_if_no_space ");
+ if (dm_pool_metadata_needs_check(pool->pmd))
+ DMEMIT("needs_check ");
+ else
+ DMEMIT("- ");
+
break;
case STATUSTYPE_TABLE:
@@ -3840,20 +3821,6 @@ static int pool_iterate_devices(struct dm_target *ti,
return fn(ti, pt->data_dev, 0, ti->len, data);
}
-static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
- struct bio_vec *biovec, int max_size)
-{
- struct pool_c *pt = ti->private;
- struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
-
- if (!q->merge_bvec_fn)
- return max_size;
-
- bvm->bi_bdev = pt->data_dev->bdev;
-
- return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
-}
-
static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
{
struct pool_c *pt = ti->private;
@@ -3918,7 +3885,7 @@ static struct target_type pool_target = {
.name = "thin-pool",
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
DM_TARGET_IMMUTABLE,
- .version = {1, 15, 0},
+ .version = {1, 16, 0},
.module = THIS_MODULE,
.ctr = pool_ctr,
.dtr = pool_dtr,
@@ -3930,7 +3897,6 @@ static struct target_type pool_target = {
.resume = pool_resume,
.message = pool_message,
.status = pool_status,
- .merge = pool_merge,
.iterate_devices = pool_iterate_devices,
.io_hints = pool_io_hints,
};
@@ -4257,21 +4223,6 @@ err:
DMEMIT("Error");
}
-static int thin_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
- struct bio_vec *biovec, int max_size)
-{
- struct thin_c *tc = ti->private;
- struct request_queue *q = bdev_get_queue(tc->pool_dev->bdev);
-
- if (!q->merge_bvec_fn)
- return max_size;
-
- bvm->bi_bdev = tc->pool_dev->bdev;
- bvm->bi_sector = dm_target_offset(ti, bvm->bi_sector);
-
- return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
-}
-
static int thin_iterate_devices(struct dm_target *ti,
iterate_devices_callout_fn fn, void *data)
{
@@ -4305,7 +4256,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type thin_target = {
.name = "thin",
- .version = {1, 15, 0},
+ .version = {1, 16, 0},
.module = THIS_MODULE,
.ctr = thin_ctr,
.dtr = thin_dtr,
@@ -4315,7 +4266,6 @@ static struct target_type thin_target = {
.presuspend = thin_presuspend,
.postsuspend = thin_postsuspend,
.status = thin_status,
- .merge = thin_merge,
.iterate_devices = thin_iterate_devices,
.io_hints = thin_io_hints,
};