summaryrefslogtreecommitdiff
path: root/drivers/md/dm-bio-prison.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-12-09 08:10:03 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-09 08:10:03 +0300
commit140dfc9299c33bbfc9350fa061f5ab65cb83df13 (patch)
tree09508691964e277f4835d30f7b9c3962e8cac596 /drivers/md/dm-bio-prison.c
parentf94784bdb114439eb3a5e62343826887bbf3f37c (diff)
parent1a71d6ffe18c0d0f03fc8531949cc8ed41d702ee (diff)
downloadlinux-140dfc9299c33bbfc9350fa061f5ab65cb83df13.tar.xz
Merge tag 'dm-3.19-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer: - Significant DM thin-provisioning performance improvements to meet performance requirements that were requested by the Gluster distributed filesystem. Specifically, dm-thinp now takes care to aggregate IO that will be issued to the same thinp block before issuing IO to the underlying devices. This really helps improve performance on HW RAID6 devices that have a writeback cache because it avoids RMW in the HW RAID controller. - Some stable fixes: fix leak in DM bufio if integrity profiles were enabled, use memzero_explicit in DM crypt to avoid any potential for information leak, and a DM cache fix to properly mark a cache block dirty if it was promoted to the cache via the overwrite optimization. - A few simple DM persistent data library fixes - DM cache multiqueue policy block promotion improvements. - DM cache discard improvements that take advantage of range (multiblock) discard support in the DM bio-prison. This allows for much more efficient bulk discard processing (e.g. when mkfs.xfs discards the entire device). - Some small optimizations in DM core and RCU deference cleanups - DM core changes to suspend/resume code to introduce the new internal suspend/resume interface that the DM thin-pool target now uses to suspend/resume active thin devices when the thin-pool must suspend/resume. This avoids forcing userspace to track all active thin volumes in a thin-pool when the thin-pool is suspended for the purposes of metadata or data space resize. * tag 'dm-3.19-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (49 commits) dm crypt: use memzero_explicit for on-stack buffer dm space map metadata: fix sm_bootstrap_get_count() dm space map metadata: fix sm_bootstrap_get_nr_blocks() dm bufio: fix memleak when using a dm_buffer's inline bio dm cache: fix spurious cell_defer when dealing with partial block at end of device dm cache: dirty flag was mistakenly being cleared when promoting via overwrite dm cache: only use overwrite optimisation for promotion when in writeback mode dm cache: discard block size must be a multiple of cache block size dm cache: fix a harmless race when working out if a block is discarded dm cache: when reloading a discard bitset allow for a different discard block size dm cache: fix some issues with the new discard range support dm array: if resizing the array is a noop set the new root to the old one dm: use rcu_dereference_protected instead of rcu_dereference dm thin: fix pool_io_hints to avoid looking at max_hw_sectors dm thin: suspend/resume active thin devices when reloading thin-pool dm: enhance internal suspend and resume interface dm thin: do not allow thin device activation while pool is suspended dm: add presuspend_undo hook to target_type dm: return earlier from dm_blk_ioctl if target doesn't implement .ioctl dm thin: remove stale 'trim' message in block comment above pool_message ...
Diffstat (limited to 'drivers/md/dm-bio-prison.c')
-rw-r--r--drivers/md/dm-bio-prison.c186
1 files changed, 87 insertions, 99 deletions
diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c
index f752d12081ff..be065300e93c 100644
--- a/drivers/md/dm-bio-prison.c
+++ b/drivers/md/dm-bio-prison.c
@@ -14,68 +14,38 @@
/*----------------------------------------------------------------*/
-struct bucket {
- spinlock_t lock;
- struct hlist_head cells;
-};
+#define MIN_CELLS 1024
struct dm_bio_prison {
+ spinlock_t lock;
mempool_t *cell_pool;
-
- unsigned nr_buckets;
- unsigned hash_mask;
- struct bucket *buckets;
+ struct rb_root cells;
};
-/*----------------------------------------------------------------*/
-
-static uint32_t calc_nr_buckets(unsigned nr_cells)
-{
- uint32_t n = 128;
-
- nr_cells /= 4;
- nr_cells = min(nr_cells, 8192u);
-
- while (n < nr_cells)
- n <<= 1;
-
- return n;
-}
-
static struct kmem_cache *_cell_cache;
-static void init_bucket(struct bucket *b)
-{
- spin_lock_init(&b->lock);
- INIT_HLIST_HEAD(&b->cells);
-}
+/*----------------------------------------------------------------*/
/*
* @nr_cells should be the number of cells you want in use _concurrently_.
* Don't confuse it with the number of distinct keys.
*/
-struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells)
+struct dm_bio_prison *dm_bio_prison_create(void)
{
- unsigned i;
- uint32_t nr_buckets = calc_nr_buckets(nr_cells);
- size_t len = sizeof(struct dm_bio_prison) +
- (sizeof(struct bucket) * nr_buckets);
- struct dm_bio_prison *prison = kmalloc(len, GFP_KERNEL);
+ struct dm_bio_prison *prison = kmalloc(sizeof(*prison), GFP_KERNEL);
if (!prison)
return NULL;
- prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache);
+ spin_lock_init(&prison->lock);
+
+ prison->cell_pool = mempool_create_slab_pool(MIN_CELLS, _cell_cache);
if (!prison->cell_pool) {
kfree(prison);
return NULL;
}
- prison->nr_buckets = nr_buckets;
- prison->hash_mask = nr_buckets - 1;
- prison->buckets = (struct bucket *) (prison + 1);
- for (i = 0; i < nr_buckets; i++)
- init_bucket(prison->buckets + i);
+ prison->cells = RB_ROOT;
return prison;
}
@@ -101,68 +71,73 @@ void dm_bio_prison_free_cell(struct dm_bio_prison *prison,
}
EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell);
-static uint32_t hash_key(struct dm_bio_prison *prison, struct dm_cell_key *key)
+static void __setup_new_cell(struct dm_cell_key *key,
+ struct bio *holder,
+ struct dm_bio_prison_cell *cell)
{
- const unsigned long BIG_PRIME = 4294967291UL;
- uint64_t hash = key->block * BIG_PRIME;
-
- return (uint32_t) (hash & prison->hash_mask);
+ memcpy(&cell->key, key, sizeof(cell->key));
+ cell->holder = holder;
+ bio_list_init(&cell->bios);
}
-static int keys_equal(struct dm_cell_key *lhs, struct dm_cell_key *rhs)
+static int cmp_keys(struct dm_cell_key *lhs,
+ struct dm_cell_key *rhs)
{
- return (lhs->virtual == rhs->virtual) &&
- (lhs->dev == rhs->dev) &&
- (lhs->block == rhs->block);
-}
+ if (lhs->virtual < rhs->virtual)
+ return -1;
-static struct bucket *get_bucket(struct dm_bio_prison *prison,
- struct dm_cell_key *key)
-{
- return prison->buckets + hash_key(prison, key);
-}
+ if (lhs->virtual > rhs->virtual)
+ return 1;
-static struct dm_bio_prison_cell *__search_bucket(struct bucket *b,
- struct dm_cell_key *key)
-{
- struct dm_bio_prison_cell *cell;
+ if (lhs->dev < rhs->dev)
+ return -1;
- hlist_for_each_entry(cell, &b->cells, list)
- if (keys_equal(&cell->key, key))
- return cell;
+ if (lhs->dev > rhs->dev)
+ return 1;
- return NULL;
-}
+ if (lhs->block_end <= rhs->block_begin)
+ return -1;
-static void __setup_new_cell(struct bucket *b,
- struct dm_cell_key *key,
- struct bio *holder,
- struct dm_bio_prison_cell *cell)
-{
- memcpy(&cell->key, key, sizeof(cell->key));
- cell->holder = holder;
- bio_list_init(&cell->bios);
- hlist_add_head(&cell->list, &b->cells);
+ if (lhs->block_begin >= rhs->block_end)
+ return 1;
+
+ return 0;
}
-static int __bio_detain(struct bucket *b,
+static int __bio_detain(struct dm_bio_prison *prison,
struct dm_cell_key *key,
struct bio *inmate,
struct dm_bio_prison_cell *cell_prealloc,
struct dm_bio_prison_cell **cell_result)
{
- struct dm_bio_prison_cell *cell;
-
- cell = __search_bucket(b, key);
- if (cell) {
- if (inmate)
- bio_list_add(&cell->bios, inmate);
- *cell_result = cell;
- return 1;
+ int r;
+ struct rb_node **new = &prison->cells.rb_node, *parent = NULL;
+
+ while (*new) {
+ struct dm_bio_prison_cell *cell =
+ container_of(*new, struct dm_bio_prison_cell, node);
+
+ r = cmp_keys(key, &cell->key);
+
+ parent = *new;
+ if (r < 0)
+ new = &((*new)->rb_left);
+ else if (r > 0)
+ new = &((*new)->rb_right);
+ else {
+ if (inmate)
+ bio_list_add(&cell->bios, inmate);
+ *cell_result = cell;
+ return 1;
+ }
}
- __setup_new_cell(b, key, inmate, cell_prealloc);
+ __setup_new_cell(key, inmate, cell_prealloc);
*cell_result = cell_prealloc;
+
+ rb_link_node(&cell_prealloc->node, parent, new);
+ rb_insert_color(&cell_prealloc->node, &prison->cells);
+
return 0;
}
@@ -174,11 +149,10 @@ static int bio_detain(struct dm_bio_prison *prison,
{
int r;
unsigned long flags;
- struct bucket *b = get_bucket(prison, key);
- spin_lock_irqsave(&b->lock, flags);
- r = __bio_detain(b, key, inmate, cell_prealloc, cell_result);
- spin_unlock_irqrestore(&b->lock, flags);
+ spin_lock_irqsave(&prison->lock, flags);
+ r = __bio_detain(prison, key, inmate, cell_prealloc, cell_result);
+ spin_unlock_irqrestore(&prison->lock, flags);
return r;
}
@@ -205,10 +179,11 @@ EXPORT_SYMBOL_GPL(dm_get_cell);
/*
* @inmates must have been initialised prior to this call
*/
-static void __cell_release(struct dm_bio_prison_cell *cell,
+static void __cell_release(struct dm_bio_prison *prison,
+ struct dm_bio_prison_cell *cell,
struct bio_list *inmates)
{
- hlist_del(&cell->list);
+ rb_erase(&cell->node, &prison->cells);
if (inmates) {
if (cell->holder)
@@ -222,21 +197,21 @@ void dm_cell_release(struct dm_bio_prison *prison,
struct bio_list *bios)
{
unsigned long flags;
- struct bucket *b = get_bucket(prison, &cell->key);
- spin_lock_irqsave(&b->lock, flags);
- __cell_release(cell, bios);
- spin_unlock_irqrestore(&b->lock, flags);
+ spin_lock_irqsave(&prison->lock, flags);
+ __cell_release(prison, cell, bios);
+ spin_unlock_irqrestore(&prison->lock, flags);
}
EXPORT_SYMBOL_GPL(dm_cell_release);
/*
* Sometimes we don't want the holder, just the additional bios.
*/
-static void __cell_release_no_holder(struct dm_bio_prison_cell *cell,
+static void __cell_release_no_holder(struct dm_bio_prison *prison,
+ struct dm_bio_prison_cell *cell,
struct bio_list *inmates)
{
- hlist_del(&cell->list);
+ rb_erase(&cell->node, &prison->cells);
bio_list_merge(inmates, &cell->bios);
}
@@ -245,11 +220,10 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
struct bio_list *inmates)
{
unsigned long flags;
- struct bucket *b = get_bucket(prison, &cell->key);
- spin_lock_irqsave(&b->lock, flags);
- __cell_release_no_holder(cell, inmates);
- spin_unlock_irqrestore(&b->lock, flags);
+ spin_lock_irqsave(&prison->lock, flags);
+ __cell_release_no_holder(prison, cell, inmates);
+ spin_unlock_irqrestore(&prison->lock, flags);
}
EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
@@ -267,6 +241,20 @@ void dm_cell_error(struct dm_bio_prison *prison,
}
EXPORT_SYMBOL_GPL(dm_cell_error);
+void dm_cell_visit_release(struct dm_bio_prison *prison,
+ void (*visit_fn)(void *, struct dm_bio_prison_cell *),
+ void *context,
+ struct dm_bio_prison_cell *cell)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&prison->lock, flags);
+ visit_fn(context, cell);
+ rb_erase(&cell->node, &prison->cells);
+ spin_unlock_irqrestore(&prison->lock, flags);
+}
+EXPORT_SYMBOL_GPL(dm_cell_visit_release);
+
/*----------------------------------------------------------------*/
#define DEFERRED_SET_SIZE 64