summaryrefslogtreecommitdiff
path: root/drivers/md/dm-cache-background-tracker.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-03 20:31:20 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-03 20:31:20 +0300
commitd35a878ae1c50977b55e352fd46e36e35add72a0 (patch)
tree7cd4e0ec418c6f3be365e56ee3c49bab218cd608 /drivers/md/dm-cache-background-tracker.c
parente5021876c91dc3894b2174cca8fa797f8e29e7b9 (diff)
parent390020ad2af9ca04844c4f3b1f299ad8746d84c8 (diff)
downloadlinux-d35a878ae1c50977b55e352fd46e36e35add72a0.tar.xz
Merge tag 'for-4.12/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer: - A major update for DM cache that reduces the latency for deciding whether blocks should migrate to/from the cache. The bio-prison-v2 interface supports this improvement by enabling direct dispatch of work to workqueues rather than having to delay the actual work dispatch to the DM cache core. So the dm-cache policies are much more nimble by being able to drive IO as they see fit. One immediate benefit from the improved latency is a cache that should be much more adaptive to changing workloads. - Add a new DM integrity target that emulates a block device that has additional per-sector tags that can be used for storing integrity information. - Add a new authenticated encryption feature to the DM crypt target that builds on the capabilities provided by the DM integrity target. - Add MD interface for switching the raid4/5/6 journal mode and update the DM raid target to use it to enable aid4/5/6 journal write-back support. - Switch the DM verity target over to using the asynchronous hash crypto API (this helps work better with architectures that have access to off-CPU algorithm providers, which should reduce CPU utilization). - Various request-based DM and DM multipath fixes and improvements from Bart and Christoph. - A DM thinp target fix for a bio structure leak that occurs for each discard IFF discard passdown is enabled. - A fix for a possible deadlock in DM bufio and a fix to re-check the new buffer allocation watermark in the face of competing admin changes to the 'max_cache_size_bytes' tunable. - A couple DM core cleanups. * tag 'for-4.12/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (50 commits) dm bufio: check new buffer allocation watermark every 30 seconds dm bufio: avoid a possible ABBA deadlock dm mpath: make it easier to detect unintended I/O request flushes dm mpath: cleanup QUEUE_IF_NO_PATH bit manipulation by introducing assign_bit() dm mpath: micro-optimize the hot path relative to MPATHF_QUEUE_IF_NO_PATH dm: introduce enum dm_queue_mode to cleanup related code dm mpath: verify __pg_init_all_paths locking assumptions at runtime dm: verify suspend_locking assumptions at runtime dm block manager: remove an unused argument from dm_block_manager_create() dm rq: check blk_mq_register_dev() return value in dm_mq_init_request_queue() dm mpath: delay requeuing while path initialization is in progress dm mpath: avoid that path removal can trigger an infinite loop dm mpath: split and rename activate_path() to prepare for its expanded use dm ioctl: prevent stack leak in dm ioctl call dm integrity: use previously calculated log2 of sectors_per_block dm integrity: use hex2bin instead of open-coded variant dm crypt: replace custom implementation of hex2bin() dm crypt: remove obsolete references to per-CPU state dm verity: switch to using asynchronous hash crypto API dm crypt: use WQ_HIGHPRI for the IO and crypt workqueues ...
Diffstat (limited to 'drivers/md/dm-cache-background-tracker.c')
-rw-r--r--drivers/md/dm-cache-background-tracker.c238
1 files changed, 238 insertions, 0 deletions
diff --git a/drivers/md/dm-cache-background-tracker.c b/drivers/md/dm-cache-background-tracker.c
new file mode 100644
index 000000000000..9b1afdfb13f0
--- /dev/null
+++ b/drivers/md/dm-cache-background-tracker.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2017 Red Hat. All rights reserved.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-cache-background-tracker.h"
+
+/*----------------------------------------------------------------*/
+
+#define DM_MSG_PREFIX "dm-background-tracker"
+
+struct bt_work {
+ struct list_head list;
+ struct rb_node node;
+ struct policy_work work;
+};
+
+struct background_tracker {
+ unsigned max_work;
+ atomic_t pending_promotes;
+ atomic_t pending_writebacks;
+ atomic_t pending_demotes;
+
+ struct list_head issued;
+ struct list_head queued;
+ struct rb_root pending;
+
+ struct kmem_cache *work_cache;
+};
+
+struct background_tracker *btracker_create(unsigned max_work)
+{
+ struct background_tracker *b = kmalloc(sizeof(*b), GFP_KERNEL);
+
+ b->max_work = max_work;
+ atomic_set(&b->pending_promotes, 0);
+ atomic_set(&b->pending_writebacks, 0);
+ atomic_set(&b->pending_demotes, 0);
+
+ INIT_LIST_HEAD(&b->issued);
+ INIT_LIST_HEAD(&b->queued);
+
+ b->pending = RB_ROOT;
+ b->work_cache = KMEM_CACHE(bt_work, 0);
+ if (!b->work_cache) {
+ DMERR("couldn't create mempool for background work items");
+ kfree(b);
+ b = NULL;
+ }
+
+ return b;
+}
+EXPORT_SYMBOL_GPL(btracker_create);
+
+void btracker_destroy(struct background_tracker *b)
+{
+ kmem_cache_destroy(b->work_cache);
+ kfree(b);
+}
+EXPORT_SYMBOL_GPL(btracker_destroy);
+
+static int cmp_oblock(dm_oblock_t lhs, dm_oblock_t rhs)
+{
+ if (from_oblock(lhs) < from_oblock(rhs))
+ return -1;
+
+ if (from_oblock(rhs) < from_oblock(lhs))
+ return 1;
+
+ return 0;
+}
+
+static bool __insert_pending(struct background_tracker *b,
+ struct bt_work *nw)
+{
+ int cmp;
+ struct bt_work *w;
+ struct rb_node **new = &b->pending.rb_node, *parent = NULL;
+
+ while (*new) {
+ w = container_of(*new, struct bt_work, node);
+
+ parent = *new;
+ cmp = cmp_oblock(w->work.oblock, nw->work.oblock);
+ if (cmp < 0)
+ new = &((*new)->rb_left);
+
+ else if (cmp > 0)
+ new = &((*new)->rb_right);
+
+ else
+ /* already present */
+ return false;
+ }
+
+ rb_link_node(&nw->node, parent, new);
+ rb_insert_color(&nw->node, &b->pending);
+
+ return true;
+}
+
+static struct bt_work *__find_pending(struct background_tracker *b,
+ dm_oblock_t oblock)
+{
+ int cmp;
+ struct bt_work *w;
+ struct rb_node **new = &b->pending.rb_node;
+
+ while (*new) {
+ w = container_of(*new, struct bt_work, node);
+
+ cmp = cmp_oblock(w->work.oblock, oblock);
+ if (cmp < 0)
+ new = &((*new)->rb_left);
+
+ else if (cmp > 0)
+ new = &((*new)->rb_right);
+
+ else
+ break;
+ }
+
+ return *new ? w : NULL;
+}
+
+
+static void update_stats(struct background_tracker *b, struct policy_work *w, int delta)
+{
+ switch (w->op) {
+ case POLICY_PROMOTE:
+ atomic_add(delta, &b->pending_promotes);
+ break;
+
+ case POLICY_DEMOTE:
+ atomic_add(delta, &b->pending_demotes);
+ break;
+
+ case POLICY_WRITEBACK:
+ atomic_add(delta, &b->pending_writebacks);
+ break;
+ }
+}
+
+unsigned btracker_nr_writebacks_queued(struct background_tracker *b)
+{
+ return atomic_read(&b->pending_writebacks);
+}
+EXPORT_SYMBOL_GPL(btracker_nr_writebacks_queued);
+
+unsigned btracker_nr_demotions_queued(struct background_tracker *b)
+{
+ return atomic_read(&b->pending_demotes);
+}
+EXPORT_SYMBOL_GPL(btracker_nr_demotions_queued);
+
+static bool max_work_reached(struct background_tracker *b)
+{
+ // FIXME: finish
+ return false;
+}
+
+int btracker_queue(struct background_tracker *b,
+ struct policy_work *work,
+ struct policy_work **pwork)
+{
+ struct bt_work *w;
+
+ if (pwork)
+ *pwork = NULL;
+
+ if (max_work_reached(b))
+ return -ENOMEM;
+
+ w = kmem_cache_alloc(b->work_cache, GFP_NOWAIT);
+ if (!w)
+ return -ENOMEM;
+
+ memcpy(&w->work, work, sizeof(*work));
+
+ if (!__insert_pending(b, w)) {
+ /*
+ * There was a race, we'll just ignore this second
+ * bit of work for the same oblock.
+ */
+ kmem_cache_free(b->work_cache, w);
+ return -EINVAL;
+ }
+
+ if (pwork) {
+ *pwork = &w->work;
+ list_add(&w->list, &b->issued);
+ } else
+ list_add(&w->list, &b->queued);
+ update_stats(b, &w->work, 1);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(btracker_queue);
+
+/*
+ * Returns -ENODATA if there's no work.
+ */
+int btracker_issue(struct background_tracker *b, struct policy_work **work)
+{
+ struct bt_work *w;
+
+ if (list_empty(&b->queued))
+ return -ENODATA;
+
+ w = list_first_entry(&b->queued, struct bt_work, list);
+ list_move(&w->list, &b->issued);
+ *work = &w->work;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(btracker_issue);
+
+void btracker_complete(struct background_tracker *b,
+ struct policy_work *op)
+{
+ struct bt_work *w = container_of(op, struct bt_work, work);
+
+ update_stats(b, &w->work, -1);
+ rb_erase(&w->node, &b->pending);
+ list_del(&w->list);
+ kmem_cache_free(b->work_cache, w);
+}
+EXPORT_SYMBOL_GPL(btracker_complete);
+
+bool btracker_promotion_already_present(struct background_tracker *b,
+ dm_oblock_t oblock)
+{
+ return __find_pending(b, oblock) != NULL;
+}
+EXPORT_SYMBOL_GPL(btracker_promotion_already_present);
+
+/*----------------------------------------------------------------*/