summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
authorGabriel Krisman Bertazi <krisman@collabora.com>2022-04-27 19:57:10 +0300
committerMike Snitzer <snitzer@kernel.org>2022-05-09 22:39:23 +0300
commitc06dfd124d46df9c482fbd1319b5fe19bcb1a110 (patch)
tree9d0dd2e46565a568e4fad61124138a28f5a76778 /drivers/md
parent567dd8f34560fa221a6343729474536aa7ede4fd (diff)
downloadlinux-c06dfd124d46df9c482fbd1319b5fe19bcb1a110.tar.xz
dm mpath: provide high-resolution timer to HST for bio-based
The precision loss of reading IO start_time with jiffies_to_nsecs instead of using a high resolution timer degrades HST path prediction for BIO-based mpath on high load workloads. Below, I show the utilization percentage of a 10 disk multipath with asymmetrical disk access cost, while being exercised by a randwrite FIO benchmark with high submission queue depth (depth=64). It is possible to see that the HST path selection degrades heavily for high-iops in BIO-mpath, underutilizing the slower paths way beyond expected. This seems to be caused by the start_time truncation, which makes some IO to seem much slower than it actually is. In this scenario ST outperforms HST for bio-mpath, but not for mq-mpath, which already uses ktime_get_ns(). The third column shows utilization with this patch applied. It is easy to see that now HST prediction is much closer to the ideal distribution (calculated considering the real cost of each path). | | ST | HST (orig) | HST(ktime) | Best | | sdd | 0.17 | 0.20 | 0.17 | 0.18 | | sde | 0.17 | 0.20 | 0.17 | 0.18 | | sdf | 0.17 | 0.20 | 0.17 | 0.18 | | sdg | 0.06 | 0.00 | 0.06 | 0.04 | | sdh | 0.03 | 0.00 | 0.03 | 0.02 | | sdi | 0.03 | 0.00 | 0.03 | 0.02 | | sdj | 0.02 | 0.00 | 0.01 | 0.01 | | sdk | 0.02 | 0.00 | 0.01 | 0.01 | | sdl | 0.17 | 0.20 | 0.17 | 0.18 | | sdm | 0.17 | 0.20 | 0.17 | 0.18 | This issue was originally discussed [1] when we first merged HST, and this patch was left as a low hanging fruit to be solved later. Regarding the implementation, as suggested by Mike in that mail thread, in order to avoid the overhead of ktime_get_ns for other selectors, this patch adds a flag for the selector code to request the high-resolution timer. I tested this using the same benchmark used in the original HST submission. Full test and benchmark scripts are available here: https://people.collabora.com/~krisman/HST-BIO-MPATH/ [1] https://lore.kernel.org/lkml/85tv0am9de.fsf@collabora.com/T/ Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com> [snitzer: cleaned up various implementation details] Signed-off-by: Mike Snitzer <snitzer@kernel.org>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-mpath.c8
-rw-r--r--drivers/md/dm-path-selector.h15
-rw-r--r--drivers/md/dm-ps-historical-service-time.c1
3 files changed, 23 insertions, 1 deletions
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 6ed9d2731254..0e325469a252 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -105,6 +105,7 @@ struct multipath {
struct dm_mpath_io {
struct pgpath *pgpath;
size_t nr_bytes;
+ u64 start_time_ns;
};
typedef int (*action_fn) (struct pgpath *pgpath);
@@ -295,6 +296,7 @@ static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mp
mpio->nr_bytes = bio->bi_iter.bi_size;
mpio->pgpath = NULL;
+ mpio->start_time_ns = 0;
*mpio_p = mpio;
dm_bio_record(bio_details, bio);
@@ -647,6 +649,9 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio,
mpio->pgpath = pgpath;
+ if (dm_ps_use_hr_timer(pgpath->pg->ps.type))
+ mpio->start_time_ns = ktime_get_ns();
+
bio->bi_status = 0;
bio_set_dev(bio, pgpath->path.dev->bdev);
bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
@@ -1713,7 +1718,8 @@ done:
if (ps->type->end_io)
ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes,
- dm_start_time_ns_from_clone(clone));
+ (mpio->start_time_ns ?:
+ dm_start_time_ns_from_clone(clone)));
}
return r;
diff --git a/drivers/md/dm-path-selector.h b/drivers/md/dm-path-selector.h
index c47bc0e20275..83cac2b04b66 100644
--- a/drivers/md/dm-path-selector.h
+++ b/drivers/md/dm-path-selector.h
@@ -26,11 +26,26 @@ struct path_selector {
void *context;
};
+/*
+ * If a path selector uses this flag, a high resolution timer is used
+ * (via ktime_get_ns) to account for IO start time in BIO-based mpath.
+ * This improves performance of some path selectors (i.e. HST), in
+ * exchange for slightly higher overhead when submitting the BIO.
+ * The extra cost is usually offset by improved path selection for
+ * some benchmarks.
+ *
+ * This has no effect for request-based mpath, since it already uses a
+ * higher precision timer by default.
+ */
+#define DM_PS_USE_HR_TIMER 0x00000001
+#define dm_ps_use_hr_timer(type) ((type)->features & DM_PS_USE_HR_TIMER)
+
/* Information about a path selector type */
struct path_selector_type {
char *name;
struct module *module;
+ unsigned int features;
unsigned int table_args;
unsigned int info_args;
diff --git a/drivers/md/dm-ps-historical-service-time.c b/drivers/md/dm-ps-historical-service-time.c
index 82f2a06153dc..1d82c95d323d 100644
--- a/drivers/md/dm-ps-historical-service-time.c
+++ b/drivers/md/dm-ps-historical-service-time.c
@@ -523,6 +523,7 @@ static int hst_end_io(struct path_selector *ps, struct dm_path *path,
static struct path_selector_type hst_ps = {
.name = "historical-service-time",
.module = THIS_MODULE,
+ .features = DM_PS_USE_HR_TIMER,
.table_args = 1,
.info_args = 3,
.create = hst_create,