20 files changed, 972 insertions, 705 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index bd8499ef157c..08ce45096350 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -56,6 +56,11 @@
  * rotational or flash-based devices, and to get the job done quickly
  * for applications consisting in many I/O-bound processes.
  *
+ * NOTE: if the main or only goal, with a given device, is to achieve
+ * the maximum-possible throughput at all times, then do switch off
+ * all low-latency heuristics for that device, by setting low_latency
+ * to 0.
+ *
  * BFQ is described in [1], where also a reference to the initial, more
  * theoretical paper on BFQ can be found. The interested reader can find
  * in the latter paper full details on the main algorithm, as well as
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index b4fc3e4260b7..8726ede19eef 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -1114,12 +1114,21 @@ static void bfq_activate_requeue_entity(struct bfq_entity *entity,
 bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
 {
 	struct bfq_sched_data *sd = entity->sched_data;
-	struct bfq_service_tree *st = bfq_entity_service_tree(entity);
-	int is_in_service = entity == sd->in_service_entity;
+	struct bfq_service_tree *st;
+	bool is_in_service;
 
 	if (!entity->on_st) /* entity never activated, or already inactive */
 		return false;
 
+	/*
+	 * If we get here, then entity is active, which implies that
+	 * bfq_group_set_parent has already been invoked for the group
+	 * represented by entity. Therefore, the field
+	 * entity->sched_data has been set, and we can safely use it.
+	 */
+	st = bfq_entity_service_tree(entity);
+	is_in_service = entity == sd->in_service_entity;
+
 	if (is_in_service)
 		bfq_calc_finish(entity, entity->service);
 
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 7c2947128f58..0480892e97e5 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -74,7 +74,7 @@ static void blkg_free(struct blkcg_gq *blkg)
 			blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
 
 	if (blkg->blkcg != &blkcg_root)
-		blk_exit_rl(&blkg->rl);
+		blk_exit_rl(blkg->q, &blkg->rl);
 
 	blkg_rwstat_exit(&blkg->stat_ios);
 	blkg_rwstat_exit(&blkg->stat_bytes);
diff --git a/block/blk-core.c b/block/blk-core.c
index 24886b69690f..a7421b772d0e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -561,13 +561,9 @@ void blk_cleanup_queue(struct request_queue *q)
 	 * prevent that q->request_fn() gets invoked after draining finished.
 	 */
 	blk_freeze_queue(q);
-	if (!q->mq_ops) {
-		spin_lock_irq(lock);
+	spin_lock_irq(lock);
+	if (!q->mq_ops)
 		__blk_drain_queue(q, true);
-	} else {
-		blk_mq_debugfs_unregister_mq(q);
-		spin_lock_irq(lock);
-	}
 	queue_flag_set(QUEUE_FLAG_DEAD, q);
 	spin_unlock_irq(lock);
 
@@ -652,13 +648,19 @@ int blk_init_rl(struct request_list *rl, struct request_queue *q,
 	if (!rl->rq_pool)
 		return -ENOMEM;
 
+	if (rl != &q->root_rl)
+		WARN_ON_ONCE(!blk_get_queue(q));
+
 	return 0;
 }
 
-void blk_exit_rl(struct request_list *rl)
+void blk_exit_rl(struct request_queue *q, struct request_list *rl)
 {
-	if (rl->rq_pool)
+	if (rl->rq_pool) {
 		mempool_destroy(rl->rq_pool);
+		if (rl != &q->root_rl)
+			blk_put_queue(q);
+	}
 }
 
 struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
@@ -2648,8 +2650,6 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 		return false;
 	}
 
-	WARN_ON_ONCE(req->rq_flags & RQF_SPECIAL_PAYLOAD);
-
 	req->__data_len -= total_bytes;
 
 	/* update sector only for requests with clear definition of sector */
@@ -2662,17 +2662,19 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 		req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK;
 	}
 
-	/*
-	 * If total number of sectors is less than the first segment
-	 * size, something has gone terribly wrong.
-	 */
-	if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
-		blk_dump_rq_flags(req, "request botched");
-		req->__data_len = blk_rq_cur_bytes(req);
-	}
+	if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) {
+		/*
+		 * If total number of sectors is less than the first segment
+		 * size, something has gone terribly wrong.
+		 */
+		if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
+			blk_dump_rq_flags(req, "request botched");
+			req->__data_len = blk_rq_cur_bytes(req);
+		}
 
-	/* recalculate the number of segments */
-	blk_recalc_rq_segments(req);
+		/* recalculate the number of segments */
+		blk_recalc_rq_segments(req);
+	}
 
 	return true;
 }
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index bcd2a7d4a3a5..803aed4d7221 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -21,28 +21,9 @@
 #include <linux/blk-mq.h>
 #include "blk.h"
 #include "blk-mq.h"
+#include "blk-mq-debugfs.h"
 #include "blk-mq-tag.h"
 
-struct blk_mq_debugfs_attr {
-	const char *name;
-	umode_t mode;
-	const struct file_operations *fops;
-};
-
-static int blk_mq_debugfs_seq_open(struct inode *inode, struct file *file,
-				   const struct seq_operations *ops)
-{
-	struct seq_file *m;
-	int ret;
-
-	ret = seq_open(file, ops);
-	if (!ret) {
-		m = file->private_data;
-		m->private = inode->i_private;
-	}
-	return ret;
-}
-
 static int blk_flags_show(struct seq_file *m, const unsigned long flags,
 			  const char *const *flag_name, int flag_name_count)
 {
@@ -53,7 +34,7 @@ static int blk_flags_show(struct seq_file *m, const unsigned long flags,
 		if (!(flags & BIT(i)))
 			continue;
 		if (sep)
-			seq_puts(m, " ");
+			seq_puts(m, "|");
 		sep = true;
 		if (i < flag_name_count && flag_name[i])
 			seq_puts(m, flag_name[i]);
@@ -63,41 +44,43 @@ static int blk_flags_show(struct seq_file *m, const unsigned long flags,
 	return 0;
 }
 
+#define QUEUE_FLAG_NAME(name) [QUEUE_FLAG_##name] = #name
 static const char *const blk_queue_flag_name[] = {
-	[QUEUE_FLAG_QUEUED]	 = "QUEUED",
-	[QUEUE_FLAG_STOPPED]	 = "STOPPED",
-	[QUEUE_FLAG_SYNCFULL]	 = "SYNCFULL",
-	[QUEUE_FLAG_ASYNCFULL]	 = "ASYNCFULL",
-	[QUEUE_FLAG_DYING]	 = "DYING",
-	[QUEUE_FLAG_BYPASS]	 = "BYPASS",
-	[QUEUE_FLAG_BIDI]	 = "BIDI",
-	[QUEUE_FLAG_NOMERGES]	 = "NOMERGES",
-	[QUEUE_FLAG_SAME_COMP]	 = "SAME_COMP",
-	[QUEUE_FLAG_FAIL_IO]	 = "FAIL_IO",
-	[QUEUE_FLAG_STACKABLE]	 = "STACKABLE",
-	[QUEUE_FLAG_NONROT]	 = "NONROT",
-	[QUEUE_FLAG_IO_STAT]	 = "IO_STAT",
-	[QUEUE_FLAG_DISCARD]	 = "DISCARD",
-	[QUEUE_FLAG_NOXMERGES]	 = "NOXMERGES",
-	[QUEUE_FLAG_ADD_RANDOM]	 = "ADD_RANDOM",
-	[QUEUE_FLAG_SECERASE]	 = "SECERASE",
-	[QUEUE_FLAG_SAME_FORCE]	 = "SAME_FORCE",
-	[QUEUE_FLAG_DEAD]	 = "DEAD",
-	[QUEUE_FLAG_INIT_DONE]	 = "INIT_DONE",
-	[QUEUE_FLAG_NO_SG_MERGE] = "NO_SG_MERGE",
-	[QUEUE_FLAG_POLL]	 = "POLL",
-	[QUEUE_FLAG_WC]		 = "WC",
-	[QUEUE_FLAG_FUA]	 = "FUA",
-	[QUEUE_FLAG_FLUSH_NQ]	 = "FLUSH_NQ",
-	[QUEUE_FLAG_DAX]	 = "DAX",
-	[QUEUE_FLAG_STATS]	 = "STATS",
-	[QUEUE_FLAG_POLL_STATS]	 = "POLL_STATS",
-	[QUEUE_FLAG_REGISTERED]	 = "REGISTERED",
-};
-
-static int blk_queue_flags_show(struct seq_file *m, void *v)
-{
-	struct request_queue *q = m->private;
+	QUEUE_FLAG_NAME(QUEUED),
+	QUEUE_FLAG_NAME(STOPPED),
+	QUEUE_FLAG_NAME(SYNCFULL),
+	QUEUE_FLAG_NAME(ASYNCFULL),
+	QUEUE_FLAG_NAME(DYING),
+	QUEUE_FLAG_NAME(BYPASS),
+	QUEUE_FLAG_NAME(BIDI),
+	QUEUE_FLAG_NAME(NOMERGES),
+	QUEUE_FLAG_NAME(SAME_COMP),
+	QUEUE_FLAG_NAME(FAIL_IO),
+	QUEUE_FLAG_NAME(STACKABLE),
+	QUEUE_FLAG_NAME(NONROT),
+	QUEUE_FLAG_NAME(IO_STAT),
+	QUEUE_FLAG_NAME(DISCARD),
+	QUEUE_FLAG_NAME(NOXMERGES),
+	QUEUE_FLAG_NAME(ADD_RANDOM),
+	QUEUE_FLAG_NAME(SECERASE),
+	QUEUE_FLAG_NAME(SAME_FORCE),
+	QUEUE_FLAG_NAME(DEAD),
+	QUEUE_FLAG_NAME(INIT_DONE),
+	QUEUE_FLAG_NAME(NO_SG_MERGE),
+	QUEUE_FLAG_NAME(POLL),
+	QUEUE_FLAG_NAME(WC),
+	QUEUE_FLAG_NAME(FUA),
+	QUEUE_FLAG_NAME(FLUSH_NQ),
+	QUEUE_FLAG_NAME(DAX),
+	QUEUE_FLAG_NAME(STATS),
+	QUEUE_FLAG_NAME(POLL_STATS),
+	QUEUE_FLAG_NAME(REGISTERED),
+};
+#undef QUEUE_FLAG_NAME
+
+static int queue_state_show(void *data, struct seq_file *m)
+{
+	struct request_queue *q = data;
 
 	blk_flags_show(m, q->queue_flags, blk_queue_flag_name,
 		       ARRAY_SIZE(blk_queue_flag_name));
@@ -105,42 +88,41 @@ static int blk_queue_flags_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static ssize_t blk_queue_flags_store(struct file *file, const char __user *ubuf,
-				     size_t len, loff_t *offp)
+static ssize_t queue_state_write(void *data, const char __user *buf,
+				 size_t count, loff_t *ppos)
 {
-	struct request_queue *q = file_inode(file)->i_private;
-	char op[16] = { }, *s;
+	struct request_queue *q = data;
+	char opbuf[16] = { }, *op;
+
+	/*
+	 * The "state" attribute is removed after blk_cleanup_queue() has called
+	 * blk_mq_free_queue(). Return if QUEUE_FLAG_DEAD has been set to avoid
+	 * triggering a use-after-free.
+	 */
+	if (blk_queue_dead(q))
+		return -ENOENT;
 
-	len = min(len, sizeof(op) - 1);
-	if (copy_from_user(op, ubuf, len))
+	if (count >= sizeof(opbuf)) {
+		pr_err("%s: operation too long\n", __func__);
+		goto inval;
+	}
+
+	if (copy_from_user(opbuf, buf, count))
 		return -EFAULT;
-	s = op;
-	strsep(&s, " \t\n"); /* strip trailing whitespace */
+	op = strstrip(opbuf);
 	if (strcmp(op, "run") == 0) {
 		blk_mq_run_hw_queues(q, true);
 	} else if (strcmp(op, "start") == 0) {
 		blk_mq_start_stopped_hw_queues(q, true);
 	} else {
-		pr_err("%s: unsupported operation %s. Use either 'run' or 'start'\n",
-		       __func__, op);
+		pr_err("%s: unsupported operation '%s'\n", __func__, op);
+inval:
+		pr_err("%s: use either 'run' or 'start'\n", __func__);
 		return -EINVAL;
 	}
-	return len;
-}
-
-static int blk_queue_flags_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, blk_queue_flags_show, inode->i_private);
+	return count;
 }
 
-static const struct file_operations blk_queue_flags_fops = {
-	.open		= blk_queue_flags_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-	.write		= blk_queue_flags_store,
-};
-
 static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
 {
 	if (stat->nr_samples) {
@@ -151,9 +133,9 @@ static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
 	}
 }
 
-static int queue_poll_stat_show(struct seq_file *m, void *v)
+static int queue_poll_stat_show(void *data, struct seq_file *m)
 {
-	struct request_queue *q = m->private;
+	struct request_queue *q = data;
 	int bucket;
 
 	for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS/2; bucket++) {
@@ -168,28 +150,19 @@ static int queue_poll_stat_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static int queue_poll_stat_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, queue_poll_stat_show, inode->i_private);
-}
-
-static const struct file_operations queue_poll_stat_fops = {
-	.open		= queue_poll_stat_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
+#define HCTX_STATE_NAME(name) [BLK_MQ_S_##name] = #name
 static const char *const hctx_state_name[] = {
-	[BLK_MQ_S_STOPPED]	 = "STOPPED",
-	[BLK_MQ_S_TAG_ACTIVE]	 = "TAG_ACTIVE",
-	[BLK_MQ_S_SCHED_RESTART] = "SCHED_RESTART",
-	[BLK_MQ_S_TAG_WAITING]	 = "TAG_WAITING",
-
+	HCTX_STATE_NAME(STOPPED),
+	HCTX_STATE_NAME(TAG_ACTIVE),
+	HCTX_STATE_NAME(SCHED_RESTART),
+	HCTX_STATE_NAME(TAG_WAITING),
+	HCTX_STATE_NAME(START_ON_RUN),
 };
-static int hctx_state_show(struct seq_file *m, void *v)
+#undef HCTX_STATE_NAME
+
+static int hctx_state_show(void *data, struct seq_file *m)
 {
-	struct blk_mq_hw_ctx *hctx = m->private;
+	struct blk_mq_hw_ctx *hctx = data;
 
 	blk_flags_show(m, hctx->state, hctx_state_name,
 		       ARRAY_SIZE(hctx_state_name));
@@ -197,34 +170,26 @@ static int hctx_state_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static int hctx_state_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, hctx_state_show, inode->i_private);
-}
-
-static const struct file_operations hctx_state_fops = {
-	.open		= hctx_state_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
+#define BLK_TAG_ALLOC_NAME(name) [BLK_TAG_ALLOC_##name] = #name
 static const char *const alloc_policy_name[] = {
-	[BLK_TAG_ALLOC_FIFO]	= "fifo",
-	[BLK_TAG_ALLOC_RR]	= "rr",
+	BLK_TAG_ALLOC_NAME(FIFO),
+	BLK_TAG_ALLOC_NAME(RR),
 };
+#undef BLK_TAG_ALLOC_NAME
 
+#define HCTX_FLAG_NAME(name) [ilog2(BLK_MQ_F_##name)] = #name
 static const char *const hctx_flag_name[] = {
-	[ilog2(BLK_MQ_F_SHOULD_MERGE)]	= "SHOULD_MERGE",
-	[ilog2(BLK_MQ_F_TAG_SHARED)]	= "TAG_SHARED",
-	[ilog2(BLK_MQ_F_SG_MERGE)]	= "SG_MERGE",
-	[ilog2(BLK_MQ_F_BLOCKING)]	= "BLOCKING",
-	[ilog2(BLK_MQ_F_NO_SCHED)]	= "NO_SCHED",
+	HCTX_FLAG_NAME(SHOULD_MERGE),
+	HCTX_FLAG_NAME(TAG_SHARED),
+	HCTX_FLAG_NAME(SG_MERGE),
+	HCTX_FLAG_NAME(BLOCKING),
+	HCTX_FLAG_NAME(NO_SCHED),
 };
+#undef HCTX_FLAG_NAME
 
-static int hctx_flags_show(struct seq_file *m, void *v)
+static int hctx_flags_show(void *data, struct seq_file *m)
 {
-	struct blk_mq_hw_ctx *hctx = m->private;
+	struct blk_mq_hw_ctx *hctx = data;
 	const int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(hctx->flags);
 
 	seq_puts(m, "alloc_policy=");
@@ -241,76 +206,69 @@ static int hctx_flags_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static int hctx_flags_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, hctx_flags_show, inode->i_private);
-}
-
-static const struct file_operations hctx_flags_fops = {
-	.open		= hctx_flags_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
+#define REQ_OP_NAME(name) [REQ_OP_##name] = #name
 static const char *const op_name[] = {
-	[REQ_OP_READ]		= "READ",
-	[REQ_OP_WRITE]		= "WRITE",
-	[REQ_OP_FLUSH]		= "FLUSH",
-	[REQ_OP_DISCARD]	= "DISCARD",
-	[REQ_OP_ZONE_REPORT]	= "ZONE_REPORT",
-	[REQ_OP_SECURE_ERASE]	= "SECURE_ERASE",
-	[REQ_OP_ZONE_RESET]	= "ZONE_RESET",
-	[REQ_OP_WRITE_SAME]	= "WRITE_SAME",
-	[REQ_OP_WRITE_ZEROES]	= "WRITE_ZEROES",
-	[REQ_OP_SCSI_IN]	= "SCSI_IN",
-	[REQ_OP_SCSI_OUT]	= "SCSI_OUT",
-	[REQ_OP_DRV_IN]		= "DRV_IN",
-	[REQ_OP_DRV_OUT]	= "DRV_OUT",
-};
-
+	REQ_OP_NAME(READ),
+	REQ_OP_NAME(WRITE),
+	REQ_OP_NAME(FLUSH),
+	REQ_OP_NAME(DISCARD),
+	REQ_OP_NAME(ZONE_REPORT),
+	REQ_OP_NAME(SECURE_ERASE),
+	REQ_OP_NAME(ZONE_RESET),
+	REQ_OP_NAME(WRITE_SAME),
+	REQ_OP_NAME(WRITE_ZEROES),
+	REQ_OP_NAME(SCSI_IN),
+	REQ_OP_NAME(SCSI_OUT),
+	REQ_OP_NAME(DRV_IN),
+	REQ_OP_NAME(DRV_OUT),
+};
+#undef REQ_OP_NAME
+
+#define CMD_FLAG_NAME(name) [__REQ_##name] = #name
 static const char *const cmd_flag_name[] = {
-	[__REQ_FAILFAST_DEV]		= "FAILFAST_DEV",
-	[__REQ_FAILFAST_TRANSPORT]	= "FAILFAST_TRANSPORT",
-	[__REQ_FAILFAST_DRIVER]		= "FAILFAST_DRIVER",
-	[__REQ_SYNC]			= "SYNC",
-	[__REQ_META]			= "META",
-	[__REQ_PRIO]			= "PRIO",
-	[__REQ_NOMERGE]			= "NOMERGE",
-	[__REQ_IDLE]			= "IDLE",
-	[__REQ_INTEGRITY]		= "INTEGRITY",
-	[__REQ_FUA]			= "FUA",
-	[__REQ_PREFLUSH]		= "PREFLUSH",
-	[__REQ_RAHEAD]			= "RAHEAD",
-	[__REQ_BACKGROUND]		= "BACKGROUND",
-	[__REQ_NR_BITS]			= "NR_BITS",
-};
-
+	CMD_FLAG_NAME(FAILFAST_DEV),
+	CMD_FLAG_NAME(FAILFAST_TRANSPORT),
+	CMD_FLAG_NAME(FAILFAST_DRIVER),
+	CMD_FLAG_NAME(SYNC),
+	CMD_FLAG_NAME(META),
+	CMD_FLAG_NAME(PRIO),
+	CMD_FLAG_NAME(NOMERGE),
+	CMD_FLAG_NAME(IDLE),
+	CMD_FLAG_NAME(INTEGRITY),
+	CMD_FLAG_NAME(FUA),
+	CMD_FLAG_NAME(PREFLUSH),
+	CMD_FLAG_NAME(RAHEAD),
+	CMD_FLAG_NAME(BACKGROUND),
+	CMD_FLAG_NAME(NOUNMAP),
+};
+#undef CMD_FLAG_NAME
+
+#define RQF_NAME(name) [ilog2((__force u32)RQF_##name)] = #name
 static const char *const rqf_name[] = {
-	[ilog2((__force u32)RQF_SORTED)]		= "SORTED",
-	[ilog2((__force u32)RQF_STARTED)]		= "STARTED",
-	[ilog2((__force u32)RQF_QUEUED)]		= "QUEUED",
-	[ilog2((__force u32)RQF_SOFTBARRIER)]		= "SOFTBARRIER",
-	[ilog2((__force u32)RQF_FLUSH_SEQ)]		= "FLUSH_SEQ",
-	[ilog2((__force u32)RQF_MIXED_MERGE)]		= "MIXED_MERGE",
-	[ilog2((__force u32)RQF_MQ_INFLIGHT)]		= "MQ_INFLIGHT",
-	[ilog2((__force u32)RQF_DONTPREP)]		= "DONTPREP",
-	[ilog2((__force u32)RQF_PREEMPT)]		= "PREEMPT",
-	[ilog2((__force u32)RQF_COPY_USER)]		= "COPY_USER",
-	[ilog2((__force u32)RQF_FAILED)]		= "FAILED",
-	[ilog2((__force u32)RQF_QUIET)]			= "QUIET",
-	[ilog2((__force u32)RQF_ELVPRIV)]		= "ELVPRIV",
-	[ilog2((__force u32)RQF_IO_STAT)]		= "IO_STAT",
-	[ilog2((__force u32)RQF_ALLOCED)]		= "ALLOCED",
-	[ilog2((__force u32)RQF_PM)]			= "PM",
-	[ilog2((__force u32)RQF_HASHED)]		= "HASHED",
-	[ilog2((__force u32)RQF_STATS)]			= "STATS",
-	[ilog2((__force u32)RQF_SPECIAL_PAYLOAD)]	= "SPECIAL_PAYLOAD",
-};
-
-static int blk_mq_debugfs_rq_show(struct seq_file *m, void *v)
+	RQF_NAME(SORTED),
+	RQF_NAME(STARTED),
+	RQF_NAME(QUEUED),
+	RQF_NAME(SOFTBARRIER),
+	RQF_NAME(FLUSH_SEQ),
+	RQF_NAME(MIXED_MERGE),
+	RQF_NAME(MQ_INFLIGHT),
+	RQF_NAME(DONTPREP),
+	RQF_NAME(PREEMPT),
+	RQF_NAME(COPY_USER),
+	RQF_NAME(FAILED),
+	RQF_NAME(QUIET),
+	RQF_NAME(ELVPRIV),
+	RQF_NAME(IO_STAT),
+	RQF_NAME(ALLOCED),
+	RQF_NAME(PM),
+	RQF_NAME(HASHED),
+	RQF_NAME(STATS),
+	RQF_NAME(SPECIAL_PAYLOAD),
+};
+#undef RQF_NAME
+
+int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
 {
-	struct request *rq = list_entry_rq(v);
 	const struct blk_mq_ops *const mq_ops = rq->q->mq_ops;
 	const unsigned int op = rq->cmd_flags & REQ_OP_MASK;
 
@@ -332,6 +290,13 @@ static int blk_mq_debugfs_rq_show(struct seq_file *m, void *v)
 	seq_puts(m, "}\n");
 	return 0;
 }
+EXPORT_SYMBOL_GPL(__blk_mq_debugfs_rq_show);
+
+int blk_mq_debugfs_rq_show(struct seq_file *m, void *v)
+{
+	return __blk_mq_debugfs_rq_show(m, list_entry_rq(v));
+}
+EXPORT_SYMBOL_GPL(blk_mq_debugfs_rq_show);
 
 static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos)
 	__acquires(&hctx->lock)
@@ -364,38 +329,14 @@ static const struct seq_operations hctx_dispatch_seq_ops = {
 	.show	= blk_mq_debugfs_rq_show,
 };
 
-static int hctx_dispatch_open(struct inode *inode, struct file *file)
-{
-	return blk_mq_debugfs_seq_open(inode, file, &hctx_dispatch_seq_ops);
-}
-
-static const struct file_operations hctx_dispatch_fops = {
-	.open		= hctx_dispatch_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-static int hctx_ctx_map_show(struct seq_file *m, void *v)
+static int hctx_ctx_map_show(void *data, struct seq_file *m)
 {
-	struct blk_mq_hw_ctx *hctx = m->private;
+	struct blk_mq_hw_ctx *hctx = data;
 
 	sbitmap_bitmap_show(&hctx->ctx_map, m);
 	return 0;
 }
 
-static int hctx_ctx_map_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, hctx_ctx_map_show, inode->i_private);
-}
-
-static const struct file_operations hctx_ctx_map_fops = {
-	.open		= hctx_ctx_map_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static void blk_mq_debugfs_tags_show(struct seq_file *m,
 				     struct blk_mq_tags *tags)
 {
@@ -413,9 +354,9 @@ static void blk_mq_debugfs_tags_show(struct seq_file *m,
 	}
 }
 
-static int hctx_tags_show(struct seq_file *m, void *v)
+static int hctx_tags_show(void *data, struct seq_file *m)
 {
-	struct blk_mq_hw_ctx *hctx = m->private;
+	struct blk_mq_hw_ctx *hctx = data;
 	struct request_queue *q = hctx->queue;
 	int res;
 
@@ -430,21 +371,9 @@ out:
 	return res;
 }
 
-static int hctx_tags_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, hctx_tags_show, inode->i_private);
-}
-
-static const struct file_operations hctx_tags_fops = {
-	.open		= hctx_tags_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int hctx_tags_bitmap_show(struct seq_file *m, void *v)
+static int hctx_tags_bitmap_show(void *data, struct seq_file *m)
 {
-	struct blk_mq_hw_ctx *hctx = m->private;
+	struct blk_mq_hw_ctx *hctx = data;
 	struct request_queue *q = hctx->queue;
 	int res;
 
@@ -459,21 +388,9 @@ out:
 	return res;
 }
 
-static int hctx_tags_bitmap_open(struct inode *inode, struct file *file)
+static int hctx_sched_tags_show(void *data, struct seq_file *m)
 {
-	return single_open(file, hctx_tags_bitmap_show, inode->i_private);
-}
-
-static const struct file_operations hctx_tags_bitmap_fops = {
-	.open		= hctx_tags_bitmap_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int hctx_sched_tags_show(struct seq_file *m, void *v)
-{
-	struct blk_mq_hw_ctx *hctx = m->private;
+	struct blk_mq_hw_ctx *hctx = data;
 	struct request_queue *q = hctx->queue;
 	int res;
 
@@ -488,21 +405,9 @@ out:
 	return res;
 }
 
-static int hctx_sched_tags_open(struct inode *inode, struct file *file)
+static int hctx_sched_tags_bitmap_show(void *data, struct seq_file *m)
 {
-	return single_open(file, hctx_sched_tags_show, inode->i_private);
-}
-
-static const struct file_operations hctx_sched_tags_fops = {
-	.open		= hctx_sched_tags_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int hctx_sched_tags_bitmap_show(struct seq_file *m, void *v)
-{
-	struct blk_mq_hw_ctx *hctx = m->private;
+	struct blk_mq_hw_ctx *hctx = data;
 	struct request_queue *q = hctx->queue;
 	int res;
 
@@ -517,21 +422,9 @@ out:
 	return res;
 }
 
-static int hctx_sched_tags_bitmap_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, hctx_sched_tags_bitmap_show, inode->i_private);
-}
-
-static const struct file_operations hctx_sched_tags_bitmap_fops = {
-	.open		= hctx_sched_tags_bitmap_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int hctx_io_poll_show(struct seq_file *m, void *v)
+static int hctx_io_poll_show(void *data, struct seq_file *m)
 {
-	struct blk_mq_hw_ctx *hctx = m->private;
+	struct blk_mq_hw_ctx *hctx = data;
 
 	seq_printf(m, "considered=%lu\n", hctx->poll_considered);
 	seq_printf(m, "invoked=%lu\n", hctx->poll_invoked);
@@ -539,32 +432,18 @@ static int hctx_io_poll_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static int hctx_io_poll_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, hctx_io_poll_show, inode->i_private);
-}
-
-static ssize_t hctx_io_poll_write(struct file *file, const char __user *buf,
+static ssize_t hctx_io_poll_write(void *data, const char __user *buf,
 				  size_t count, loff_t *ppos)
 {
-	struct seq_file *m = file->private_data;
-	struct blk_mq_hw_ctx *hctx = m->private;
+	struct blk_mq_hw_ctx *hctx = data;
 
 	hctx->poll_considered = hctx->poll_invoked = hctx->poll_success = 0;
 	return count;
 }
 
-static const struct file_operations hctx_io_poll_fops = {
-	.open		= hctx_io_poll_open,
-	.read		= seq_read,
-	.write		= hctx_io_poll_write,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int hctx_dispatched_show(struct seq_file *m, void *v)
+static int hctx_dispatched_show(void *data, struct seq_file *m)
 {
-	struct blk_mq_hw_ctx *hctx = m->private;
+	struct blk_mq_hw_ctx *hctx = data;
 	int i;
 
 	seq_printf(m, "%8u\t%lu\n", 0U, hctx->dispatched[0]);
@@ -579,16 +458,10 @@ static int hctx_dispatched_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static int hctx_dispatched_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, hctx_dispatched_show, inode->i_private);
-}
-
-static ssize_t hctx_dispatched_write(struct file *file, const char __user *buf,
+static ssize_t hctx_dispatched_write(void *data, const char __user *buf,
 				     size_t count, loff_t *ppos)
 {
-	struct seq_file *m = file->private_data;
-	struct blk_mq_hw_ctx *hctx = m->private;
+	struct blk_mq_hw_ctx *hctx = data;
 	int i;
 
 	for (i = 0; i < BLK_MQ_MAX_DISPATCH_ORDER; i++)
@@ -596,96 +469,48 @@ static ssize_t hctx_dispatched_write(struct file *file, const char __user *buf,
 	return count;
 }
 
-static const struct file_operations hctx_dispatched_fops = {
-	.open		= hctx_dispatched_open,
-	.read		= seq_read,
-	.write		= hctx_dispatched_write,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int hctx_queued_show(struct seq_file *m, void *v)
+static int hctx_queued_show(void *data, struct seq_file *m)
 {
-	struct blk_mq_hw_ctx *hctx = m->private;
+	struct blk_mq_hw_ctx *hctx = data;
 
 	seq_printf(m, "%lu\n", hctx->queued);
 	return 0;
 }
 
-static int hctx_queued_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, hctx_queued_show, inode->i_private);
-}
-
-static ssize_t hctx_queued_write(struct file *file, const char __user *buf,
+static ssize_t hctx_queued_write(void *data, const char __user *buf,
 				 size_t count, loff_t *ppos)
 {
-	struct seq_file *m = file->private_data;
-	struct blk_mq_hw_ctx *hctx = m->private;
+	struct blk_mq_hw_ctx *hctx = data;
 
 	hctx->queued = 0;
 	return count;
 }
 
-static const struct file_operations hctx_queued_fops = {
-	.open		= hctx_queued_open,
-	.read		= seq_read,
-	.write		= hctx_queued_write,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int hctx_run_show(struct seq_file *m, void *v)
+static int hctx_run_show(void *data, struct seq_file *m)
 {
-	struct blk_mq_hw_ctx *hctx = m->private;
+	struct blk_mq_hw_ctx *hctx = data;
 
 	seq_printf(m, "%lu\n", hctx->run);
 	return 0;
 }
 
-static int hctx_run_open(struct inode *inode, struct file *file)
+static ssize_t hctx_run_write(void *data, const char __user *buf, size_t count,
+			      loff_t *ppos)
 {
-	return single_open(file, hctx_run_show, inode->i_private);
-}
-
-static ssize_t hctx_run_write(struct file *file, const char __user *buf,
-				 size_t count, loff_t *ppos)
-{
-	struct seq_file *m = file->private_data;
-	struct blk_mq_hw_ctx *hctx = m->private;
+	struct blk_mq_hw_ctx *hctx = data;
 
 	hctx->run = 0;
 	return count;
 }
 
-static const struct file_operations hctx_run_fops = {
-	.open		= hctx_run_open,
-	.read		= seq_read,
-	.write		= hctx_run_write,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int hctx_active_show(struct seq_file *m, void *v)
+static int hctx_active_show(void *data, struct seq_file *m)
 {
-	struct blk_mq_hw_ctx *hctx = m->private;
+	struct blk_mq_hw_ctx *hctx = data;
 
 	seq_printf(m, "%d\n", atomic_read(&hctx->nr_active));
 	return 0;
 }
 
-static int hctx_active_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, hctx_active_show, inode->i_private);
-}
-
-static const struct file_operations hctx_active_fops = {
-	.open		= hctx_active_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos)
 	__acquires(&ctx->lock)
 {
@@ -716,156 +541,192 @@ static const struct seq_operations ctx_rq_list_seq_ops = {
 	.stop	= ctx_rq_list_stop,
 	.show	= blk_mq_debugfs_rq_show,
 };
-
-static int ctx_rq_list_open(struct inode *inode, struct file *file)
+static int ctx_dispatched_show(void *data, struct seq_file *m)
 {
-	return blk_mq_debugfs_seq_open(inode, file, &ctx_rq_list_seq_ops);
-}
-
-static const struct file_operations ctx_rq_list_fops = {
-	.open		= ctx_rq_list_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-static int ctx_dispatched_show(struct seq_file *m, void *v)
-{
-	struct blk_mq_ctx *ctx = m->private;
+	struct blk_mq_ctx *ctx = data;
 
 	seq_printf(m, "%lu %lu\n", ctx->rq_dispatched[1], ctx->rq_dispatched[0]);
 	return 0;
 }
 
-static int ctx_dispatched_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ctx_dispatched_show, inode->i_private);
-}
-
-static ssize_t ctx_dispatched_write(struct file *file, const char __user *buf,
+static ssize_t ctx_dispatched_write(void *data, const char __user *buf,
 				    size_t count, loff_t *ppos)
 {
-	struct seq_file *m = file->private_data;
-	struct blk_mq_ctx *ctx = m->private;
+	struct blk_mq_ctx *ctx = data;
 
 	ctx->rq_dispatched[0] = ctx->rq_dispatched[1] = 0;
 	return count;
 }
 
-static const struct file_operations ctx_dispatched_fops = {
-	.open		= ctx_dispatched_open,
-	.read		= seq_read,
-	.write		= ctx_dispatched_write,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int ctx_merged_show(struct seq_file *m, void *v)
+static int ctx_merged_show(void *data, struct seq_file *m)
 {
-	struct blk_mq_ctx *ctx = m->private;
+	struct blk_mq_ctx *ctx = data;
 
 	seq_printf(m, "%lu\n", ctx->rq_merged);
 	return 0;
 }
 
-static int ctx_merged_open(struct inode *inode, struct file *file)
+static ssize_t ctx_merged_write(void *data, const char __user *buf,
+				size_t count, loff_t *ppos)
 {
-	return single_open(file, ctx_merged_show, inode->i_private);
-}
-
-static ssize_t ctx_merged_write(struct file *file, const char __user *buf,
-				    size_t count, loff_t *ppos)
-{
-	struct seq_file *m = file->private_data;
-	struct blk_mq_ctx *ctx = m->private;
+	struct blk_mq_ctx *ctx = data;
 
 	ctx->rq_merged = 0;
 	return count;
 }
 
-static const struct file_operations ctx_merged_fops = {
-	.open		= ctx_merged_open,
-	.read		= seq_read,
-	.write		= ctx_merged_write,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int ctx_completed_show(struct seq_file *m, void *v)
+static int ctx_completed_show(void *data, struct seq_file *m)
 {
-	struct blk_mq_ctx *ctx = m->private;
+	struct blk_mq_ctx *ctx = data;
 
 	seq_printf(m, "%lu %lu\n", ctx->rq_completed[1], ctx->rq_completed[0]);
 	return 0;
 }
 
-static int ctx_completed_open(struct inode *inode, struct file *file)
+static ssize_t ctx_completed_write(void *data, const char __user *buf,
+				   size_t count, loff_t *ppos)
 {
-	return single_open(file, ctx_completed_show, inode->i_private);
+	struct blk_mq_ctx *ctx = data;
+
+	ctx->rq_completed[0] = ctx->rq_completed[1] = 0;
+	return count;
 }
 
-static ssize_t ctx_completed_write(struct file *file, const char __user *buf,
-				   size_t count, loff_t *ppos)
+static int blk_mq_debugfs_show(struct seq_file *m, void *v)
+{
+	const struct blk_mq_debugfs_attr *attr = m->private;
+	void *data = d_inode(m->file->f_path.dentry->d_parent)->i_private;
+
+	return attr->show(data, m);
+}
+
+static ssize_t blk_mq_debugfs_write(struct file *file, const char __user *buf,
+				    size_t count, loff_t *ppos)
 {
 	struct seq_file *m = file->private_data;
-	struct blk_mq_ctx *ctx = m->private;
+	const struct blk_mq_debugfs_attr *attr = m->private;
+	void *data = d_inode(file->f_path.dentry->d_parent)->i_private;
 
-	ctx->rq_completed[0] = ctx->rq_completed[1] = 0;
-	return count;
+	if (!attr->write)
+		return -EPERM;
+
+	return attr->write(data, buf, count, ppos);
+}
+
+static int blk_mq_debugfs_open(struct inode *inode, struct file *file)
+{
+	const struct blk_mq_debugfs_attr *attr = inode->i_private;
+	void *data = d_inode(file->f_path.dentry->d_parent)->i_private;
+	struct seq_file *m;
+	int ret;
+
+	if (attr->seq_ops) {
+		ret = seq_open(file, attr->seq_ops);
+		if (!ret) {
+			m = file->private_data;
+			m->private = data;
+		}
+		return ret;
+	}
+
+	if (WARN_ON_ONCE(!attr->show))
+		return -EPERM;
+
+	return single_open(file, blk_mq_debugfs_show, inode->i_private);
+}
+
+static int blk_mq_debugfs_release(struct inode *inode, struct file *file)
+{
+	const struct blk_mq_debugfs_attr *attr = inode->i_private;
+
+	if (attr->show)
+		return single_release(inode, file);
+	else
+		return seq_release(inode, file);
 }
 
-static const struct file_operations ctx_completed_fops = {
-	.open		= ctx_completed_open,
+const struct file_operations blk_mq_debugfs_fops = {
+	.open		= blk_mq_debugfs_open,
 	.read		= seq_read,
-	.write		= ctx_completed_write,
+	.write		= blk_mq_debugfs_write,
 	.llseek		= seq_lseek,
-	.release	= single_release,
+	.release	= blk_mq_debugfs_release,
 };
 
 static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
-	{"poll_stat", 0400, &queue_poll_stat_fops},
-	{"state", 0600, &blk_queue_flags_fops},
+	{"poll_stat", 0400, queue_poll_stat_show},
+	{"state", 0600, queue_state_show, queue_state_write},
 	{},
 };
 
 static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
-	{"state", 0400, &hctx_state_fops},
-	{"flags", 0400, &hctx_flags_fops},
-	{"dispatch", 0400, &hctx_dispatch_fops},
-	{"ctx_map", 0400, &hctx_ctx_map_fops},
-	{"tags", 0400, &hctx_tags_fops},
-	{"tags_bitmap", 0400, &hctx_tags_bitmap_fops},
-	{"sched_tags", 0400, &hctx_sched_tags_fops},
-	{"sched_tags_bitmap", 0400, &hctx_sched_tags_bitmap_fops},
-	{"io_poll", 0600, &hctx_io_poll_fops},
-	{"dispatched", 0600, &hctx_dispatched_fops},
-	{"queued", 0600, &hctx_queued_fops},
-	{"run", 0600, &hctx_run_fops},
-	{"active", 0400, &hctx_active_fops},
+	{"state", 0400, hctx_state_show},
+	{"flags", 0400, hctx_flags_show},
+	{"dispatch", 0400, .seq_ops = &hctx_dispatch_seq_ops},
+	{"ctx_map", 0400, hctx_ctx_map_show},
+	{"tags", 0400, hctx_tags_show},
+	{"tags_bitmap", 0400, hctx_tags_bitmap_show},
+	{"sched_tags", 0400, hctx_sched_tags_show},
+	{"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show},
+	{"io_poll", 0600, hctx_io_poll_show, hctx_io_poll_write},
+	{"dispatched", 0600, hctx_dispatched_show, hctx_dispatched_write},
+	{"queued", 0600, hctx_queued_show, hctx_queued_write},
+	{"run", 0600, hctx_run_show, hctx_run_write},
+	{"active", 0400, hctx_active_show},
 	{},
 };
 
 static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = {
-	{"rq_list", 0400, &ctx_rq_list_fops},
-	{"dispatched", 0600, &ctx_dispatched_fops},
-	{"merged", 0600, &ctx_merged_fops},
-	{"completed", 0600, &ctx_completed_fops},
+	{"rq_list", 0400, .seq_ops = &ctx_rq_list_seq_ops},
+	{"dispatched", 0600, ctx_dispatched_show, ctx_dispatched_write},
+	{"merged", 0600, ctx_merged_show, ctx_merged_write},
+	{"completed", 0600, ctx_completed_show, ctx_completed_write},
 	{},
 };
 
+static bool debugfs_create_files(struct dentry *parent, void *data,
+				 const struct blk_mq_debugfs_attr *attr)
+{
+	d_inode(parent)->i_private = data;
+
+	for (; attr->name; attr++) {
+		if (!debugfs_create_file(attr->name, attr->mode, parent,
+					 (void *)attr, &blk_mq_debugfs_fops))
+			return false;
+	}
+	return true;
+}
+
 int blk_mq_debugfs_register(struct request_queue *q)
 {
+	struct blk_mq_hw_ctx *hctx;
+	int i;
+
 	if (!blk_debugfs_root)
 		return -ENOENT;
 
 	q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent),
 					    blk_debugfs_root);
 	if (!q->debugfs_dir)
-		goto err;
+		return -ENOMEM;
 
-	if (blk_mq_debugfs_register_mq(q))
+	if (!debugfs_create_files(q->debugfs_dir, q,
+				  blk_mq_debugfs_queue_attrs))
 		goto err;
 
+	/*
+	 * blk_mq_init_hctx() attempted to do this already, but q->debugfs_dir
+	 * didn't exist yet (because we don't know what to name the directory
+	 * until the queue is registered to a gendisk).
+	 */
+	queue_for_each_hw_ctx(q, hctx, i) {
+		if (!hctx->debugfs_dir && blk_mq_debugfs_register_hctx(q, hctx))
+			goto err;
+		if (q->elevator && !hctx->sched_debugfs_dir &&
+		    blk_mq_debugfs_register_sched_hctx(q, hctx))
+			goto err;
+	}
+
 	return 0;
 
 err:
@@ -876,30 +737,18 @@ err:
 void blk_mq_debugfs_unregister(struct request_queue *q)
 {
 	debugfs_remove_recursive(q->debugfs_dir);
-	q->mq_debugfs_dir = NULL;
+	q->sched_debugfs_dir = NULL;
 	q->debugfs_dir = NULL;
 }
 
-static bool debugfs_create_files(struct dentry *parent, void *data,
-				const struct blk_mq_debugfs_attr *attr)
-{
-	for (; attr->name; attr++) {
-		if (!debugfs_create_file(attr->name, attr->mode, parent,
-					 data, attr->fops))
-			return false;
-	}
-	return true;
-}
-
-static int blk_mq_debugfs_register_ctx(struct request_queue *q,
-				       struct blk_mq_ctx *ctx,
-				       struct dentry *hctx_dir)
+static int blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx,
+				       struct blk_mq_ctx *ctx)
 {
 	struct dentry *ctx_dir;
 	char name[20];
 
 	snprintf(name, sizeof(name), "cpu%u", ctx->cpu);
-	ctx_dir = debugfs_create_dir(name, hctx_dir);
+	ctx_dir = debugfs_create_dir(name, hctx->debugfs_dir);
 	if (!ctx_dir)
 		return -ENOMEM;
 
@@ -909,59 +758,122 @@ static int blk_mq_debugfs_register_ctx(struct request_queue *q,
 	return 0;
 }
 
-static int blk_mq_debugfs_register_hctx(struct request_queue *q,
-					struct blk_mq_hw_ctx *hctx)
+int blk_mq_debugfs_register_hctx(struct request_queue *q,
+				 struct blk_mq_hw_ctx *hctx)
 {
 	struct blk_mq_ctx *ctx;
-	struct dentry *hctx_dir;
 	char name[20];
 	int i;
 
-	snprintf(name, sizeof(name), "%u", hctx->queue_num);
-	hctx_dir = debugfs_create_dir(name, q->mq_debugfs_dir);
-	if (!hctx_dir)
-		return -ENOMEM;
+	if (!q->debugfs_dir)
+		return -ENOENT;
 
-	if (!debugfs_create_files(hctx_dir, hctx, blk_mq_debugfs_hctx_attrs))
+	snprintf(name, sizeof(name), "hctx%u", hctx->queue_num);
+	hctx->debugfs_dir = debugfs_create_dir(name, q->debugfs_dir);
+	if (!hctx->debugfs_dir)
 		return -ENOMEM;
 
+	if (!debugfs_create_files(hctx->debugfs_dir, hctx,
+				  blk_mq_debugfs_hctx_attrs))
+		goto err;
+
 	hctx_for_each_ctx(hctx, ctx, i) {
-		if (blk_mq_debugfs_register_ctx(q, ctx, hctx_dir))
+		if (blk_mq_debugfs_register_ctx(hctx, ctx))
+			goto err;
+	}
+
+	return 0;
+
+err:
+	blk_mq_debugfs_unregister_hctx(hctx);
+	return -ENOMEM;
+}
+
+void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx)
+{
+	debugfs_remove_recursive(hctx->debugfs_dir);
+	hctx->sched_debugfs_dir = NULL;
+	hctx->debugfs_dir = NULL;
+}
+
+int blk_mq_debugfs_register_hctxs(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	int i;
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		if (blk_mq_debugfs_register_hctx(q, hctx))
 			return -ENOMEM;
 	}
 
 	return 0;
 }
 
-int blk_mq_debugfs_register_mq(struct request_queue *q)
+void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
+	queue_for_each_hw_ctx(q, hctx, i)
+		blk_mq_debugfs_unregister_hctx(hctx);
+}
+
+int blk_mq_debugfs_register_sched(struct request_queue *q)
+{
+	struct elevator_type *e = q->elevator->type;
+
 	if (!q->debugfs_dir)
 		return -ENOENT;
 
-	q->mq_debugfs_dir = debugfs_create_dir("mq", q->debugfs_dir);
-	if (!q->mq_debugfs_dir)
-		goto err;
+	if (!e->queue_debugfs_attrs)
+		return 0;
 
-	if (!debugfs_create_files(q->mq_debugfs_dir, q, blk_mq_debugfs_queue_attrs))
-		goto err;
+	q->sched_debugfs_dir = debugfs_create_dir("sched", q->debugfs_dir);
+	if (!q->sched_debugfs_dir)
+		return -ENOMEM;
 
-	queue_for_each_hw_ctx(q, hctx, i) {
-		if (blk_mq_debugfs_register_hctx(q, hctx))
-			goto err;
-	}
+	if (!debugfs_create_files(q->sched_debugfs_dir, q,
+				  e->queue_debugfs_attrs))
+		goto err;
 
 	return 0;
 
 err:
-	blk_mq_debugfs_unregister_mq(q);
+	blk_mq_debugfs_unregister_sched(q);
 	return -ENOMEM;
 }
 
-void blk_mq_debugfs_unregister_mq(struct request_queue *q)
+void blk_mq_debugfs_unregister_sched(struct request_queue *q)
+{
+	debugfs_remove_recursive(q->sched_debugfs_dir);
+	q->sched_debugfs_dir = NULL;
+}
+
+int blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
+				       struct blk_mq_hw_ctx *hctx)
+{
+	struct elevator_type *e = q->elevator->type;
+
+	if (!hctx->debugfs_dir)
+		return -ENOENT;
+
+	if (!e->hctx_debugfs_attrs)
+		return 0;
+
+	hctx->sched_debugfs_dir = debugfs_create_dir("sched",
+						     hctx->debugfs_dir);
+	if (!hctx->sched_debugfs_dir)
+		return -ENOMEM;
+
+	if (!debugfs_create_files(hctx->sched_debugfs_dir, hctx,
+				  e->hctx_debugfs_attrs))
+		return -ENOMEM;
+
+	return 0;
+}
+
+void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx)
 {
-	debugfs_remove_recursive(q->mq_debugfs_dir);
-	q->mq_debugfs_dir = NULL;
+	debugfs_remove_recursive(hctx->sched_debugfs_dir);
+	hctx->sched_debugfs_dir = NULL;
 }
diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h
new file mode 100644
index 000000000000..a182e6f97565
--- /dev/null
+++ b/block/blk-mq-debugfs.h
@@ -0,0 +1,82 @@
+#ifndef INT_BLK_MQ_DEBUGFS_H
+#define INT_BLK_MQ_DEBUGFS_H
+
+#ifdef CONFIG_BLK_DEBUG_FS
+
+#include <linux/seq_file.h>
+
+struct blk_mq_debugfs_attr {
+	const char *name;
+	umode_t mode;
+	int (*show)(void *, struct seq_file *);
+	ssize_t (*write)(void *, const char __user *, size_t, loff_t *);
+	/* Set either .show or .seq_ops. */
+	const struct seq_operations *seq_ops;
+};
+
+int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq);
+int blk_mq_debugfs_rq_show(struct seq_file *m, void *v);
+
+int blk_mq_debugfs_register(struct request_queue *q);
+void blk_mq_debugfs_unregister(struct request_queue *q);
+int blk_mq_debugfs_register_hctx(struct request_queue *q,
+				 struct blk_mq_hw_ctx *hctx);
+void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx);
+int blk_mq_debugfs_register_hctxs(struct request_queue *q);
+void blk_mq_debugfs_unregister_hctxs(struct request_queue *q);
+
+int blk_mq_debugfs_register_sched(struct request_queue *q);
+void blk_mq_debugfs_unregister_sched(struct request_queue *q);
+int blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
+				       struct blk_mq_hw_ctx *hctx);
+void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx);
+#else
+static inline int blk_mq_debugfs_register(struct request_queue *q)
+{
+	return 0;
+}
+
+static inline void blk_mq_debugfs_unregister(struct request_queue *q)
+{
+}
+
+static inline int blk_mq_debugfs_register_hctx(struct request_queue *q,
+					       struct blk_mq_hw_ctx *hctx)
+{
+	return 0;
+}
+
+static inline void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx)
+{
+}
+
+static inline int blk_mq_debugfs_register_hctxs(struct request_queue *q)
+{
+	return 0;
+}
+
+static inline void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
+{
+}
+
+static inline int blk_mq_debugfs_register_sched(struct request_queue *q)
+{
+	return 0;
+}
+
+static inline void blk_mq_debugfs_unregister_sched(struct request_queue *q)
+{
+}
+
+static inline int blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
+						     struct blk_mq_hw_ctx *hctx)
+{
+	return 0;
+}
+
+static inline void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx)
+{
+}
+#endif
+
+#endif
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 8b361e192e8a..1f5b692526ae 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -11,6 +11,7 @@
 
 #include "blk.h"
 #include "blk-mq.h"
+#include "blk-mq-debugfs.h"
 #include "blk-mq-sched.h"
 #include "blk-mq-tag.h"
 #include "blk-wbt.h"
@@ -82,11 +83,7 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
 	if (likely(!data->hctx))
 		data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
 
-	/*
-	 * For a reserved tag, allocate a normal request since we might
-	 * have driver dependencies on the value of the internal tag.
-	 */
-	if (e && !(data->flags & BLK_MQ_REQ_RESERVED)) {
+	if (e) {
 		data->flags |= BLK_MQ_REQ_INTERNAL;
 
 		/*
@@ -476,6 +473,8 @@ int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
 		}
 	}
 
+	blk_mq_debugfs_register_sched_hctx(q, hctx);
+
 	return 0;
 }
 
@@ -487,6 +486,8 @@ void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
 	if (!e)
 		return;
 
+	blk_mq_debugfs_unregister_sched_hctx(hctx);
+
 	if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
 		e->type->ops.mq.exit_hctx(hctx, hctx_idx);
 		hctx->sched_data = NULL;
@@ -523,8 +524,10 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 	if (ret)
 		goto err;
 
-	if (e->ops.mq.init_hctx) {
-		queue_for_each_hw_ctx(q, hctx, i) {
+	blk_mq_debugfs_register_sched(q);
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		if (e->ops.mq.init_hctx) {
 			ret = e->ops.mq.init_hctx(hctx, i);
 			if (ret) {
 				eq = q->elevator;
@@ -533,6 +536,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 				return ret;
 			}
 		}
+		blk_mq_debugfs_register_sched_hctx(q, hctx);
 	}
 
 	return 0;
@@ -548,14 +552,14 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
 	struct blk_mq_hw_ctx *hctx;
 	unsigned int i;
 
-	if (e->type->ops.mq.exit_hctx) {
-		queue_for_each_hw_ctx(q, hctx, i) {
-			if (hctx->sched_data) {
-				e->type->ops.mq.exit_hctx(hctx, i);
-				hctx->sched_data = NULL;
-			}
+	queue_for_each_hw_ctx(q, hctx, i) {
+		blk_mq_debugfs_unregister_sched_hctx(hctx);
+		if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
+			e->type->ops.mq.exit_hctx(hctx, i);
+			hctx->sched_data = NULL;
 		}
 	}
+	blk_mq_debugfs_unregister_sched(q);
 	if (e->type->ops.mq.exit_sched)
 		e->type->ops.mq.exit_sched(e);
 	blk_mq_sched_tags_teardown(q);
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index ec0afdf765e3..79969c3c234f 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -258,8 +258,6 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
 	queue_for_each_hw_ctx(q, hctx, i)
 		blk_mq_unregister_hctx(hctx);
 
-	blk_mq_debugfs_unregister_mq(q);
-
 	kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
 	kobject_del(&q->mq_kobj);
 	kobject_put(&dev->kobj);
@@ -318,8 +316,6 @@ int __blk_mq_register_dev(struct device *dev, struct request_queue *q)
 
 	kobject_uevent(&q->mq_kobj, KOBJ_ADD);
 
-	blk_mq_debugfs_register(q);
-
 	queue_for_each_hw_ctx(q, hctx, i) {
 		ret = blk_mq_register_hctx(hctx);
 		if (ret)
@@ -335,8 +331,6 @@ unreg:
 	while (--i >= 0)
 		blk_mq_unregister_hctx(q->queue_hw_ctx[i]);
 
-	blk_mq_debugfs_unregister_mq(q);
-
 	kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
 	kobject_del(&q->mq_kobj);
 	kobject_put(&dev->kobj);
@@ -364,8 +358,6 @@ void blk_mq_sysfs_unregister(struct request_queue *q)
 	if (!q->mq_sysfs_init_done)
 		goto unlock;
 
-	blk_mq_debugfs_unregister_mq(q);
-
 	queue_for_each_hw_ctx(q, hctx, i)
 		blk_mq_unregister_hctx(hctx);
 
@@ -382,8 +374,6 @@ int blk_mq_sysfs_register(struct request_queue *q)
 	if (!q->mq_sysfs_init_done)
 		goto unlock;
 
-	blk_mq_debugfs_register_mq(q);
-
 	queue_for_each_hw_ctx(q, hctx, i) {
 		ret = blk_mq_register_hctx(hctx);
 		if (ret)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index bf90684a007a..1bcccedcc74f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -31,6 +31,7 @@
 #include <linux/blk-mq.h>
 #include "blk.h"
 #include "blk-mq.h"
+#include "blk-mq-debugfs.h"
 #include "blk-mq-tag.h"
 #include "blk-stat.h"
 #include "blk-wbt.h"
@@ -41,6 +42,7 @@ static LIST_HEAD(all_q_list);
 
 static void blk_mq_poll_stats_start(struct request_queue *q);
 static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
+static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync);
 
 static int blk_mq_poll_stats_bkt(const struct request *rq)
 {
@@ -166,7 +168,7 @@ void blk_mq_quiesce_queue(struct request_queue *q)
 	unsigned int i;
 	bool rcu = false;
 
-	blk_mq_stop_hw_queues(q);
+	__blk_mq_stop_hw_queues(q, true);
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (hctx->flags & BLK_MQ_F_BLOCKING)
@@ -626,25 +628,6 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q,
 }
 EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
 
-void blk_mq_abort_requeue_list(struct request_queue *q)
-{
-	unsigned long flags;
-	LIST_HEAD(rq_list);
-
-	spin_lock_irqsave(&q->requeue_lock, flags);
-	list_splice_init(&q->requeue_list, &rq_list);
-	spin_unlock_irqrestore(&q->requeue_lock, flags);
-
-	while (!list_empty(&rq_list)) {
-		struct request *rq;
-
-		rq = list_first_entry(&rq_list, struct request, queuelist);
-		list_del_init(&rq->queuelist);
-		blk_mq_end_request(rq, -EIO);
-	}
-}
-EXPORT_SYMBOL(blk_mq_abort_requeue_list);
-
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
 {
 	if (tag < tags->nr_tags) {
@@ -1218,20 +1201,34 @@ bool blk_mq_queue_stopped(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_mq_queue_stopped);
 
-void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
+static void __blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx, bool sync)
 {
-	cancel_delayed_work_sync(&hctx->run_work);
+	if (sync)
+		cancel_delayed_work_sync(&hctx->run_work);
+	else
+		cancel_delayed_work(&hctx->run_work);
+
 	set_bit(BLK_MQ_S_STOPPED, &hctx->state);
 }
+
+void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
+{
+	__blk_mq_stop_hw_queue(hctx, false);
+}
 EXPORT_SYMBOL(blk_mq_stop_hw_queue);
 
-void blk_mq_stop_hw_queues(struct request_queue *q)
+static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync)
 {
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
 	queue_for_each_hw_ctx(q, hctx, i)
-		blk_mq_stop_hw_queue(hctx);
+		__blk_mq_stop_hw_queue(hctx, sync);
+}
+
+void blk_mq_stop_hw_queues(struct request_queue *q)
+{
+	__blk_mq_stop_hw_queues(q, false);
 }
 EXPORT_SYMBOL(blk_mq_stop_hw_queues);
 
@@ -1538,13 +1535,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 	blk_queue_bounce(q, &bio);
 
+	blk_queue_split(q, &bio, q->bio_split);
+
 	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
 		bio_io_error(bio);
 		return BLK_QC_T_NONE;
 	}
 
-	blk_queue_split(q, &bio, q->bio_split);
-
 	if (!is_flush_fua && !blk_queue_nomerges(q) &&
 	    blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
 		return BLK_QC_T_NONE;
@@ -1655,8 +1652,7 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 
 			if (!rq)
 				continue;
-			set->ops->exit_request(set->driver_data, rq,
-						hctx_idx, i);
+			set->ops->exit_request(set, rq, hctx_idx);
 			tags->static_rqs[i] = NULL;
 		}
 	}
@@ -1787,8 +1783,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 
 			tags->static_rqs[i] = rq;
 			if (set->ops->init_request) {
-				if (set->ops->init_request(set->driver_data,
-						rq, hctx_idx, i,
+				if (set->ops->init_request(set, rq, hctx_idx,
 						node)) {
 					tags->static_rqs[i] = NULL;
 					goto fail;
@@ -1849,14 +1844,12 @@ static void blk_mq_exit_hctx(struct request_queue *q,
 		struct blk_mq_tag_set *set,
 		struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
 {
-	unsigned flush_start_tag = set->queue_depth;
+	blk_mq_debugfs_unregister_hctx(hctx);
 
 	blk_mq_tag_idle(hctx);
 
 	if (set->ops->exit_request)
-		set->ops->exit_request(set->driver_data,
-				       hctx->fq->flush_rq, hctx_idx,
-				       flush_start_tag + hctx_idx);
+		set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx);
 
 	blk_mq_sched_exit_hctx(q, hctx, hctx_idx);
 
@@ -1889,7 +1882,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
 		struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
 {
 	int node;
-	unsigned flush_start_tag = set->queue_depth;
 
 	node = hctx->numa_node;
 	if (node == NUMA_NO_NODE)
@@ -1933,14 +1925,15 @@ static int blk_mq_init_hctx(struct request_queue *q,
 		goto sched_exit_hctx;
 
 	if (set->ops->init_request &&
-	    set->ops->init_request(set->driver_data,
-				   hctx->fq->flush_rq, hctx_idx,
-				   flush_start_tag + hctx_idx, node))
+	    set->ops->init_request(set, hctx->fq->flush_rq, hctx_idx,
+				   node))
 		goto free_fq;
 
 	if (hctx->flags & BLK_MQ_F_BLOCKING)
 		init_srcu_struct(&hctx->queue_rq_srcu);
 
+	blk_mq_debugfs_register_hctx(q, hctx);
+
 	return 0;
 
  free_fq:
@@ -2378,6 +2371,7 @@ static void blk_mq_queue_reinit(struct request_queue *q,
 {
 	WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth));
 
+	blk_mq_debugfs_unregister_hctxs(q);
 	blk_mq_sysfs_unregister(q);
 
 	/*
@@ -2389,6 +2383,7 @@ static void blk_mq_queue_reinit(struct request_queue *q,
 	blk_mq_map_swqueue(q, online_mask);
 
 	blk_mq_sysfs_register(q);
+	blk_mq_debugfs_register_hctxs(q);
 }
 
 /*
@@ -2617,7 +2612,6 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
 		return -EINVAL;
 
 	blk_mq_freeze_queue(q);
-	blk_mq_quiesce_queue(q);
 
 	ret = 0;
 	queue_for_each_hw_ctx(q, hctx, i) {
@@ -2643,12 +2637,12 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
 		q->nr_requests = nr;
 
 	blk_mq_unfreeze_queue(q);
-	blk_mq_start_stopped_hw_queues(q, true);
 
 	return ret;
 }
 
-void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
+static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
+							int nr_hw_queues)
 {
 	struct request_queue *q;
 
@@ -2672,6 +2666,13 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
 	list_for_each_entry(q, &set->tag_list, tag_set_list)
 		blk_mq_unfreeze_queue(q);
 }
+
+void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
+{
+	mutex_lock(&set->tag_list_lock);
+	__blk_mq_update_nr_hw_queues(set, nr_hw_queues);
+	mutex_unlock(&set->tag_list_lock);
+}
 EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
 
 /* Enable polling stats and return whether they were already enabled. */
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 2814a14e529c..cc67b48e3551 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -83,34 +83,6 @@ extern int blk_mq_sysfs_register(struct request_queue *q);
 extern void blk_mq_sysfs_unregister(struct request_queue *q);
 extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
 
-/*
- * debugfs helpers
- */
-#ifdef CONFIG_BLK_DEBUG_FS
-int blk_mq_debugfs_register(struct request_queue *q);
-void blk_mq_debugfs_unregister(struct request_queue *q);
-int blk_mq_debugfs_register_mq(struct request_queue *q);
-void blk_mq_debugfs_unregister_mq(struct request_queue *q);
-#else
-static inline int blk_mq_debugfs_register(struct request_queue *q)
-{
-	return 0;
-}
-
-static inline void blk_mq_debugfs_unregister(struct request_queue *q)
-{
-}
-
-static inline int blk_mq_debugfs_register_mq(struct request_queue *q)
-{
-	return 0;
-}
-
-static inline void blk_mq_debugfs_unregister_mq(struct request_queue *q)
-{
-}
-#endif
-
 extern void blk_mq_rq_timed_out(struct request *req, bool reserved);
 
 void blk_mq_release(struct request_queue *q);
diff --git a/block/blk-stat.c b/block/blk-stat.c
index 6c2f40940439..c52356d90fe3 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -96,13 +96,16 @@ void blk_stat_add(struct request *rq)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(cb, &q->stats->callbacks, list) {
-		if (blk_stat_is_active(cb)) {
-			bucket = cb->bucket_fn(rq);
-			if (bucket < 0)
-				continue;
-			stat = &this_cpu_ptr(cb->cpu_stat)[bucket];
-			__blk_stat_add(stat, value);
-		}
+		if (!blk_stat_is_active(cb))
+			continue;
+
+		bucket = cb->bucket_fn(rq);
+		if (bucket < 0)
+			continue;
+
+		stat = &get_cpu_ptr(cb->cpu_stat)[bucket];
+		__blk_stat_add(stat, value);
+		put_cpu_ptr(cb->cpu_stat);
 	}
 	rcu_read_unlock();
 }
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 3f37813ccbaf..283da7fbe034 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -13,6 +13,7 @@
 
 #include "blk.h"
 #include "blk-mq.h"
+#include "blk-mq-debugfs.h"
 #include "blk-wbt.h"
 
 struct queue_sysfs_entry {
@@ -808,7 +809,7 @@ static void blk_release_queue(struct kobject *kobj)
 
 	blk_free_queue_stats(q->stats);
 
-	blk_exit_rl(&q->root_rl);
+	blk_exit_rl(q, &q->root_rl);
 
 	if (q->queue_tags)
 		__blk_queue_free_tags(q);
@@ -886,8 +887,10 @@ int blk_register_queue(struct gendisk *disk)
 		goto unlock;
 	}
 
-	if (q->mq_ops)
+	if (q->mq_ops) {
 		__blk_mq_register_dev(dev, q);
+		blk_mq_debugfs_register(q);
+	}
 
 	kobject_uevent(&q->kobj, KOBJ_ADD);
 
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index b78db2e5fdff..fc13dd0c6e39 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -22,11 +22,11 @@ static int throtl_quantum = 32;
 #define DFL_THROTL_SLICE_HD (HZ / 10)
 #define DFL_THROTL_SLICE_SSD (HZ / 50)
 #define MAX_THROTL_SLICE (HZ)
-#define DFL_IDLE_THRESHOLD_SSD (1000L) /* 1 ms */
-#define DFL_IDLE_THRESHOLD_HD (100L * 1000) /* 100 ms */
 #define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */
-/* default latency target is 0, eg, guarantee IO latency by default */
-#define DFL_LATENCY_TARGET (0)
+#define MIN_THROTL_BPS (320 * 1024)
+#define MIN_THROTL_IOPS (10)
+#define DFL_LATENCY_TARGET (-1L)
+#define DFL_IDLE_THRESHOLD (0)
 
 #define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT)
 
@@ -157,6 +157,7 @@ struct throtl_grp {
 	unsigned long last_check_time;
 
 	unsigned long latency_target; /* us */
+	unsigned long latency_target_conf; /* us */
 	/* When did we start a new slice */
 	unsigned long slice_start[2];
 	unsigned long slice_end[2];
@@ -165,6 +166,7 @@ struct throtl_grp {
 	unsigned long checked_last_finish_time; /* ns / 1024 */
 	unsigned long avg_idletime; /* ns / 1024 */
 	unsigned long idletime_threshold; /* us */
+	unsigned long idletime_threshold_conf; /* us */
 
 	unsigned int bio_cnt; /* total bios */
 	unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
@@ -201,8 +203,6 @@ struct throtl_data
 	unsigned int limit_index;
 	bool limit_valid[LIMIT_CNT];
 
-	unsigned long dft_idletime_threshold; /* us */
-
 	unsigned long low_upgrade_time;
 	unsigned long low_downgrade_time;
 
@@ -294,8 +294,14 @@ static uint64_t tg_bps_limit(struct throtl_grp *tg, int rw)
 
 	td = tg->td;
 	ret = tg->bps[rw][td->limit_index];
-	if (ret == 0 && td->limit_index == LIMIT_LOW)
-		return tg->bps[rw][LIMIT_MAX];
+	if (ret == 0 && td->limit_index == LIMIT_LOW) {
+		/* intermediate node or iops isn't 0 */
+		if (!list_empty(&blkg->blkcg->css.children) ||
+		    tg->iops[rw][td->limit_index])
+			return U64_MAX;
+		else
+			return MIN_THROTL_BPS;
+	}
 
 	if (td->limit_index == LIMIT_MAX && tg->bps[rw][LIMIT_LOW] &&
 	    tg->bps[rw][LIMIT_LOW] != tg->bps[rw][LIMIT_MAX]) {
@@ -315,10 +321,17 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
 
 	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent)
 		return UINT_MAX;
+
 	td = tg->td;
 	ret = tg->iops[rw][td->limit_index];
-	if (ret == 0 && tg->td->limit_index == LIMIT_LOW)
-		return tg->iops[rw][LIMIT_MAX];
+	if (ret == 0 && tg->td->limit_index == LIMIT_LOW) {
+		/* intermediate node or bps isn't 0 */
+		if (!list_empty(&blkg->blkcg->css.children) ||
+		    tg->bps[rw][td->limit_index])
+			return UINT_MAX;
+		else
+			return MIN_THROTL_IOPS;
+	}
 
 	if (td->limit_index == LIMIT_MAX && tg->iops[rw][LIMIT_LOW] &&
 	    tg->iops[rw][LIMIT_LOW] != tg->iops[rw][LIMIT_MAX]) {
@@ -482,6 +495,9 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
 	/* LIMIT_LOW will have default value 0 */
 
 	tg->latency_target = DFL_LATENCY_TARGET;
+	tg->latency_target_conf = DFL_LATENCY_TARGET;
+	tg->idletime_threshold = DFL_IDLE_THRESHOLD;
+	tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD;
 
 	return &tg->pd;
 }
@@ -510,8 +526,6 @@ static void throtl_pd_init(struct blkg_policy_data *pd)
 	if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent)
 		sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
 	tg->td = td;
-
-	tg->idletime_threshold = td->dft_idletime_threshold;
 }
 
 /*
@@ -1349,7 +1363,7 @@ static int tg_print_conf_uint(struct seq_file *sf, void *v)
 	return 0;
 }
 
-static void tg_conf_updated(struct throtl_grp *tg)
+static void tg_conf_updated(struct throtl_grp *tg, bool global)
 {
 	struct throtl_service_queue *sq = &tg->service_queue;
 	struct cgroup_subsys_state *pos_css;
@@ -1367,8 +1381,26 @@ static void tg_conf_updated(struct throtl_grp *tg)
 	 * restrictions in the whole hierarchy and allows them to bypass
 	 * blk-throttle.
 	 */
-	blkg_for_each_descendant_pre(blkg, pos_css, tg_to_blkg(tg))
-		tg_update_has_rules(blkg_to_tg(blkg));
+	blkg_for_each_descendant_pre(blkg, pos_css,
+			global ? tg->td->queue->root_blkg : tg_to_blkg(tg)) {
+		struct throtl_grp *this_tg = blkg_to_tg(blkg);
+		struct throtl_grp *parent_tg;
+
+		tg_update_has_rules(this_tg);
+		/* ignore root/second level */
+		if (!cgroup_subsys_on_dfl(io_cgrp_subsys) || !blkg->parent ||
+		    !blkg->parent->parent)
+			continue;
+		parent_tg = blkg_to_tg(blkg->parent);
+		/*
+		 * make sure all children has lower idle time threshold and
+		 * higher latency target
+		 */
+		this_tg->idletime_threshold = min(this_tg->idletime_threshold,
+				parent_tg->idletime_threshold);
+		this_tg->latency_target = max(this_tg->latency_target,
+				parent_tg->latency_target);
+	}
 
 	/*
 	 * We're already holding queue_lock and know @tg is valid.  Let's
@@ -1413,7 +1445,7 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
 	else
 		*(unsigned int *)((void *)tg + of_cft(of)->private) = v;
 
-	tg_conf_updated(tg);
+	tg_conf_updated(tg, false);
 	ret = 0;
 out_finish:
 	blkg_conf_finish(&ctx);
@@ -1497,34 +1529,34 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
 	    tg->iops_conf[READ][off] == iops_dft &&
 	    tg->iops_conf[WRITE][off] == iops_dft &&
 	    (off != LIMIT_LOW ||
-	     (tg->idletime_threshold == tg->td->dft_idletime_threshold &&
-	      tg->latency_target == DFL_LATENCY_TARGET)))
+	     (tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD &&
+	      tg->latency_target_conf == DFL_LATENCY_TARGET)))
 		return 0;
 
-	if (tg->bps_conf[READ][off] != bps_dft)
+	if (tg->bps_conf[READ][off] != U64_MAX)
 		snprintf(bufs[0], sizeof(bufs[0]), "%llu",
 			tg->bps_conf[READ][off]);
-	if (tg->bps_conf[WRITE][off] != bps_dft)
+	if (tg->bps_conf[WRITE][off] != U64_MAX)
 		snprintf(bufs[1], sizeof(bufs[1]), "%llu",
 			tg->bps_conf[WRITE][off]);
-	if (tg->iops_conf[READ][off] != iops_dft)
+	if (tg->iops_conf[READ][off] != UINT_MAX)
 		snprintf(bufs[2], sizeof(bufs[2]), "%u",
 			tg->iops_conf[READ][off]);
-	if (tg->iops_conf[WRITE][off] != iops_dft)
+	if (tg->iops_conf[WRITE][off] != UINT_MAX)
 		snprintf(bufs[3], sizeof(bufs[3]), "%u",
 			tg->iops_conf[WRITE][off]);
 	if (off == LIMIT_LOW) {
-		if (tg->idletime_threshold == ULONG_MAX)
+		if (tg->idletime_threshold_conf == ULONG_MAX)
 			strcpy(idle_time, " idle=max");
 		else
 			snprintf(idle_time, sizeof(idle_time), " idle=%lu",
-				tg->idletime_threshold);
+				tg->idletime_threshold_conf);
 
-		if (tg->latency_target == ULONG_MAX)
+		if (tg->latency_target_conf == ULONG_MAX)
 			strcpy(latency_time, " latency=max");
 		else
 			snprintf(latency_time, sizeof(latency_time),
-				" latency=%lu", tg->latency_target);
+				" latency=%lu", tg->latency_target_conf);
 	}
 
 	seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s%s%s\n",
@@ -1563,8 +1595,8 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
 	v[2] = tg->iops_conf[READ][index];
 	v[3] = tg->iops_conf[WRITE][index];
 
-	idle_time = tg->idletime_threshold;
-	latency_time = tg->latency_target;
+	idle_time = tg->idletime_threshold_conf;
+	latency_time = tg->latency_target_conf;
 	while (true) {
 		char tok[27];	/* wiops=18446744073709551616 */
 		char *p;
@@ -1623,17 +1655,33 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
 		tg->iops_conf[READ][LIMIT_MAX]);
 	tg->iops[WRITE][LIMIT_LOW] = min(tg->iops_conf[WRITE][LIMIT_LOW],
 		tg->iops_conf[WRITE][LIMIT_MAX]);
+	tg->idletime_threshold_conf = idle_time;
+	tg->latency_target_conf = latency_time;
+
+	/* force user to configure all settings for low limit  */
+	if (!(tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW] ||
+	      tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) ||
+	    tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD ||
+	    tg->latency_target_conf == DFL_LATENCY_TARGET) {
+		tg->bps[READ][LIMIT_LOW] = 0;
+		tg->bps[WRITE][LIMIT_LOW] = 0;
+		tg->iops[READ][LIMIT_LOW] = 0;
+		tg->iops[WRITE][LIMIT_LOW] = 0;
+		tg->idletime_threshold = DFL_IDLE_THRESHOLD;
+		tg->latency_target = DFL_LATENCY_TARGET;
+	} else if (index == LIMIT_LOW) {
+		tg->idletime_threshold = tg->idletime_threshold_conf;
+		tg->latency_target = tg->latency_target_conf;
+	}
 
-	if (index == LIMIT_LOW) {
-		blk_throtl_update_limit_valid(tg->td);
-		if (tg->td->limit_valid[LIMIT_LOW])
+	blk_throtl_update_limit_valid(tg->td);
+	if (tg->td->limit_valid[LIMIT_LOW]) {
+		if (index == LIMIT_LOW)
 			tg->td->limit_index = LIMIT_LOW;
-		tg->idletime_threshold = (idle_time == ULONG_MAX) ?
-			ULONG_MAX : idle_time;
-		tg->latency_target = (latency_time == ULONG_MAX) ?
-			ULONG_MAX : latency_time;
-	}
-	tg_conf_updated(tg);
+	} else
+		tg->td->limit_index = LIMIT_MAX;
+	tg_conf_updated(tg, index == LIMIT_LOW &&
+		tg->td->limit_valid[LIMIT_LOW]);
 	ret = 0;
 out_finish:
 	blkg_conf_finish(&ctx);
@@ -1722,17 +1770,25 @@ static bool throtl_tg_is_idle(struct throtl_grp *tg)
 	/*
 	 * cgroup is idle if:
 	 * - single idle is too long, longer than a fixed value (in case user
-	 *   configure a too big threshold) or 4 times of slice
+	 *   configure a too big threshold) or 4 times of idletime threshold
 	 * - average think time is more than threshold
 	 * - IO latency is largely below threshold
 	 */
-	unsigned long time = jiffies_to_usecs(4 * tg->td->throtl_slice);
-
-	time = min_t(unsigned long, MAX_IDLE_TIME, time);
-	return (ktime_get_ns() >> 10) - tg->last_finish_time > time ||
-	       tg->avg_idletime > tg->idletime_threshold ||
-	       (tg->latency_target && tg->bio_cnt &&
+	unsigned long time;
+	bool ret;
+
+	time = min_t(unsigned long, MAX_IDLE_TIME, 4 * tg->idletime_threshold);
+	ret = tg->latency_target == DFL_LATENCY_TARGET ||
+	      tg->idletime_threshold == DFL_IDLE_THRESHOLD ||
+	      (ktime_get_ns() >> 10) - tg->last_finish_time > time ||
+	      tg->avg_idletime > tg->idletime_threshold ||
+	      (tg->latency_target && tg->bio_cnt &&
 		tg->bad_bio_cnt * 5 < tg->bio_cnt);
+	throtl_log(&tg->service_queue,
+		"avg_idle=%ld, idle_threshold=%ld, bad_bio=%d, total_bio=%d, is_idle=%d, scale=%d",
+		tg->avg_idletime, tg->idletime_threshold, tg->bad_bio_cnt,
+		tg->bio_cnt, ret, tg->td->scale);
+	return ret;
 }
 
 static bool throtl_tg_can_upgrade(struct throtl_grp *tg)
@@ -1828,6 +1884,7 @@ static void throtl_upgrade_state(struct throtl_data *td)
 	struct cgroup_subsys_state *pos_css;
 	struct blkcg_gq *blkg;
 
+	throtl_log(&td->service_queue, "upgrade to max");
 	td->limit_index = LIMIT_MAX;
 	td->low_upgrade_time = jiffies;
 	td->scale = 0;
@@ -1850,6 +1907,7 @@ static void throtl_downgrade_state(struct throtl_data *td, int new)
 {
 	td->scale /= 2;
 
+	throtl_log(&td->service_queue, "downgrade, scale %d", td->scale);
 	if (td->scale) {
 		td->low_upgrade_time = jiffies - td->scale * td->throtl_slice;
 		return;
@@ -2023,6 +2081,11 @@ static void throtl_update_latency_buckets(struct throtl_data *td)
 		td->avg_buckets[i].valid = true;
 		last_latency = td->avg_buckets[i].latency;
 	}
+
+	for (i = 0; i < LATENCY_BUCKET_SIZE; i++)
+		throtl_log(&td->service_queue,
+			"Latency bucket %d: latency=%ld, valid=%d", i,
+			td->avg_buckets[i].latency, td->avg_buckets[i].valid);
 }
 #else
 static inline void throtl_update_latency_buckets(struct throtl_data *td)
@@ -2354,19 +2417,14 @@ void blk_throtl_exit(struct request_queue *q)
 void blk_throtl_register_queue(struct request_queue *q)
 {
 	struct throtl_data *td;
-	struct cgroup_subsys_state *pos_css;
-	struct blkcg_gq *blkg;
 
 	td = q->td;
 	BUG_ON(!td);
 
-	if (blk_queue_nonrot(q)) {
+	if (blk_queue_nonrot(q))
 		td->throtl_slice = DFL_THROTL_SLICE_SSD;
-		td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_SSD;
-	} else {
+	else
 		td->throtl_slice = DFL_THROTL_SLICE_HD;
-		td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_HD;
-	}
 #ifndef CONFIG_BLK_DEV_THROTTLING_LOW
 	/* if no low limit, use previous default */
 	td->throtl_slice = DFL_THROTL_SLICE_HD;
@@ -2375,18 +2433,6 @@ void blk_throtl_register_queue(struct request_queue *q)
 	td->track_bio_latency = !q->mq_ops && !q->request_fn;
 	if (!td->track_bio_latency)
 		blk_stat_enable_accounting(q);
-
-	/*
-	 * some tg are created before queue is fully initialized, eg, nonrot
-	 * isn't initialized yet
-	 */
-	rcu_read_lock();
-	blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) {
-		struct throtl_grp *tg = blkg_to_tg(blkg);
-
-		tg->idletime_threshold = td->dft_idletime_threshold;
-	}
-	rcu_read_unlock();
 }
 
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
diff --git a/block/blk.h b/block/blk.h
index 2ed70228e44f..83c8e1100525 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -59,7 +59,7 @@ void blk_free_flush_queue(struct blk_flush_queue *q);
 
 int blk_init_rl(struct request_list *rl, struct request_queue *q,
 		gfp_t gfp_mask);
-void blk_exit_rl(struct request_list *rl);
+void blk_exit_rl(struct request_queue *q, struct request_list *rl);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 			struct bio *bio);
 void blk_queue_bypass_start(struct request_queue *q);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index da69b079725f..b7e9c7feeab2 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -38,9 +38,13 @@ static const u64 cfq_target_latency = (u64)NSEC_PER_SEC * 3/10; /* 300 ms */
 static const int cfq_hist_divisor = 4;
 
 /*
- * offset from end of service tree
+ * offset from end of queue service tree for idle class
  */
 #define CFQ_IDLE_DELAY		(NSEC_PER_SEC / 5)
+/* offset from end of group service tree under time slice mode */
+#define CFQ_SLICE_MODE_GROUP_DELAY (NSEC_PER_SEC / 5)
+/* offset from end of group service under IOPS mode */
+#define CFQ_IOPS_MODE_GROUP_DELAY (HZ / 5)
 
 /*
  * below this threshold, we consider thinktime immediate
@@ -1362,6 +1366,14 @@ cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
 	cfqg->vfraction = max_t(unsigned, vfr, 1);
 }
 
+static inline u64 cfq_get_cfqg_vdisktime_delay(struct cfq_data *cfqd)
+{
+	if (!iops_mode(cfqd))
+		return CFQ_SLICE_MODE_GROUP_DELAY;
+	else
+		return CFQ_IOPS_MODE_GROUP_DELAY;
+}
+
 static void
 cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
 {
@@ -1381,7 +1393,8 @@ cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
 	n = rb_last(&st->rb);
 	if (n) {
 		__cfqg = rb_entry_cfqg(n);
-		cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY;
+		cfqg->vdisktime = __cfqg->vdisktime +
+			cfq_get_cfqg_vdisktime_delay(cfqd);
 	} else
 		cfqg->vdisktime = st->min_vdisktime;
 	cfq_group_service_tree_add(st, cfqg);
diff --git a/block/elevator.c b/block/elevator.c
index bf11e70f008b..dac99fbfc273 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -950,7 +950,6 @@ static int elevator_switch_mq(struct request_queue *q,
 	int ret;
 
 	blk_mq_freeze_queue(q);
-	blk_mq_quiesce_queue(q);
 
 	if (q->elevator) {
 		if (q->elevator->registered)
@@ -978,9 +977,7 @@ static int elevator_switch_mq(struct request_queue *q,
 
 out:
 	blk_mq_unfreeze_queue(q);
-	blk_mq_start_stopped_hw_queues(q, true);
 	return ret;
-
 }
 
 /*
@@ -1065,10 +1062,8 @@ static int __elevator_change(struct request_queue *q, const char *name)
 
 	strlcpy(elevator_name, name, sizeof(elevator_name));
 	e = elevator_get(strstrip(elevator_name), true);
-	if (!e) {
-		printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
+	if (!e)
 		return -EINVAL;
-	}
 
 	if (q->elevator &&
 	    !strcmp(elevator_name, q->elevator->type->elevator_name)) {
@@ -1088,19 +1083,6 @@ static int __elevator_change(struct request_queue *q, const char *name)
 	return elevator_switch(q, e);
 }
 
-int elevator_change(struct request_queue *q, const char *name)
-{
-	int ret;
-
-	/* Protect q->elevator from elevator_init() */
-	mutex_lock(&q->sysfs_lock);
-	ret = __elevator_change(q, name);
-	mutex_unlock(&q->sysfs_lock);
-
-	return ret;
-}
-EXPORT_SYMBOL(elevator_change);
-
 static inline bool elv_support_iosched(struct request_queue *q)
 {
 	if (q->mq_ops && q->tag_set && (q->tag_set->flags &
@@ -1121,7 +1103,6 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
 	if (!ret)
 		return count;
 
-	printk(KERN_ERR "elevator: switch to %s failed\n", name);
 	return ret;
 }
 
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index 3b0090bc5dd1..b9faabc75fdb 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -26,6 +26,7 @@
 
 #include "blk.h"
 #include "blk-mq.h"
+#include "blk-mq-debugfs.h"
 #include "blk-mq-sched.h"
 #include "blk-mq-tag.h"
 #include "blk-stat.h"
@@ -683,6 +684,131 @@ static struct elv_fs_entry kyber_sched_attrs[] = {
 };
 #undef KYBER_LAT_ATTR
 
+#ifdef CONFIG_BLK_DEBUG_FS
+#define KYBER_DEBUGFS_DOMAIN_ATTRS(domain, name)			\
+static int kyber_##name##_tokens_show(void *data, struct seq_file *m)	\
+{									\
+	struct request_queue *q = data;					\
+	struct kyber_queue_data *kqd = q->elevator->elevator_data;	\
+									\
+	sbitmap_queue_show(&kqd->domain_tokens[domain], m);		\
+	return 0;							\
+}									\
+									\
+static void *kyber_##name##_rqs_start(struct seq_file *m, loff_t *pos)	\
+	__acquires(&khd->lock)						\
+{									\
+	struct blk_mq_hw_ctx *hctx = m->private;			\
+	struct kyber_hctx_data *khd = hctx->sched_data;			\
+									\
+	spin_lock(&khd->lock);						\
+	return seq_list_start(&khd->rqs[domain], *pos);			\
+}									\
+									\
+static void *kyber_##name##_rqs_next(struct seq_file *m, void *v,	\
+				     loff_t *pos)			\
+{									\
+	struct blk_mq_hw_ctx *hctx = m->private;			\
+	struct kyber_hctx_data *khd = hctx->sched_data;			\
+									\
+	return seq_list_next(v, &khd->rqs[domain], pos);		\
+}									\
+									\
+static void kyber_##name##_rqs_stop(struct seq_file *m, void *v)	\
+	__releases(&khd->lock)						\
+{									\
+	struct blk_mq_hw_ctx *hctx = m->private;			\
+	struct kyber_hctx_data *khd = hctx->sched_data;			\
+									\
+	spin_unlock(&khd->lock);					\
+}									\
+									\
+static const struct seq_operations kyber_##name##_rqs_seq_ops = {	\
+	.start	= kyber_##name##_rqs_start,				\
+	.next	= kyber_##name##_rqs_next,				\
+	.stop	= kyber_##name##_rqs_stop,				\
+	.show	= blk_mq_debugfs_rq_show,				\
+};									\
+									\
+static int kyber_##name##_waiting_show(void *data, struct seq_file *m)	\
+{									\
+	struct blk_mq_hw_ctx *hctx = data;				\
+	struct kyber_hctx_data *khd = hctx->sched_data;			\
+	wait_queue_t *wait = &khd->domain_wait[domain];			\
+									\
+	seq_printf(m, "%d\n", !list_empty_careful(&wait->task_list));	\
+	return 0;							\
+}
+KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_READ, read)
+KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_SYNC_WRITE, sync_write)
+KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_OTHER, other)
+#undef KYBER_DEBUGFS_DOMAIN_ATTRS
+
+static int kyber_async_depth_show(void *data, struct seq_file *m)
+{
+	struct request_queue *q = data;
+	struct kyber_queue_data *kqd = q->elevator->elevator_data;
+
+	seq_printf(m, "%u\n", kqd->async_depth);
+	return 0;
+}
+
+static int kyber_cur_domain_show(void *data, struct seq_file *m)
+{
+	struct blk_mq_hw_ctx *hctx = data;
+	struct kyber_hctx_data *khd = hctx->sched_data;
+
+	switch (khd->cur_domain) {
+	case KYBER_READ:
+		seq_puts(m, "READ\n");
+		break;
+	case KYBER_SYNC_WRITE:
+		seq_puts(m, "SYNC_WRITE\n");
+		break;
+	case KYBER_OTHER:
+		seq_puts(m, "OTHER\n");
+		break;
+	default:
+		seq_printf(m, "%u\n", khd->cur_domain);
+		break;
+	}
+	return 0;
+}
+
+static int kyber_batching_show(void *data, struct seq_file *m)
+{
+	struct blk_mq_hw_ctx *hctx = data;
+	struct kyber_hctx_data *khd = hctx->sched_data;
+
+	seq_printf(m, "%u\n", khd->batching);
+	return 0;
+}
+
+#define KYBER_QUEUE_DOMAIN_ATTRS(name)	\
+	{#name "_tokens", 0400, kyber_##name##_tokens_show}
+static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = {
+	KYBER_QUEUE_DOMAIN_ATTRS(read),
+	KYBER_QUEUE_DOMAIN_ATTRS(sync_write),
+	KYBER_QUEUE_DOMAIN_ATTRS(other),
+	{"async_depth", 0400, kyber_async_depth_show},
+	{},
+};
+#undef KYBER_QUEUE_DOMAIN_ATTRS
+
+#define KYBER_HCTX_DOMAIN_ATTRS(name)					\
+	{#name "_rqs", 0400, .seq_ops = &kyber_##name##_rqs_seq_ops},	\
+	{#name "_waiting", 0400, kyber_##name##_waiting_show}
+static const struct blk_mq_debugfs_attr kyber_hctx_debugfs_attrs[] = {
+	KYBER_HCTX_DOMAIN_ATTRS(read),
+	KYBER_HCTX_DOMAIN_ATTRS(sync_write),
+	KYBER_HCTX_DOMAIN_ATTRS(other),
+	{"cur_domain", 0400, kyber_cur_domain_show},
+	{"batching", 0400, kyber_batching_show},
+	{},
+};
+#undef KYBER_HCTX_DOMAIN_ATTRS
+#endif
+
 static struct elevator_type kyber_sched = {
 	.ops.mq = {
 		.init_sched = kyber_init_sched,
@@ -696,6 +822,10 @@ static struct elevator_type kyber_sched = {
 		.has_work = kyber_has_work,
 	},
 	.uses_mq = true,
+#ifdef CONFIG_BLK_DEBUG_FS
+	.queue_debugfs_attrs = kyber_queue_debugfs_attrs,
+	.hctx_debugfs_attrs = kyber_hctx_debugfs_attrs,
+#endif
 	.elevator_attrs = kyber_sched_attrs,
 	.elevator_name = "kyber",
 	.elevator_owner = THIS_MODULE,
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 236121633ca0..1b964a387afe 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -19,6 +19,7 @@
 
 #include "blk.h"
 #include "blk-mq.h"
+#include "blk-mq-debugfs.h"
 #include "blk-mq-tag.h"
 #include "blk-mq-sched.h"
 
@@ -517,6 +518,125 @@ static struct elv_fs_entry deadline_attrs[] = {
 	__ATTR_NULL
 };
 
+#ifdef CONFIG_BLK_DEBUG_FS
+#define DEADLINE_DEBUGFS_DDIR_ATTRS(ddir, name)				\
+static void *deadline_##name##_fifo_start(struct seq_file *m,		\
+					  loff_t *pos)			\
+	__acquires(&dd->lock)						\
+{									\
+	struct request_queue *q = m->private;				\
+	struct deadline_data *dd = q->elevator->elevator_data;		\
+									\
+	spin_lock(&dd->lock);						\
+	return seq_list_start(&dd->fifo_list[ddir], *pos);		\
+}									\
+									\
+static void *deadline_##name##_fifo_next(struct seq_file *m, void *v,	\
+					 loff_t *pos)			\
+{									\
+	struct request_queue *q = m->private;				\
+	struct deadline_data *dd = q->elevator->elevator_data;		\
+									\
+	return seq_list_next(v, &dd->fifo_list[ddir], pos);		\
+}									\
+									\
+static void deadline_##name##_fifo_stop(struct seq_file *m, void *v)	\
+	__releases(&dd->lock)						\
+{									\
+	struct request_queue *q = m->private;				\
+	struct deadline_data *dd = q->elevator->elevator_data;		\
+									\
+	spin_unlock(&dd->lock);						\
+}									\
+									\
+static const struct seq_operations deadline_##name##_fifo_seq_ops = {	\
+	.start	= deadline_##name##_fifo_start,				\
+	.next	= deadline_##name##_fifo_next,				\
+	.stop	= deadline_##name##_fifo_stop,				\
+	.show	= blk_mq_debugfs_rq_show,				\
+};									\
+									\
+static int deadline_##name##_next_rq_show(void *data,			\
+					  struct seq_file *m)		\
+{									\
+	struct request_queue *q = data;					\
+	struct deadline_data *dd = q->elevator->elevator_data;		\
+	struct request *rq = dd->next_rq[ddir];				\
+									\
+	if (rq)								\
+		__blk_mq_debugfs_rq_show(m, rq);			\
+	return 0;							\
+}
+DEADLINE_DEBUGFS_DDIR_ATTRS(READ, read)
+DEADLINE_DEBUGFS_DDIR_ATTRS(WRITE, write)
+#undef DEADLINE_DEBUGFS_DDIR_ATTRS
+
+static int deadline_batching_show(void *data, struct seq_file *m)
+{
+	struct request_queue *q = data;
+	struct deadline_data *dd = q->elevator->elevator_data;
+
+	seq_printf(m, "%u\n", dd->batching);
+	return 0;
+}
+
+static int deadline_starved_show(void *data, struct seq_file *m)
+{
+	struct request_queue *q = data;
+	struct deadline_data *dd = q->elevator->elevator_data;
+
+	seq_printf(m, "%u\n", dd->starved);
+	return 0;
+}
+
+static void *deadline_dispatch_start(struct seq_file *m, loff_t *pos)
+	__acquires(&dd->lock)
+{
+	struct request_queue *q = m->private;
+	struct deadline_data *dd = q->elevator->elevator_data;
+
+	spin_lock(&dd->lock);
+	return seq_list_start(&dd->dispatch, *pos);
+}
+
+static void *deadline_dispatch_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct request_queue *q = m->private;
+	struct deadline_data *dd = q->elevator->elevator_data;
+
+	return seq_list_next(v, &dd->dispatch, pos);
+}
+
+static void deadline_dispatch_stop(struct seq_file *m, void *v)
+	__releases(&dd->lock)
+{
+	struct request_queue *q = m->private;
+	struct deadline_data *dd = q->elevator->elevator_data;
+
+	spin_unlock(&dd->lock);
+}
+
+static const struct seq_operations deadline_dispatch_seq_ops = {
+	.start	= deadline_dispatch_start,
+	.next	= deadline_dispatch_next,
+	.stop	= deadline_dispatch_stop,
+	.show	= blk_mq_debugfs_rq_show,
+};
+
+#define DEADLINE_QUEUE_DDIR_ATTRS(name)						\
+	{#name "_fifo_list", 0400, .seq_ops = &deadline_##name##_fifo_seq_ops},	\
+	{#name "_next_rq", 0400, deadline_##name##_next_rq_show}
+static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = {
+	DEADLINE_QUEUE_DDIR_ATTRS(read),
+	DEADLINE_QUEUE_DDIR_ATTRS(write),
+	{"batching", 0400, deadline_batching_show},
+	{"starved", 0400, deadline_starved_show},
+	{"dispatch", 0400, .seq_ops = &deadline_dispatch_seq_ops},
+	{},
+};
+#undef DEADLINE_QUEUE_DDIR_ATTRS
+#endif
+
 static struct elevator_type mq_deadline = {
 	.ops.mq = {
 		.insert_requests	= dd_insert_requests,
@@ -533,6 +653,9 @@ static struct elevator_type mq_deadline = {
 	},
 
 	.uses_mq	= true,
+#ifdef CONFIG_BLK_DEBUG_FS
+	.queue_debugfs_attrs = deadline_queue_debugfs_attrs,
+#endif
 	.elevator_attrs = deadline_attrs,
 	.elevator_name = "mq-deadline",
 	.elevator_owner = THIS_MODULE,
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 0171a2faad68..c5ec8246e25e 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -16,7 +16,6 @@
 #include <linux/kmod.h>
 #include <linux/ctype.h>
 #include <linux/genhd.h>
-#include <linux/dax.h>
 #include <linux/blktrace_api.h>
 
 #include "partitions/check.h"
@@ -321,8 +320,10 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 
 	if (info) {
 		struct partition_meta_info *pinfo = alloc_part_info(disk);
-		if (!pinfo)
+		if (!pinfo) {
+			err = -ENOMEM;
 			goto out_free_stats;
+		}
 		memcpy(pinfo, info, sizeof(*info));
 		p->info = pinfo;
 	}
@@ -630,24 +631,12 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
 	return 0;
 }
 
-static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n)
-{
-	struct address_space *mapping = bdev->bd_inode->i_mapping;
-
-	return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)),
-				 NULL);
-}
-
 unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
 {
+	struct address_space *mapping = bdev->bd_inode->i_mapping;
 	struct page *page;
 
-	/* don't populate page cache for dax capable devices */
-	if (IS_DAX(bdev->bd_inode))
-		page = read_dax_sector(bdev, n);
-	else
-		page = read_pagecache_sector(bdev, n);
-
+	page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)), NULL);
 	if (!IS_ERR(page)) {
 		if (PageError(page))
 			goto fail;
diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c
index 93e7c1b32edd..5610cd537da7 100644
--- a/block/partitions/msdos.c
+++ b/block/partitions/msdos.c
@@ -300,6 +300,8 @@ static void parse_bsd(struct parsed_partitions *state,
 			continue;
 		bsd_start = le32_to_cpu(p->p_offset);
 		bsd_size = le32_to_cpu(p->p_size);
+		if (memcmp(flavour, "bsd\0", 4) == 0)
+			bsd_start += offset;
 		if (offset == bsd_start && size == bsd_size)
 			/* full parent partition, we have it already */
 			continue;