diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/bfq-iosched.c | 5 | ||||
-rw-r--r-- | block/bfq-wf2q.c | 13 | ||||
-rw-r--r-- | block/blk-cgroup.c | 2 | ||||
-rw-r--r-- | block/blk-core.c | 42 | ||||
-rw-r--r-- | block/blk-mq-debugfs.c | 870 | ||||
-rw-r--r-- | block/blk-mq-debugfs.h | 82 | ||||
-rw-r--r-- | block/blk-mq-sched.c | 30 | ||||
-rw-r--r-- | block/blk-mq-sysfs.c | 10 | ||||
-rw-r--r-- | block/blk-mq.c | 83 | ||||
-rw-r--r-- | block/blk-mq.h | 28 | ||||
-rw-r--r-- | block/blk-stat.c | 17 | ||||
-rw-r--r-- | block/blk-sysfs.c | 7 | ||||
-rw-r--r-- | block/blk-throttle.c | 172 | ||||
-rw-r--r-- | block/blk.h | 2 | ||||
-rw-r--r-- | block/cfq-iosched.c | 17 | ||||
-rw-r--r-- | block/elevator.c | 21 | ||||
-rw-r--r-- | block/kyber-iosched.c | 130 | ||||
-rw-r--r-- | block/mq-deadline.c | 123 | ||||
-rw-r--r-- | block/partition-generic.c | 21 | ||||
-rw-r--r-- | block/partitions/msdos.c | 2 |
20 files changed, 972 insertions, 705 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index bd8499ef157c..08ce45096350 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -56,6 +56,11 @@ * rotational or flash-based devices, and to get the job done quickly * for applications consisting in many I/O-bound processes. * + * NOTE: if the main or only goal, with a given device, is to achieve + * the maximum-possible throughput at all times, then do switch off + * all low-latency heuristics for that device, by setting low_latency + * to 0. + * * BFQ is described in [1], where also a reference to the initial, more * theoretical paper on BFQ can be found. The interested reader can find * in the latter paper full details on the main algorithm, as well as diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index b4fc3e4260b7..8726ede19eef 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -1114,12 +1114,21 @@ static void bfq_activate_requeue_entity(struct bfq_entity *entity, bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree) { struct bfq_sched_data *sd = entity->sched_data; - struct bfq_service_tree *st = bfq_entity_service_tree(entity); - int is_in_service = entity == sd->in_service_entity; + struct bfq_service_tree *st; + bool is_in_service; if (!entity->on_st) /* entity never activated, or already inactive */ return false; + /* + * If we get here, then entity is active, which implies that + * bfq_group_set_parent has already been invoked for the group + * represented by entity. Therefore, the field + * entity->sched_data has been set, and we can safely use it. + */ + st = bfq_entity_service_tree(entity); + is_in_service = entity == sd->in_service_entity; + if (is_in_service) bfq_calc_finish(entity, entity->service); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 7c2947128f58..0480892e97e5 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -74,7 +74,7 @@ static void blkg_free(struct blkcg_gq *blkg) blkcg_policy[i]->pd_free_fn(blkg->pd[i]); if (blkg->blkcg != &blkcg_root) - blk_exit_rl(&blkg->rl); + blk_exit_rl(blkg->q, &blkg->rl); blkg_rwstat_exit(&blkg->stat_ios); blkg_rwstat_exit(&blkg->stat_bytes); diff --git a/block/blk-core.c b/block/blk-core.c index 24886b69690f..a7421b772d0e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -561,13 +561,9 @@ void blk_cleanup_queue(struct request_queue *q) * prevent that q->request_fn() gets invoked after draining finished. */ blk_freeze_queue(q); - if (!q->mq_ops) { - spin_lock_irq(lock); + spin_lock_irq(lock); + if (!q->mq_ops) __blk_drain_queue(q, true); - } else { - blk_mq_debugfs_unregister_mq(q); - spin_lock_irq(lock); - } queue_flag_set(QUEUE_FLAG_DEAD, q); spin_unlock_irq(lock); @@ -652,13 +648,19 @@ int blk_init_rl(struct request_list *rl, struct request_queue *q, if (!rl->rq_pool) return -ENOMEM; + if (rl != &q->root_rl) + WARN_ON_ONCE(!blk_get_queue(q)); + return 0; } -void blk_exit_rl(struct request_list *rl) +void blk_exit_rl(struct request_queue *q, struct request_list *rl) { - if (rl->rq_pool) + if (rl->rq_pool) { mempool_destroy(rl->rq_pool); + if (rl != &q->root_rl) + blk_put_queue(q); + } } struct request_queue *blk_alloc_queue(gfp_t gfp_mask) @@ -2648,8 +2650,6 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) return false; } - WARN_ON_ONCE(req->rq_flags & RQF_SPECIAL_PAYLOAD); - req->__data_len -= total_bytes; /* update sector only for requests with clear definition of sector */ @@ -2662,17 +2662,19 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK; } - /* - * If total number of sectors is less than the first segment - * size, something has gone terribly wrong. - */ - if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { - blk_dump_rq_flags(req, "request botched"); - req->__data_len = blk_rq_cur_bytes(req); - } + if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) { + /* + * If total number of sectors is less than the first segment + * size, something has gone terribly wrong. + */ + if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { + blk_dump_rq_flags(req, "request botched"); + req->__data_len = blk_rq_cur_bytes(req); + } - /* recalculate the number of segments */ - blk_recalc_rq_segments(req); + /* recalculate the number of segments */ + blk_recalc_rq_segments(req); + } return true; } diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index bcd2a7d4a3a5..803aed4d7221 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -21,28 +21,9 @@ #include <linux/blk-mq.h> #include "blk.h" #include "blk-mq.h" +#include "blk-mq-debugfs.h" #include "blk-mq-tag.h" -struct blk_mq_debugfs_attr { - const char *name; - umode_t mode; - const struct file_operations *fops; -}; - -static int blk_mq_debugfs_seq_open(struct inode *inode, struct file *file, - const struct seq_operations *ops) -{ - struct seq_file *m; - int ret; - - ret = seq_open(file, ops); - if (!ret) { - m = file->private_data; - m->private = inode->i_private; - } - return ret; -} - static int blk_flags_show(struct seq_file *m, const unsigned long flags, const char *const *flag_name, int flag_name_count) { @@ -53,7 +34,7 @@ static int blk_flags_show(struct seq_file *m, const unsigned long flags, if (!(flags & BIT(i))) continue; if (sep) - seq_puts(m, " "); + seq_puts(m, "|"); sep = true; if (i < flag_name_count && flag_name[i]) seq_puts(m, flag_name[i]); @@ -63,41 +44,43 @@ static int blk_flags_show(struct seq_file *m, const unsigned long flags, return 0; } +#define QUEUE_FLAG_NAME(name) [QUEUE_FLAG_##name] = #name static const char *const blk_queue_flag_name[] = { - [QUEUE_FLAG_QUEUED] = "QUEUED", - [QUEUE_FLAG_STOPPED] = "STOPPED", - [QUEUE_FLAG_SYNCFULL] = "SYNCFULL", - [QUEUE_FLAG_ASYNCFULL] = "ASYNCFULL", - [QUEUE_FLAG_DYING] = "DYING", - [QUEUE_FLAG_BYPASS] = "BYPASS", - [QUEUE_FLAG_BIDI] = "BIDI", - [QUEUE_FLAG_NOMERGES] = "NOMERGES", - [QUEUE_FLAG_SAME_COMP] = "SAME_COMP", - [QUEUE_FLAG_FAIL_IO] = "FAIL_IO", - [QUEUE_FLAG_STACKABLE] = "STACKABLE", - [QUEUE_FLAG_NONROT] = "NONROT", - [QUEUE_FLAG_IO_STAT] = "IO_STAT", - [QUEUE_FLAG_DISCARD] = "DISCARD", - [QUEUE_FLAG_NOXMERGES] = "NOXMERGES", - [QUEUE_FLAG_ADD_RANDOM] = "ADD_RANDOM", - [QUEUE_FLAG_SECERASE] = "SECERASE", - [QUEUE_FLAG_SAME_FORCE] = "SAME_FORCE", - [QUEUE_FLAG_DEAD] = "DEAD", - [QUEUE_FLAG_INIT_DONE] = "INIT_DONE", - [QUEUE_FLAG_NO_SG_MERGE] = "NO_SG_MERGE", - [QUEUE_FLAG_POLL] = "POLL", - [QUEUE_FLAG_WC] = "WC", - [QUEUE_FLAG_FUA] = "FUA", - [QUEUE_FLAG_FLUSH_NQ] = "FLUSH_NQ", - [QUEUE_FLAG_DAX] = "DAX", - [QUEUE_FLAG_STATS] = "STATS", - [QUEUE_FLAG_POLL_STATS] = "POLL_STATS", - [QUEUE_FLAG_REGISTERED] = "REGISTERED", -}; - -static int blk_queue_flags_show(struct seq_file *m, void *v) -{ - struct request_queue *q = m->private; + QUEUE_FLAG_NAME(QUEUED), + QUEUE_FLAG_NAME(STOPPED), + QUEUE_FLAG_NAME(SYNCFULL), + QUEUE_FLAG_NAME(ASYNCFULL), + QUEUE_FLAG_NAME(DYING), + QUEUE_FLAG_NAME(BYPASS), + QUEUE_FLAG_NAME(BIDI), + QUEUE_FLAG_NAME(NOMERGES), + QUEUE_FLAG_NAME(SAME_COMP), + QUEUE_FLAG_NAME(FAIL_IO), + QUEUE_FLAG_NAME(STACKABLE), + QUEUE_FLAG_NAME(NONROT), + QUEUE_FLAG_NAME(IO_STAT), + QUEUE_FLAG_NAME(DISCARD), + QUEUE_FLAG_NAME(NOXMERGES), + QUEUE_FLAG_NAME(ADD_RANDOM), + QUEUE_FLAG_NAME(SECERASE), + QUEUE_FLAG_NAME(SAME_FORCE), + QUEUE_FLAG_NAME(DEAD), + QUEUE_FLAG_NAME(INIT_DONE), + QUEUE_FLAG_NAME(NO_SG_MERGE), + QUEUE_FLAG_NAME(POLL), + QUEUE_FLAG_NAME(WC), + QUEUE_FLAG_NAME(FUA), + QUEUE_FLAG_NAME(FLUSH_NQ), + QUEUE_FLAG_NAME(DAX), + QUEUE_FLAG_NAME(STATS), + QUEUE_FLAG_NAME(POLL_STATS), + QUEUE_FLAG_NAME(REGISTERED), +}; +#undef QUEUE_FLAG_NAME + +static int queue_state_show(void *data, struct seq_file *m) +{ + struct request_queue *q = data; blk_flags_show(m, q->queue_flags, blk_queue_flag_name, ARRAY_SIZE(blk_queue_flag_name)); @@ -105,42 +88,41 @@ static int blk_queue_flags_show(struct seq_file *m, void *v) return 0; } -static ssize_t blk_queue_flags_store(struct file *file, const char __user *ubuf, - size_t len, loff_t *offp) +static ssize_t queue_state_write(void *data, const char __user *buf, + size_t count, loff_t *ppos) { - struct request_queue *q = file_inode(file)->i_private; - char op[16] = { }, *s; + struct request_queue *q = data; + char opbuf[16] = { }, *op; + + /* + * The "state" attribute is removed after blk_cleanup_queue() has called + * blk_mq_free_queue(). Return if QUEUE_FLAG_DEAD has been set to avoid + * triggering a use-after-free. + */ + if (blk_queue_dead(q)) + return -ENOENT; - len = min(len, sizeof(op) - 1); - if (copy_from_user(op, ubuf, len)) + if (count >= sizeof(opbuf)) { + pr_err("%s: operation too long\n", __func__); + goto inval; + } + + if (copy_from_user(opbuf, buf, count)) return -EFAULT; - s = op; - strsep(&s, " \t\n"); /* strip trailing whitespace */ + op = strstrip(opbuf); if (strcmp(op, "run") == 0) { blk_mq_run_hw_queues(q, true); } else if (strcmp(op, "start") == 0) { blk_mq_start_stopped_hw_queues(q, true); } else { - pr_err("%s: unsupported operation %s. Use either 'run' or 'start'\n", - __func__, op); + pr_err("%s: unsupported operation '%s'\n", __func__, op); +inval: + pr_err("%s: use either 'run' or 'start'\n", __func__); return -EINVAL; } - return len; -} - -static int blk_queue_flags_open(struct inode *inode, struct file *file) -{ - return single_open(file, blk_queue_flags_show, inode->i_private); + return count; } -static const struct file_operations blk_queue_flags_fops = { - .open = blk_queue_flags_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = blk_queue_flags_store, -}; - static void print_stat(struct seq_file *m, struct blk_rq_stat *stat) { if (stat->nr_samples) { @@ -151,9 +133,9 @@ static void print_stat(struct seq_file *m, struct blk_rq_stat *stat) } } -static int queue_poll_stat_show(struct seq_file *m, void *v) +static int queue_poll_stat_show(void *data, struct seq_file *m) { - struct request_queue *q = m->private; + struct request_queue *q = data; int bucket; for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS/2; bucket++) { @@ -168,28 +150,19 @@ static int queue_poll_stat_show(struct seq_file *m, void *v) return 0; } -static int queue_poll_stat_open(struct inode *inode, struct file *file) -{ - return single_open(file, queue_poll_stat_show, inode->i_private); -} - -static const struct file_operations queue_poll_stat_fops = { - .open = queue_poll_stat_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - +#define HCTX_STATE_NAME(name) [BLK_MQ_S_##name] = #name static const char *const hctx_state_name[] = { - [BLK_MQ_S_STOPPED] = "STOPPED", - [BLK_MQ_S_TAG_ACTIVE] = "TAG_ACTIVE", - [BLK_MQ_S_SCHED_RESTART] = "SCHED_RESTART", - [BLK_MQ_S_TAG_WAITING] = "TAG_WAITING", - + HCTX_STATE_NAME(STOPPED), + HCTX_STATE_NAME(TAG_ACTIVE), + HCTX_STATE_NAME(SCHED_RESTART), + HCTX_STATE_NAME(TAG_WAITING), + HCTX_STATE_NAME(START_ON_RUN), }; -static int hctx_state_show(struct seq_file *m, void *v) +#undef HCTX_STATE_NAME + +static int hctx_state_show(void *data, struct seq_file *m) { - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; blk_flags_show(m, hctx->state, hctx_state_name, ARRAY_SIZE(hctx_state_name)); @@ -197,34 +170,26 @@ static int hctx_state_show(struct seq_file *m, void *v) return 0; } -static int hctx_state_open(struct inode *inode, struct file *file) -{ - return single_open(file, hctx_state_show, inode->i_private); -} - -static const struct file_operations hctx_state_fops = { - .open = hctx_state_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - +#define BLK_TAG_ALLOC_NAME(name) [BLK_TAG_ALLOC_##name] = #name static const char *const alloc_policy_name[] = { - [BLK_TAG_ALLOC_FIFO] = "fifo", - [BLK_TAG_ALLOC_RR] = "rr", + BLK_TAG_ALLOC_NAME(FIFO), + BLK_TAG_ALLOC_NAME(RR), }; +#undef BLK_TAG_ALLOC_NAME +#define HCTX_FLAG_NAME(name) [ilog2(BLK_MQ_F_##name)] = #name static const char *const hctx_flag_name[] = { - [ilog2(BLK_MQ_F_SHOULD_MERGE)] = "SHOULD_MERGE", - [ilog2(BLK_MQ_F_TAG_SHARED)] = "TAG_SHARED", - [ilog2(BLK_MQ_F_SG_MERGE)] = "SG_MERGE", - [ilog2(BLK_MQ_F_BLOCKING)] = "BLOCKING", - [ilog2(BLK_MQ_F_NO_SCHED)] = "NO_SCHED", + HCTX_FLAG_NAME(SHOULD_MERGE), + HCTX_FLAG_NAME(TAG_SHARED), + HCTX_FLAG_NAME(SG_MERGE), + HCTX_FLAG_NAME(BLOCKING), + HCTX_FLAG_NAME(NO_SCHED), }; +#undef HCTX_FLAG_NAME -static int hctx_flags_show(struct seq_file *m, void *v) +static int hctx_flags_show(void *data, struct seq_file *m) { - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; const int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(hctx->flags); seq_puts(m, "alloc_policy="); @@ -241,76 +206,69 @@ static int hctx_flags_show(struct seq_file *m, void *v) return 0; } -static int hctx_flags_open(struct inode *inode, struct file *file) -{ - return single_open(file, hctx_flags_show, inode->i_private); -} - -static const struct file_operations hctx_flags_fops = { - .open = hctx_flags_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - +#define REQ_OP_NAME(name) [REQ_OP_##name] = #name static const char *const op_name[] = { - [REQ_OP_READ] = "READ", - [REQ_OP_WRITE] = "WRITE", - [REQ_OP_FLUSH] = "FLUSH", - [REQ_OP_DISCARD] = "DISCARD", - [REQ_OP_ZONE_REPORT] = "ZONE_REPORT", - [REQ_OP_SECURE_ERASE] = "SECURE_ERASE", - [REQ_OP_ZONE_RESET] = "ZONE_RESET", - [REQ_OP_WRITE_SAME] = "WRITE_SAME", - [REQ_OP_WRITE_ZEROES] = "WRITE_ZEROES", - [REQ_OP_SCSI_IN] = "SCSI_IN", - [REQ_OP_SCSI_OUT] = "SCSI_OUT", - [REQ_OP_DRV_IN] = "DRV_IN", - [REQ_OP_DRV_OUT] = "DRV_OUT", -}; - + REQ_OP_NAME(READ), + REQ_OP_NAME(WRITE), + REQ_OP_NAME(FLUSH), + REQ_OP_NAME(DISCARD), + REQ_OP_NAME(ZONE_REPORT), + REQ_OP_NAME(SECURE_ERASE), + REQ_OP_NAME(ZONE_RESET), + REQ_OP_NAME(WRITE_SAME), + REQ_OP_NAME(WRITE_ZEROES), + REQ_OP_NAME(SCSI_IN), + REQ_OP_NAME(SCSI_OUT), + REQ_OP_NAME(DRV_IN), + REQ_OP_NAME(DRV_OUT), +}; +#undef REQ_OP_NAME + +#define CMD_FLAG_NAME(name) [__REQ_##name] = #name static const char *const cmd_flag_name[] = { - [__REQ_FAILFAST_DEV] = "FAILFAST_DEV", - [__REQ_FAILFAST_TRANSPORT] = "FAILFAST_TRANSPORT", - [__REQ_FAILFAST_DRIVER] = "FAILFAST_DRIVER", - [__REQ_SYNC] = "SYNC", - [__REQ_META] = "META", - [__REQ_PRIO] = "PRIO", - [__REQ_NOMERGE] = "NOMERGE", - [__REQ_IDLE] = "IDLE", - [__REQ_INTEGRITY] = "INTEGRITY", - [__REQ_FUA] = "FUA", - [__REQ_PREFLUSH] = "PREFLUSH", - [__REQ_RAHEAD] = "RAHEAD", - [__REQ_BACKGROUND] = "BACKGROUND", - [__REQ_NR_BITS] = "NR_BITS", -}; - + CMD_FLAG_NAME(FAILFAST_DEV), + CMD_FLAG_NAME(FAILFAST_TRANSPORT), + CMD_FLAG_NAME(FAILFAST_DRIVER), + CMD_FLAG_NAME(SYNC), + CMD_FLAG_NAME(META), + CMD_FLAG_NAME(PRIO), + CMD_FLAG_NAME(NOMERGE), + CMD_FLAG_NAME(IDLE), + CMD_FLAG_NAME(INTEGRITY), + CMD_FLAG_NAME(FUA), + CMD_FLAG_NAME(PREFLUSH), + CMD_FLAG_NAME(RAHEAD), + CMD_FLAG_NAME(BACKGROUND), + CMD_FLAG_NAME(NOUNMAP), +}; +#undef CMD_FLAG_NAME + +#define RQF_NAME(name) [ilog2((__force u32)RQF_##name)] = #name static const char *const rqf_name[] = { - [ilog2((__force u32)RQF_SORTED)] = "SORTED", - [ilog2((__force u32)RQF_STARTED)] = "STARTED", - [ilog2((__force u32)RQF_QUEUED)] = "QUEUED", - [ilog2((__force u32)RQF_SOFTBARRIER)] = "SOFTBARRIER", - [ilog2((__force u32)RQF_FLUSH_SEQ)] = "FLUSH_SEQ", - [ilog2((__force u32)RQF_MIXED_MERGE)] = "MIXED_MERGE", - [ilog2((__force u32)RQF_MQ_INFLIGHT)] = "MQ_INFLIGHT", - [ilog2((__force u32)RQF_DONTPREP)] = "DONTPREP", - [ilog2((__force u32)RQF_PREEMPT)] = "PREEMPT", - [ilog2((__force u32)RQF_COPY_USER)] = "COPY_USER", - [ilog2((__force u32)RQF_FAILED)] = "FAILED", - [ilog2((__force u32)RQF_QUIET)] = "QUIET", - [ilog2((__force u32)RQF_ELVPRIV)] = "ELVPRIV", - [ilog2((__force u32)RQF_IO_STAT)] = "IO_STAT", - [ilog2((__force u32)RQF_ALLOCED)] = "ALLOCED", - [ilog2((__force u32)RQF_PM)] = "PM", - [ilog2((__force u32)RQF_HASHED)] = "HASHED", - [ilog2((__force u32)RQF_STATS)] = "STATS", - [ilog2((__force u32)RQF_SPECIAL_PAYLOAD)] = "SPECIAL_PAYLOAD", -}; - -static int blk_mq_debugfs_rq_show(struct seq_file *m, void *v) + RQF_NAME(SORTED), + RQF_NAME(STARTED), + RQF_NAME(QUEUED), + RQF_NAME(SOFTBARRIER), + RQF_NAME(FLUSH_SEQ), + RQF_NAME(MIXED_MERGE), + RQF_NAME(MQ_INFLIGHT), + RQF_NAME(DONTPREP), + RQF_NAME(PREEMPT), + RQF_NAME(COPY_USER), + RQF_NAME(FAILED), + RQF_NAME(QUIET), + RQF_NAME(ELVPRIV), + RQF_NAME(IO_STAT), + RQF_NAME(ALLOCED), + RQF_NAME(PM), + RQF_NAME(HASHED), + RQF_NAME(STATS), + RQF_NAME(SPECIAL_PAYLOAD), +}; +#undef RQF_NAME + +int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq) { - struct request *rq = list_entry_rq(v); const struct blk_mq_ops *const mq_ops = rq->q->mq_ops; const unsigned int op = rq->cmd_flags & REQ_OP_MASK; @@ -332,6 +290,13 @@ static int blk_mq_debugfs_rq_show(struct seq_file *m, void *v) seq_puts(m, "}\n"); return 0; } +EXPORT_SYMBOL_GPL(__blk_mq_debugfs_rq_show); + +int blk_mq_debugfs_rq_show(struct seq_file *m, void *v) +{ + return __blk_mq_debugfs_rq_show(m, list_entry_rq(v)); +} +EXPORT_SYMBOL_GPL(blk_mq_debugfs_rq_show); static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos) __acquires(&hctx->lock) @@ -364,38 +329,14 @@ static const struct seq_operations hctx_dispatch_seq_ops = { .show = blk_mq_debugfs_rq_show, }; -static int hctx_dispatch_open(struct inode *inode, struct file *file) -{ - return blk_mq_debugfs_seq_open(inode, file, &hctx_dispatch_seq_ops); -} - -static const struct file_operations hctx_dispatch_fops = { - .open = hctx_dispatch_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int hctx_ctx_map_show(struct seq_file *m, void *v) +static int hctx_ctx_map_show(void *data, struct seq_file *m) { - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; sbitmap_bitmap_show(&hctx->ctx_map, m); return 0; } -static int hctx_ctx_map_open(struct inode *inode, struct file *file) -{ - return single_open(file, hctx_ctx_map_show, inode->i_private); -} - -static const struct file_operations hctx_ctx_map_fops = { - .open = hctx_ctx_map_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static void blk_mq_debugfs_tags_show(struct seq_file *m, struct blk_mq_tags *tags) { @@ -413,9 +354,9 @@ static void blk_mq_debugfs_tags_show(struct seq_file *m, } } -static int hctx_tags_show(struct seq_file *m, void *v) +static int hctx_tags_show(void *data, struct seq_file *m) { - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; struct request_queue *q = hctx->queue; int res; @@ -430,21 +371,9 @@ out: return res; } -static int hctx_tags_open(struct inode *inode, struct file *file) -{ - return single_open(file, hctx_tags_show, inode->i_private); -} - -static const struct file_operations hctx_tags_fops = { - .open = hctx_tags_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int hctx_tags_bitmap_show(struct seq_file *m, void *v) +static int hctx_tags_bitmap_show(void *data, struct seq_file *m) { - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; struct request_queue *q = hctx->queue; int res; @@ -459,21 +388,9 @@ out: return res; } -static int hctx_tags_bitmap_open(struct inode *inode, struct file *file) +static int hctx_sched_tags_show(void *data, struct seq_file *m) { - return single_open(file, hctx_tags_bitmap_show, inode->i_private); -} - -static const struct file_operations hctx_tags_bitmap_fops = { - .open = hctx_tags_bitmap_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int hctx_sched_tags_show(struct seq_file *m, void *v) -{ - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; struct request_queue *q = hctx->queue; int res; @@ -488,21 +405,9 @@ out: return res; } -static int hctx_sched_tags_open(struct inode *inode, struct file *file) +static int hctx_sched_tags_bitmap_show(void *data, struct seq_file *m) { - return single_open(file, hctx_sched_tags_show, inode->i_private); -} - -static const struct file_operations hctx_sched_tags_fops = { - .open = hctx_sched_tags_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int hctx_sched_tags_bitmap_show(struct seq_file *m, void *v) -{ - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; struct request_queue *q = hctx->queue; int res; @@ -517,21 +422,9 @@ out: return res; } -static int hctx_sched_tags_bitmap_open(struct inode *inode, struct file *file) -{ - return single_open(file, hctx_sched_tags_bitmap_show, inode->i_private); -} - -static const struct file_operations hctx_sched_tags_bitmap_fops = { - .open = hctx_sched_tags_bitmap_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int hctx_io_poll_show(struct seq_file *m, void *v) +static int hctx_io_poll_show(void *data, struct seq_file *m) { - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; seq_printf(m, "considered=%lu\n", hctx->poll_considered); seq_printf(m, "invoked=%lu\n", hctx->poll_invoked); @@ -539,32 +432,18 @@ static int hctx_io_poll_show(struct seq_file *m, void *v) return 0; } -static int hctx_io_poll_open(struct inode *inode, struct file *file) -{ - return single_open(file, hctx_io_poll_show, inode->i_private); -} - -static ssize_t hctx_io_poll_write(struct file *file, const char __user *buf, +static ssize_t hctx_io_poll_write(void *data, const char __user *buf, size_t count, loff_t *ppos) { - struct seq_file *m = file->private_data; - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; hctx->poll_considered = hctx->poll_invoked = hctx->poll_success = 0; return count; } -static const struct file_operations hctx_io_poll_fops = { - .open = hctx_io_poll_open, - .read = seq_read, - .write = hctx_io_poll_write, - .llseek = seq_lseek, - .release = single_release, -}; - -static int hctx_dispatched_show(struct seq_file *m, void *v) +static int hctx_dispatched_show(void *data, struct seq_file *m) { - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; int i; seq_printf(m, "%8u\t%lu\n", 0U, hctx->dispatched[0]); @@ -579,16 +458,10 @@ static int hctx_dispatched_show(struct seq_file *m, void *v) return 0; } -static int hctx_dispatched_open(struct inode *inode, struct file *file) -{ - return single_open(file, hctx_dispatched_show, inode->i_private); -} - -static ssize_t hctx_dispatched_write(struct file *file, const char __user *buf, +static ssize_t hctx_dispatched_write(void *data, const char __user *buf, size_t count, loff_t *ppos) { - struct seq_file *m = file->private_data; - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; int i; for (i = 0; i < BLK_MQ_MAX_DISPATCH_ORDER; i++) @@ -596,96 +469,48 @@ static ssize_t hctx_dispatched_write(struct file *file, const char __user *buf, return count; } -static const struct file_operations hctx_dispatched_fops = { - .open = hctx_dispatched_open, - .read = seq_read, - .write = hctx_dispatched_write, - .llseek = seq_lseek, - .release = single_release, -}; - -static int hctx_queued_show(struct seq_file *m, void *v) +static int hctx_queued_show(void *data, struct seq_file *m) { - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; seq_printf(m, "%lu\n", hctx->queued); return 0; } -static int hctx_queued_open(struct inode *inode, struct file *file) -{ - return single_open(file, hctx_queued_show, inode->i_private); -} - -static ssize_t hctx_queued_write(struct file *file, const char __user *buf, +static ssize_t hctx_queued_write(void *data, const char __user *buf, size_t count, loff_t *ppos) { - struct seq_file *m = file->private_data; - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; hctx->queued = 0; return count; } -static const struct file_operations hctx_queued_fops = { - .open = hctx_queued_open, - .read = seq_read, - .write = hctx_queued_write, - .llseek = seq_lseek, - .release = single_release, -}; - -static int hctx_run_show(struct seq_file *m, void *v) +static int hctx_run_show(void *data, struct seq_file *m) { - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; seq_printf(m, "%lu\n", hctx->run); return 0; } -static int hctx_run_open(struct inode *inode, struct file *file) +static ssize_t hctx_run_write(void *data, const char __user *buf, size_t count, + loff_t *ppos) { - return single_open(file, hctx_run_show, inode->i_private); -} - -static ssize_t hctx_run_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct seq_file *m = file->private_data; - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; hctx->run = 0; return count; } -static const struct file_operations hctx_run_fops = { - .open = hctx_run_open, - .read = seq_read, - .write = hctx_run_write, - .llseek = seq_lseek, - .release = single_release, -}; - -static int hctx_active_show(struct seq_file *m, void *v) +static int hctx_active_show(void *data, struct seq_file *m) { - struct blk_mq_hw_ctx *hctx = m->private; + struct blk_mq_hw_ctx *hctx = data; seq_printf(m, "%d\n", atomic_read(&hctx->nr_active)); return 0; } -static int hctx_active_open(struct inode *inode, struct file *file) -{ - return single_open(file, hctx_active_show, inode->i_private); -} - -static const struct file_operations hctx_active_fops = { - .open = hctx_active_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos) __acquires(&ctx->lock) { @@ -716,156 +541,192 @@ static const struct seq_operations ctx_rq_list_seq_ops = { .stop = ctx_rq_list_stop, .show = blk_mq_debugfs_rq_show, }; - -static int ctx_rq_list_open(struct inode *inode, struct file *file) +static int ctx_dispatched_show(void *data, struct seq_file *m) { - return blk_mq_debugfs_seq_open(inode, file, &ctx_rq_list_seq_ops); -} - -static const struct file_operations ctx_rq_list_fops = { - .open = ctx_rq_list_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int ctx_dispatched_show(struct seq_file *m, void *v) -{ - struct blk_mq_ctx *ctx = m->private; + struct blk_mq_ctx *ctx = data; seq_printf(m, "%lu %lu\n", ctx->rq_dispatched[1], ctx->rq_dispatched[0]); return 0; } -static int ctx_dispatched_open(struct inode *inode, struct file *file) -{ - return single_open(file, ctx_dispatched_show, inode->i_private); -} - -static ssize_t ctx_dispatched_write(struct file *file, const char __user *buf, +static ssize_t ctx_dispatched_write(void *data, const char __user *buf, size_t count, loff_t *ppos) { - struct seq_file *m = file->private_data; - struct blk_mq_ctx *ctx = m->private; + struct blk_mq_ctx *ctx = data; ctx->rq_dispatched[0] = ctx->rq_dispatched[1] = 0; return count; } -static const struct file_operations ctx_dispatched_fops = { - .open = ctx_dispatched_open, - .read = seq_read, - .write = ctx_dispatched_write, - .llseek = seq_lseek, - .release = single_release, -}; - -static int ctx_merged_show(struct seq_file *m, void *v) +static int ctx_merged_show(void *data, struct seq_file *m) { - struct blk_mq_ctx *ctx = m->private; + struct blk_mq_ctx *ctx = data; seq_printf(m, "%lu\n", ctx->rq_merged); return 0; } -static int ctx_merged_open(struct inode *inode, struct file *file) +static ssize_t ctx_merged_write(void *data, const char __user *buf, + size_t count, loff_t *ppos) { - return single_open(file, ctx_merged_show, inode->i_private); -} - -static ssize_t ctx_merged_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct seq_file *m = file->private_data; - struct blk_mq_ctx *ctx = m->private; + struct blk_mq_ctx *ctx = data; ctx->rq_merged = 0; return count; } -static const struct file_operations ctx_merged_fops = { - .open = ctx_merged_open, - .read = seq_read, - .write = ctx_merged_write, - .llseek = seq_lseek, - .release = single_release, -}; - -static int ctx_completed_show(struct seq_file *m, void *v) +static int ctx_completed_show(void *data, struct seq_file *m) { - struct blk_mq_ctx *ctx = m->private; + struct blk_mq_ctx *ctx = data; seq_printf(m, "%lu %lu\n", ctx->rq_completed[1], ctx->rq_completed[0]); return 0; } -static int ctx_completed_open(struct inode *inode, struct file *file) +static ssize_t ctx_completed_write(void *data, const char __user *buf, + size_t count, loff_t *ppos) { - return single_open(file, ctx_completed_show, inode->i_private); + struct blk_mq_ctx *ctx = data; + + ctx->rq_completed[0] = ctx->rq_completed[1] = 0; + return count; } -static ssize_t ctx_completed_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +static int blk_mq_debugfs_show(struct seq_file *m, void *v) +{ + const struct blk_mq_debugfs_attr *attr = m->private; + void *data = d_inode(m->file->f_path.dentry->d_parent)->i_private; + + return attr->show(data, m); +} + +static ssize_t blk_mq_debugfs_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) { struct seq_file *m = file->private_data; - struct blk_mq_ctx *ctx = m->private; + const struct blk_mq_debugfs_attr *attr = m->private; + void *data = d_inode(file->f_path.dentry->d_parent)->i_private; - ctx->rq_completed[0] = ctx->rq_completed[1] = 0; - return count; + if (!attr->write) + return -EPERM; + + return attr->write(data, buf, count, ppos); +} + +static int blk_mq_debugfs_open(struct inode *inode, struct file *file) +{ + const struct blk_mq_debugfs_attr *attr = inode->i_private; + void *data = d_inode(file->f_path.dentry->d_parent)->i_private; + struct seq_file *m; + int ret; + + if (attr->seq_ops) { + ret = seq_open(file, attr->seq_ops); + if (!ret) { + m = file->private_data; + m->private = data; + } + return ret; + } + + if (WARN_ON_ONCE(!attr->show)) + return -EPERM; + + return single_open(file, blk_mq_debugfs_show, inode->i_private); +} + +static int blk_mq_debugfs_release(struct inode *inode, struct file *file) +{ + const struct blk_mq_debugfs_attr *attr = inode->i_private; + + if (attr->show) + return single_release(inode, file); + else + return seq_release(inode, file); } -static const struct file_operations ctx_completed_fops = { - .open = ctx_completed_open, +const struct file_operations blk_mq_debugfs_fops = { + .open = blk_mq_debugfs_open, .read = seq_read, - .write = ctx_completed_write, + .write = blk_mq_debugfs_write, .llseek = seq_lseek, - .release = single_release, + .release = blk_mq_debugfs_release, }; static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = { - {"poll_stat", 0400, &queue_poll_stat_fops}, - {"state", 0600, &blk_queue_flags_fops}, + {"poll_stat", 0400, queue_poll_stat_show}, + {"state", 0600, queue_state_show, queue_state_write}, {}, }; static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = { - {"state", 0400, &hctx_state_fops}, - {"flags", 0400, &hctx_flags_fops}, - {"dispatch", 0400, &hctx_dispatch_fops}, - {"ctx_map", 0400, &hctx_ctx_map_fops}, - {"tags", 0400, &hctx_tags_fops}, - {"tags_bitmap", 0400, &hctx_tags_bitmap_fops}, - {"sched_tags", 0400, &hctx_sched_tags_fops}, - {"sched_tags_bitmap", 0400, &hctx_sched_tags_bitmap_fops}, - {"io_poll", 0600, &hctx_io_poll_fops}, - {"dispatched", 0600, &hctx_dispatched_fops}, - {"queued", 0600, &hctx_queued_fops}, - {"run", 0600, &hctx_run_fops}, - {"active", 0400, &hctx_active_fops}, + {"state", 0400, hctx_state_show}, + {"flags", 0400, hctx_flags_show}, + {"dispatch", 0400, .seq_ops = &hctx_dispatch_seq_ops}, + {"ctx_map", 0400, hctx_ctx_map_show}, + {"tags", 0400, hctx_tags_show}, + {"tags_bitmap", 0400, hctx_tags_bitmap_show}, + {"sched_tags", 0400, hctx_sched_tags_show}, + {"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show}, + {"io_poll", 0600, hctx_io_poll_show, hctx_io_poll_write}, + {"dispatched", 0600, hctx_dispatched_show, hctx_dispatched_write}, + {"queued", 0600, hctx_queued_show, hctx_queued_write}, + {"run", 0600, hctx_run_show, hctx_run_write}, + {"active", 0400, hctx_active_show}, {}, }; static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = { - {"rq_list", 0400, &ctx_rq_list_fops}, - {"dispatched", 0600, &ctx_dispatched_fops}, - {"merged", 0600, &ctx_merged_fops}, - {"completed", 0600, &ctx_completed_fops}, + {"rq_list", 0400, .seq_ops = &ctx_rq_list_seq_ops}, + {"dispatched", 0600, ctx_dispatched_show, ctx_dispatched_write}, + {"merged", 0600, ctx_merged_show, ctx_merged_write}, + {"completed", 0600, ctx_completed_show, ctx_completed_write}, {}, }; +static bool debugfs_create_files(struct dentry *parent, void *data, + const struct blk_mq_debugfs_attr *attr) +{ + d_inode(parent)->i_private = data; + + for (; attr->name; attr++) { + if (!debugfs_create_file(attr->name, attr->mode, parent, + (void *)attr, &blk_mq_debugfs_fops)) + return false; + } + return true; +} + int blk_mq_debugfs_register(struct request_queue *q) { + struct blk_mq_hw_ctx *hctx; + int i; + if (!blk_debugfs_root) return -ENOENT; q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent), blk_debugfs_root); if (!q->debugfs_dir) - goto err; + return -ENOMEM; - if (blk_mq_debugfs_register_mq(q)) + if (!debugfs_create_files(q->debugfs_dir, q, + blk_mq_debugfs_queue_attrs)) goto err; + /* + * blk_mq_init_hctx() attempted to do this already, but q->debugfs_dir + * didn't exist yet (because we don't know what to name the directory + * until the queue is registered to a gendisk). + */ + queue_for_each_hw_ctx(q, hctx, i) { + if (!hctx->debugfs_dir && blk_mq_debugfs_register_hctx(q, hctx)) + goto err; + if (q->elevator && !hctx->sched_debugfs_dir && + blk_mq_debugfs_register_sched_hctx(q, hctx)) + goto err; + } + return 0; err: @@ -876,30 +737,18 @@ err: void blk_mq_debugfs_unregister(struct request_queue *q) { debugfs_remove_recursive(q->debugfs_dir); - q->mq_debugfs_dir = NULL; + q->sched_debugfs_dir = NULL; q->debugfs_dir = NULL; } -static bool debugfs_create_files(struct dentry *parent, void *data, - const struct blk_mq_debugfs_attr *attr) -{ - for (; attr->name; attr++) { - if (!debugfs_create_file(attr->name, attr->mode, parent, - data, attr->fops)) - return false; - } - return true; -} - -static int blk_mq_debugfs_register_ctx(struct request_queue *q, - struct blk_mq_ctx *ctx, - struct dentry *hctx_dir) +static int blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, + struct blk_mq_ctx *ctx) { struct dentry *ctx_dir; char name[20]; snprintf(name, sizeof(name), "cpu%u", ctx->cpu); - ctx_dir = debugfs_create_dir(name, hctx_dir); + ctx_dir = debugfs_create_dir(name, hctx->debugfs_dir); if (!ctx_dir) return -ENOMEM; @@ -909,59 +758,122 @@ static int blk_mq_debugfs_register_ctx(struct request_queue *q, return 0; } -static int blk_mq_debugfs_register_hctx(struct request_queue *q, - struct blk_mq_hw_ctx *hctx) +int blk_mq_debugfs_register_hctx(struct request_queue *q, + struct blk_mq_hw_ctx *hctx) { struct blk_mq_ctx *ctx; - struct dentry *hctx_dir; char name[20]; int i; - snprintf(name, sizeof(name), "%u", hctx->queue_num); - hctx_dir = debugfs_create_dir(name, q->mq_debugfs_dir); - if (!hctx_dir) - return -ENOMEM; + if (!q->debugfs_dir) + return -ENOENT; - if (!debugfs_create_files(hctx_dir, hctx, blk_mq_debugfs_hctx_attrs)) + snprintf(name, sizeof(name), "hctx%u", hctx->queue_num); + hctx->debugfs_dir = debugfs_create_dir(name, q->debugfs_dir); + if (!hctx->debugfs_dir) return -ENOMEM; + if (!debugfs_create_files(hctx->debugfs_dir, hctx, + blk_mq_debugfs_hctx_attrs)) + goto err; + hctx_for_each_ctx(hctx, ctx, i) { - if (blk_mq_debugfs_register_ctx(q, ctx, hctx_dir)) + if (blk_mq_debugfs_register_ctx(hctx, ctx)) + goto err; + } + + return 0; + +err: + blk_mq_debugfs_unregister_hctx(hctx); + return -ENOMEM; +} + +void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) +{ + debugfs_remove_recursive(hctx->debugfs_dir); + hctx->sched_debugfs_dir = NULL; + hctx->debugfs_dir = NULL; +} + +int blk_mq_debugfs_register_hctxs(struct request_queue *q) +{ + struct blk_mq_hw_ctx *hctx; + int i; + + queue_for_each_hw_ctx(q, hctx, i) { + if (blk_mq_debugfs_register_hctx(q, hctx)) return -ENOMEM; } return 0; } -int blk_mq_debugfs_register_mq(struct request_queue *q) +void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) { struct blk_mq_hw_ctx *hctx; int i; + queue_for_each_hw_ctx(q, hctx, i) + blk_mq_debugfs_unregister_hctx(hctx); +} + +int blk_mq_debugfs_register_sched(struct request_queue *q) +{ + struct elevator_type *e = q->elevator->type; + if (!q->debugfs_dir) return -ENOENT; - q->mq_debugfs_dir = debugfs_create_dir("mq", q->debugfs_dir); - if (!q->mq_debugfs_dir) - goto err; + if (!e->queue_debugfs_attrs) + return 0; - if (!debugfs_create_files(q->mq_debugfs_dir, q, blk_mq_debugfs_queue_attrs)) - goto err; + q->sched_debugfs_dir = debugfs_create_dir("sched", q->debugfs_dir); + if (!q->sched_debugfs_dir) + return -ENOMEM; - queue_for_each_hw_ctx(q, hctx, i) { - if (blk_mq_debugfs_register_hctx(q, hctx)) - goto err; - } + if (!debugfs_create_files(q->sched_debugfs_dir, q, + e->queue_debugfs_attrs)) + goto err; return 0; err: - blk_mq_debugfs_unregister_mq(q); + blk_mq_debugfs_unregister_sched(q); return -ENOMEM; } -void blk_mq_debugfs_unregister_mq(struct request_queue *q) +void blk_mq_debugfs_unregister_sched(struct request_queue *q) +{ + debugfs_remove_recursive(q->sched_debugfs_dir); + q->sched_debugfs_dir = NULL; +} + +int blk_mq_debugfs_register_sched_hctx(struct request_queue *q, + struct blk_mq_hw_ctx *hctx) +{ + struct elevator_type *e = q->elevator->type; + + if (!hctx->debugfs_dir) + return -ENOENT; + + if (!e->hctx_debugfs_attrs) + return 0; + + hctx->sched_debugfs_dir = debugfs_create_dir("sched", + hctx->debugfs_dir); + if (!hctx->sched_debugfs_dir) + return -ENOMEM; + + if (!debugfs_create_files(hctx->sched_debugfs_dir, hctx, + e->hctx_debugfs_attrs)) + return -ENOMEM; + + return 0; +} + +void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx) { - debugfs_remove_recursive(q->mq_debugfs_dir); - q->mq_debugfs_dir = NULL; + debugfs_remove_recursive(hctx->sched_debugfs_dir); + hctx->sched_debugfs_dir = NULL; } diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h new file mode 100644 index 000000000000..a182e6f97565 --- /dev/null +++ b/block/blk-mq-debugfs.h @@ -0,0 +1,82 @@ +#ifndef INT_BLK_MQ_DEBUGFS_H +#define INT_BLK_MQ_DEBUGFS_H + +#ifdef CONFIG_BLK_DEBUG_FS + +#include <linux/seq_file.h> + +struct blk_mq_debugfs_attr { + const char *name; + umode_t mode; + int (*show)(void *, struct seq_file *); + ssize_t (*write)(void *, const char __user *, size_t, loff_t *); + /* Set either .show or .seq_ops. */ + const struct seq_operations *seq_ops; +}; + +int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq); +int blk_mq_debugfs_rq_show(struct seq_file *m, void *v); + +int blk_mq_debugfs_register(struct request_queue *q); +void blk_mq_debugfs_unregister(struct request_queue *q); +int blk_mq_debugfs_register_hctx(struct request_queue *q, + struct blk_mq_hw_ctx *hctx); +void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx); +int blk_mq_debugfs_register_hctxs(struct request_queue *q); +void blk_mq_debugfs_unregister_hctxs(struct request_queue *q); + +int blk_mq_debugfs_register_sched(struct request_queue *q); +void blk_mq_debugfs_unregister_sched(struct request_queue *q); +int blk_mq_debugfs_register_sched_hctx(struct request_queue *q, + struct blk_mq_hw_ctx *hctx); +void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx); +#else +static inline int blk_mq_debugfs_register(struct request_queue *q) +{ + return 0; +} + +static inline void blk_mq_debugfs_unregister(struct request_queue *q) +{ +} + +static inline int blk_mq_debugfs_register_hctx(struct request_queue *q, + struct blk_mq_hw_ctx *hctx) +{ + return 0; +} + +static inline void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) +{ +} + +static inline int blk_mq_debugfs_register_hctxs(struct request_queue *q) +{ + return 0; +} + +static inline void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) +{ +} + +static inline int blk_mq_debugfs_register_sched(struct request_queue *q) +{ + return 0; +} + +static inline void blk_mq_debugfs_unregister_sched(struct request_queue *q) +{ +} + +static inline int blk_mq_debugfs_register_sched_hctx(struct request_queue *q, + struct blk_mq_hw_ctx *hctx) +{ + return 0; +} + +static inline void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx) +{ +} +#endif + +#endif diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 8b361e192e8a..1f5b692526ae 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -11,6 +11,7 @@ #include "blk.h" #include "blk-mq.h" +#include "blk-mq-debugfs.h" #include "blk-mq-sched.h" #include "blk-mq-tag.h" #include "blk-wbt.h" @@ -82,11 +83,7 @@ struct request *blk_mq_sched_get_request(struct request_queue *q, if (likely(!data->hctx)) data->hctx = blk_mq_map_queue(q, data->ctx->cpu); - /* - * For a reserved tag, allocate a normal request since we might - * have driver dependencies on the value of the internal tag. - */ - if (e && !(data->flags & BLK_MQ_REQ_RESERVED)) { + if (e) { data->flags |= BLK_MQ_REQ_INTERNAL; /* @@ -476,6 +473,8 @@ int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, } } + blk_mq_debugfs_register_sched_hctx(q, hctx); + return 0; } @@ -487,6 +486,8 @@ void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, if (!e) return; + blk_mq_debugfs_unregister_sched_hctx(hctx); + if (e->type->ops.mq.exit_hctx && hctx->sched_data) { e->type->ops.mq.exit_hctx(hctx, hctx_idx); hctx->sched_data = NULL; @@ -523,8 +524,10 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) if (ret) goto err; - if (e->ops.mq.init_hctx) { - queue_for_each_hw_ctx(q, hctx, i) { + blk_mq_debugfs_register_sched(q); + + queue_for_each_hw_ctx(q, hctx, i) { + if (e->ops.mq.init_hctx) { ret = e->ops.mq.init_hctx(hctx, i); if (ret) { eq = q->elevator; @@ -533,6 +536,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) return ret; } } + blk_mq_debugfs_register_sched_hctx(q, hctx); } return 0; @@ -548,14 +552,14 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) struct blk_mq_hw_ctx *hctx; unsigned int i; - if (e->type->ops.mq.exit_hctx) { - queue_for_each_hw_ctx(q, hctx, i) { - if (hctx->sched_data) { - e->type->ops.mq.exit_hctx(hctx, i); - hctx->sched_data = NULL; - } + queue_for_each_hw_ctx(q, hctx, i) { + blk_mq_debugfs_unregister_sched_hctx(hctx); + if (e->type->ops.mq.exit_hctx && hctx->sched_data) { + e->type->ops.mq.exit_hctx(hctx, i); + hctx->sched_data = NULL; } } + blk_mq_debugfs_unregister_sched(q); if (e->type->ops.mq.exit_sched) e->type->ops.mq.exit_sched(e); blk_mq_sched_tags_teardown(q); diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index ec0afdf765e3..79969c3c234f 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -258,8 +258,6 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q) queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); - blk_mq_debugfs_unregister_mq(q); - kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); kobject_del(&q->mq_kobj); kobject_put(&dev->kobj); @@ -318,8 +316,6 @@ int __blk_mq_register_dev(struct device *dev, struct request_queue *q) kobject_uevent(&q->mq_kobj, KOBJ_ADD); - blk_mq_debugfs_register(q); - queue_for_each_hw_ctx(q, hctx, i) { ret = blk_mq_register_hctx(hctx); if (ret) @@ -335,8 +331,6 @@ unreg: while (--i >= 0) blk_mq_unregister_hctx(q->queue_hw_ctx[i]); - blk_mq_debugfs_unregister_mq(q); - kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); kobject_del(&q->mq_kobj); kobject_put(&dev->kobj); @@ -364,8 +358,6 @@ void blk_mq_sysfs_unregister(struct request_queue *q) if (!q->mq_sysfs_init_done) goto unlock; - blk_mq_debugfs_unregister_mq(q); - queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); @@ -382,8 +374,6 @@ int blk_mq_sysfs_register(struct request_queue *q) if (!q->mq_sysfs_init_done) goto unlock; - blk_mq_debugfs_register_mq(q); - queue_for_each_hw_ctx(q, hctx, i) { ret = blk_mq_register_hctx(hctx); if (ret) diff --git a/block/blk-mq.c b/block/blk-mq.c index bf90684a007a..1bcccedcc74f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -31,6 +31,7 @@ #include <linux/blk-mq.h> #include "blk.h" #include "blk-mq.h" +#include "blk-mq-debugfs.h" #include "blk-mq-tag.h" #include "blk-stat.h" #include "blk-wbt.h" @@ -41,6 +42,7 @@ static LIST_HEAD(all_q_list); static void blk_mq_poll_stats_start(struct request_queue *q); static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb); +static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync); static int blk_mq_poll_stats_bkt(const struct request *rq) { @@ -166,7 +168,7 @@ void blk_mq_quiesce_queue(struct request_queue *q) unsigned int i; bool rcu = false; - blk_mq_stop_hw_queues(q); + __blk_mq_stop_hw_queues(q, true); queue_for_each_hw_ctx(q, hctx, i) { if (hctx->flags & BLK_MQ_F_BLOCKING) @@ -626,25 +628,6 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q, } EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list); -void blk_mq_abort_requeue_list(struct request_queue *q) -{ - unsigned long flags; - LIST_HEAD(rq_list); - - spin_lock_irqsave(&q->requeue_lock, flags); - list_splice_init(&q->requeue_list, &rq_list); - spin_unlock_irqrestore(&q->requeue_lock, flags); - - while (!list_empty(&rq_list)) { - struct request *rq; - - rq = list_first_entry(&rq_list, struct request, queuelist); - list_del_init(&rq->queuelist); - blk_mq_end_request(rq, -EIO); - } -} -EXPORT_SYMBOL(blk_mq_abort_requeue_list); - struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag) { if (tag < tags->nr_tags) { @@ -1218,20 +1201,34 @@ bool blk_mq_queue_stopped(struct request_queue *q) } EXPORT_SYMBOL(blk_mq_queue_stopped); -void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) +static void __blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx, bool sync) { - cancel_delayed_work_sync(&hctx->run_work); + if (sync) + cancel_delayed_work_sync(&hctx->run_work); + else + cancel_delayed_work(&hctx->run_work); + set_bit(BLK_MQ_S_STOPPED, &hctx->state); } + +void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) +{ + __blk_mq_stop_hw_queue(hctx, false); +} EXPORT_SYMBOL(blk_mq_stop_hw_queue); -void blk_mq_stop_hw_queues(struct request_queue *q) +static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync) { struct blk_mq_hw_ctx *hctx; int i; queue_for_each_hw_ctx(q, hctx, i) - blk_mq_stop_hw_queue(hctx); + __blk_mq_stop_hw_queue(hctx, sync); +} + +void blk_mq_stop_hw_queues(struct request_queue *q) +{ + __blk_mq_stop_hw_queues(q, false); } EXPORT_SYMBOL(blk_mq_stop_hw_queues); @@ -1538,13 +1535,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_queue_bounce(q, &bio); + blk_queue_split(q, &bio, q->bio_split); + if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { bio_io_error(bio); return BLK_QC_T_NONE; } - blk_queue_split(q, &bio, q->bio_split); - if (!is_flush_fua && !blk_queue_nomerges(q) && blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq)) return BLK_QC_T_NONE; @@ -1655,8 +1652,7 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, if (!rq) continue; - set->ops->exit_request(set->driver_data, rq, - hctx_idx, i); + set->ops->exit_request(set, rq, hctx_idx); tags->static_rqs[i] = NULL; } } @@ -1787,8 +1783,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, tags->static_rqs[i] = rq; if (set->ops->init_request) { - if (set->ops->init_request(set->driver_data, - rq, hctx_idx, i, + if (set->ops->init_request(set, rq, hctx_idx, node)) { tags->static_rqs[i] = NULL; goto fail; @@ -1849,14 +1844,12 @@ static void blk_mq_exit_hctx(struct request_queue *q, struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { - unsigned flush_start_tag = set->queue_depth; + blk_mq_debugfs_unregister_hctx(hctx); blk_mq_tag_idle(hctx); if (set->ops->exit_request) - set->ops->exit_request(set->driver_data, - hctx->fq->flush_rq, hctx_idx, - flush_start_tag + hctx_idx); + set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx); blk_mq_sched_exit_hctx(q, hctx, hctx_idx); @@ -1889,7 +1882,6 @@ static int blk_mq_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, unsigned hctx_idx) { int node; - unsigned flush_start_tag = set->queue_depth; node = hctx->numa_node; if (node == NUMA_NO_NODE) @@ -1933,14 +1925,15 @@ static int blk_mq_init_hctx(struct request_queue *q, goto sched_exit_hctx; if (set->ops->init_request && - set->ops->init_request(set->driver_data, - hctx->fq->flush_rq, hctx_idx, - flush_start_tag + hctx_idx, node)) + set->ops->init_request(set, hctx->fq->flush_rq, hctx_idx, + node)) goto free_fq; if (hctx->flags & BLK_MQ_F_BLOCKING) init_srcu_struct(&hctx->queue_rq_srcu); + blk_mq_debugfs_register_hctx(q, hctx); + return 0; free_fq: @@ -2378,6 +2371,7 @@ static void blk_mq_queue_reinit(struct request_queue *q, { WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)); + blk_mq_debugfs_unregister_hctxs(q); blk_mq_sysfs_unregister(q); /* @@ -2389,6 +2383,7 @@ static void blk_mq_queue_reinit(struct request_queue *q, blk_mq_map_swqueue(q, online_mask); blk_mq_sysfs_register(q); + blk_mq_debugfs_register_hctxs(q); } /* @@ -2617,7 +2612,6 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) return -EINVAL; blk_mq_freeze_queue(q); - blk_mq_quiesce_queue(q); ret = 0; queue_for_each_hw_ctx(q, hctx, i) { @@ -2643,12 +2637,12 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) q->nr_requests = nr; blk_mq_unfreeze_queue(q); - blk_mq_start_stopped_hw_queues(q, true); return ret; } -void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) +static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, + int nr_hw_queues) { struct request_queue *q; @@ -2672,6 +2666,13 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) list_for_each_entry(q, &set->tag_list, tag_set_list) blk_mq_unfreeze_queue(q); } + +void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) +{ + mutex_lock(&set->tag_list_lock); + __blk_mq_update_nr_hw_queues(set, nr_hw_queues); + mutex_unlock(&set->tag_list_lock); +} EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues); /* Enable polling stats and return whether they were already enabled. */ diff --git a/block/blk-mq.h b/block/blk-mq.h index 2814a14e529c..cc67b48e3551 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -83,34 +83,6 @@ extern int blk_mq_sysfs_register(struct request_queue *q); extern void blk_mq_sysfs_unregister(struct request_queue *q); extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); -/* - * debugfs helpers - */ -#ifdef CONFIG_BLK_DEBUG_FS -int blk_mq_debugfs_register(struct request_queue *q); -void blk_mq_debugfs_unregister(struct request_queue *q); -int blk_mq_debugfs_register_mq(struct request_queue *q); -void blk_mq_debugfs_unregister_mq(struct request_queue *q); -#else -static inline int blk_mq_debugfs_register(struct request_queue *q) -{ - return 0; -} - -static inline void blk_mq_debugfs_unregister(struct request_queue *q) -{ -} - -static inline int blk_mq_debugfs_register_mq(struct request_queue *q) -{ - return 0; -} - -static inline void blk_mq_debugfs_unregister_mq(struct request_queue *q) -{ -} -#endif - extern void blk_mq_rq_timed_out(struct request *req, bool reserved); void blk_mq_release(struct request_queue *q); diff --git a/block/blk-stat.c b/block/blk-stat.c index 6c2f40940439..c52356d90fe3 100644 --- a/block/blk-stat.c +++ b/block/blk-stat.c @@ -96,13 +96,16 @@ void blk_stat_add(struct request *rq) rcu_read_lock(); list_for_each_entry_rcu(cb, &q->stats->callbacks, list) { - if (blk_stat_is_active(cb)) { - bucket = cb->bucket_fn(rq); - if (bucket < 0) - continue; - stat = &this_cpu_ptr(cb->cpu_stat)[bucket]; - __blk_stat_add(stat, value); - } + if (!blk_stat_is_active(cb)) + continue; + + bucket = cb->bucket_fn(rq); + if (bucket < 0) + continue; + + stat = &get_cpu_ptr(cb->cpu_stat)[bucket]; + __blk_stat_add(stat, value); + put_cpu_ptr(cb->cpu_stat); } rcu_read_unlock(); } diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 3f37813ccbaf..283da7fbe034 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -13,6 +13,7 @@ #include "blk.h" #include "blk-mq.h" +#include "blk-mq-debugfs.h" #include "blk-wbt.h" struct queue_sysfs_entry { @@ -808,7 +809,7 @@ static void blk_release_queue(struct kobject *kobj) blk_free_queue_stats(q->stats); - blk_exit_rl(&q->root_rl); + blk_exit_rl(q, &q->root_rl); if (q->queue_tags) __blk_queue_free_tags(q); @@ -886,8 +887,10 @@ int blk_register_queue(struct gendisk *disk) goto unlock; } - if (q->mq_ops) + if (q->mq_ops) { __blk_mq_register_dev(dev, q); + blk_mq_debugfs_register(q); + } kobject_uevent(&q->kobj, KOBJ_ADD); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index b78db2e5fdff..fc13dd0c6e39 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -22,11 +22,11 @@ static int throtl_quantum = 32; #define DFL_THROTL_SLICE_HD (HZ / 10) #define DFL_THROTL_SLICE_SSD (HZ / 50) #define MAX_THROTL_SLICE (HZ) -#define DFL_IDLE_THRESHOLD_SSD (1000L) /* 1 ms */ -#define DFL_IDLE_THRESHOLD_HD (100L * 1000) /* 100 ms */ #define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */ -/* default latency target is 0, eg, guarantee IO latency by default */ -#define DFL_LATENCY_TARGET (0) +#define MIN_THROTL_BPS (320 * 1024) +#define MIN_THROTL_IOPS (10) +#define DFL_LATENCY_TARGET (-1L) +#define DFL_IDLE_THRESHOLD (0) #define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT) @@ -157,6 +157,7 @@ struct throtl_grp { unsigned long last_check_time; unsigned long latency_target; /* us */ + unsigned long latency_target_conf; /* us */ /* When did we start a new slice */ unsigned long slice_start[2]; unsigned long slice_end[2]; @@ -165,6 +166,7 @@ struct throtl_grp { unsigned long checked_last_finish_time; /* ns / 1024 */ unsigned long avg_idletime; /* ns / 1024 */ unsigned long idletime_threshold; /* us */ + unsigned long idletime_threshold_conf; /* us */ unsigned int bio_cnt; /* total bios */ unsigned int bad_bio_cnt; /* bios exceeding latency threshold */ @@ -201,8 +203,6 @@ struct throtl_data unsigned int limit_index; bool limit_valid[LIMIT_CNT]; - unsigned long dft_idletime_threshold; /* us */ - unsigned long low_upgrade_time; unsigned long low_downgrade_time; @@ -294,8 +294,14 @@ static uint64_t tg_bps_limit(struct throtl_grp *tg, int rw) td = tg->td; ret = tg->bps[rw][td->limit_index]; - if (ret == 0 && td->limit_index == LIMIT_LOW) - return tg->bps[rw][LIMIT_MAX]; + if (ret == 0 && td->limit_index == LIMIT_LOW) { + /* intermediate node or iops isn't 0 */ + if (!list_empty(&blkg->blkcg->css.children) || + tg->iops[rw][td->limit_index]) + return U64_MAX; + else + return MIN_THROTL_BPS; + } if (td->limit_index == LIMIT_MAX && tg->bps[rw][LIMIT_LOW] && tg->bps[rw][LIMIT_LOW] != tg->bps[rw][LIMIT_MAX]) { @@ -315,10 +321,17 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw) if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent) return UINT_MAX; + td = tg->td; ret = tg->iops[rw][td->limit_index]; - if (ret == 0 && tg->td->limit_index == LIMIT_LOW) - return tg->iops[rw][LIMIT_MAX]; + if (ret == 0 && tg->td->limit_index == LIMIT_LOW) { + /* intermediate node or bps isn't 0 */ + if (!list_empty(&blkg->blkcg->css.children) || + tg->bps[rw][td->limit_index]) + return UINT_MAX; + else + return MIN_THROTL_IOPS; + } if (td->limit_index == LIMIT_MAX && tg->iops[rw][LIMIT_LOW] && tg->iops[rw][LIMIT_LOW] != tg->iops[rw][LIMIT_MAX]) { @@ -482,6 +495,9 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node) /* LIMIT_LOW will have default value 0 */ tg->latency_target = DFL_LATENCY_TARGET; + tg->latency_target_conf = DFL_LATENCY_TARGET; + tg->idletime_threshold = DFL_IDLE_THRESHOLD; + tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD; return &tg->pd; } @@ -510,8 +526,6 @@ static void throtl_pd_init(struct blkg_policy_data *pd) if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent) sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue; tg->td = td; - - tg->idletime_threshold = td->dft_idletime_threshold; } /* @@ -1349,7 +1363,7 @@ static int tg_print_conf_uint(struct seq_file *sf, void *v) return 0; } -static void tg_conf_updated(struct throtl_grp *tg) +static void tg_conf_updated(struct throtl_grp *tg, bool global) { struct throtl_service_queue *sq = &tg->service_queue; struct cgroup_subsys_state *pos_css; @@ -1367,8 +1381,26 @@ static void tg_conf_updated(struct throtl_grp *tg) * restrictions in the whole hierarchy and allows them to bypass * blk-throttle. */ - blkg_for_each_descendant_pre(blkg, pos_css, tg_to_blkg(tg)) - tg_update_has_rules(blkg_to_tg(blkg)); + blkg_for_each_descendant_pre(blkg, pos_css, + global ? tg->td->queue->root_blkg : tg_to_blkg(tg)) { + struct throtl_grp *this_tg = blkg_to_tg(blkg); + struct throtl_grp *parent_tg; + + tg_update_has_rules(this_tg); + /* ignore root/second level */ + if (!cgroup_subsys_on_dfl(io_cgrp_subsys) || !blkg->parent || + !blkg->parent->parent) + continue; + parent_tg = blkg_to_tg(blkg->parent); + /* + * make sure all children has lower idle time threshold and + * higher latency target + */ + this_tg->idletime_threshold = min(this_tg->idletime_threshold, + parent_tg->idletime_threshold); + this_tg->latency_target = max(this_tg->latency_target, + parent_tg->latency_target); + } /* * We're already holding queue_lock and know @tg is valid. Let's @@ -1413,7 +1445,7 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of, else *(unsigned int *)((void *)tg + of_cft(of)->private) = v; - tg_conf_updated(tg); + tg_conf_updated(tg, false); ret = 0; out_finish: blkg_conf_finish(&ctx); @@ -1497,34 +1529,34 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd, tg->iops_conf[READ][off] == iops_dft && tg->iops_conf[WRITE][off] == iops_dft && (off != LIMIT_LOW || - (tg->idletime_threshold == tg->td->dft_idletime_threshold && - tg->latency_target == DFL_LATENCY_TARGET))) + (tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD && + tg->latency_target_conf == DFL_LATENCY_TARGET))) return 0; - if (tg->bps_conf[READ][off] != bps_dft) + if (tg->bps_conf[READ][off] != U64_MAX) snprintf(bufs[0], sizeof(bufs[0]), "%llu", tg->bps_conf[READ][off]); - if (tg->bps_conf[WRITE][off] != bps_dft) + if (tg->bps_conf[WRITE][off] != U64_MAX) snprintf(bufs[1], sizeof(bufs[1]), "%llu", tg->bps_conf[WRITE][off]); - if (tg->iops_conf[READ][off] != iops_dft) + if (tg->iops_conf[READ][off] != UINT_MAX) snprintf(bufs[2], sizeof(bufs[2]), "%u", tg->iops_conf[READ][off]); - if (tg->iops_conf[WRITE][off] != iops_dft) + if (tg->iops_conf[WRITE][off] != UINT_MAX) snprintf(bufs[3], sizeof(bufs[3]), "%u", tg->iops_conf[WRITE][off]); if (off == LIMIT_LOW) { - if (tg->idletime_threshold == ULONG_MAX) + if (tg->idletime_threshold_conf == ULONG_MAX) strcpy(idle_time, " idle=max"); else snprintf(idle_time, sizeof(idle_time), " idle=%lu", - tg->idletime_threshold); + tg->idletime_threshold_conf); - if (tg->latency_target == ULONG_MAX) + if (tg->latency_target_conf == ULONG_MAX) strcpy(latency_time, " latency=max"); else snprintf(latency_time, sizeof(latency_time), - " latency=%lu", tg->latency_target); + " latency=%lu", tg->latency_target_conf); } seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s%s%s\n", @@ -1563,8 +1595,8 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of, v[2] = tg->iops_conf[READ][index]; v[3] = tg->iops_conf[WRITE][index]; - idle_time = tg->idletime_threshold; - latency_time = tg->latency_target; + idle_time = tg->idletime_threshold_conf; + latency_time = tg->latency_target_conf; while (true) { char tok[27]; /* wiops=18446744073709551616 */ char *p; @@ -1623,17 +1655,33 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of, tg->iops_conf[READ][LIMIT_MAX]); tg->iops[WRITE][LIMIT_LOW] = min(tg->iops_conf[WRITE][LIMIT_LOW], tg->iops_conf[WRITE][LIMIT_MAX]); + tg->idletime_threshold_conf = idle_time; + tg->latency_target_conf = latency_time; + + /* force user to configure all settings for low limit */ + if (!(tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW] || + tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) || + tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD || + tg->latency_target_conf == DFL_LATENCY_TARGET) { + tg->bps[READ][LIMIT_LOW] = 0; + tg->bps[WRITE][LIMIT_LOW] = 0; + tg->iops[READ][LIMIT_LOW] = 0; + tg->iops[WRITE][LIMIT_LOW] = 0; + tg->idletime_threshold = DFL_IDLE_THRESHOLD; + tg->latency_target = DFL_LATENCY_TARGET; + } else if (index == LIMIT_LOW) { + tg->idletime_threshold = tg->idletime_threshold_conf; + tg->latency_target = tg->latency_target_conf; + } - if (index == LIMIT_LOW) { - blk_throtl_update_limit_valid(tg->td); - if (tg->td->limit_valid[LIMIT_LOW]) + blk_throtl_update_limit_valid(tg->td); + if (tg->td->limit_valid[LIMIT_LOW]) { + if (index == LIMIT_LOW) tg->td->limit_index = LIMIT_LOW; - tg->idletime_threshold = (idle_time == ULONG_MAX) ? - ULONG_MAX : idle_time; - tg->latency_target = (latency_time == ULONG_MAX) ? - ULONG_MAX : latency_time; - } - tg_conf_updated(tg); + } else + tg->td->limit_index = LIMIT_MAX; + tg_conf_updated(tg, index == LIMIT_LOW && + tg->td->limit_valid[LIMIT_LOW]); ret = 0; out_finish: blkg_conf_finish(&ctx); @@ -1722,17 +1770,25 @@ static bool throtl_tg_is_idle(struct throtl_grp *tg) /* * cgroup is idle if: * - single idle is too long, longer than a fixed value (in case user - * configure a too big threshold) or 4 times of slice + * configure a too big threshold) or 4 times of idletime threshold * - average think time is more than threshold * - IO latency is largely below threshold */ - unsigned long time = jiffies_to_usecs(4 * tg->td->throtl_slice); - - time = min_t(unsigned long, MAX_IDLE_TIME, time); - return (ktime_get_ns() >> 10) - tg->last_finish_time > time || - tg->avg_idletime > tg->idletime_threshold || - (tg->latency_target && tg->bio_cnt && + unsigned long time; + bool ret; + + time = min_t(unsigned long, MAX_IDLE_TIME, 4 * tg->idletime_threshold); + ret = tg->latency_target == DFL_LATENCY_TARGET || + tg->idletime_threshold == DFL_IDLE_THRESHOLD || + (ktime_get_ns() >> 10) - tg->last_finish_time > time || + tg->avg_idletime > tg->idletime_threshold || + (tg->latency_target && tg->bio_cnt && tg->bad_bio_cnt * 5 < tg->bio_cnt); + throtl_log(&tg->service_queue, + "avg_idle=%ld, idle_threshold=%ld, bad_bio=%d, total_bio=%d, is_idle=%d, scale=%d", + tg->avg_idletime, tg->idletime_threshold, tg->bad_bio_cnt, + tg->bio_cnt, ret, tg->td->scale); + return ret; } static bool throtl_tg_can_upgrade(struct throtl_grp *tg) @@ -1828,6 +1884,7 @@ static void throtl_upgrade_state(struct throtl_data *td) struct cgroup_subsys_state *pos_css; struct blkcg_gq *blkg; + throtl_log(&td->service_queue, "upgrade to max"); td->limit_index = LIMIT_MAX; td->low_upgrade_time = jiffies; td->scale = 0; @@ -1850,6 +1907,7 @@ static void throtl_downgrade_state(struct throtl_data *td, int new) { td->scale /= 2; + throtl_log(&td->service_queue, "downgrade, scale %d", td->scale); if (td->scale) { td->low_upgrade_time = jiffies - td->scale * td->throtl_slice; return; @@ -2023,6 +2081,11 @@ static void throtl_update_latency_buckets(struct throtl_data *td) td->avg_buckets[i].valid = true; last_latency = td->avg_buckets[i].latency; } + + for (i = 0; i < LATENCY_BUCKET_SIZE; i++) + throtl_log(&td->service_queue, + "Latency bucket %d: latency=%ld, valid=%d", i, + td->avg_buckets[i].latency, td->avg_buckets[i].valid); } #else static inline void throtl_update_latency_buckets(struct throtl_data *td) @@ -2354,19 +2417,14 @@ void blk_throtl_exit(struct request_queue *q) void blk_throtl_register_queue(struct request_queue *q) { struct throtl_data *td; - struct cgroup_subsys_state *pos_css; - struct blkcg_gq *blkg; td = q->td; BUG_ON(!td); - if (blk_queue_nonrot(q)) { + if (blk_queue_nonrot(q)) td->throtl_slice = DFL_THROTL_SLICE_SSD; - td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_SSD; - } else { + else td->throtl_slice = DFL_THROTL_SLICE_HD; - td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_HD; - } #ifndef CONFIG_BLK_DEV_THROTTLING_LOW /* if no low limit, use previous default */ td->throtl_slice = DFL_THROTL_SLICE_HD; @@ -2375,18 +2433,6 @@ void blk_throtl_register_queue(struct request_queue *q) td->track_bio_latency = !q->mq_ops && !q->request_fn; if (!td->track_bio_latency) blk_stat_enable_accounting(q); - - /* - * some tg are created before queue is fully initialized, eg, nonrot - * isn't initialized yet - */ - rcu_read_lock(); - blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) { - struct throtl_grp *tg = blkg_to_tg(blkg); - - tg->idletime_threshold = td->dft_idletime_threshold; - } - rcu_read_unlock(); } #ifdef CONFIG_BLK_DEV_THROTTLING_LOW diff --git a/block/blk.h b/block/blk.h index 2ed70228e44f..83c8e1100525 100644 --- a/block/blk.h +++ b/block/blk.h @@ -59,7 +59,7 @@ void blk_free_flush_queue(struct blk_flush_queue *q); int blk_init_rl(struct request_list *rl, struct request_queue *q, gfp_t gfp_mask); -void blk_exit_rl(struct request_list *rl); +void blk_exit_rl(struct request_queue *q, struct request_list *rl); void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio); void blk_queue_bypass_start(struct request_queue *q); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index da69b079725f..b7e9c7feeab2 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -38,9 +38,13 @@ static const u64 cfq_target_latency = (u64)NSEC_PER_SEC * 3/10; /* 300 ms */ static const int cfq_hist_divisor = 4; /* - * offset from end of service tree + * offset from end of queue service tree for idle class */ #define CFQ_IDLE_DELAY (NSEC_PER_SEC / 5) +/* offset from end of group service tree under time slice mode */ +#define CFQ_SLICE_MODE_GROUP_DELAY (NSEC_PER_SEC / 5) +/* offset from end of group service under IOPS mode */ +#define CFQ_IOPS_MODE_GROUP_DELAY (HZ / 5) /* * below this threshold, we consider thinktime immediate @@ -1362,6 +1366,14 @@ cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) cfqg->vfraction = max_t(unsigned, vfr, 1); } +static inline u64 cfq_get_cfqg_vdisktime_delay(struct cfq_data *cfqd) +{ + if (!iops_mode(cfqd)) + return CFQ_SLICE_MODE_GROUP_DELAY; + else + return CFQ_IOPS_MODE_GROUP_DELAY; +} + static void cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg) { @@ -1381,7 +1393,8 @@ cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg) n = rb_last(&st->rb); if (n) { __cfqg = rb_entry_cfqg(n); - cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY; + cfqg->vdisktime = __cfqg->vdisktime + + cfq_get_cfqg_vdisktime_delay(cfqd); } else cfqg->vdisktime = st->min_vdisktime; cfq_group_service_tree_add(st, cfqg); diff --git a/block/elevator.c b/block/elevator.c index bf11e70f008b..dac99fbfc273 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -950,7 +950,6 @@ static int elevator_switch_mq(struct request_queue *q, int ret; blk_mq_freeze_queue(q); - blk_mq_quiesce_queue(q); if (q->elevator) { if (q->elevator->registered) @@ -978,9 +977,7 @@ static int elevator_switch_mq(struct request_queue *q, out: blk_mq_unfreeze_queue(q); - blk_mq_start_stopped_hw_queues(q, true); return ret; - } /* @@ -1065,10 +1062,8 @@ static int __elevator_change(struct request_queue *q, const char *name) strlcpy(elevator_name, name, sizeof(elevator_name)); e = elevator_get(strstrip(elevator_name), true); - if (!e) { - printk(KERN_ERR "elevator: type %s not found\n", elevator_name); + if (!e) return -EINVAL; - } if (q->elevator && !strcmp(elevator_name, q->elevator->type->elevator_name)) { @@ -1088,19 +1083,6 @@ static int __elevator_change(struct request_queue *q, const char *name) return elevator_switch(q, e); } -int elevator_change(struct request_queue *q, const char *name) -{ - int ret; - - /* Protect q->elevator from elevator_init() */ - mutex_lock(&q->sysfs_lock); - ret = __elevator_change(q, name); - mutex_unlock(&q->sysfs_lock); - - return ret; -} -EXPORT_SYMBOL(elevator_change); - static inline bool elv_support_iosched(struct request_queue *q) { if (q->mq_ops && q->tag_set && (q->tag_set->flags & @@ -1121,7 +1103,6 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name, if (!ret) return count; - printk(KERN_ERR "elevator: switch to %s failed\n", name); return ret; } diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 3b0090bc5dd1..b9faabc75fdb 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -26,6 +26,7 @@ #include "blk.h" #include "blk-mq.h" +#include "blk-mq-debugfs.h" #include "blk-mq-sched.h" #include "blk-mq-tag.h" #include "blk-stat.h" @@ -683,6 +684,131 @@ static struct elv_fs_entry kyber_sched_attrs[] = { }; #undef KYBER_LAT_ATTR +#ifdef CONFIG_BLK_DEBUG_FS +#define KYBER_DEBUGFS_DOMAIN_ATTRS(domain, name) \ +static int kyber_##name##_tokens_show(void *data, struct seq_file *m) \ +{ \ + struct request_queue *q = data; \ + struct kyber_queue_data *kqd = q->elevator->elevator_data; \ + \ + sbitmap_queue_show(&kqd->domain_tokens[domain], m); \ + return 0; \ +} \ + \ +static void *kyber_##name##_rqs_start(struct seq_file *m, loff_t *pos) \ + __acquires(&khd->lock) \ +{ \ + struct blk_mq_hw_ctx *hctx = m->private; \ + struct kyber_hctx_data *khd = hctx->sched_data; \ + \ + spin_lock(&khd->lock); \ + return seq_list_start(&khd->rqs[domain], *pos); \ +} \ + \ +static void *kyber_##name##_rqs_next(struct seq_file *m, void *v, \ + loff_t *pos) \ +{ \ + struct blk_mq_hw_ctx *hctx = m->private; \ + struct kyber_hctx_data *khd = hctx->sched_data; \ + \ + return seq_list_next(v, &khd->rqs[domain], pos); \ +} \ + \ +static void kyber_##name##_rqs_stop(struct seq_file *m, void *v) \ + __releases(&khd->lock) \ +{ \ + struct blk_mq_hw_ctx *hctx = m->private; \ + struct kyber_hctx_data *khd = hctx->sched_data; \ + \ + spin_unlock(&khd->lock); \ +} \ + \ +static const struct seq_operations kyber_##name##_rqs_seq_ops = { \ + .start = kyber_##name##_rqs_start, \ + .next = kyber_##name##_rqs_next, \ + .stop = kyber_##name##_rqs_stop, \ + .show = blk_mq_debugfs_rq_show, \ +}; \ + \ +static int kyber_##name##_waiting_show(void *data, struct seq_file *m) \ +{ \ + struct blk_mq_hw_ctx *hctx = data; \ + struct kyber_hctx_data *khd = hctx->sched_data; \ + wait_queue_t *wait = &khd->domain_wait[domain]; \ + \ + seq_printf(m, "%d\n", !list_empty_careful(&wait->task_list)); \ + return 0; \ +} +KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_READ, read) +KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_SYNC_WRITE, sync_write) +KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_OTHER, other) +#undef KYBER_DEBUGFS_DOMAIN_ATTRS + +static int kyber_async_depth_show(void *data, struct seq_file *m) +{ + struct request_queue *q = data; + struct kyber_queue_data *kqd = q->elevator->elevator_data; + + seq_printf(m, "%u\n", kqd->async_depth); + return 0; +} + +static int kyber_cur_domain_show(void *data, struct seq_file *m) +{ + struct blk_mq_hw_ctx *hctx = data; + struct kyber_hctx_data *khd = hctx->sched_data; + + switch (khd->cur_domain) { + case KYBER_READ: + seq_puts(m, "READ\n"); + break; + case KYBER_SYNC_WRITE: + seq_puts(m, "SYNC_WRITE\n"); + break; + case KYBER_OTHER: + seq_puts(m, "OTHER\n"); + break; + default: + seq_printf(m, "%u\n", khd->cur_domain); + break; + } + return 0; +} + +static int kyber_batching_show(void *data, struct seq_file *m) +{ + struct blk_mq_hw_ctx *hctx = data; + struct kyber_hctx_data *khd = hctx->sched_data; + + seq_printf(m, "%u\n", khd->batching); + return 0; +} + +#define KYBER_QUEUE_DOMAIN_ATTRS(name) \ + {#name "_tokens", 0400, kyber_##name##_tokens_show} +static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = { + KYBER_QUEUE_DOMAIN_ATTRS(read), + KYBER_QUEUE_DOMAIN_ATTRS(sync_write), + KYBER_QUEUE_DOMAIN_ATTRS(other), + {"async_depth", 0400, kyber_async_depth_show}, + {}, +}; +#undef KYBER_QUEUE_DOMAIN_ATTRS + +#define KYBER_HCTX_DOMAIN_ATTRS(name) \ + {#name "_rqs", 0400, .seq_ops = &kyber_##name##_rqs_seq_ops}, \ + {#name "_waiting", 0400, kyber_##name##_waiting_show} +static const struct blk_mq_debugfs_attr kyber_hctx_debugfs_attrs[] = { + KYBER_HCTX_DOMAIN_ATTRS(read), + KYBER_HCTX_DOMAIN_ATTRS(sync_write), + KYBER_HCTX_DOMAIN_ATTRS(other), + {"cur_domain", 0400, kyber_cur_domain_show}, + {"batching", 0400, kyber_batching_show}, + {}, +}; +#undef KYBER_HCTX_DOMAIN_ATTRS +#endif + static struct elevator_type kyber_sched = { .ops.mq = { .init_sched = kyber_init_sched, @@ -696,6 +822,10 @@ static struct elevator_type kyber_sched = { .has_work = kyber_has_work, }, .uses_mq = true, +#ifdef CONFIG_BLK_DEBUG_FS + .queue_debugfs_attrs = kyber_queue_debugfs_attrs, + .hctx_debugfs_attrs = kyber_hctx_debugfs_attrs, +#endif .elevator_attrs = kyber_sched_attrs, .elevator_name = "kyber", .elevator_owner = THIS_MODULE, diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 236121633ca0..1b964a387afe 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -19,6 +19,7 @@ #include "blk.h" #include "blk-mq.h" +#include "blk-mq-debugfs.h" #include "blk-mq-tag.h" #include "blk-mq-sched.h" @@ -517,6 +518,125 @@ static struct elv_fs_entry deadline_attrs[] = { __ATTR_NULL }; +#ifdef CONFIG_BLK_DEBUG_FS +#define DEADLINE_DEBUGFS_DDIR_ATTRS(ddir, name) \ +static void *deadline_##name##_fifo_start(struct seq_file *m, \ + loff_t *pos) \ + __acquires(&dd->lock) \ +{ \ + struct request_queue *q = m->private; \ + struct deadline_data *dd = q->elevator->elevator_data; \ + \ + spin_lock(&dd->lock); \ + return seq_list_start(&dd->fifo_list[ddir], *pos); \ +} \ + \ +static void *deadline_##name##_fifo_next(struct seq_file *m, void *v, \ + loff_t *pos) \ +{ \ + struct request_queue *q = m->private; \ + struct deadline_data *dd = q->elevator->elevator_data; \ + \ + return seq_list_next(v, &dd->fifo_list[ddir], pos); \ +} \ + \ +static void deadline_##name##_fifo_stop(struct seq_file *m, void *v) \ + __releases(&dd->lock) \ +{ \ + struct request_queue *q = m->private; \ + struct deadline_data *dd = q->elevator->elevator_data; \ + \ + spin_unlock(&dd->lock); \ +} \ + \ +static const struct seq_operations deadline_##name##_fifo_seq_ops = { \ + .start = deadline_##name##_fifo_start, \ + .next = deadline_##name##_fifo_next, \ + .stop = deadline_##name##_fifo_stop, \ + .show = blk_mq_debugfs_rq_show, \ +}; \ + \ +static int deadline_##name##_next_rq_show(void *data, \ + struct seq_file *m) \ +{ \ + struct request_queue *q = data; \ + struct deadline_data *dd = q->elevator->elevator_data; \ + struct request *rq = dd->next_rq[ddir]; \ + \ + if (rq) \ + __blk_mq_debugfs_rq_show(m, rq); \ + return 0; \ +} +DEADLINE_DEBUGFS_DDIR_ATTRS(READ, read) +DEADLINE_DEBUGFS_DDIR_ATTRS(WRITE, write) +#undef DEADLINE_DEBUGFS_DDIR_ATTRS + +static int deadline_batching_show(void *data, struct seq_file *m) +{ + struct request_queue *q = data; + struct deadline_data *dd = q->elevator->elevator_data; + + seq_printf(m, "%u\n", dd->batching); + return 0; +} + +static int deadline_starved_show(void *data, struct seq_file *m) +{ + struct request_queue *q = data; + struct deadline_data *dd = q->elevator->elevator_data; + + seq_printf(m, "%u\n", dd->starved); + return 0; +} + +static void *deadline_dispatch_start(struct seq_file *m, loff_t *pos) + __acquires(&dd->lock) +{ + struct request_queue *q = m->private; + struct deadline_data *dd = q->elevator->elevator_data; + + spin_lock(&dd->lock); + return seq_list_start(&dd->dispatch, *pos); +} + +static void *deadline_dispatch_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct request_queue *q = m->private; + struct deadline_data *dd = q->elevator->elevator_data; + + return seq_list_next(v, &dd->dispatch, pos); +} + +static void deadline_dispatch_stop(struct seq_file *m, void *v) + __releases(&dd->lock) +{ + struct request_queue *q = m->private; + struct deadline_data *dd = q->elevator->elevator_data; + + spin_unlock(&dd->lock); +} + +static const struct seq_operations deadline_dispatch_seq_ops = { + .start = deadline_dispatch_start, + .next = deadline_dispatch_next, + .stop = deadline_dispatch_stop, + .show = blk_mq_debugfs_rq_show, +}; + +#define DEADLINE_QUEUE_DDIR_ATTRS(name) \ + {#name "_fifo_list", 0400, .seq_ops = &deadline_##name##_fifo_seq_ops}, \ + {#name "_next_rq", 0400, deadline_##name##_next_rq_show} +static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = { + DEADLINE_QUEUE_DDIR_ATTRS(read), + DEADLINE_QUEUE_DDIR_ATTRS(write), + {"batching", 0400, deadline_batching_show}, + {"starved", 0400, deadline_starved_show}, + {"dispatch", 0400, .seq_ops = &deadline_dispatch_seq_ops}, + {}, +}; +#undef DEADLINE_QUEUE_DDIR_ATTRS +#endif + static struct elevator_type mq_deadline = { .ops.mq = { .insert_requests = dd_insert_requests, @@ -533,6 +653,9 @@ static struct elevator_type mq_deadline = { }, .uses_mq = true, +#ifdef CONFIG_BLK_DEBUG_FS + .queue_debugfs_attrs = deadline_queue_debugfs_attrs, +#endif .elevator_attrs = deadline_attrs, .elevator_name = "mq-deadline", .elevator_owner = THIS_MODULE, diff --git a/block/partition-generic.c b/block/partition-generic.c index 0171a2faad68..c5ec8246e25e 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -16,7 +16,6 @@ #include <linux/kmod.h> #include <linux/ctype.h> #include <linux/genhd.h> -#include <linux/dax.h> #include <linux/blktrace_api.h> #include "partitions/check.h" @@ -321,8 +320,10 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, if (info) { struct partition_meta_info *pinfo = alloc_part_info(disk); - if (!pinfo) + if (!pinfo) { + err = -ENOMEM; goto out_free_stats; + } memcpy(pinfo, info, sizeof(*info)); p->info = pinfo; } @@ -630,24 +631,12 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev) return 0; } -static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n) -{ - struct address_space *mapping = bdev->bd_inode->i_mapping; - - return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)), - NULL); -} - unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p) { + struct address_space *mapping = bdev->bd_inode->i_mapping; struct page *page; - /* don't populate page cache for dax capable devices */ - if (IS_DAX(bdev->bd_inode)) - page = read_dax_sector(bdev, n); - else - page = read_pagecache_sector(bdev, n); - + page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)), NULL); if (!IS_ERR(page)) { if (PageError(page)) goto fail; diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c index 93e7c1b32edd..5610cd537da7 100644 --- a/block/partitions/msdos.c +++ b/block/partitions/msdos.c @@ -300,6 +300,8 @@ static void parse_bsd(struct parsed_partitions *state, continue; bsd_start = le32_to_cpu(p->p_offset); bsd_size = le32_to_cpu(p->p_size); + if (memcmp(flavour, "bsd\0", 4) == 0) + bsd_start += offset; if (offset == bsd_start && size == bsd_size) /* full parent partition, we have it already */ continue; |