diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/Kconfig | 1 | ||||
-rw-r--r-- | block/bfq-iosched.c | 2 | ||||
-rw-r--r-- | block/bio.c | 38 | ||||
-rw-r--r-- | block/blk-cgroup.c | 2 | ||||
-rw-r--r-- | block/blk-core.c | 94 | ||||
-rw-r--r-- | block/blk-mq-cpumap.c | 10 | ||||
-rw-r--r-- | block/blk-mq-debugfs.c | 148 | ||||
-rw-r--r-- | block/blk-mq-debugfs.h | 36 | ||||
-rw-r--r-- | block/blk-mq-pci.c | 2 | ||||
-rw-r--r-- | block/blk-mq-rdma.c | 4 | ||||
-rw-r--r-- | block/blk-mq-sched.c | 29 | ||||
-rw-r--r-- | block/blk-mq-sched.h | 1 | ||||
-rw-r--r-- | block/blk-mq-virtio.c | 4 | ||||
-rw-r--r-- | block/blk-mq.c | 5 | ||||
-rw-r--r-- | block/blk-rq-qos.c | 7 | ||||
-rw-r--r-- | block/blk-sysfs.c | 47 | ||||
-rw-r--r-- | block/blk-throttle.c | 2 | ||||
-rw-r--r-- | block/blk.h | 11 | ||||
-rw-r--r-- | block/bsg-lib.c | 1 | ||||
-rw-r--r-- | block/elevator.c | 2 | ||||
-rw-r--r-- | block/genhd.c | 4 | ||||
-rw-r--r-- | block/partitions/ldm.c | 2 |
22 files changed, 196 insertions, 256 deletions
diff --git a/block/Kconfig b/block/Kconfig index 78374cb03114..56cb1695cd87 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -73,6 +73,7 @@ config BLK_DEV_INTEGRITY config BLK_DEV_ZONED bool "Zoned block device support" + select MQ_IOSCHED_DEADLINE ---help--- Block layer zoned block device support. This option enables support for ZAC/ZBC host-managed and host-aware zoned block devices. diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index f8d430f88d25..f9269ae6da9c 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -240,7 +240,7 @@ static struct kmem_cache *bfq_pool; * containing only random (seeky) I/O are prevented from being tagged * as soft real-time. */ -#define BFQQ_TOTALLY_SEEKY(bfqq) (bfqq->seek_history & -1) +#define BFQQ_TOTALLY_SEEKY(bfqq) (bfqq->seek_history == -1) /* Min number of samples required to perform peak-rate update */ #define BFQ_RATE_MIN_SAMPLES 32 diff --git a/block/bio.c b/block/bio.c index 683cbb40f051..ce797d73bb43 100644 --- a/block/bio.c +++ b/block/bio.c @@ -636,7 +636,7 @@ EXPORT_SYMBOL(bio_clone_fast); static inline bool page_is_mergeable(const struct bio_vec *bv, struct page *page, unsigned int len, unsigned int off, - bool same_page) + bool *same_page) { phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) + bv->bv_offset + bv->bv_len - 1; @@ -647,15 +647,9 @@ static inline bool page_is_mergeable(const struct bio_vec *bv, if (xen_domain() && !xen_biovec_phys_mergeable(bv, page)) return false; - if ((vec_end_addr & PAGE_MASK) != page_addr) { - if (same_page) - return false; - if (pfn_to_page(PFN_DOWN(vec_end_addr)) + 1 != page) - return false; - } - - WARN_ON_ONCE(same_page && (len + off) > PAGE_SIZE); - + *same_page = ((vec_end_addr & PAGE_MASK) == page_addr); + if (!*same_page && pfn_to_page(PFN_DOWN(vec_end_addr)) + 1 != page) + return false; return true; } @@ -701,6 +695,7 @@ static int __bio_add_pc_page(struct request_queue *q, struct bio *bio, bool put_same_page) { struct bio_vec *bvec; + bool same_page = false; /* * cloned bio must not modify vec list @@ -729,7 +724,7 @@ static int __bio_add_pc_page(struct request_queue *q, struct bio *bio, if (bvec_gap_to_prev(q, bvec, offset)) return 0; - if (page_is_mergeable(bvec, page, len, offset, false) && + if (page_is_mergeable(bvec, page, len, offset, &same_page) && can_add_page_to_seg(q, bvec, page, len, offset)) { bvec->bv_len += len; goto done; @@ -767,8 +762,7 @@ EXPORT_SYMBOL(bio_add_pc_page); * @page: start page to add * @len: length of the data to add * @off: offset of the data relative to @page - * @same_page: if %true only merge if the new data is in the same physical - * page as the last segment of the bio. + * @same_page: return if the segment has been merged inside the same page * * Try to add the data at @page + @off to the last bvec of @bio. This is a * a useful optimisation for file systems with a block size smaller than the @@ -779,7 +773,7 @@ EXPORT_SYMBOL(bio_add_pc_page); * Return %true on success or %false on failure. */ bool __bio_try_merge_page(struct bio *bio, struct page *page, - unsigned int len, unsigned int off, bool same_page) + unsigned int len, unsigned int off, bool *same_page) { if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) return false; @@ -837,7 +831,9 @@ EXPORT_SYMBOL_GPL(__bio_add_page); int bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset) { - if (!__bio_try_merge_page(bio, page, len, offset, false)) { + bool same_page = false; + + if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) { if (bio_full(bio)) return 0; __bio_add_page(bio, page, len, offset); @@ -900,6 +896,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; struct page **pages = (struct page **)bv; + bool same_page = false; ssize_t size, left; unsigned len, i; size_t offset; @@ -920,8 +917,15 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) struct page *page = pages[i]; len = min_t(size_t, PAGE_SIZE - offset, left); - if (WARN_ON_ONCE(bio_add_page(bio, page, len, offset) != len)) - return -EINVAL; + + if (__bio_try_merge_page(bio, page, len, offset, &same_page)) { + if (same_page) + put_page(page); + } else { + if (WARN_ON_ONCE(bio_full(bio))) + return -EINVAL; + __bio_add_page(bio, page, len, offset); + } offset = 0; } diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index b97b479e4f64..1f7127b03490 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -881,7 +881,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, blkg_free(new_blkg); } else { blkg = blkg_create(pos, q, new_blkg); - if (unlikely(IS_ERR(blkg))) { + if (IS_ERR(blkg)) { ret = PTR_ERR(blkg); goto fail_unlock; } diff --git a/block/blk-core.c b/block/blk-core.c index 1bf83a0df0f6..8340f69670d8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -282,35 +282,6 @@ void blk_set_queue_dying(struct request_queue *q) } EXPORT_SYMBOL_GPL(blk_set_queue_dying); -/* Unconfigure the I/O scheduler and dissociate from the cgroup controller. */ -void blk_exit_queue(struct request_queue *q) -{ - /* - * Since the I/O scheduler exit code may access cgroup information, - * perform I/O scheduler exit before disassociating from the block - * cgroup controller. - */ - if (q->elevator) { - ioc_clear_queue(q); - elevator_exit(q, q->elevator); - q->elevator = NULL; - } - - /* - * Remove all references to @q from the block cgroup controller before - * restoring @q->queue_lock to avoid that restoring this pointer causes - * e.g. blkcg_print_blkgs() to crash. - */ - blkcg_exit_queue(q); - - /* - * Since the cgroup code may dereference the @q->backing_dev_info - * pointer, only decrease its reference count after having removed the - * association with the block cgroup controller. - */ - bdi_put(q->backing_dev_info); -} - /** * blk_cleanup_queue - shutdown a request queue * @q: request queue to shutdown @@ -346,17 +317,22 @@ void blk_cleanup_queue(struct request_queue *q) del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer); blk_sync_queue(q); - /* - * I/O scheduler exit is only safe after the sysfs scheduler attribute - * has been removed. - */ - WARN_ON_ONCE(q->kobj.state_in_sysfs); - - blk_exit_queue(q); - if (queue_is_mq(q)) blk_mq_exit_queue(q); + /* + * In theory, request pool of sched_tags belongs to request queue. + * However, the current implementation requires tag_set for freeing + * requests, so free the pool now. + * + * Queue has become frozen, there can't be any in-queue requests, so + * it is safe to free requests now. + */ + mutex_lock(&q->sysfs_lock); + if (q->elevator) + blk_mq_sched_free_requests(q); + mutex_unlock(&q->sysfs_lock); + percpu_ref_exit(&q->q_usage_counter); /* @q is and will stay empty, shutdown and put */ @@ -994,22 +970,8 @@ blk_qc_t generic_make_request(struct bio *bio) * yet. */ struct bio_list bio_list_on_stack[2]; - blk_mq_req_flags_t flags = 0; - struct request_queue *q = bio->bi_disk->queue; blk_qc_t ret = BLK_QC_T_NONE; - if (bio->bi_opf & REQ_NOWAIT) - flags = BLK_MQ_REQ_NOWAIT; - if (bio_flagged(bio, BIO_QUEUE_ENTERED)) - blk_queue_enter_live(q); - else if (blk_queue_enter(q, flags) < 0) { - if (!blk_queue_dying(q) && (bio->bi_opf & REQ_NOWAIT)) - bio_wouldblock_error(bio); - else - bio_io_error(bio); - return ret; - } - if (!generic_make_request_checks(bio)) goto out; @@ -1046,22 +1008,11 @@ blk_qc_t generic_make_request(struct bio *bio) bio_list_init(&bio_list_on_stack[0]); current->bio_list = bio_list_on_stack; do { - bool enter_succeeded = true; - - if (unlikely(q != bio->bi_disk->queue)) { - if (q) - blk_queue_exit(q); - q = bio->bi_disk->queue; - flags = 0; - if (bio->bi_opf & REQ_NOWAIT) - flags = BLK_MQ_REQ_NOWAIT; - if (blk_queue_enter(q, flags) < 0) { - enter_succeeded = false; - q = NULL; - } - } + struct request_queue *q = bio->bi_disk->queue; + blk_mq_req_flags_t flags = bio->bi_opf & REQ_NOWAIT ? + BLK_MQ_REQ_NOWAIT : 0; - if (enter_succeeded) { + if (likely(blk_queue_enter(q, flags) == 0)) { struct bio_list lower, same; /* Create a fresh bio_list for all subordinate requests */ @@ -1069,6 +1020,8 @@ blk_qc_t generic_make_request(struct bio *bio) bio_list_init(&bio_list_on_stack[0]); ret = q->make_request_fn(q, bio); + blk_queue_exit(q); + /* sort new bios into those for a lower level * and those for the same level */ @@ -1095,8 +1048,6 @@ blk_qc_t generic_make_request(struct bio *bio) current->bio_list = NULL; /* deactivate */ out: - if (q) - blk_queue_exit(q); return ret; } EXPORT_SYMBOL(generic_make_request); @@ -1200,7 +1151,9 @@ static int blk_cloned_rq_check_limits(struct request_queue *q, struct request *rq) { if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, req_op(rq))) { - printk(KERN_ERR "%s: over max size limit.\n", __func__); + printk(KERN_ERR "%s: over max size limit. (%u > %u)\n", + __func__, blk_rq_sectors(rq), + blk_queue_get_max_sectors(q, req_op(rq))); return -EIO; } @@ -1212,7 +1165,8 @@ static int blk_cloned_rq_check_limits(struct request_queue *q, */ blk_recalc_rq_segments(rq); if (rq->nr_phys_segments > queue_max_segments(q)) { - printk(KERN_ERR "%s: over max segments limit.\n", __func__); + printk(KERN_ERR "%s: over max segments limit. (%hu > %hu)\n", + __func__, rq->nr_phys_segments, queue_max_segments(q)); return -EIO; } diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 48bebf00a5f3..f945621a0e8f 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c @@ -42,8 +42,8 @@ int blk_mq_map_queues(struct blk_mq_queue_map *qmap) /* * First do sequential mapping between CPUs and queues. * In case we still have CPUs to map, and we have some number of - * threads per cores then map sibling threads to the same queue for - * performace optimizations. + * threads per cores then map sibling threads to the same queue + * for performance optimizations. */ if (cpu < nr_queues) { map[cpu] = cpu_to_queue_index(qmap, nr_queues, cpu); @@ -60,7 +60,11 @@ int blk_mq_map_queues(struct blk_mq_queue_map *qmap) } EXPORT_SYMBOL_GPL(blk_mq_map_queues); -/* +/** + * blk_mq_hw_queue_to_node - Look up the memory node for a hardware queue index + * @qmap: CPU to hardware queue map. + * @index: hardware queue index. + * * We have no quick way of doing reverse lookups. This is only used at * queue init time, so runtime isn't important. */ diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 6aea0ebc3a73..3afe327f816f 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -821,38 +821,28 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = { {}, }; -static bool debugfs_create_files(struct dentry *parent, void *data, +static void debugfs_create_files(struct dentry *parent, void *data, const struct blk_mq_debugfs_attr *attr) { if (IS_ERR_OR_NULL(parent)) - return false; + return; d_inode(parent)->i_private = data; - for (; attr->name; attr++) { - if (!debugfs_create_file(attr->name, attr->mode, parent, - (void *)attr, &blk_mq_debugfs_fops)) - return false; - } - return true; + for (; attr->name; attr++) + debugfs_create_file(attr->name, attr->mode, parent, + (void *)attr, &blk_mq_debugfs_fops); } -int blk_mq_debugfs_register(struct request_queue *q) +void blk_mq_debugfs_register(struct request_queue *q) { struct blk_mq_hw_ctx *hctx; int i; - if (!blk_debugfs_root) - return -ENOENT; - q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent), blk_debugfs_root); - if (!q->debugfs_dir) - return -ENOMEM; - if (!debugfs_create_files(q->debugfs_dir, q, - blk_mq_debugfs_queue_attrs)) - goto err; + debugfs_create_files(q->debugfs_dir, q, blk_mq_debugfs_queue_attrs); /* * blk_mq_init_sched() attempted to do this already, but q->debugfs_dir @@ -864,11 +854,10 @@ int blk_mq_debugfs_register(struct request_queue *q) /* Similarly, blk_mq_init_hctx() couldn't do this previously. */ queue_for_each_hw_ctx(q, hctx, i) { - if (!hctx->debugfs_dir && blk_mq_debugfs_register_hctx(q, hctx)) - goto err; - if (q->elevator && !hctx->sched_debugfs_dir && - blk_mq_debugfs_register_sched_hctx(q, hctx)) - goto err; + if (!hctx->debugfs_dir) + blk_mq_debugfs_register_hctx(q, hctx); + if (q->elevator && !hctx->sched_debugfs_dir) + blk_mq_debugfs_register_sched_hctx(q, hctx); } if (q->rq_qos) { @@ -879,12 +868,6 @@ int blk_mq_debugfs_register(struct request_queue *q) rqos = rqos->next; } } - - return 0; - -err: - blk_mq_debugfs_unregister(q); - return -ENOMEM; } void blk_mq_debugfs_unregister(struct request_queue *q) @@ -894,52 +877,32 @@ void blk_mq_debugfs_unregister(struct request_queue *q) q->debugfs_dir = NULL; } -static int blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, - struct blk_mq_ctx *ctx) +static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, + struct blk_mq_ctx *ctx) { struct dentry *ctx_dir; char name[20]; snprintf(name, sizeof(name), "cpu%u", ctx->cpu); ctx_dir = debugfs_create_dir(name, hctx->debugfs_dir); - if (!ctx_dir) - return -ENOMEM; - if (!debugfs_create_files(ctx_dir, ctx, blk_mq_debugfs_ctx_attrs)) - return -ENOMEM; - - return 0; + debugfs_create_files(ctx_dir, ctx, blk_mq_debugfs_ctx_attrs); } -int blk_mq_debugfs_register_hctx(struct request_queue *q, - struct blk_mq_hw_ctx *hctx) +void blk_mq_debugfs_register_hctx(struct request_queue *q, + struct blk_mq_hw_ctx *hctx) { struct blk_mq_ctx *ctx; char name[20]; int i; - if (!q->debugfs_dir) - return -ENOENT; - snprintf(name, sizeof(name), "hctx%u", hctx->queue_num); hctx->debugfs_dir = debugfs_create_dir(name, q->debugfs_dir); - if (!hctx->debugfs_dir) - return -ENOMEM; - if (!debugfs_create_files(hctx->debugfs_dir, hctx, - blk_mq_debugfs_hctx_attrs)) - goto err; - - hctx_for_each_ctx(hctx, ctx, i) { - if (blk_mq_debugfs_register_ctx(hctx, ctx)) - goto err; - } + debugfs_create_files(hctx->debugfs_dir, hctx, blk_mq_debugfs_hctx_attrs); - return 0; - -err: - blk_mq_debugfs_unregister_hctx(hctx); - return -ENOMEM; + hctx_for_each_ctx(hctx, ctx, i) + blk_mq_debugfs_register_ctx(hctx, ctx); } void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) @@ -949,17 +912,13 @@ void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) hctx->debugfs_dir = NULL; } -int blk_mq_debugfs_register_hctxs(struct request_queue *q) +void blk_mq_debugfs_register_hctxs(struct request_queue *q) { struct blk_mq_hw_ctx *hctx; int i; - queue_for_each_hw_ctx(q, hctx, i) { - if (blk_mq_debugfs_register_hctx(q, hctx)) - return -ENOMEM; - } - - return 0; + queue_for_each_hw_ctx(q, hctx, i) + blk_mq_debugfs_register_hctx(q, hctx); } void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) @@ -971,29 +930,23 @@ void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) blk_mq_debugfs_unregister_hctx(hctx); } -int blk_mq_debugfs_register_sched(struct request_queue *q) +void blk_mq_debugfs_register_sched(struct request_queue *q) { struct elevator_type *e = q->elevator->type; + /* + * If the parent directory has not been created yet, return, we will be + * called again later on and the directory/files will be created then. + */ if (!q->debugfs_dir) - return -ENOENT; + return; if (!e->queue_debugfs_attrs) - return 0; + return; q->sched_debugfs_dir = debugfs_create_dir("sched", q->debugfs_dir); - if (!q->sched_debugfs_dir) - return -ENOMEM; - - if (!debugfs_create_files(q->sched_debugfs_dir, q, - e->queue_debugfs_attrs)) - goto err; - return 0; - -err: - blk_mq_debugfs_unregister_sched(q); - return -ENOMEM; + debugfs_create_files(q->sched_debugfs_dir, q, e->queue_debugfs_attrs); } void blk_mq_debugfs_unregister_sched(struct request_queue *q) @@ -1008,36 +961,22 @@ void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) rqos->debugfs_dir = NULL; } -int blk_mq_debugfs_register_rqos(struct rq_qos *rqos) +void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) { struct request_queue *q = rqos->q; const char *dir_name = rq_qos_id_to_name(rqos->id); - if (!q->debugfs_dir) - return -ENOENT; - if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs) - return 0; + return; - if (!q->rqos_debugfs_dir) { + if (!q->rqos_debugfs_dir) q->rqos_debugfs_dir = debugfs_create_dir("rqos", q->debugfs_dir); - if (!q->rqos_debugfs_dir) - return -ENOMEM; - } rqos->debugfs_dir = debugfs_create_dir(dir_name, rqos->q->rqos_debugfs_dir); - if (!rqos->debugfs_dir) - return -ENOMEM; - if (!debugfs_create_files(rqos->debugfs_dir, rqos, - rqos->ops->debugfs_attrs)) - goto err; - return 0; - err: - blk_mq_debugfs_unregister_rqos(rqos); - return -ENOMEM; + debugfs_create_files(rqos->debugfs_dir, rqos, rqos->ops->debugfs_attrs); } void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q) @@ -1046,27 +985,18 @@ void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q) q->rqos_debugfs_dir = NULL; } -int blk_mq_debugfs_register_sched_hctx(struct request_queue *q, - struct blk_mq_hw_ctx *hctx) +void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, + struct blk_mq_hw_ctx *hctx) { struct elevator_type *e = q->elevator->type; - if (!hctx->debugfs_dir) - return -ENOENT; - if (!e->hctx_debugfs_attrs) - return 0; + return; hctx->sched_debugfs_dir = debugfs_create_dir("sched", hctx->debugfs_dir); - if (!hctx->sched_debugfs_dir) - return -ENOMEM; - - if (!debugfs_create_files(hctx->sched_debugfs_dir, hctx, - e->hctx_debugfs_attrs)) - return -ENOMEM; - - return 0; + debugfs_create_files(hctx->sched_debugfs_dir, hctx, + e->hctx_debugfs_attrs); } void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx) diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index 8c9012a578c1..a68aa6041a10 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -18,74 +18,68 @@ struct blk_mq_debugfs_attr { int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq); int blk_mq_debugfs_rq_show(struct seq_file *m, void *v); -int blk_mq_debugfs_register(struct request_queue *q); +void blk_mq_debugfs_register(struct request_queue *q); void blk_mq_debugfs_unregister(struct request_queue *q); -int blk_mq_debugfs_register_hctx(struct request_queue *q, - struct blk_mq_hw_ctx *hctx); +void blk_mq_debugfs_register_hctx(struct request_queue *q, + struct blk_mq_hw_ctx *hctx); void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx); -int blk_mq_debugfs_register_hctxs(struct request_queue *q); +void blk_mq_debugfs_register_hctxs(struct request_queue *q); void blk_mq_debugfs_unregister_hctxs(struct request_queue *q); -int blk_mq_debugfs_register_sched(struct request_queue *q); +void blk_mq_debugfs_register_sched(struct request_queue *q); void blk_mq_debugfs_unregister_sched(struct request_queue *q); -int blk_mq_debugfs_register_sched_hctx(struct request_queue *q, +void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx); void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx); -int blk_mq_debugfs_register_rqos(struct rq_qos *rqos); +void blk_mq_debugfs_register_rqos(struct rq_qos *rqos); void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos); void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q); #else -static inline int blk_mq_debugfs_register(struct request_queue *q) +static inline void blk_mq_debugfs_register(struct request_queue *q) { - return 0; } static inline void blk_mq_debugfs_unregister(struct request_queue *q) { } -static inline int blk_mq_debugfs_register_hctx(struct request_queue *q, - struct blk_mq_hw_ctx *hctx) +static inline void blk_mq_debugfs_register_hctx(struct request_queue *q, + struct blk_mq_hw_ctx *hctx) { - return 0; } static inline void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) { } -static inline int blk_mq_debugfs_register_hctxs(struct request_queue *q) +static inline void blk_mq_debugfs_register_hctxs(struct request_queue *q) { - return 0; } static inline void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) { } -static inline int blk_mq_debugfs_register_sched(struct request_queue *q) +static inline void blk_mq_debugfs_register_sched(struct request_queue *q) { - return 0; } static inline void blk_mq_debugfs_unregister_sched(struct request_queue *q) { } -static inline int blk_mq_debugfs_register_sched_hctx(struct request_queue *q, - struct blk_mq_hw_ctx *hctx) +static inline void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, + struct blk_mq_hw_ctx *hctx) { - return 0; } static inline void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx) { } -static inline int blk_mq_debugfs_register_rqos(struct rq_qos *rqos) +static inline void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) { - return 0; } static inline void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c index ad4545a2a98b..b595a94c4d16 100644 --- a/block/blk-mq-pci.c +++ b/block/blk-mq-pci.c @@ -13,7 +13,7 @@ /** * blk_mq_pci_map_queues - provide a default queue mapping for PCI device - * @set: tagset to provide the mapping for + * @qmap: CPU to hardware queue map. * @pdev: PCI device associated with @set. * @offset: Offset to use for the pci irq vector * diff --git a/block/blk-mq-rdma.c b/block/blk-mq-rdma.c index cc921e6ba709..14f968e58b8f 100644 --- a/block/blk-mq-rdma.c +++ b/block/blk-mq-rdma.c @@ -8,8 +8,8 @@ /** * blk_mq_rdma_map_queues - provide a default queue mapping for rdma device - * @set: tagset to provide the mapping for - * @dev: rdma device associated with @set. + * @map: CPU to hardware queue map. + * @dev: rdma device to provide a mapping for. * @first_vec: first interrupt vectors to use for queues (usually 0) * * This function assumes the rdma device @dev has at least as many available diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 74c6bb871f7e..2766066a15db 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -475,14 +475,18 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q, return ret; } +/* called in queue's release handler, tagset has gone away */ static void blk_mq_sched_tags_teardown(struct request_queue *q) { - struct blk_mq_tag_set *set = q->tag_set; struct blk_mq_hw_ctx *hctx; int i; - queue_for_each_hw_ctx(q, hctx, i) - blk_mq_sched_free_tags(set, hctx, i); + queue_for_each_hw_ctx(q, hctx, i) { + if (hctx->sched_tags) { + blk_mq_free_rq_map(hctx->sched_tags); + hctx->sched_tags = NULL; + } + } } int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) @@ -523,6 +527,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) ret = e->ops.init_hctx(hctx, i); if (ret) { eq = q->elevator; + blk_mq_sched_free_requests(q); blk_mq_exit_sched(q, eq); kobject_put(&eq->kobj); return ret; @@ -534,11 +539,29 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) return 0; err: + blk_mq_sched_free_requests(q); blk_mq_sched_tags_teardown(q); q->elevator = NULL; return ret; } +/* + * called in either blk_queue_cleanup or elevator_switch, tagset + * is required for freeing requests + */ +void blk_mq_sched_free_requests(struct request_queue *q) +{ + struct blk_mq_hw_ctx *hctx; + int i; + + lockdep_assert_held(&q->sysfs_lock); + + queue_for_each_hw_ctx(q, hctx, i) { + if (hctx->sched_tags) + blk_mq_free_rqs(q->tag_set, hctx->sched_tags, i); + } +} + void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) { struct blk_mq_hw_ctx *hctx; diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index c7bdb52367ac..3cf92cbbd8ac 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -28,6 +28,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx); int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e); void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e); +void blk_mq_sched_free_requests(struct request_queue *q); static inline bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) diff --git a/block/blk-mq-virtio.c b/block/blk-mq-virtio.c index 75a52c18a8f6..488341628256 100644 --- a/block/blk-mq-virtio.c +++ b/block/blk-mq-virtio.c @@ -11,8 +11,8 @@ /** * blk_mq_virtio_map_queues - provide a default queue mapping for virtio device - * @set: tagset to provide the mapping for - * @vdev: virtio device associated with @set. + * @qmap: CPU to hardware queue map. + * @vdev: virtio device to provide a mapping for. * @first_vec: first interrupt vectors to use for queues (usually 0) * * This function assumes the virtio device @vdev has at least as many available diff --git a/block/blk-mq.c b/block/blk-mq.c index 32b8ad3d341b..ce0f5f4ede70 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2865,7 +2865,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, goto err_exit; if (blk_mq_alloc_ctxs(q)) - goto err_exit; + goto err_poll; /* init q->mq_kobj and sw queues' kobjects */ blk_mq_sysfs_init(q); @@ -2929,6 +2929,9 @@ err_hctxs: kfree(q->queue_hw_ctx); err_sys_init: blk_mq_sysfs_deinit(q); +err_poll: + blk_stat_free_callback(q->poll_cb); + q->poll_cb = NULL; err_exit: q->mq_ops = NULL; return ERR_PTR(-ENOMEM); diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index 3f55b56f24bc..659ccb8b693f 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -209,9 +209,10 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr, /** * rq_qos_wait - throttle on a rqw if we need to - * @private_data - caller provided specific data - * @acquire_inflight_cb - inc the rqw->inflight counter if we can - * @cleanup_cb - the callback to cleanup in case we race with a waker + * @rqw: rqw to throttle on + * @private_data: caller provided specific data + * @acquire_inflight_cb: inc the rqw->inflight counter if we can + * @cleanup_cb: the callback to cleanup in case we race with a waker * * This provides a uniform place for the rq_qos users to do their throttling. * Since you can end up with a lot of things sleeping at once, this manages the diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index a16a02c52a85..977c659dcd18 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -840,6 +840,36 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head) kmem_cache_free(blk_requestq_cachep, q); } +/* Unconfigure the I/O scheduler and dissociate from the cgroup controller. */ +static void blk_exit_queue(struct request_queue *q) +{ + /* + * Since the I/O scheduler exit code may access cgroup information, + * perform I/O scheduler exit before disassociating from the block + * cgroup controller. + */ + if (q->elevator) { + ioc_clear_queue(q); + __elevator_exit(q, q->elevator); + q->elevator = NULL; + } + + /* + * Remove all references to @q from the block cgroup controller before + * restoring @q->queue_lock to avoid that restoring this pointer causes + * e.g. blkcg_print_blkgs() to crash. + */ + blkcg_exit_queue(q); + + /* + * Since the cgroup code may dereference the @q->backing_dev_info + * pointer, only decrease its reference count after having removed the + * association with the block cgroup controller. + */ + bdi_put(q->backing_dev_info); +} + + /** * __blk_release_queue - release a request queue * @work: pointer to the release_work member of the request queue to be released @@ -860,23 +890,10 @@ static void __blk_release_queue(struct work_struct *work) blk_stat_remove_callback(q, q->poll_cb); blk_stat_free_callback(q->poll_cb); - if (!blk_queue_dead(q)) { - /* - * Last reference was dropped without having called - * blk_cleanup_queue(). - */ - WARN_ONCE(blk_queue_init_done(q), - "request queue %p has been registered but blk_cleanup_queue() has not been called for that queue\n", - q); - blk_exit_queue(q); - } - - WARN(blk_queue_root_blkg(q), - "request queue %p is being released but it has not yet been removed from the blkcg controller\n", - q); - blk_free_queue_stats(q->stats); + blk_exit_queue(q); + blk_queue_free_zone_bitmaps(q); if (queue_is_mq(q)) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 1b97a73d2fb1..9ea7c0ecad10 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1220,7 +1220,7 @@ static bool throtl_can_upgrade(struct throtl_data *td, struct throtl_grp *this_tg); /** * throtl_pending_timer_fn - timer function for service_queue->pending_timer - * @arg: the throtl_service_queue being serviced + * @t: the pending_timer member of the throtl_service_queue being serviced * * This timer is armed when a child throtl_grp with active bio's become * pending and queued on the service_queue's pending_tree and expires when diff --git a/block/blk.h b/block/blk.h index e27fd1512e4b..7814aa207153 100644 --- a/block/blk.h +++ b/block/blk.h @@ -6,6 +6,7 @@ #include <linux/blk-mq.h> #include <xen/xen.h> #include "blk-mq.h" +#include "blk-mq-sched.h" /* Max future timer expiry for timeouts */ #define BLK_MAX_TIMEOUT (5 * HZ) @@ -50,7 +51,6 @@ struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, int node, int cmd_size, gfp_t flags); void blk_free_flush_queue(struct blk_flush_queue *q); -void blk_exit_queue(struct request_queue *q); void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio); void blk_freeze_queue(struct request_queue *q); @@ -177,10 +177,17 @@ void blk_insert_flush(struct request *rq); int elevator_init_mq(struct request_queue *q); int elevator_switch_mq(struct request_queue *q, struct elevator_type *new_e); -void elevator_exit(struct request_queue *, struct elevator_queue *); +void __elevator_exit(struct request_queue *, struct elevator_queue *); int elv_register_queue(struct request_queue *q); void elv_unregister_queue(struct request_queue *q); +static inline void elevator_exit(struct request_queue *q, + struct elevator_queue *e) +{ + blk_mq_sched_free_requests(q); + __elevator_exit(q, e); +} + struct hd_struct *__disk_get_part(struct gendisk *disk, int partno); #ifdef CONFIG_FAIL_IO_TIMEOUT diff --git a/block/bsg-lib.c b/block/bsg-lib.c index b898a1cdf872..785dd58947f1 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -354,6 +354,7 @@ static const struct blk_mq_ops bsg_mq_ops = { * @dev: device to attach bsg device to * @name: device to give bsg device * @job_fn: bsg job handler + * @timeout: timeout handler function pointer * @dd_job_size: size of LLD data needed for each job */ struct request_queue *bsg_setup_queue(struct device *dev, const char *name, diff --git a/block/elevator.c b/block/elevator.c index ec55d5fc0b3e..2f17d66d0e61 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -178,7 +178,7 @@ static void elevator_release(struct kobject *kobj) kfree(e); } -void elevator_exit(struct request_queue *q, struct elevator_queue *e) +void __elevator_exit(struct request_queue *q, struct elevator_queue *e) { mutex_lock(&e->sysfs_lock); if (e->type->ops.exit_sched) diff --git a/block/genhd.c b/block/genhd.c index ad6826628e79..24654e1d83e6 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -532,8 +532,8 @@ void blk_free_devt(dev_t devt) } } -/** - * We invalidate devt by assigning NULL pointer for devt in idr. +/* + * We invalidate devt by assigning NULL pointer for devt in idr. */ void blk_invalidate_devt(dev_t devt) { diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c index 6db573f33219..fe5d970e2e60 100644 --- a/block/partitions/ldm.c +++ b/block/partitions/ldm.c @@ -19,7 +19,7 @@ #include "check.h" #include "msdos.h" -/** +/* * ldm_debug/info/error/crit - Output an error message * @f: A printf format string containing the message * @...: Variables to substitute into @f |