diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-14 01:15:15 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-14 01:15:15 +0300 |
commit | edda415314804c29fa07e538938fa07947012d8f (patch) | |
tree | 0428db94253f73bb0744f52d26645c33830756f3 | |
parent | 3e565a351ed3e94352bfbe0be06c659fc8fafb19 (diff) | |
parent | bb06ec31452fb2da1594f88035c2ecea4e0652f4 (diff) | |
download | linux-edda415314804c29fa07e538938fa07947012d8f.tar.xz |
Merge tag 'for-linus-20180413' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe:
"Followup fixes for this merge window. This contains:
- Series from Ming, fixing corner cases in our CPU <-> queue mapping.
This triggered repeated warnings on especially s390, but I also hit
it in cpu hot plug/unplug testing while doing IO on NVMe on x86-64.
- Another fix from Ming, ensuring that we always order budget and
driver tag identically, avoiding a deadlock on QD=1 devices.
- Loop locking regression fix from this merge window, from Omar.
- Another loop locking fix, this time missing an unlock, from Tetsuo
Handa.
- Fix for racing IO submission with device removal from Bart.
- sr reference fix from me, fixing a case where disk change or
getevents can race with device removal.
- Set of nvme fixes by way of Keith, from various contributors"
* tag 'for-linus-20180413' of git://git.kernel.dk/linux-block: (28 commits)
nvme: expand nvmf_check_if_ready checks
nvme: Use admin command effects for admin commands
nvmet: fix space padding in serial number
nvme: check return value of init_srcu_struct function
nvmet: Fix nvmet_execute_write_zeroes sector count
nvme-pci: Separate IO and admin queue IRQ vectors
nvme-pci: Remove unused queue parameter
nvme-pci: Skip queue deletion if there are no queues
nvme: target: fix buffer overflow
nvme: don't send keep-alives to the discovery controller
nvme: unexport nvme_start_keep_alive
nvme-loop: fix kernel oops in case of unhandled command
nvme: enforce 64bit offset for nvme_get_log_ext fn
sr: get/drop reference to device in revalidate and check_events
blk-mq: Revert "blk-mq: reimplement blk_mq_hw_queue_mapped"
blk-mq: Avoid that submitting a bio concurrently with device removal triggers a crash
backing: silence compiler warning using __printf
blk-mq: remove code for dealing with remapping queue
blk-mq: reimplement blk_mq_hw_queue_mapped
blk-mq: don't check queue mapped in __blk_mq_delay_run_hw_queue()
...
-rw-r--r-- | block/blk-core.c | 35 | ||||
-rw-r--r-- | block/blk-mq-cpumap.c | 5 | ||||
-rw-r--r-- | block/blk-mq-debugfs.c | 1 | ||||
-rw-r--r-- | block/blk-mq.c | 122 | ||||
-rw-r--r-- | drivers/block/loop.c | 45 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 33 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 83 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.h | 33 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 12 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 4 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 35 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 14 | ||||
-rw-r--r-- | drivers/nvme/target/admin-cmd.c | 1 | ||||
-rw-r--r-- | drivers/nvme/target/discovery.c | 2 | ||||
-rw-r--r-- | drivers/nvme/target/io-cmd.c | 4 | ||||
-rw-r--r-- | drivers/nvme/target/loop.c | 20 | ||||
-rw-r--r-- | drivers/scsi/sr.c | 19 | ||||
-rw-r--r-- | include/linux/backing-dev.h | 1 | ||||
-rw-r--r-- | include/linux/blk-mq.h | 2 |
19 files changed, 245 insertions, 226 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index abcb8684ba67..806ce2442819 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2385,8 +2385,20 @@ blk_qc_t generic_make_request(struct bio *bio) * yet. */ struct bio_list bio_list_on_stack[2]; + blk_mq_req_flags_t flags = 0; + struct request_queue *q = bio->bi_disk->queue; blk_qc_t ret = BLK_QC_T_NONE; + if (bio->bi_opf & REQ_NOWAIT) + flags = BLK_MQ_REQ_NOWAIT; + if (blk_queue_enter(q, flags) < 0) { + if (!blk_queue_dying(q) && (bio->bi_opf & REQ_NOWAIT)) + bio_wouldblock_error(bio); + else + bio_io_error(bio); + return ret; + } + if (!generic_make_request_checks(bio)) goto out; @@ -2423,11 +2435,22 @@ blk_qc_t generic_make_request(struct bio *bio) bio_list_init(&bio_list_on_stack[0]); current->bio_list = bio_list_on_stack; do { - struct request_queue *q = bio->bi_disk->queue; - blk_mq_req_flags_t flags = bio->bi_opf & REQ_NOWAIT ? - BLK_MQ_REQ_NOWAIT : 0; + bool enter_succeeded = true; + + if (unlikely(q != bio->bi_disk->queue)) { + if (q) + blk_queue_exit(q); + q = bio->bi_disk->queue; + flags = 0; + if (bio->bi_opf & REQ_NOWAIT) + flags = BLK_MQ_REQ_NOWAIT; + if (blk_queue_enter(q, flags) < 0) { + enter_succeeded = false; + q = NULL; + } + } - if (likely(blk_queue_enter(q, flags) == 0)) { + if (enter_succeeded) { struct bio_list lower, same; /* Create a fresh bio_list for all subordinate requests */ @@ -2435,8 +2458,6 @@ blk_qc_t generic_make_request(struct bio *bio) bio_list_init(&bio_list_on_stack[0]); ret = q->make_request_fn(q, bio); - blk_queue_exit(q); - /* sort new bios into those for a lower level * and those for the same level */ @@ -2463,6 +2484,8 @@ blk_qc_t generic_make_request(struct bio *bio) current->bio_list = NULL; /* deactivate */ out: + if (q) + blk_queue_exit(q); return ret; } EXPORT_SYMBOL(generic_make_request); diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 9f8cffc8a701..3eb169f15842 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c @@ -16,11 +16,6 @@ static int cpu_to_queue_index(unsigned int nr_queues, const int cpu) { - /* - * Non present CPU will be mapped to queue index 0. - */ - if (!cpu_present(cpu)) - return 0; return cpu % nr_queues; } diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 58b3b79cbe83..3080e18cb859 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -235,7 +235,6 @@ static const char *const hctx_state_name[] = { HCTX_STATE_NAME(STOPPED), HCTX_STATE_NAME(TAG_ACTIVE), HCTX_STATE_NAME(SCHED_RESTART), - HCTX_STATE_NAME(START_ON_RUN), }; #undef HCTX_STATE_NAME diff --git a/block/blk-mq.c b/block/blk-mq.c index f5c7dbcb954f..0dc9e341c2a7 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1180,7 +1180,12 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, struct blk_mq_queue_data bd; rq = list_first_entry(list, struct request, queuelist); - if (!blk_mq_get_driver_tag(rq, &hctx, false)) { + + hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); + if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) + break; + + if (!blk_mq_get_driver_tag(rq, NULL, false)) { /* * The initial allocation attempt failed, so we need to * rerun the hardware queue when a tag is freed. The @@ -1189,8 +1194,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, * we'll re-run it below. */ if (!blk_mq_mark_tag_wait(&hctx, rq)) { - if (got_budget) - blk_mq_put_dispatch_budget(hctx); + blk_mq_put_dispatch_budget(hctx); /* * For non-shared tags, the RESTART check * will suffice. @@ -1201,11 +1205,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, } } - if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) { - blk_mq_put_driver_tag(rq); - break; - } - list_del_init(&rq->queuelist); bd.rq = rq; @@ -1336,6 +1335,15 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) hctx_unlock(hctx, srcu_idx); } +static inline int blk_mq_first_mapped_cpu(struct blk_mq_hw_ctx *hctx) +{ + int cpu = cpumask_first_and(hctx->cpumask, cpu_online_mask); + + if (cpu >= nr_cpu_ids) + cpu = cpumask_first(hctx->cpumask); + return cpu; +} + /* * It'd be great if the workqueue API had a way to pass * in a mask and had some smarts for more clever placement. @@ -1345,26 +1353,17 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) { bool tried = false; + int next_cpu = hctx->next_cpu; if (hctx->queue->nr_hw_queues == 1) return WORK_CPU_UNBOUND; if (--hctx->next_cpu_batch <= 0) { - int next_cpu; select_cpu: - next_cpu = cpumask_next_and(hctx->next_cpu, hctx->cpumask, + next_cpu = cpumask_next_and(next_cpu, hctx->cpumask, cpu_online_mask); if (next_cpu >= nr_cpu_ids) - next_cpu = cpumask_first_and(hctx->cpumask,cpu_online_mask); - - /* - * No online CPU is found, so have to make sure hctx->next_cpu - * is set correctly for not breaking workqueue. - */ - if (next_cpu >= nr_cpu_ids) - hctx->next_cpu = cpumask_first(hctx->cpumask); - else - hctx->next_cpu = next_cpu; + next_cpu = blk_mq_first_mapped_cpu(hctx); hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; } @@ -1372,7 +1371,7 @@ select_cpu: * Do unbound schedule if we can't find a online CPU for this hctx, * and it should only happen in the path of handling CPU DEAD. */ - if (!cpu_online(hctx->next_cpu)) { + if (!cpu_online(next_cpu)) { if (!tried) { tried = true; goto select_cpu; @@ -1382,18 +1381,18 @@ select_cpu: * Make sure to re-select CPU next time once after CPUs * in hctx->cpumask become online again. */ + hctx->next_cpu = next_cpu; hctx->next_cpu_batch = 1; return WORK_CPU_UNBOUND; } - return hctx->next_cpu; + + hctx->next_cpu = next_cpu; + return next_cpu; } static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async, unsigned long msecs) { - if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx))) - return; - if (unlikely(blk_mq_hctx_stopped(hctx))) return; @@ -1560,40 +1559,14 @@ static void blk_mq_run_work_fn(struct work_struct *work) hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work); /* - * If we are stopped, don't run the queue. The exception is if - * BLK_MQ_S_START_ON_RUN is set. For that case, we auto-clear - * the STOPPED bit and run it. + * If we are stopped, don't run the queue. */ - if (test_bit(BLK_MQ_S_STOPPED, &hctx->state)) { - if (!test_bit(BLK_MQ_S_START_ON_RUN, &hctx->state)) - return; - - clear_bit(BLK_MQ_S_START_ON_RUN, &hctx->state); + if (test_bit(BLK_MQ_S_STOPPED, &hctx->state)) clear_bit(BLK_MQ_S_STOPPED, &hctx->state); - } __blk_mq_run_hw_queue(hctx); } - -void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) -{ - if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx))) - return; - - /* - * Stop the hw queue, then modify currently delayed work. - * This should prevent us from running the queue prematurely. - * Mark the queue as auto-clearing STOPPED when it runs. - */ - blk_mq_stop_hw_queue(hctx); - set_bit(BLK_MQ_S_START_ON_RUN, &hctx->state); - kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), - &hctx->run_work, - msecs_to_jiffies(msecs)); -} -EXPORT_SYMBOL(blk_mq_delay_queue); - static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx, struct request *rq, bool at_head) @@ -1804,11 +1777,11 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, if (q->elevator && !bypass_insert) goto insert; - if (!blk_mq_get_driver_tag(rq, NULL, false)) + if (!blk_mq_get_dispatch_budget(hctx)) goto insert; - if (!blk_mq_get_dispatch_budget(hctx)) { - blk_mq_put_driver_tag(rq); + if (!blk_mq_get_driver_tag(rq, NULL, false)) { + blk_mq_put_dispatch_budget(hctx); goto insert; } @@ -2356,7 +2329,7 @@ static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set, static void blk_mq_map_swqueue(struct request_queue *q) { - unsigned int i, hctx_idx; + unsigned int i; struct blk_mq_hw_ctx *hctx; struct blk_mq_ctx *ctx; struct blk_mq_tag_set *set = q->tag_set; @@ -2373,23 +2346,8 @@ static void blk_mq_map_swqueue(struct request_queue *q) /* * Map software to hardware queues. - * - * If the cpu isn't present, the cpu is mapped to first hctx. */ for_each_possible_cpu(i) { - hctx_idx = q->mq_map[i]; - /* unmapped hw queue can be remapped after CPU topo changed */ - if (!set->tags[hctx_idx] && - !__blk_mq_alloc_rq_map(set, hctx_idx)) { - /* - * If tags initialization fail for some hctx, - * that hctx won't be brought online. In this - * case, remap the current ctx to hctx[0] which - * is guaranteed to always have tags allocated - */ - q->mq_map[i] = 0; - } - ctx = per_cpu_ptr(q->queue_ctx, i); hctx = blk_mq_map_queue(q, i); @@ -2401,21 +2359,8 @@ static void blk_mq_map_swqueue(struct request_queue *q) mutex_unlock(&q->sysfs_lock); queue_for_each_hw_ctx(q, hctx, i) { - /* - * If no software queues are mapped to this hardware queue, - * disable it and free the request entries. - */ - if (!hctx->nr_ctx) { - /* Never unmap queue 0. We need it as a - * fallback in case of a new remap fails - * allocation - */ - if (i && set->tags[i]) - blk_mq_free_map_and_requests(set, i); - - hctx->tags = NULL; - continue; - } + /* every hctx should get mapped by at least one CPU */ + WARN_ON(!hctx->nr_ctx); hctx->tags = set->tags[i]; WARN_ON(!hctx->tags); @@ -2430,8 +2375,7 @@ static void blk_mq_map_swqueue(struct request_queue *q) /* * Initialize batch roundrobin counts */ - hctx->next_cpu = cpumask_first_and(hctx->cpumask, - cpu_online_mask); + hctx->next_cpu = blk_mq_first_mapped_cpu(hctx); hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; } } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 264abaaff662..c9d04497a415 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1103,11 +1103,15 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (info->lo_encrypt_type) { unsigned int type = info->lo_encrypt_type; - if (type >= MAX_LO_CRYPT) - return -EINVAL; + if (type >= MAX_LO_CRYPT) { + err = -EINVAL; + goto exit; + } xfer = xfer_funcs[type]; - if (xfer == NULL) - return -EINVAL; + if (xfer == NULL) { + err = -EINVAL; + goto exit; + } } else xfer = NULL; @@ -1283,12 +1287,13 @@ static int loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) { struct loop_info info; struct loop_info64 info64; - int err = 0; + int err; - if (!arg) - err = -EINVAL; - if (!err) - err = loop_get_status(lo, &info64); + if (!arg) { + mutex_unlock(&lo->lo_ctl_mutex); + return -EINVAL; + } + err = loop_get_status(lo, &info64); if (!err) err = loop_info64_to_old(&info64, &info); if (!err && copy_to_user(arg, &info, sizeof(info))) @@ -1300,12 +1305,13 @@ loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) { static int loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) { struct loop_info64 info64; - int err = 0; + int err; - if (!arg) - err = -EINVAL; - if (!err) - err = loop_get_status(lo, &info64); + if (!arg) { + mutex_unlock(&lo->lo_ctl_mutex); + return -EINVAL; + } + err = loop_get_status(lo, &info64); if (!err && copy_to_user(arg, &info64, sizeof(info64))) err = -EFAULT; @@ -1529,12 +1535,13 @@ loop_get_status_compat(struct loop_device *lo, struct compat_loop_info __user *arg) { struct loop_info64 info64; - int err = 0; + int err; - if (!arg) - err = -EINVAL; - if (!err) - err = loop_get_status(lo, &info64); + if (!arg) { + mutex_unlock(&lo->lo_ctl_mutex); + return -EINVAL; + } + err = loop_get_status(lo, &info64); if (!err) err = loop_info64_to_compat(&info64, arg); return err; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 197a6ba9700f..9df4f71e58ca 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -376,6 +376,15 @@ static void nvme_put_ns(struct nvme_ns *ns) kref_put(&ns->kref, nvme_free_ns); } +static inline void nvme_clear_nvme_request(struct request *req) +{ + if (!(req->rq_flags & RQF_DONTPREP)) { + nvme_req(req)->retries = 0; + nvme_req(req)->flags = 0; + req->rq_flags |= RQF_DONTPREP; + } +} + struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid) { @@ -392,6 +401,7 @@ struct request *nvme_alloc_request(struct request_queue *q, return req; req->cmd_flags |= REQ_FAILFAST_DRIVER; + nvme_clear_nvme_request(req); nvme_req(req)->cmd = cmd; return req; @@ -608,11 +618,7 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, { blk_status_t ret = BLK_STS_OK; - if (!(req->rq_flags & RQF_DONTPREP)) { - nvme_req(req)->retries = 0; - nvme_req(req)->flags = 0; - req->rq_flags |= RQF_DONTPREP; - } + nvme_clear_nvme_request(req); switch (req_op(req)) { case REQ_OP_DRV_IN: @@ -742,6 +748,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, return PTR_ERR(req); req->timeout = timeout ? timeout : ADMIN_TIMEOUT; + nvme_req(req)->flags |= NVME_REQ_USERCMD; if (ubuffer && bufflen) { ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, @@ -826,7 +833,7 @@ static void nvme_keep_alive_work(struct work_struct *work) } } -void nvme_start_keep_alive(struct nvme_ctrl *ctrl) +static void nvme_start_keep_alive(struct nvme_ctrl *ctrl) { if (unlikely(ctrl->kato == 0)) return; @@ -836,7 +843,6 @@ void nvme_start_keep_alive(struct nvme_ctrl *ctrl) ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive; schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); } -EXPORT_SYMBOL_GPL(nvme_start_keep_alive); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl) { @@ -1103,7 +1109,7 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, } if (ctrl->effects) - effects = le32_to_cpu(ctrl->effects->iocs[opcode]); + effects = le32_to_cpu(ctrl->effects->acs[opcode]); else effects = nvme_known_admin_effects(opcode); @@ -2220,7 +2226,7 @@ out_unlock: int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 log_page, void *log, - size_t size, size_t offset) + size_t size, u64 offset) { struct nvme_command c = { }; unsigned long dwlen = size / 4 - 1; @@ -2235,8 +2241,8 @@ int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns, c.get_log_page.lid = log_page; c.get_log_page.numdl = cpu_to_le16(dwlen & ((1 << 16) - 1)); c.get_log_page.numdu = cpu_to_le16(dwlen >> 16); - c.get_log_page.lpol = cpu_to_le32(offset & ((1ULL << 32) - 1)); - c.get_log_page.lpou = cpu_to_le32(offset >> 32ULL); + c.get_log_page.lpol = cpu_to_le32(lower_32_bits(offset)); + c.get_log_page.lpou = cpu_to_le32(upper_32_bits(offset)); return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size); } @@ -2833,7 +2839,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, goto out_free_head; head->instance = ret; INIT_LIST_HEAD(&head->list); - init_srcu_struct(&head->srcu); + ret = init_srcu_struct(&head->srcu); + if (ret) + goto out_ida_remove; head->subsys = ctrl->subsys; head->ns_id = nsid; kref_init(&head->ref); @@ -2855,6 +2863,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, return head; out_cleanup_srcu: cleanup_srcu_struct(&head->srcu); +out_ida_remove: ida_simple_remove(&ctrl->subsys->ns_ida, head->instance); out_free_head: kfree(head); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 8f0f34d06d46..124c458806df 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -536,6 +536,85 @@ static struct nvmf_transport_ops *nvmf_lookup_transport( return NULL; } +blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, struct request *rq, + bool queue_live, bool is_connected) +{ + struct nvme_command *cmd = nvme_req(rq)->cmd; + + if (likely(ctrl->state == NVME_CTRL_LIVE && is_connected)) + return BLK_STS_OK; + + switch (ctrl->state) { + case NVME_CTRL_DELETING: + goto reject_io; + + case NVME_CTRL_NEW: + case NVME_CTRL_CONNECTING: + if (!is_connected) + /* + * This is the case of starting a new + * association but connectivity was lost + * before it was fully created. We need to + * error the commands used to initialize the + * controller so the reconnect can go into a + * retry attempt. The commands should all be + * marked REQ_FAILFAST_DRIVER, which will hit + * the reject path below. Anything else will + * be queued while the state settles. + */ + goto reject_or_queue_io; + + if ((queue_live && + !(nvme_req(rq)->flags & NVME_REQ_USERCMD)) || + (!queue_live && blk_rq_is_passthrough(rq) && + cmd->common.opcode == nvme_fabrics_command && + cmd->fabrics.fctype == nvme_fabrics_type_connect)) + /* + * If queue is live, allow only commands that + * are internally generated pass through. These + * are commands on the admin queue to initialize + * the controller. This will reject any ioctl + * admin cmds received while initializing. + * + * If the queue is not live, allow only a + * connect command. This will reject any ioctl + * admin cmd as well as initialization commands + * if the controller reverted the queue to non-live. + */ + return BLK_STS_OK; + + /* + * fall-thru to the reject_or_queue_io clause + */ + break; + + /* these cases fall-thru + * case NVME_CTRL_LIVE: + * case NVME_CTRL_RESETTING: + */ + default: + break; + } + +reject_or_queue_io: + /* + * Any other new io is something we're not in a state to send + * to the device. Default action is to busy it and retry it + * after the controller state is recovered. However, anything + * marked for failfast or nvme multipath is immediately failed. + * Note: commands used to initialize the controller will be + * marked for failfast. + * Note: nvme cli/ioctl commands are marked for failfast. + */ + if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) + return BLK_STS_RESOURCE; + +reject_io: + nvme_req(rq)->status = NVME_SC_ABORT_REQ; + return BLK_STS_IOERR; +} +EXPORT_SYMBOL_GPL(nvmf_check_if_ready); + static const match_table_t opt_tokens = { { NVMF_OPT_TRANSPORT, "transport=%s" }, { NVMF_OPT_TRADDR, "traddr=%s" }, @@ -608,8 +687,10 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, opts->discovery_nqn = !(strcmp(opts->subsysnqn, NVME_DISC_SUBSYS_NAME)); - if (opts->discovery_nqn) + if (opts->discovery_nqn) { + opts->kato = 0; opts->nr_io_queues = 0; + } break; case NVMF_OPT_TRADDR: p = match_strdup(args); diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index a3145d90c1d2..ef46c915b7b5 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -157,36 +157,7 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops); void nvmf_free_options(struct nvmf_ctrl_options *opts); int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); - -static inline blk_status_t nvmf_check_init_req(struct nvme_ctrl *ctrl, - struct request *rq) -{ - struct nvme_command *cmd = nvme_req(rq)->cmd; - - /* - * We cannot accept any other command until the connect command has - * completed, so only allow connect to pass. - */ - if (!blk_rq_is_passthrough(rq) || - cmd->common.opcode != nvme_fabrics_command || - cmd->fabrics.fctype != nvme_fabrics_type_connect) { - /* - * Connecting state means transport disruption or initial - * establishment, which can take a long time and even might - * fail permanently, fail fast to give upper layers a chance - * to failover. - * Deleting state means that the ctrl will never accept commands - * again, fail it permanently. - */ - if (ctrl->state == NVME_CTRL_CONNECTING || - ctrl->state == NVME_CTRL_DELETING) { - nvme_req(rq)->status = NVME_SC_ABORT_REQ; - return BLK_STS_IOERR; - } - return BLK_STS_RESOURCE; /* try again later */ - } - - return BLK_STS_OK; -} +blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, + struct request *rq, bool queue_live, bool is_connected); #endif /* _NVME_FABRICS_H */ diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index c6e719b2f3ca..6cb26bcf6ec0 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2277,14 +2277,6 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, return BLK_STS_OK; } -static inline blk_status_t nvme_fc_is_ready(struct nvme_fc_queue *queue, - struct request *rq) -{ - if (unlikely(!test_bit(NVME_FC_Q_LIVE, &queue->flags))) - return nvmf_check_init_req(&queue->ctrl->ctrl, rq); - return BLK_STS_OK; -} - static blk_status_t nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) @@ -2300,7 +2292,9 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, u32 data_len; blk_status_t ret; - ret = nvme_fc_is_ready(queue, rq); + ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq, + test_bit(NVME_FC_Q_LIVE, &queue->flags), + ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE); if (unlikely(ret)) return ret; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index cf93690b3ffc..061fecfd44f5 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -105,6 +105,7 @@ struct nvme_request { enum { NVME_REQ_CANCELLED = (1 << 0), + NVME_REQ_USERCMD = (1 << 1), }; static inline struct nvme_request *nvme_req(struct request *req) @@ -422,7 +423,6 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, unsigned timeout, int qid, int at_head, blk_mq_req_flags_t flags); int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); -void nvme_start_keep_alive(struct nvme_ctrl *ctrl); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); @@ -430,7 +430,7 @@ int nvme_delete_ctrl(struct nvme_ctrl *ctrl); int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl); int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns, - u8 log_page, void *log, size_t size, size_t offset); + u8 log_page, void *log, size_t size, u64 offset); extern const struct attribute_group nvme_ns_id_attr_group; extern const struct block_device_operations nvme_ns_head_ops; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 295fbec1e5f2..fbc71fac6f1e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -84,6 +84,7 @@ struct nvme_dev { struct dma_pool *prp_small_pool; unsigned online_queues; unsigned max_qid; + unsigned int num_vecs; int q_depth; u32 db_stride; void __iomem *bar; @@ -414,7 +415,8 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) { struct nvme_dev *dev = set->driver_data; - return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev), 0); + return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev), + dev->num_vecs > 1 ? 1 /* admin queue */ : 0); } /** @@ -1380,8 +1382,7 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, return 0; } -static int nvme_alloc_queue(struct nvme_dev *dev, int qid, - int depth, int node) +static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) { struct nvme_queue *nvmeq = &dev->queues[qid]; @@ -1457,7 +1458,11 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) nvmeq->sq_cmds_io = dev->cmb + offset; } - nvmeq->cq_vector = qid - 1; + /* + * A queue's vector matches the queue identifier unless the controller + * has only one vector available. + */ + nvmeq->cq_vector = dev->num_vecs == 1 ? 0 : qid; result = adapter_alloc_cq(dev, qid, nvmeq); if (result < 0) goto release_vector; @@ -1596,8 +1601,7 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) if (result < 0) return result; - result = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH, - dev_to_node(dev->dev)); + result = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH); if (result) return result; @@ -1630,9 +1634,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev) int ret = 0; for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) { - /* vector == qid - 1, match nvme_create_queue */ - if (nvme_alloc_queue(dev, i, dev->q_depth, - pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) { + if (nvme_alloc_queue(dev, i, dev->q_depth)) { ret = -ENOMEM; break; } @@ -1914,6 +1916,10 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) int result, nr_io_queues; unsigned long size; + struct irq_affinity affd = { + .pre_vectors = 1 + }; + nr_io_queues = num_possible_cpus(); result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); if (result < 0) @@ -1949,11 +1955,12 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) * setting up the full range we need. */ pci_free_irq_vectors(pdev); - nr_io_queues = pci_alloc_irq_vectors(pdev, 1, nr_io_queues, - PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY); - if (nr_io_queues <= 0) + result = pci_alloc_irq_vectors_affinity(pdev, 1, nr_io_queues + 1, + PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd); + if (result <= 0) return -EIO; - dev->max_qid = nr_io_queues; + dev->num_vecs = result; + dev->max_qid = max(result - 1, 1); /* * Should investigate if there's a performance win from allocating @@ -2201,7 +2208,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_stop_queues(&dev->ctrl); - if (!dead) { + if (!dead && dev->ctrl.queue_count > 0) { /* * If the controller is still alive tell it to stop using the * host memory buffer. In theory the shutdown / reset should diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 758537e9ba07..1eb4438a8763 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1601,17 +1601,6 @@ nvme_rdma_timeout(struct request *rq, bool reserved) return BLK_EH_HANDLED; } -/* - * We cannot accept any other command until the Connect command has completed. - */ -static inline blk_status_t -nvme_rdma_is_ready(struct nvme_rdma_queue *queue, struct request *rq) -{ - if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) - return nvmf_check_init_req(&queue->ctrl->ctrl, rq); - return BLK_STS_OK; -} - static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -1627,7 +1616,8 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, WARN_ON_ONCE(rq->tag < 0); - ret = nvme_rdma_is_ready(queue, rq); + ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq, + test_bit(NVME_RDMA_Q_LIVE, &queue->flags), true); if (unlikely(ret)) return ret; diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 90dcdc40ac71..5e0e9fcc0d4d 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -178,6 +178,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) id->vid = 0; id->ssvid = 0; + memset(id->sn, ' ', sizeof(id->sn)); bin2hex(id->sn, &ctrl->subsys->serial, min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2)); memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' '); diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index a72425d8bce0..231e04e0a496 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -59,7 +59,7 @@ static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr, memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE); memcpy(e->traddr, traddr, NVMF_TRADDR_SIZE); memcpy(e->tsas.common, port->disc_addr.tsas.common, NVMF_TSAS_SIZE); - memcpy(e->subnqn, subsys_nqn, NVMF_NQN_SIZE); + strncpy(e->subnqn, subsys_nqn, NVMF_NQN_SIZE); } /* diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c index 28bbdff4a88b..cd2344179673 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd.c @@ -173,8 +173,8 @@ static void nvmet_execute_write_zeroes(struct nvmet_req *req) sector = le64_to_cpu(write_zeroes->slba) << (req->ns->blksize_shift - 9); - nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length)) << - (req->ns->blksize_shift - 9)) + 1; + nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) << + (req->ns->blksize_shift - 9)); if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector, GFP_KERNEL, &bio, 0)) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index a350765d2d5c..31fdfba556a8 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -149,14 +149,6 @@ nvme_loop_timeout(struct request *rq, bool reserved) return BLK_EH_HANDLED; } -static inline blk_status_t nvme_loop_is_ready(struct nvme_loop_queue *queue, - struct request *rq) -{ - if (unlikely(!test_bit(NVME_LOOP_Q_LIVE, &queue->flags))) - return nvmf_check_init_req(&queue->ctrl->ctrl, rq); - return BLK_STS_OK; -} - static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -166,7 +158,8 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); blk_status_t ret; - ret = nvme_loop_is_ready(queue, req); + ret = nvmf_check_if_ready(&queue->ctrl->ctrl, req, + test_bit(NVME_LOOP_Q_LIVE, &queue->flags), true); if (unlikely(ret)) return ret; @@ -174,15 +167,12 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, if (ret) return ret; + blk_mq_start_request(req); iod->cmd.common.flags |= NVME_CMD_SGL_METABUF; iod->req.port = nvmet_loop_port; if (!nvmet_req_init(&iod->req, &queue->nvme_cq, - &queue->nvme_sq, &nvme_loop_ops)) { - nvme_cleanup_cmd(req); - blk_mq_start_request(req); - nvme_loop_queue_response(&iod->req); + &queue->nvme_sq, &nvme_loop_ops)) return BLK_STS_OK; - } if (blk_rq_payload_bytes(req)) { iod->sg_table.sgl = iod->first_sgl; @@ -196,8 +186,6 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, iod->req.transfer_len = blk_rq_payload_bytes(req); } - blk_mq_start_request(req); - schedule_work(&iod->work); return BLK_STS_OK; } diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 0cf25d789d05..3f3cb72e0c0c 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -587,18 +587,28 @@ out: static unsigned int sr_block_check_events(struct gendisk *disk, unsigned int clearing) { - struct scsi_cd *cd = scsi_cd(disk); + unsigned int ret = 0; + struct scsi_cd *cd; - if (atomic_read(&cd->device->disk_events_disable_depth)) + cd = scsi_cd_get(disk); + if (!cd) return 0; - return cdrom_check_events(&cd->cdi, clearing); + if (!atomic_read(&cd->device->disk_events_disable_depth)) + ret = cdrom_check_events(&cd->cdi, clearing); + + scsi_cd_put(cd); + return ret; } static int sr_block_revalidate_disk(struct gendisk *disk) { - struct scsi_cd *cd = scsi_cd(disk); struct scsi_sense_hdr sshdr; + struct scsi_cd *cd; + + cd = scsi_cd_get(disk); + if (!cd) + return -ENXIO; /* if the unit is not ready, nothing more to do */ if (scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr)) @@ -607,6 +617,7 @@ static int sr_block_revalidate_disk(struct gendisk *disk) sr_cd_check(&cd->cdi); get_sectorsize(cd); out: + scsi_cd_put(cd); return 0; } diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 09da0f124699..f6be4b0b6c18 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -28,6 +28,7 @@ void bdi_put(struct backing_dev_info *bdi); __printf(2, 3) int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...); +__printf(2, 0) int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args); int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 8efcf49796a3..e3986f4b3461 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -183,7 +183,6 @@ enum { BLK_MQ_S_STOPPED = 0, BLK_MQ_S_TAG_ACTIVE = 1, BLK_MQ_S_SCHED_RESTART = 2, - BLK_MQ_S_START_ON_RUN = 3, BLK_MQ_MAX_DEPTH = 10240, @@ -270,7 +269,6 @@ void blk_mq_unquiesce_queue(struct request_queue *q); void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_run_hw_queues(struct request_queue *q, bool async); -void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv); void blk_mq_freeze_queue(struct request_queue *q); |