diff options
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r-- | drivers/nvme/host/core.c | 157 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 100 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.h | 4 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 15 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 9 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 258 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 12 | ||||
-rw-r--r-- | drivers/nvme/host/trace.h | 4 |
8 files changed, 313 insertions, 246 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b9ca782fe82d..04a20da76786 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -101,6 +101,15 @@ static void nvme_ns_remove(struct nvme_ns *ns); static int nvme_revalidate_disk(struct gendisk *disk); static void nvme_put_subsystem(struct nvme_subsystem *subsys); +static void nvme_queue_scan(struct nvme_ctrl *ctrl) +{ + /* + * Only new queue scan work when admin and IO queues are both alive + */ + if (ctrl->state == NVME_CTRL_LIVE) + queue_work(nvme_wq, &ctrl->scan_work); +} + int nvme_reset_ctrl(struct nvme_ctrl *ctrl) { if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) @@ -244,9 +253,6 @@ EXPORT_SYMBOL_GPL(nvme_complete_rq); void nvme_cancel_request(struct request *req, void *data, bool reserved) { - if (!blk_mq_request_started(req)) - return; - dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device, "Cancelling I/O %d", req->tag); @@ -1033,6 +1039,21 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count) } EXPORT_SYMBOL_GPL(nvme_set_queue_count); +#define NVME_AEN_SUPPORTED \ + (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT) + +static void nvme_enable_aen(struct nvme_ctrl *ctrl) +{ + u32 result; + int status; + + status = nvme_set_features(ctrl, NVME_FEAT_ASYNC_EVENT, + ctrl->oaes & NVME_AEN_SUPPORTED, NULL, 0, &result); + if (status) + dev_warn(ctrl->device, "Failed to configure AEN (cfg %x)\n", + ctrl->oaes & NVME_AEN_SUPPORTED); +} + static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) { struct nvme_user_io io; @@ -1351,13 +1372,19 @@ static void nvme_set_chunk_size(struct nvme_ns *ns) blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size)); } -static void nvme_config_discard(struct nvme_ctrl *ctrl, - unsigned stream_alignment, struct request_queue *queue) +static void nvme_config_discard(struct nvme_ns *ns) { + struct nvme_ctrl *ctrl = ns->ctrl; + struct request_queue *queue = ns->queue; u32 size = queue_logical_block_size(queue); - if (stream_alignment) - size *= stream_alignment; + if (!(ctrl->oncs & NVME_CTRL_ONCS_DSM)) { + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue); + return; + } + + if (ctrl->nr_streams && ns->sws && ns->sgs) + size *= ns->sws * ns->sgs; BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < NVME_DSM_MAX_RANGES); @@ -1365,9 +1392,12 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, queue->limits.discard_alignment = 0; queue->limits.discard_granularity = size; + /* If discard is already enabled, don't reset queue limits */ + if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue)) + return; + blk_queue_max_discard_sectors(queue, UINT_MAX); blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, queue); if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); @@ -1411,10 +1441,6 @@ static void nvme_update_disk_info(struct gendisk *disk, { sector_t capacity = le64_to_cpup(&id->nsze) << (ns->lba_shift - 9); unsigned short bs = 1 << ns->lba_shift; - unsigned stream_alignment = 0; - - if (ns->ctrl->nr_streams && ns->sws && ns->sgs) - stream_alignment = ns->sws * ns->sgs; blk_mq_freeze_queue(disk->queue); blk_integrity_unregister(disk); @@ -1428,10 +1454,9 @@ static void nvme_update_disk_info(struct gendisk *disk, nvme_init_integrity(disk, ns->ms, ns->pi_type); if (ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) capacity = 0; - set_capacity(disk, capacity); - if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM) - nvme_config_discard(ns->ctrl, stream_alignment, disk->queue); + set_capacity(disk, capacity); + nvme_config_discard(ns); blk_mq_unfreeze_queue(disk->queue); } @@ -1577,7 +1602,7 @@ static int nvme_pr_reserve(struct block_device *bdev, u64 key, static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, enum pr_type type, bool abort) { - u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1; + u32 cdw10 = nvme_pr_type(type) << 8 | (abort ? 2 : 1); return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire); } @@ -1589,7 +1614,7 @@ static int nvme_pr_clear(struct block_device *bdev, u64 key) static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type) { - u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0; + u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 1 << 3 : 0); return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); } @@ -2183,7 +2208,8 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) * Verify that the subsystem actually supports multiple * controllers, else bail out. */ - if (nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) { + if (!ctrl->opts->discovery_nqn && + nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) { dev_err(ctrl->device, "ignoring ctrl due to duplicate subnqn (%s).\n", found->subnqn); @@ -2314,7 +2340,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) { ret = nvme_get_effects_log(ctrl); if (ret < 0) - return ret; + goto out_free; } if (!ctrl->identified) { @@ -2345,6 +2371,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->oacs = le16_to_cpu(id->oacs); ctrl->oncs = le16_to_cpup(&id->oncs); + ctrl->oaes = le32_to_cpu(id->oaes); atomic_set(&ctrl->abort_limit, id->acl + 1); ctrl->vwc = id->vwc; ctrl->cntlid = le16_to_cpup(&id->cntlid); @@ -3170,6 +3197,42 @@ static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn) nvme_remove_invalid_namespaces(ctrl, nn); } +static bool nvme_scan_changed_ns_log(struct nvme_ctrl *ctrl) +{ + size_t log_size = NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32); + __le32 *log; + int error, i; + bool ret = false; + + log = kzalloc(log_size, GFP_KERNEL); + if (!log) + return false; + + error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size); + if (error) { + dev_warn(ctrl->device, + "reading changed ns log failed: %d\n", error); + goto out_free_log; + } + + if (log[0] == cpu_to_le32(0xffffffff)) + goto out_free_log; + + for (i = 0; i < NVME_MAX_CHANGED_NAMESPACES; i++) { + u32 nsid = le32_to_cpu(log[i]); + + if (nsid == 0) + break; + dev_info(ctrl->device, "rescanning namespace %d.\n", nsid); + nvme_validate_ns(ctrl, nsid); + } + ret = true; + +out_free_log: + kfree(log); + return ret; +} + static void nvme_scan_work(struct work_struct *work) { struct nvme_ctrl *ctrl = @@ -3182,6 +3245,12 @@ static void nvme_scan_work(struct work_struct *work) WARN_ON_ONCE(!ctrl->tagset); + if (test_and_clear_bit(EVENT_NS_CHANGED, &ctrl->events)) { + if (nvme_scan_changed_ns_log(ctrl)) + goto out_sort_namespaces; + dev_info(ctrl->device, "rescanning namespaces.\n"); + } + if (nvme_identify_ctrl(ctrl, &id)) return; @@ -3189,25 +3258,16 @@ static void nvme_scan_work(struct work_struct *work) if (ctrl->vs >= NVME_VS(1, 1, 0) && !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) { if (!nvme_scan_ns_list(ctrl, nn)) - goto done; + goto out_free_id; } nvme_scan_ns_sequential(ctrl, nn); - done: +out_free_id: + kfree(id); +out_sort_namespaces: down_write(&ctrl->namespaces_rwsem); list_sort(NULL, &ctrl->namespaces, ns_cmp); up_write(&ctrl->namespaces_rwsem); - kfree(id); -} - -void nvme_queue_scan(struct nvme_ctrl *ctrl) -{ - /* - * Only new queue scan work when admin and IO queues are both alive - */ - if (ctrl->state == NVME_CTRL_LIVE) - queue_work(nvme_wq, &ctrl->scan_work); } -EXPORT_SYMBOL_GPL(nvme_queue_scan); /* * This function iterates the namespace list unlocked to allow recovery from @@ -3322,8 +3382,23 @@ static void nvme_fw_act_work(struct work_struct *work) nvme_get_fw_slot_info(ctrl); } +static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) +{ + switch ((result & 0xff00) >> 8) { + case NVME_AER_NOTICE_NS_CHANGED: + set_bit(EVENT_NS_CHANGED, &ctrl->events); + nvme_queue_scan(ctrl); + break; + case NVME_AER_NOTICE_FW_ACT_STARTING: + queue_work(nvme_wq, &ctrl->fw_act_work); + break; + default: + dev_warn(ctrl->device, "async event result %08x\n", result); + } +} + void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, - union nvme_result *res) + volatile union nvme_result *res) { u32 result = le32_to_cpu(res->u32); @@ -3331,6 +3406,9 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, return; switch (result & 0x7) { + case NVME_AER_NOTICE: + nvme_handle_aen_notice(ctrl, result); + break; case NVME_AER_ERROR: case NVME_AER_SMART: case NVME_AER_CSS: @@ -3340,18 +3418,6 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, default: break; } - - switch (result & 0xff07) { - case NVME_AER_NOTICE_NS_CHANGED: - dev_info(ctrl->device, "rescanning\n"); - nvme_queue_scan(ctrl); - break; - case NVME_AER_NOTICE_FW_ACT_STARTING: - queue_work(nvme_wq, &ctrl->fw_act_work); - break; - default: - dev_warn(ctrl->device, "async event result %08x\n", result); - } queue_work(nvme_wq, &ctrl->async_event_work); } EXPORT_SYMBOL_GPL(nvme_complete_async_event); @@ -3374,6 +3440,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) if (ctrl->queue_count > 1) { nvme_queue_scan(ctrl); + nvme_enable_aen(ctrl); queue_work(nvme_wq, &ctrl->async_event_work); nvme_start_queues(ctrl); } diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 7ae732a77fe8..5f5f7067c41d 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -57,7 +57,7 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn) goto out_unlock; kref_init(&host->ref); - memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE); + strlcpy(host->nqn, hostnqn, NVMF_NQN_SIZE); list_add_tail(&host->list, &nvmf_hosts); out_unlock: @@ -545,71 +545,54 @@ blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, struct request *rq, return BLK_STS_OK; switch (ctrl->state) { - case NVME_CTRL_DELETING: - goto reject_io; - case NVME_CTRL_NEW: case NVME_CTRL_CONNECTING: + case NVME_CTRL_DELETING: + /* + * This is the case of starting a new or deleting an association + * but connectivity was lost before it was fully created or torn + * down. We need to error the commands used to initialize the + * controller so the reconnect can go into a retry attempt. The + * commands should all be marked REQ_FAILFAST_DRIVER, which will + * hit the reject path below. Anything else will be queued while + * the state settles. + */ if (!is_connected) - /* - * This is the case of starting a new - * association but connectivity was lost - * before it was fully created. We need to - * error the commands used to initialize the - * controller so the reconnect can go into a - * retry attempt. The commands should all be - * marked REQ_FAILFAST_DRIVER, which will hit - * the reject path below. Anything else will - * be queued while the state settles. - */ - goto reject_or_queue_io; - - if ((queue_live && - !(nvme_req(rq)->flags & NVME_REQ_USERCMD)) || - (!queue_live && blk_rq_is_passthrough(rq) && - cmd->common.opcode == nvme_fabrics_command && - cmd->fabrics.fctype == nvme_fabrics_type_connect)) - /* - * If queue is live, allow only commands that - * are internally generated pass through. These - * are commands on the admin queue to initialize - * the controller. This will reject any ioctl - * admin cmds received while initializing. - * - * If the queue is not live, allow only a - * connect command. This will reject any ioctl - * admin cmd as well as initialization commands - * if the controller reverted the queue to non-live. - */ + break; + + /* + * If queue is live, allow only commands that are internally + * generated pass through. These are commands on the admin + * queue to initialize the controller. This will reject any + * ioctl admin cmds received while initializing. + */ + if (queue_live && !(nvme_req(rq)->flags & NVME_REQ_USERCMD)) return BLK_STS_OK; /* - * fall-thru to the reject_or_queue_io clause + * If the queue is not live, allow only a connect command. This + * will reject any ioctl admin cmd as well as initialization + * commands if the controller reverted the queue to non-live. */ + if (!queue_live && blk_rq_is_passthrough(rq) && + cmd->common.opcode == nvme_fabrics_command && + cmd->fabrics.fctype == nvme_fabrics_type_connect) + return BLK_STS_OK; break; - - /* these cases fall-thru - * case NVME_CTRL_LIVE: - * case NVME_CTRL_RESETTING: - */ default: break; } -reject_or_queue_io: /* - * Any other new io is something we're not in a state to send - * to the device. Default action is to busy it and retry it - * after the controller state is recovered. However, anything - * marked for failfast or nvme multipath is immediately failed. - * Note: commands used to initialize the controller will be - * marked for failfast. + * Any other new io is something we're not in a state to send to the + * device. Default action is to busy it and retry it after the + * controller state is recovered. However, anything marked for failfast + * or nvme multipath is immediately failed. Note: commands used to + * initialize the controller will be marked for failfast. * Note: nvme cli/ioctl commands are marked for failfast. */ if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) return BLK_STS_RESOURCE; - -reject_io: nvme_req(rq)->status = NVME_SC_ABORT_REQ; return BLK_STS_IOERR; } @@ -689,10 +672,6 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, opts->discovery_nqn = !(strcmp(opts->subsysnqn, NVME_DISC_SUBSYS_NAME)); - if (opts->discovery_nqn) { - opts->kato = 0; - opts->nr_io_queues = 0; - } break; case NVMF_OPT_TRADDR: p = match_strdup(args); @@ -851,6 +830,11 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, } } + if (opts->discovery_nqn) { + opts->kato = 0; + opts->nr_io_queues = 0; + opts->duplicate_connect = true; + } if (ctrl_loss_tmo < 0) opts->max_reconnects = -1; else @@ -983,16 +967,6 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) goto out_module_put; } - if (strcmp(ctrl->subsys->subnqn, opts->subsysnqn)) { - dev_warn(ctrl->device, - "controller returned incorrect NQN: \"%s\".\n", - ctrl->subsys->subnqn); - module_put(ops->module); - up_read(&nvmf_transports_rwsem); - nvme_delete_ctrl_sync(ctrl); - return ERR_PTR(-EINVAL); - } - module_put(ops->module); up_read(&nvmf_transports_rwsem); return ctrl; diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index ef46c915b7b5..0cf0460a5c92 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -139,7 +139,9 @@ static inline bool nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl, struct nvmf_ctrl_options *opts) { - if (strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) || + if (ctrl->state == NVME_CTRL_DELETING || + ctrl->state == NVME_CTRL_DEAD || + strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) || strcmp(opts->host->nqn, ctrl->opts->host->nqn) || memcmp(&opts->host->id, &ctrl->opts->host->id, sizeof(uuid_t))) return false; diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 6cb26bcf6ec0..0bad65803271 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1686,16 +1686,6 @@ done: goto check_error; } - /* - * Force failures of commands if we're killing the controller - * or have an error on a command used to create an new association - */ - if (status && - (blk_queue_dying(rq->q) || - ctrl->ctrl.state == NVME_CTRL_NEW || - ctrl->ctrl.state == NVME_CTRL_CONNECTING)) - status |= cpu_to_le16(NVME_SC_DNR << 1); - __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); nvme_end_request(rq, status, result); @@ -2403,9 +2393,6 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); - if (!blk_mq_request_started(req)) - return; - __nvme_fc_abort_op(ctrl, op); } @@ -3284,6 +3271,8 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) } spin_unlock_irqrestore(&nvme_fc_lock, flags); + pr_warn("%s: %s - %s combination not found\n", + __func__, opts->traddr, opts->host_traddr); return ERR_PTR(-ENOENT); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 17d2f7cf3fed..de24fe77c80b 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -22,6 +22,7 @@ #include <linux/lightnvm.h> #include <linux/sed-opal.h> #include <linux/fault-inject.h> +#include <linux/rcupdate.h> extern unsigned int nvme_io_timeout; #define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) @@ -180,6 +181,7 @@ struct nvme_ctrl { u16 kas; u8 npss; u8 apsta; + u32 oaes; u32 aen_result; unsigned int shutdown_timeout; unsigned int kato; @@ -192,6 +194,8 @@ struct nvme_ctrl { struct delayed_work ka_work; struct nvme_command ka_cmd; struct work_struct fw_act_work; +#define EVENT_NS_CHANGED (1 << 0) + unsigned long events; /* Power saving configuration */ u64 ps_max_latency_us; @@ -398,14 +402,13 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl); void nvme_put_ctrl(struct nvme_ctrl *ctrl); int nvme_init_identify(struct nvme_ctrl *ctrl); -void nvme_queue_scan(struct nvme_ctrl *ctrl); void nvme_remove_namespaces(struct nvme_ctrl *ctrl); int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, bool send); void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, - union nvme_result *res); + volatile union nvme_result *res); void nvme_stop_queues(struct nvme_ctrl *ctrl); void nvme_start_queues(struct nvme_ctrl *ctrl); @@ -454,7 +457,7 @@ static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) { struct nvme_ns_head *head = ns->head; - if (head && ns == srcu_dereference(head->current_path, &head->srcu)) + if (head && ns == rcu_access_pointer(head->current_path)) rcu_assign_pointer(head->current_path, NULL); } struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 17a0190bd88f..e526437bacbf 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -13,6 +13,7 @@ */ #include <linux/aer.h> +#include <linux/async.h> #include <linux/blkdev.h> #include <linux/blk-mq.h> #include <linux/blk-mq-pci.h> @@ -68,7 +69,6 @@ MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2"); struct nvme_dev; struct nvme_queue; -static void nvme_process_cq(struct nvme_queue *nvmeq); static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); /* @@ -147,9 +147,10 @@ static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl) struct nvme_queue { struct device *q_dmadev; struct nvme_dev *dev; - spinlock_t q_lock; + spinlock_t sq_lock; struct nvme_command *sq_cmds; struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; volatile struct nvme_completion *cqes; struct blk_mq_tags **tags; dma_addr_t sq_dma_addr; @@ -159,9 +160,9 @@ struct nvme_queue { s16 cq_vector; u16 sq_tail; u16 cq_head; + u16 last_cq_head; u16 qid; u8 cq_phase; - u8 cqe_seen; u32 *dbbuf_sq_db; u32 *dbbuf_cq_db; u32 *dbbuf_sq_ei; @@ -420,28 +421,25 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) } /** - * __nvme_submit_cmd() - Copy a command into a queue and ring the doorbell + * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell * @nvmeq: The queue to use * @cmd: The command to send - * - * Safe to use from interrupt context */ -static void __nvme_submit_cmd(struct nvme_queue *nvmeq, - struct nvme_command *cmd) +static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) { - u16 tail = nvmeq->sq_tail; - + spin_lock(&nvmeq->sq_lock); if (nvmeq->sq_cmds_io) - memcpy_toio(&nvmeq->sq_cmds_io[tail], cmd, sizeof(*cmd)); + memcpy_toio(&nvmeq->sq_cmds_io[nvmeq->sq_tail], cmd, + sizeof(*cmd)); else - memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd)); + memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd)); - if (++tail == nvmeq->q_depth) - tail = 0; - if (nvme_dbbuf_update_and_check_event(tail, nvmeq->dbbuf_sq_db, - nvmeq->dbbuf_sq_ei)) - writel(tail, nvmeq->q_db); - nvmeq->sq_tail = tail; + if (++nvmeq->sq_tail == nvmeq->q_depth) + nvmeq->sq_tail = 0; + if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail, + nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei)) + writel(nvmeq->sq_tail, nvmeq->q_db); + spin_unlock(&nvmeq->sq_lock); } static void **nvme_pci_iod_list(struct request *req) @@ -872,6 +870,13 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, struct nvme_command cmnd; blk_status_t ret; + /* + * We should not need to do this, but we're still using this to + * ensure we can drain requests on a dying queue. + */ + if (unlikely(nvmeq->cq_vector < 0)) + return BLK_STS_IOERR; + ret = nvme_setup_cmd(ns, req, &cmnd); if (ret) return ret; @@ -887,16 +892,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, } blk_mq_start_request(req); - - spin_lock_irq(&nvmeq->q_lock); - if (unlikely(nvmeq->cq_vector < 0)) { - ret = BLK_STS_IOERR; - spin_unlock_irq(&nvmeq->q_lock); - goto out_cleanup_iod; - } - __nvme_submit_cmd(nvmeq, &cmnd); - nvme_process_cq(nvmeq); - spin_unlock_irq(&nvmeq->q_lock); + nvme_submit_cmd(nvmeq, &cmnd); return BLK_STS_OK; out_cleanup_iod: nvme_free_iod(dev, req); @@ -914,10 +910,10 @@ static void nvme_pci_complete_rq(struct request *req) } /* We read the CQE phase first to check if the rest of the entry is valid */ -static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head, - u16 phase) +static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq) { - return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase; + return (le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) == + nvmeq->cq_phase; } static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq) @@ -931,9 +927,9 @@ static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq) } } -static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, - struct nvme_completion *cqe) +static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) { + volatile struct nvme_completion *cqe = &nvmeq->cqes[idx]; struct request *req; if (unlikely(cqe->command_id >= nvmeq->q_depth)) { @@ -956,83 +952,87 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, return; } - nvmeq->cqe_seen = 1; req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id); nvme_end_request(req, cqe->status, cqe->result); } -static inline bool nvme_read_cqe(struct nvme_queue *nvmeq, - struct nvme_completion *cqe) +static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end) { - if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) { - *cqe = nvmeq->cqes[nvmeq->cq_head]; + while (start != end) { + nvme_handle_cqe(nvmeq, start); + if (++start == nvmeq->q_depth) + start = 0; + } +} - if (++nvmeq->cq_head == nvmeq->q_depth) { - nvmeq->cq_head = 0; - nvmeq->cq_phase = !nvmeq->cq_phase; - } - return true; +static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) +{ + if (++nvmeq->cq_head == nvmeq->q_depth) { + nvmeq->cq_head = 0; + nvmeq->cq_phase = !nvmeq->cq_phase; } - return false; } -static void nvme_process_cq(struct nvme_queue *nvmeq) +static inline bool nvme_process_cq(struct nvme_queue *nvmeq, u16 *start, + u16 *end, int tag) { - struct nvme_completion cqe; - int consumed = 0; + bool found = false; - while (nvme_read_cqe(nvmeq, &cqe)) { - nvme_handle_cqe(nvmeq, &cqe); - consumed++; + *start = nvmeq->cq_head; + while (!found && nvme_cqe_pending(nvmeq)) { + if (nvmeq->cqes[nvmeq->cq_head].command_id == tag) + found = true; + nvme_update_cq_head(nvmeq); } + *end = nvmeq->cq_head; - if (consumed) + if (*start != *end) nvme_ring_cq_doorbell(nvmeq); + return found; } static irqreturn_t nvme_irq(int irq, void *data) { - irqreturn_t result; struct nvme_queue *nvmeq = data; - spin_lock(&nvmeq->q_lock); - nvme_process_cq(nvmeq); - result = nvmeq->cqe_seen ? IRQ_HANDLED : IRQ_NONE; - nvmeq->cqe_seen = 0; - spin_unlock(&nvmeq->q_lock); - return result; + irqreturn_t ret = IRQ_NONE; + u16 start, end; + + spin_lock(&nvmeq->cq_lock); + if (nvmeq->cq_head != nvmeq->last_cq_head) + ret = IRQ_HANDLED; + nvme_process_cq(nvmeq, &start, &end, -1); + nvmeq->last_cq_head = nvmeq->cq_head; + spin_unlock(&nvmeq->cq_lock); + + if (start != end) { + nvme_complete_cqes(nvmeq, start, end); + return IRQ_HANDLED; + } + + return ret; } static irqreturn_t nvme_irq_check(int irq, void *data) { struct nvme_queue *nvmeq = data; - if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) + if (nvme_cqe_pending(nvmeq)) return IRQ_WAKE_THREAD; return IRQ_NONE; } static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag) { - struct nvme_completion cqe; - int found = 0, consumed = 0; + u16 start, end; + bool found; - if (!nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) + if (!nvme_cqe_pending(nvmeq)) return 0; - spin_lock_irq(&nvmeq->q_lock); - while (nvme_read_cqe(nvmeq, &cqe)) { - nvme_handle_cqe(nvmeq, &cqe); - consumed++; - - if (tag == cqe.command_id) { - found = 1; - break; - } - } - - if (consumed) - nvme_ring_cq_doorbell(nvmeq); - spin_unlock_irq(&nvmeq->q_lock); + spin_lock_irq(&nvmeq->cq_lock); + found = nvme_process_cq(nvmeq, &start, &end, tag); + spin_unlock_irq(&nvmeq->cq_lock); + nvme_complete_cqes(nvmeq, start, end); return found; } @@ -1052,10 +1052,7 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl) memset(&c, 0, sizeof(c)); c.common.opcode = nvme_admin_async_event; c.common.command_id = NVME_AQ_BLK_MQ_DEPTH; - - spin_lock_irq(&nvmeq->q_lock); - __nvme_submit_cmd(nvmeq, &c); - spin_unlock_irq(&nvmeq->q_lock); + nvme_submit_cmd(nvmeq, &c); } static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) @@ -1070,7 +1067,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) } static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, - struct nvme_queue *nvmeq) + struct nvme_queue *nvmeq, s16 vector) { struct nvme_command c; int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED; @@ -1085,7 +1082,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, c.create_cq.cqid = cpu_to_le16(qid); c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1); c.create_cq.cq_flags = cpu_to_le16(flags); - c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector); + c.create_cq.irq_vector = cpu_to_le16(vector); return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0); } @@ -1208,7 +1205,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) nvme_warn_reset(dev, csts); nvme_dev_disable(dev, false); nvme_reset_ctrl(&dev->ctrl); - return BLK_EH_HANDLED; + return BLK_EH_DONE; } /* @@ -1218,24 +1215,24 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) dev_warn(dev->ctrl.device, "I/O %d QID %d timeout, completion polled\n", req->tag, nvmeq->qid); - return BLK_EH_HANDLED; + return BLK_EH_DONE; } /* * Shutdown immediately if controller times out while starting. The * reset work will see the pci device disabled when it gets the forced * cancellation error. All outstanding requests are completed on - * shutdown, so we return BLK_EH_HANDLED. + * shutdown, so we return BLK_EH_DONE. */ switch (dev->ctrl.state) { case NVME_CTRL_CONNECTING: case NVME_CTRL_RESETTING: - dev_warn(dev->ctrl.device, + dev_warn_ratelimited(dev->ctrl.device, "I/O %d QID %d timeout, disable controller\n", req->tag, nvmeq->qid); nvme_dev_disable(dev, false); nvme_req(req)->flags |= NVME_REQ_CANCELLED; - return BLK_EH_HANDLED; + return BLK_EH_DONE; default: break; } @@ -1252,12 +1249,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) nvme_dev_disable(dev, false); nvme_reset_ctrl(&dev->ctrl); - /* - * Mark the request as handled, since the inline shutdown - * forces all outstanding requests to complete. - */ nvme_req(req)->flags |= NVME_REQ_CANCELLED; - return BLK_EH_HANDLED; + return BLK_EH_DONE; } if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) { @@ -1321,15 +1314,21 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) { int vector; - spin_lock_irq(&nvmeq->q_lock); + spin_lock_irq(&nvmeq->cq_lock); if (nvmeq->cq_vector == -1) { - spin_unlock_irq(&nvmeq->q_lock); + spin_unlock_irq(&nvmeq->cq_lock); return 1; } vector = nvmeq->cq_vector; nvmeq->dev->online_queues--; nvmeq->cq_vector = -1; - spin_unlock_irq(&nvmeq->q_lock); + spin_unlock_irq(&nvmeq->cq_lock); + + /* + * Ensure that nvme_queue_rq() sees it ->cq_vector == -1 without + * having to grab the lock. + */ + mb(); if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q); @@ -1342,15 +1341,18 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) { struct nvme_queue *nvmeq = &dev->queues[0]; + u16 start, end; if (shutdown) nvme_shutdown_ctrl(&dev->ctrl); else nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap); - spin_lock_irq(&nvmeq->q_lock); - nvme_process_cq(nvmeq); - spin_unlock_irq(&nvmeq->q_lock); + spin_lock_irq(&nvmeq->cq_lock); + nvme_process_cq(nvmeq, &start, &end, -1); + spin_unlock_irq(&nvmeq->cq_lock); + + nvme_complete_cqes(nvmeq, start, end); } static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues, @@ -1408,7 +1410,8 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) nvmeq->q_dmadev = dev->dev; nvmeq->dev = dev; - spin_lock_init(&nvmeq->q_lock); + spin_lock_init(&nvmeq->sq_lock); + spin_lock_init(&nvmeq->cq_lock); nvmeq->cq_head = 0; nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; @@ -1444,7 +1447,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) { struct nvme_dev *dev = nvmeq->dev; - spin_lock_irq(&nvmeq->q_lock); + spin_lock_irq(&nvmeq->cq_lock); nvmeq->sq_tail = 0; nvmeq->cq_head = 0; nvmeq->cq_phase = 1; @@ -1452,13 +1455,14 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth)); nvme_dbbuf_init(dev, nvmeq, qid); dev->online_queues++; - spin_unlock_irq(&nvmeq->q_lock); + spin_unlock_irq(&nvmeq->cq_lock); } static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) { struct nvme_dev *dev = nvmeq->dev; int result; + s16 vector; if (dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) { unsigned offset = (qid - 1) * roundup(SQ_SIZE(nvmeq->q_depth), @@ -1471,15 +1475,21 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) * A queue's vector matches the queue identifier unless the controller * has only one vector available. */ - nvmeq->cq_vector = dev->num_vecs == 1 ? 0 : qid; - result = adapter_alloc_cq(dev, qid, nvmeq); + vector = dev->num_vecs == 1 ? 0 : qid; + result = adapter_alloc_cq(dev, qid, nvmeq, vector); if (result < 0) - goto release_vector; + goto out; result = adapter_alloc_sq(dev, qid, nvmeq); if (result < 0) goto release_cq; + /* + * Set cq_vector after alloc cq/sq, otherwise nvme_suspend_queue will + * invoke free_irq for it and cause a 'Trying to free already-free IRQ + * xxx' warning if the create CQ/SQ command times out. + */ + nvmeq->cq_vector = vector; nvme_init_queue(nvmeq, qid); result = queue_request_irq(nvmeq); if (result < 0) @@ -1487,13 +1497,13 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) return result; - release_sq: +release_sq: + nvmeq->cq_vector = -1; dev->online_queues--; adapter_delete_sq(dev, qid); - release_cq: +release_cq: adapter_delete_cq(dev, qid); - release_vector: - nvmeq->cq_vector = -1; +out: return result; } @@ -1997,19 +2007,22 @@ static void nvme_del_queue_end(struct request *req, blk_status_t error) static void nvme_del_cq_end(struct request *req, blk_status_t error) { struct nvme_queue *nvmeq = req->end_io_data; + u16 start, end; if (!error) { unsigned long flags; /* - * We might be called with the AQ q_lock held - * and the I/O queue q_lock should always + * We might be called with the AQ cq_lock held + * and the I/O queue cq_lock should always * nest inside the AQ one. */ - spin_lock_irqsave_nested(&nvmeq->q_lock, flags, + spin_lock_irqsave_nested(&nvmeq->cq_lock, flags, SINGLE_DEPTH_NESTING); - nvme_process_cq(nvmeq); - spin_unlock_irqrestore(&nvmeq->q_lock, flags); + nvme_process_cq(nvmeq, &start, &end, -1); + spin_unlock_irqrestore(&nvmeq->cq_lock, flags); + + nvme_complete_cqes(nvmeq, start, end); } nvme_del_queue_end(req, error); @@ -2497,6 +2510,15 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) return 0; } +static void nvme_async_probe(void *data, async_cookie_t cookie) +{ + struct nvme_dev *dev = data; + + nvme_reset_ctrl_sync(&dev->ctrl); + flush_work(&dev->ctrl.scan_work); + nvme_put_ctrl(&dev->ctrl); +} + static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int node, result = -ENOMEM; @@ -2541,7 +2563,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); - nvme_reset_ctrl(&dev->ctrl); + nvme_get_ctrl(&dev->ctrl); + async_schedule(nvme_async_probe, dev); return 0; @@ -2685,6 +2708,9 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev) static void nvme_error_resume(struct pci_dev *pdev) { + struct nvme_dev *dev = pci_get_drvdata(pdev); + + flush_work(&dev->ctrl.reset_work); pci_cleanup_aer_uncorrect_error_status(pdev); } @@ -2714,6 +2740,8 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_MEDIUM_PRIO_SQ }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, + { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */ + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x1c58, 0x0023), /* WDC SN200 adapter */ @@ -2728,6 +2756,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */ .driver_data = NVME_QUIRK_LIGHTNVM, }, + { PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */ + .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 1eb4438a8763..7b3f08410430 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -778,7 +778,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, if (error) { dev_err(ctrl->ctrl.device, "prop_get NVME_REG_CAP failed\n"); - goto out_cleanup_queue; + goto out_stop_queue; } ctrl->ctrl.sqsize = @@ -786,23 +786,25 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); if (error) - goto out_cleanup_queue; + goto out_stop_queue; ctrl->ctrl.max_hw_sectors = (ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9); error = nvme_init_identify(&ctrl->ctrl); if (error) - goto out_cleanup_queue; + goto out_stop_queue; error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev, &ctrl->async_event_sqe, sizeof(struct nvme_command), DMA_TO_DEVICE); if (error) - goto out_cleanup_queue; + goto out_stop_queue; return 0; +out_stop_queue: + nvme_rdma_stop_queue(&ctrl->queues[0]); out_cleanup_queue: if (new) blk_cleanup_queue(ctrl->ctrl.admin_q); @@ -1598,7 +1600,7 @@ nvme_rdma_timeout(struct request *rq, bool reserved) /* fail with DNR on cmd timeout */ nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR; - return BLK_EH_HANDLED; + return BLK_EH_DONE; } static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index ea91fccd1bc0..01390f0e1671 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -148,8 +148,8 @@ TRACE_EVENT(nvme_complete_rq, __entry->flags = nvme_req(req)->flags; __entry->status = nvme_req(req)->status; ), - TP_printk("cmdid=%u, qid=%d, res=%llu, retries=%u, flags=0x%x, status=%u", - __entry->cid, __entry->qid, __entry->result, + TP_printk("qid=%d, cmdid=%u, res=%llu, retries=%u, flags=0x%x, status=%u", + __entry->qid, __entry->cid, __entry->result, __entry->retries, __entry->flags, __entry->status) ); |