diff options
Diffstat (limited to 'drivers/nvme/host/core.c')
-rw-r--r-- | drivers/nvme/host/core.c | 132 |
1 files changed, 112 insertions, 20 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index eeb409c287b8..d5e0906262ea 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -49,10 +49,9 @@ unsigned char shutdown_timeout = 5; module_param(shutdown_timeout, byte, 0644); MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown"); -unsigned int nvme_max_retries = 5; -module_param_named(max_retries, nvme_max_retries, uint, 0644); +static u8 nvme_max_retries = 5; +module_param_named(max_retries, nvme_max_retries, byte, 0644); MODULE_PARM_DESC(max_retries, "max number of retries a command may have"); -EXPORT_SYMBOL_GPL(nvme_max_retries); static int nvme_char_major; module_param(nvme_char_major, int, 0); @@ -62,11 +61,66 @@ module_param(default_ps_max_latency_us, ulong, 0644); MODULE_PARM_DESC(default_ps_max_latency_us, "max power saving latency for new devices; use PM QOS to change per device"); +static bool force_apst; +module_param(force_apst, bool, 0644); +MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off"); + static LIST_HEAD(nvme_ctrl_list); static DEFINE_SPINLOCK(dev_list_lock); static struct class *nvme_class; +static int nvme_error_status(struct request *req) +{ + switch (nvme_req(req)->status & 0x7ff) { + case NVME_SC_SUCCESS: + return 0; + case NVME_SC_CAP_EXCEEDED: + return -ENOSPC; + default: + return -EIO; + + /* + * XXX: these errors are a nasty side-band protocol to + * drivers/md/dm-mpath.c:noretry_error() that aren't documented + * anywhere.. + */ + case NVME_SC_CMD_SEQ_ERROR: + return -EILSEQ; + case NVME_SC_ONCS_NOT_SUPPORTED: + return -EOPNOTSUPP; + case NVME_SC_WRITE_FAULT: + case NVME_SC_READ_ERROR: + case NVME_SC_UNWRITTEN_BLOCK: + return -ENODATA; + } +} + +static inline bool nvme_req_needs_retry(struct request *req) +{ + if (blk_noretry_request(req)) + return false; + if (nvme_req(req)->status & NVME_SC_DNR) + return false; + if (jiffies - req->start_time >= req->timeout) + return false; + if (nvme_req(req)->retries >= nvme_max_retries) + return false; + return true; +} + +void nvme_complete_rq(struct request *req) +{ + if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) { + nvme_req(req)->retries++; + blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q)); + return; + } + + blk_mq_end_request(req, nvme_error_status(req)); +} +EXPORT_SYMBOL_GPL(nvme_complete_rq); + void nvme_cancel_request(struct request *req, void *data, bool reserved) { int status; @@ -80,7 +134,9 @@ void nvme_cancel_request(struct request *req, void *data, bool reserved) status = NVME_SC_ABORT_REQ; if (blk_queue_dying(req->q)) status |= NVME_SC_DNR; - blk_mq_complete_request(req, status); + nvme_req(req)->status = status; + blk_mq_complete_request(req); + } EXPORT_SYMBOL_GPL(nvme_cancel_request); @@ -205,12 +261,6 @@ fail: return NULL; } -void nvme_requeue_req(struct request *req) -{ - blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q)); -} -EXPORT_SYMBOL_GPL(nvme_requeue_req); - struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, unsigned int flags, int qid) { @@ -327,6 +377,12 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, { int ret = BLK_MQ_RQ_QUEUE_OK; + if (!(req->rq_flags & RQF_DONTPREP)) { + nvme_req(req)->retries = 0; + nvme_req(req)->flags = 0; + req->rq_flags |= RQF_DONTPREP; + } + switch (req_op(req)) { case REQ_OP_DRV_IN: case REQ_OP_DRV_OUT: @@ -335,6 +391,8 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, case REQ_OP_FLUSH: nvme_setup_flush(ns, cmd); break; + case REQ_OP_WRITE_ZEROES: + /* currently only aliased to deallocate for a few ctrls: */ case REQ_OP_DISCARD: ret = nvme_setup_discard(ns, req, cmd); break; @@ -378,7 +436,10 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, blk_execute_rq(req->q, NULL, req, at_head); if (result) *result = nvme_req(req)->result; - ret = req->errors; + if (nvme_req(req)->flags & NVME_REQ_CANCELLED) + ret = -EINTR; + else + ret = nvme_req(req)->status; out: blk_mq_free_request(req); return ret; @@ -463,7 +524,10 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, } submit: blk_execute_rq(req->q, disk, req, 0); - ret = req->errors; + if (nvme_req(req)->flags & NVME_REQ_CANCELLED) + ret = -EINTR; + else + ret = nvme_req(req)->status; if (result) *result = le32_to_cpu(nvme_req(req)->result.u32); if (meta && !ret && !write) { @@ -900,16 +964,14 @@ static void nvme_config_discard(struct nvme_ns *ns) BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < NVME_DSM_MAX_RANGES); - if (ctrl->quirks & NVME_QUIRK_DISCARD_ZEROES) - ns->queue->limits.discard_zeroes_data = 1; - else - ns->queue->limits.discard_zeroes_data = 0; - ns->queue->limits.discard_alignment = logical_block_size; ns->queue->limits.discard_granularity = logical_block_size; blk_queue_max_discard_sectors(ns->queue, UINT_MAX); blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); + + if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) + blk_queue_max_write_zeroes_sectors(ns->queue, UINT_MAX); } static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id) @@ -1267,7 +1329,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) * heuristic: we are willing to spend at most 2% of the time * transitioning between power states. Therefore, when running * in any given state, we will enter the next lower-power - * non-operational state after waiting 100 * (enlat + exlat) + * non-operational state after waiting 50 * (enlat + exlat) * microseconds, as long as that state's total latency is under * the requested maximum latency. * @@ -1278,6 +1340,8 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) unsigned apste; struct nvme_feat_auto_pst *table; + u64 max_lat_us = 0; + int max_ps = -1; int ret; /* @@ -1299,6 +1363,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) if (ctrl->ps_max_latency_us == 0) { /* Turn off APST. */ apste = 0; + dev_dbg(ctrl->device, "APST disabled\n"); } else { __le64 target = cpu_to_le64(0); int state; @@ -1348,9 +1413,22 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) target = cpu_to_le64((state << 3) | (transition_ms << 8)); + + if (max_ps == -1) + max_ps = state; + + if (total_latency_us > max_lat_us) + max_lat_us = total_latency_us; } apste = 1; + + if (max_ps == -1) { + dev_dbg(ctrl->device, "APST enabled but no non-operational states are available\n"); + } else { + dev_dbg(ctrl->device, "APST enabled: max PS = %d, max round-trip latency = %lluus, table = %*phN\n", + max_ps, max_lat_us, (int)sizeof(*table), table); + } } ret = nvme_set_features(ctrl, NVME_FEAT_AUTO_PST, apste, @@ -1488,6 +1566,11 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) } } + if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) { + dev_warn(ctrl->dev, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n"); + ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS; + } + ctrl->oacs = le16_to_cpu(id->oacs); ctrl->vid = le16_to_cpu(id->vid); ctrl->oncs = le16_to_cpup(&id->oncs); @@ -1510,7 +1593,16 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->npss = id->npss; prev_apsta = ctrl->apsta; - ctrl->apsta = (ctrl->quirks & NVME_QUIRK_NO_APST) ? 0 : id->apsta; + if (ctrl->quirks & NVME_QUIRK_NO_APST) { + if (force_apst && id->apsta) { + dev_warn(ctrl->dev, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n"); + ctrl->apsta = 1; + } else { + ctrl->apsta = 0; + } + } else { + ctrl->apsta = id->apsta; + } memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd)); if (ctrl->ops->is_fabrics) { @@ -2393,7 +2485,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl) mutex_lock(&ctrl->namespaces_mutex); list_for_each_entry(ns, &ctrl->namespaces, list) - blk_mq_freeze_queue_start(ns->queue); + blk_freeze_queue_start(ns->queue); mutex_unlock(&ctrl->namespaces_mutex); } EXPORT_SYMBOL_GPL(nvme_start_freeze); |