diff options
Diffstat (limited to 'drivers/nvme')
31 files changed, 762 insertions, 541 deletions
diff --git a/drivers/nvme/common/auth.c b/drivers/nvme/common/auth.c index a23ab5c968b9..a3455f1d67fa 100644 --- a/drivers/nvme/common/auth.c +++ b/drivers/nvme/common/auth.c @@ -471,4 +471,5 @@ int nvme_auth_generate_key(u8 *secret, struct nvme_dhchap_key **ret_key) } EXPORT_SYMBOL_GPL(nvme_auth_generate_key); +MODULE_DESCRIPTION("NVMe Authentication framework"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/common/keyring.c b/drivers/nvme/common/keyring.c index a5c0431c101c..6f7e7a8fa5ae 100644 --- a/drivers/nvme/common/keyring.c +++ b/drivers/nvme/common/keyring.c @@ -181,5 +181,6 @@ static void __exit nvme_keyring_exit(void) MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>"); +MODULE_DESCRIPTION("NVMe Keyring implementation"); module_init(nvme_keyring_init); module_exit(nvme_keyring_exit); diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index 596bb11eeba5..a480cdeac288 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -797,6 +797,7 @@ static int apple_nvme_init_request(struct blk_mq_tag_set *set, static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown) { + enum nvme_ctrl_state state = nvme_ctrl_state(&anv->ctrl); u32 csts = readl(anv->mmio_nvme + NVME_REG_CSTS); bool dead = false, freeze = false; unsigned long flags; @@ -808,8 +809,8 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown) if (csts & NVME_CSTS_CFS) dead = true; - if (anv->ctrl.state == NVME_CTRL_LIVE || - anv->ctrl.state == NVME_CTRL_RESETTING) { + if (state == NVME_CTRL_LIVE || + state == NVME_CTRL_RESETTING) { freeze = true; nvme_start_freeze(&anv->ctrl); } @@ -881,7 +882,7 @@ static enum blk_eh_timer_return apple_nvme_timeout(struct request *req) unsigned long flags; u32 csts = readl(anv->mmio_nvme + NVME_REG_CSTS); - if (anv->ctrl.state != NVME_CTRL_LIVE) { + if (nvme_ctrl_state(&anv->ctrl) != NVME_CTRL_LIVE) { /* * From rdma.c: * If we are resetting, connecting or deleting we should @@ -985,10 +986,10 @@ static void apple_nvme_reset_work(struct work_struct *work) u32 boot_status, aqa; struct apple_nvme *anv = container_of(work, struct apple_nvme, ctrl.reset_work); + enum nvme_ctrl_state state = nvme_ctrl_state(&anv->ctrl); - if (anv->ctrl.state != NVME_CTRL_RESETTING) { - dev_warn(anv->dev, "ctrl state %d is not RESETTING\n", - anv->ctrl.state); + if (state != NVME_CTRL_RESETTING) { + dev_warn(anv->dev, "ctrl state %d is not RESETTING\n", state); ret = -ENODEV; goto out; } @@ -1515,7 +1516,7 @@ static int apple_nvme_probe(struct platform_device *pdev) goto put_dev; } - anv->ctrl.admin_q = blk_mq_init_queue(&anv->admin_tagset); + anv->ctrl.admin_q = blk_mq_alloc_queue(&anv->admin_tagset, NULL, NULL); if (IS_ERR(anv->ctrl.admin_q)) { ret = -ENOMEM; goto put_dev; diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index 72c0525c75f5..a264b3ae078b 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -48,11 +48,6 @@ struct nvme_dhchap_queue_context { static struct workqueue_struct *nvme_auth_wq; -#define nvme_auth_flags_from_qid(qid) \ - (qid == 0) ? 0 : BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED -#define nvme_auth_queue_from_qid(ctrl, qid) \ - (qid == 0) ? (ctrl)->fabrics_q : (ctrl)->connect_q - static inline int ctrl_max_dhchaps(struct nvme_ctrl *ctrl) { return ctrl->opts->nr_io_queues + ctrl->opts->nr_write_queues + @@ -63,10 +58,15 @@ static int nvme_auth_submit(struct nvme_ctrl *ctrl, int qid, void *data, size_t data_len, bool auth_send) { struct nvme_command cmd = {}; - blk_mq_req_flags_t flags = nvme_auth_flags_from_qid(qid); - struct request_queue *q = nvme_auth_queue_from_qid(ctrl, qid); + nvme_submit_flags_t flags = NVME_SUBMIT_RETRY; + struct request_queue *q = ctrl->fabrics_q; int ret; + if (qid != 0) { + flags |= NVME_SUBMIT_NOWAIT | NVME_SUBMIT_RESERVED; + q = ctrl->connect_q; + } + cmd.auth_common.opcode = nvme_fabrics_command; cmd.auth_common.secp = NVME_AUTH_DHCHAP_PROTOCOL_IDENTIFIER; cmd.auth_common.spsp0 = 0x01; @@ -80,8 +80,7 @@ static int nvme_auth_submit(struct nvme_ctrl *ctrl, int qid, } ret = __nvme_submit_sync_cmd(q, &cmd, NULL, data, data_len, - qid == 0 ? NVME_QID_ANY : qid, - 0, flags); + qid == 0 ? NVME_QID_ANY : qid, flags); if (ret > 0) dev_warn(ctrl->device, "qid %d auth_send failed with status %d\n", qid, ret); @@ -897,7 +896,7 @@ static void nvme_ctrl_auth_work(struct work_struct *work) * If the ctrl is no connected, bail as reconnect will handle * authentication. */ - if (ctrl->state != NVME_CTRL_LIVE) + if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) return; /* Authenticate admin queue first */ diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c index 20f46c230885..6f2ebb5fcdb0 100644 --- a/drivers/nvme/host/constants.c +++ b/drivers/nvme/host/constants.c @@ -171,15 +171,15 @@ static const char * const nvme_statuses[] = { [NVME_SC_HOST_ABORTED_CMD] = "Host Aborted Command", }; -const unsigned char *nvme_get_error_status_str(u16 status) +const char *nvme_get_error_status_str(u16 status) { status &= 0x7ff; if (status < ARRAY_SIZE(nvme_statuses) && nvme_statuses[status]) - return nvme_statuses[status & 0x7ff]; + return nvme_statuses[status]; return "Unknown"; } -const unsigned char *nvme_get_opcode_str(u8 opcode) +const char *nvme_get_opcode_str(u8 opcode) { if (opcode < ARRAY_SIZE(nvme_ops) && nvme_ops[opcode]) return nvme_ops[opcode]; @@ -187,7 +187,7 @@ const unsigned char *nvme_get_opcode_str(u8 opcode) } EXPORT_SYMBOL_GPL(nvme_get_opcode_str); -const unsigned char *nvme_get_admin_opcode_str(u8 opcode) +const char *nvme_get_admin_opcode_str(u8 opcode) { if (opcode < ARRAY_SIZE(nvme_admin_ops) && nvme_admin_ops[opcode]) return nvme_admin_ops[opcode]; @@ -195,7 +195,7 @@ const unsigned char *nvme_get_admin_opcode_str(u8 opcode) } EXPORT_SYMBOL_GPL(nvme_get_admin_opcode_str); -const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode) { +const char *nvme_get_fabrics_opcode_str(u8 opcode) { if (opcode < ARRAY_SIZE(nvme_fabrics_ops) && nvme_fabrics_ops[opcode]) return nvme_fabrics_ops[opcode]; return "Unknown"; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 85ab0fcf9e88..00864a634470 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -114,12 +114,21 @@ static DEFINE_MUTEX(nvme_subsystems_lock); static DEFINE_IDA(nvme_instance_ida); static dev_t nvme_ctrl_base_chr_devt; -static struct class *nvme_class; -static struct class *nvme_subsys_class; +static int nvme_class_uevent(const struct device *dev, struct kobj_uevent_env *env); +static const struct class nvme_class = { + .name = "nvme", + .dev_uevent = nvme_class_uevent, +}; + +static const struct class nvme_subsys_class = { + .name = "nvme-subsystem", +}; static DEFINE_IDA(nvme_ns_chr_minor_ida); static dev_t nvme_ns_chr_devt; -static struct class *nvme_ns_chr_class; +static const struct class nvme_ns_chr_class = { + .name = "nvme-generic", +}; static void nvme_put_subsystem(struct nvme_subsystem *subsys); static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, @@ -338,6 +347,30 @@ static void nvme_log_error(struct request *req) nr->status & NVME_SC_DNR ? "DNR " : ""); } +static void nvme_log_err_passthru(struct request *req) +{ + struct nvme_ns *ns = req->q->queuedata; + struct nvme_request *nr = nvme_req(req); + + pr_err_ratelimited("%s: %s(0x%x), %s (sct 0x%x / sc 0x%x) %s%s" + "cdw10=0x%x cdw11=0x%x cdw12=0x%x cdw13=0x%x cdw14=0x%x cdw15=0x%x\n", + ns ? ns->disk->disk_name : dev_name(nr->ctrl->device), + ns ? nvme_get_opcode_str(nr->cmd->common.opcode) : + nvme_get_admin_opcode_str(nr->cmd->common.opcode), + nr->cmd->common.opcode, + nvme_get_error_status_str(nr->status), + nr->status >> 8 & 7, /* Status Code Type */ + nr->status & 0xff, /* Status Code */ + nr->status & NVME_SC_MORE ? "MORE " : "", + nr->status & NVME_SC_DNR ? "DNR " : "", + nr->cmd->common.cdw10, + nr->cmd->common.cdw11, + nr->cmd->common.cdw12, + nr->cmd->common.cdw13, + nr->cmd->common.cdw14, + nr->cmd->common.cdw14); +} + enum nvme_disposition { COMPLETE, RETRY, @@ -385,8 +418,12 @@ static inline void nvme_end_req(struct request *req) { blk_status_t status = nvme_error_status(nvme_req(req)->status); - if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET))) - nvme_log_error(req); + if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET))) { + if (blk_rq_is_passthrough(req)) + nvme_log_err_passthru(req); + else + nvme_log_error(req); + } nvme_end_req_zoned(req); nvme_trace_bio_complete(req); if (req->cmd_flags & REQ_NVME_MPATH) @@ -679,10 +716,21 @@ static inline void nvme_clear_nvme_request(struct request *req) /* initialize a passthrough request */ void nvme_init_request(struct request *req, struct nvme_command *cmd) { - if (req->q->queuedata) + struct nvme_request *nr = nvme_req(req); + bool logging_enabled; + + if (req->q->queuedata) { + struct nvme_ns *ns = req->q->disk->private_data; + + logging_enabled = ns->head->passthru_err_log_enabled; req->timeout = NVME_IO_TIMEOUT; - else /* no queuedata implies admin queue */ + } else { /* no queuedata implies admin queue */ + logging_enabled = nr->ctrl->passthru_err_log_enabled; req->timeout = NVME_ADMIN_TIMEOUT; + } + + if (!logging_enabled) + req->rq_flags |= RQF_QUIET; /* passthru commands should let the driver set the SGL flags */ cmd->common.flags &= ~NVME_CMD_SGL_ALL; @@ -691,8 +739,7 @@ void nvme_init_request(struct request *req, struct nvme_command *cmd) if (req->mq_hctx->type == HCTX_TYPE_POLL) req->cmd_flags |= REQ_POLLED; nvme_clear_nvme_request(req); - req->rq_flags |= RQF_QUIET; - memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd)); + memcpy(nr->cmd, cmd, sizeof(*cmd)); } EXPORT_SYMBOL_GPL(nvme_init_request); @@ -721,7 +768,7 @@ blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl, EXPORT_SYMBOL_GPL(nvme_fail_nonready_command); bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, - bool queue_live) + bool queue_live, enum nvme_ctrl_state state) { struct nvme_request *req = nvme_req(rq); @@ -742,7 +789,7 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, * command, which is require to set the queue live in the * appropinquate states. */ - switch (nvme_ctrl_state(ctrl)) { + switch (state) { case NVME_CTRL_CONNECTING: if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) && (req->cmd->fabrics.fctype == nvme_fabrics_type_connect || @@ -1051,20 +1098,27 @@ EXPORT_SYMBOL_NS_GPL(nvme_execute_rq, NVME_TARGET_PASSTHRU); */ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, union nvme_result *result, void *buffer, unsigned bufflen, - int qid, int at_head, blk_mq_req_flags_t flags) + int qid, nvme_submit_flags_t flags) { struct request *req; int ret; + blk_mq_req_flags_t blk_flags = 0; + if (flags & NVME_SUBMIT_NOWAIT) + blk_flags |= BLK_MQ_REQ_NOWAIT; + if (flags & NVME_SUBMIT_RESERVED) + blk_flags |= BLK_MQ_REQ_RESERVED; if (qid == NVME_QID_ANY) - req = blk_mq_alloc_request(q, nvme_req_op(cmd), flags); + req = blk_mq_alloc_request(q, nvme_req_op(cmd), blk_flags); else - req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), flags, + req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), blk_flags, qid - 1); if (IS_ERR(req)) return PTR_ERR(req); nvme_init_request(req, cmd); + if (flags & NVME_SUBMIT_RETRY) + req->cmd_flags &= ~REQ_FAILFAST_DRIVER; if (buffer && bufflen) { ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL); @@ -1072,7 +1126,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, goto out; } - ret = nvme_execute_rq(req, at_head); + ret = nvme_execute_rq(req, flags & NVME_SUBMIT_AT_HEAD); if (result && ret >= 0) *result = nvme_req(req)->result; out: @@ -1085,7 +1139,7 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buffer, unsigned bufflen) { return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen, - NVME_QID_ANY, 0, 0); + NVME_QID_ANY, 0); } EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd); @@ -1108,6 +1162,10 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) effects &= ~NVME_CMD_EFFECTS_CSE_MASK; } else { effects = le32_to_cpu(ctrl->effects->acs[opcode]); + + /* Ignore execution restrictions if any relaxation bits are set */ + if (effects & NVME_CMD_EFFECTS_CSER_MASK) + effects &= ~NVME_CMD_EFFECTS_CSE_MASK; } return effects; @@ -1349,8 +1407,10 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, sizeof(struct nvme_id_ctrl)); - if (error) + if (error) { kfree(*id); + *id = NULL; + } return error; } @@ -1479,6 +1539,7 @@ int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, if (error) { dev_warn(ctrl->device, "Identify namespace failed (%d)\n", error); kfree(*id); + *id = NULL; } return error; } @@ -1560,7 +1621,7 @@ static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid, c.features.dword11 = cpu_to_le32(dword11); ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res, - buffer, buflen, NVME_QID_ANY, 0, 0); + buffer, buflen, NVME_QID_ANY, 0); if (ret >= 0 && result) *result = le32_to_cpu(res.u32); return ret; @@ -1678,12 +1739,23 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) return 0; } -#ifdef CONFIG_BLK_DEV_INTEGRITY -static void nvme_init_integrity(struct gendisk *disk, - struct nvme_ns_head *head, u32 max_integrity_segments) +static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head) { struct blk_integrity integrity = { }; + blk_integrity_unregister(disk); + + if (!head->ms) + return true; + + /* + * PI can always be supported as we can ask the controller to simply + * insert/strip it, which is not possible for other kinds of metadata. + */ + if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) || + !(head->features & NVME_NS_METADATA_SUPPORTED)) + return nvme_ns_has_pi(head); + switch (head->pi_type) { case NVME_NS_DPS_PI_TYPE3: switch (head->guard_type) { @@ -1726,53 +1798,32 @@ static void nvme_init_integrity(struct gendisk *disk, } integrity.tuple_size = head->ms; + integrity.pi_offset = head->pi_offset; blk_integrity_register(disk, &integrity); - blk_queue_max_integrity_segments(disk->queue, max_integrity_segments); -} -#else -static void nvme_init_integrity(struct gendisk *disk, - struct nvme_ns_head *head, u32 max_integrity_segments) -{ + return true; } -#endif /* CONFIG_BLK_DEV_INTEGRITY */ -static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk, - struct nvme_ns_head *head) +static void nvme_config_discard(struct nvme_ns *ns, struct queue_limits *lim) { - struct request_queue *queue = disk->queue; - u32 max_discard_sectors; - - if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX)) { - max_discard_sectors = nvme_lba_to_sect(head, ctrl->dmrsl); - } else if (ctrl->oncs & NVME_CTRL_ONCS_DSM) { - max_discard_sectors = UINT_MAX; - } else { - blk_queue_max_discard_sectors(queue, 0); - return; - } + struct nvme_ctrl *ctrl = ns->ctrl; BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < NVME_DSM_MAX_RANGES); - /* - * If discard is already enabled, don't reset queue limits. - * - * This works around the fact that the block layer can't cope well with - * updating the hardware limits when overridden through sysfs. This is - * harmless because discard limits in NVMe are purely advisory. - */ - if (queue->limits.max_discard_sectors) - return; + if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns->head, UINT_MAX)) + lim->max_hw_discard_sectors = + nvme_lba_to_sect(ns->head, ctrl->dmrsl); + else if (ctrl->oncs & NVME_CTRL_ONCS_DSM) + lim->max_hw_discard_sectors = UINT_MAX; + else + lim->max_hw_discard_sectors = 0; + + lim->discard_granularity = lim->logical_block_size; - blk_queue_max_discard_sectors(queue, max_discard_sectors); if (ctrl->dmrl) - blk_queue_max_discard_segments(queue, ctrl->dmrl); + lim->max_discard_segments = ctrl->dmrl; else - blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES); - queue->limits.discard_granularity = queue_logical_block_size(queue); - - if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) - blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); + lim->max_discard_segments = NVME_DSM_MAX_RANGES; } static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) @@ -1783,42 +1834,38 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) a->csi == b->csi; } -static int nvme_init_ms(struct nvme_ctrl *ctrl, struct nvme_ns_head *head, - struct nvme_id_ns *id) +static int nvme_identify_ns_nvm(struct nvme_ctrl *ctrl, unsigned int nsid, + struct nvme_id_ns_nvm **nvmp) { - bool first = id->dps & NVME_NS_DPS_PI_FIRST; - unsigned lbaf = nvme_lbaf_index(id->flbas); - struct nvme_command c = { }; + struct nvme_command c = { + .identify.opcode = nvme_admin_identify, + .identify.nsid = cpu_to_le32(nsid), + .identify.cns = NVME_ID_CNS_CS_NS, + .identify.csi = NVME_CSI_NVM, + }; struct nvme_id_ns_nvm *nvm; - int ret = 0; - u32 elbaf; - - head->pi_size = 0; - head->ms = le16_to_cpu(id->lbaf[lbaf].ms); - if (!(ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)) { - head->pi_size = sizeof(struct t10_pi_tuple); - head->guard_type = NVME_NVM_NS_16B_GUARD; - goto set_pi; - } + int ret; nvm = kzalloc(sizeof(*nvm), GFP_KERNEL); if (!nvm) return -ENOMEM; - c.identify.opcode = nvme_admin_identify; - c.identify.nsid = cpu_to_le32(head->ns_id); - c.identify.cns = NVME_ID_CNS_CS_NS; - c.identify.csi = NVME_CSI_NVM; - ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, nvm, sizeof(*nvm)); if (ret) - goto free_data; + kfree(nvm); + else + *nvmp = nvm; + return ret; +} - elbaf = le32_to_cpu(nvm->elbaf[lbaf]); +static void nvme_configure_pi_elbas(struct nvme_ns_head *head, + struct nvme_id_ns *id, struct nvme_id_ns_nvm *nvm) +{ + u32 elbaf = le32_to_cpu(nvm->elbaf[nvme_lbaf_index(id->flbas)]); /* no support for storage tag formats right now */ if (nvme_elbaf_sts(elbaf)) - goto free_data; + return; head->guard_type = nvme_elbaf_guard_type(elbaf); switch (head->guard_type) { @@ -1831,30 +1878,31 @@ static int nvme_init_ms(struct nvme_ctrl *ctrl, struct nvme_ns_head *head, default: break; } - -free_data: - kfree(nvm); -set_pi: - if (head->pi_size && (first || head->ms == head->pi_size)) - head->pi_type = id->dps & NVME_NS_DPS_PI_MASK; - else - head->pi_type = 0; - - return ret; } -static int nvme_configure_metadata(struct nvme_ctrl *ctrl, - struct nvme_ns_head *head, struct nvme_id_ns *id) +static void nvme_configure_metadata(struct nvme_ctrl *ctrl, + struct nvme_ns_head *head, struct nvme_id_ns *id, + struct nvme_id_ns_nvm *nvm) { - int ret; - - ret = nvme_init_ms(ctrl, head, id); - if (ret) - return ret; - head->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); + head->pi_type = 0; + head->pi_size = 0; + head->pi_offset = 0; + head->ms = le16_to_cpu(id->lbaf[nvme_lbaf_index(id->flbas)].ms); if (!head->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) - return 0; + return; + + if (nvm && (ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)) { + nvme_configure_pi_elbas(head, id, nvm); + } else { + head->pi_size = sizeof(struct t10_pi_tuple); + head->guard_type = NVME_NVM_NS_16B_GUARD; + } + + if (head->pi_size && head->ms >= head->pi_size) + head->pi_type = id->dps & NVME_NS_DPS_PI_MASK; + if (!(id->dps & NVME_NS_DPS_PI_FIRST)) + head->pi_offset = head->ms - head->pi_size; if (ctrl->ops->flags & NVME_F_FABRICS) { /* @@ -1863,7 +1911,7 @@ static int nvme_configure_metadata(struct nvme_ctrl *ctrl, * remap the separate metadata buffer from the block layer. */ if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT))) - return 0; + return; head->features |= NVME_NS_EXT_LBAS; @@ -1890,33 +1938,32 @@ static int nvme_configure_metadata(struct nvme_ctrl *ctrl, else head->features |= NVME_NS_METADATA_SUPPORTED; } - return 0; } -static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, - struct request_queue *q) +static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl) { - bool vwc = ctrl->vwc & NVME_CTRL_VWC_PRESENT; - - if (ctrl->max_hw_sectors) { - u32 max_segments = - (ctrl->max_hw_sectors / (NVME_CTRL_PAGE_SIZE >> 9)) + 1; + return ctrl->max_hw_sectors / (NVME_CTRL_PAGE_SIZE >> SECTOR_SHIFT) + 1; +} - max_segments = min_not_zero(max_segments, ctrl->max_segments); - blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); - blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); - } - blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1); - blk_queue_dma_alignment(q, 3); - blk_queue_write_cache(q, vwc, vwc); +static void nvme_set_ctrl_limits(struct nvme_ctrl *ctrl, + struct queue_limits *lim) +{ + lim->max_hw_sectors = ctrl->max_hw_sectors; + lim->max_segments = min_t(u32, USHRT_MAX, + min_not_zero(nvme_max_drv_segments(ctrl), ctrl->max_segments)); + lim->max_integrity_segments = ctrl->max_integrity_segments; + lim->virt_boundary_mask = NVME_CTRL_PAGE_SIZE - 1; + lim->max_segment_size = UINT_MAX; + lim->dma_alignment = 3; } -static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk, - struct nvme_ns_head *head, struct nvme_id_ns *id) +static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id, + struct queue_limits *lim) { - sector_t capacity = nvme_lba_to_sect(head, le64_to_cpu(id->nsze)); + struct nvme_ns_head *head = ns->head; u32 bs = 1U << head->lba_shift; u32 atomic_bs, phys_bs, io_opt = 0; + bool valid = true; /* * The block layer can't support LBA sizes larger than the page size @@ -1924,12 +1971,10 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk, * allow block I/O. */ if (head->lba_shift > PAGE_SHIFT || head->lba_shift < SECTOR_SHIFT) { - capacity = 0; bs = (1 << 9); + valid = false; } - blk_integrity_unregister(disk); - atomic_bs = phys_bs = bs; if (id->nabo == 0) { /* @@ -1940,7 +1985,7 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk, if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; else - atomic_bs = (1 + ctrl->subsys->awupf) * bs; + atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; } if (id->nsfeat & NVME_NS_FEAT_IO_OPT) { @@ -1950,36 +1995,20 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk, io_opt = bs * (1 + le16_to_cpu(id->nows)); } - blk_queue_logical_block_size(disk->queue, bs); /* * Linux filesystems assume writing a single physical block is * an atomic operation. Hence limit the physical block size to the * value of the Atomic Write Unit Power Fail parameter. */ - blk_queue_physical_block_size(disk->queue, min(phys_bs, atomic_bs)); - blk_queue_io_min(disk->queue, phys_bs); - blk_queue_io_opt(disk->queue, io_opt); - - /* - * Register a metadata profile for PI, or the plain non-integrity NVMe - * metadata masquerading as Type 0 if supported, otherwise reject block - * I/O to namespaces with metadata except when the namespace supports - * PI, as it can strip/insert in that case. - */ - if (head->ms) { - if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && - (head->features & NVME_NS_METADATA_SUPPORTED)) - nvme_init_integrity(disk, head, - ctrl->max_integrity_segments); - else if (!nvme_ns_has_pi(head)) - capacity = 0; - } - - set_capacity_and_notify(disk, capacity); - - nvme_config_discard(ctrl, disk, head); - blk_queue_max_write_zeroes_sectors(disk->queue, - ctrl->max_zeroes_sectors); + lim->logical_block_size = bs; + lim->physical_block_size = min(phys_bs, atomic_bs); + lim->io_min = phys_bs; + lim->io_opt = io_opt; + if (ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) + lim->max_write_zeroes_sectors = UINT_MAX; + else + lim->max_write_zeroes_sectors = ns->ctrl->max_zeroes_sectors; + return valid; } static bool nvme_ns_is_readonly(struct nvme_ns *ns, struct nvme_ns_info *info) @@ -1993,7 +2022,8 @@ static inline bool nvme_first_scan(struct gendisk *disk) return !disk_live(disk); } -static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id) +static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id, + struct queue_limits *lim) { struct nvme_ctrl *ctrl = ns->ctrl; u32 iob; @@ -2021,38 +2051,36 @@ static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id) return; } - blk_queue_chunk_sectors(ns->queue, iob); + lim->chunk_sectors = iob; } static int nvme_update_ns_info_generic(struct nvme_ns *ns, struct nvme_ns_info *info) { + struct queue_limits lim; + int ret; + blk_mq_freeze_queue(ns->disk->queue); - nvme_set_queue_limits(ns->ctrl, ns->queue); + lim = queue_limits_start_update(ns->disk->queue); + nvme_set_ctrl_limits(ns->ctrl, &lim); + ret = queue_limits_commit_update(ns->disk->queue, &lim); set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info)); blk_mq_unfreeze_queue(ns->disk->queue); - if (nvme_ns_head_multipath(ns->head)) { - blk_mq_freeze_queue(ns->head->disk->queue); - set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info)); - nvme_mpath_revalidate_paths(ns); - blk_stack_limits(&ns->head->disk->queue->limits, - &ns->queue->limits, 0); - ns->head->disk->flags |= GENHD_FL_HIDDEN; - blk_mq_unfreeze_queue(ns->head->disk->queue); - } - /* Hide the block-interface for these devices */ - ns->disk->flags |= GENHD_FL_HIDDEN; - set_bit(NVME_NS_READY, &ns->flags); - - return 0; + if (!ret) + ret = -ENODEV; + return ret; } static int nvme_update_ns_info_block(struct nvme_ns *ns, struct nvme_ns_info *info) { + bool vwc = ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT; + struct queue_limits lim; + struct nvme_id_ns_nvm *nvm = NULL; struct nvme_id_ns *id; + sector_t capacity; unsigned lbaf; int ret; @@ -2064,30 +2092,52 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, /* namespace not allocated or attached */ info->is_removed = true; ret = -ENODEV; - goto error; + goto out; + } + + if (ns->ctrl->ctratt & NVME_CTRL_ATTR_ELBAS) { + ret = nvme_identify_ns_nvm(ns->ctrl, info->nsid, &nvm); + if (ret < 0) + goto out; } blk_mq_freeze_queue(ns->disk->queue); lbaf = nvme_lbaf_index(id->flbas); ns->head->lba_shift = id->lbaf[lbaf].ds; ns->head->nuse = le64_to_cpu(id->nuse); - nvme_set_queue_limits(ns->ctrl, ns->queue); + capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze)); - ret = nvme_configure_metadata(ns->ctrl, ns->head, id); - if (ret < 0) { - blk_mq_unfreeze_queue(ns->disk->queue); - goto out; - } - nvme_set_chunk_sectors(ns, id); - nvme_update_disk_info(ns->ctrl, ns->disk, ns->head, id); - - if (ns->head->ids.csi == NVME_CSI_ZNS) { - ret = nvme_update_zone_info(ns, lbaf); + lim = queue_limits_start_update(ns->disk->queue); + nvme_set_ctrl_limits(ns->ctrl, &lim); + nvme_configure_metadata(ns->ctrl, ns->head, id, nvm); + nvme_set_chunk_sectors(ns, id, &lim); + if (!nvme_update_disk_info(ns, id, &lim)) + capacity = 0; + nvme_config_discard(ns, &lim); + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && + ns->head->ids.csi == NVME_CSI_ZNS) { + ret = nvme_update_zone_info(ns, lbaf, &lim); if (ret) { blk_mq_unfreeze_queue(ns->disk->queue); goto out; } } + ret = queue_limits_commit_update(ns->disk->queue, &lim); + if (ret) { + blk_mq_unfreeze_queue(ns->disk->queue); + goto out; + } + + /* + * Register a metadata profile for PI, or the plain non-integrity NVMe + * metadata masquerading as Type 0 if supported, otherwise reject block + * I/O to namespaces with metadata except when the namespace supports + * PI, as it can strip/insert in that case. + */ + if (!nvme_init_integrity(ns->disk, ns->head)) + capacity = 0; + + set_capacity_and_notify(ns->disk, capacity); /* * Only set the DEAC bit if the device guarantees that reads from @@ -2098,62 +2148,81 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3))) ns->head->features |= NVME_NS_DEAC; set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info)); + blk_queue_write_cache(ns->disk->queue, vwc, vwc); set_bit(NVME_NS_READY, &ns->flags); blk_mq_unfreeze_queue(ns->disk->queue); if (blk_queue_is_zoned(ns->queue)) { - ret = nvme_revalidate_zones(ns); + ret = blk_revalidate_disk_zones(ns->disk, NULL); if (ret && !nvme_first_scan(ns->disk)) goto out; } - if (nvme_ns_head_multipath(ns->head)) { - blk_mq_freeze_queue(ns->head->disk->queue); - nvme_update_disk_info(ns->ctrl, ns->head->disk, ns->head, id); - set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info)); - nvme_mpath_revalidate_paths(ns); - blk_stack_limits(&ns->head->disk->queue->limits, - &ns->queue->limits, 0); - disk_update_readahead(ns->head->disk); - blk_mq_unfreeze_queue(ns->head->disk->queue); - } - ret = 0; out: - /* - * If probing fails due an unsupported feature, hide the block device, - * but still allow other access. - */ - if (ret == -ENODEV) { - ns->disk->flags |= GENHD_FL_HIDDEN; - set_bit(NVME_NS_READY, &ns->flags); - ret = 0; - } - -error: + kfree(nvm); kfree(id); return ret; } static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) { + bool unsupported = false; + int ret; + switch (info->ids.csi) { case NVME_CSI_ZNS: if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) { dev_info(ns->ctrl->device, "block device for nsid %u not supported without CONFIG_BLK_DEV_ZONED\n", info->nsid); - return nvme_update_ns_info_generic(ns, info); + ret = nvme_update_ns_info_generic(ns, info); + break; } - return nvme_update_ns_info_block(ns, info); + ret = nvme_update_ns_info_block(ns, info); + break; case NVME_CSI_NVM: - return nvme_update_ns_info_block(ns, info); + ret = nvme_update_ns_info_block(ns, info); + break; default: dev_info(ns->ctrl->device, "block device for nsid %u not supported (csi %u)\n", info->nsid, info->ids.csi); - return nvme_update_ns_info_generic(ns, info); + ret = nvme_update_ns_info_generic(ns, info); + break; + } + + /* + * If probing fails due an unsupported feature, hide the block device, + * but still allow other access. + */ + if (ret == -ENODEV) { + ns->disk->flags |= GENHD_FL_HIDDEN; + set_bit(NVME_NS_READY, &ns->flags); + unsupported = true; + ret = 0; + } + + if (!ret && nvme_ns_head_multipath(ns->head)) { + struct queue_limits lim; + + blk_mq_freeze_queue(ns->head->disk->queue); + if (unsupported) + ns->head->disk->flags |= GENHD_FL_HIDDEN; + else + nvme_init_integrity(ns->head->disk, ns->head); + set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk)); + set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info)); + nvme_mpath_revalidate_paths(ns); + + lim = queue_limits_start_update(ns->head->disk->queue); + queue_limits_stack_bdev(&lim, ns->disk->part0, 0, + ns->head->disk->disk_name); + ret = queue_limits_commit_update(ns->head->disk->queue, &lim); + blk_mq_unfreeze_queue(ns->head->disk->queue); } + + return ret; } #ifdef CONFIG_BLK_SED_OPAL @@ -2172,7 +2241,7 @@ static int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t l cmd.common.cdw11 = cpu_to_le32(len); return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len, - NVME_QID_ANY, 1, 0); + NVME_QID_ANY, NVME_SUBMIT_AT_HEAD); } static void nvme_configure_opal(struct nvme_ctrl *ctrl, bool was_suspended) @@ -2828,7 +2897,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) subsys->awupf = le16_to_cpu(id->awupf); nvme_mpath_default_iopolicy(subsys); - subsys->dev.class = nvme_subsys_class; + subsys->dev.class = &nvme_subsys_class; subsys->dev.release = nvme_release_subsystem; subsys->dev.groups = nvme_subsys_attrs_groups; dev_set_name(&subsys->dev, "nvme-subsys%d", ctrl->instance); @@ -3068,11 +3137,17 @@ static int nvme_check_ctrl_fabric_info(struct nvme_ctrl *ctrl, struct nvme_id_ct return -EINVAL; } + if (!ctrl->maxcmd) { + dev_err(ctrl->device, "Maximum outstanding commands is 0\n"); + return -EINVAL; + } + return 0; } static int nvme_init_identify(struct nvme_ctrl *ctrl) { + struct queue_limits lim; struct nvme_id_ctrl *id; u32 max_hw_sectors; bool prev_apst_enabled; @@ -3139,7 +3214,12 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->max_hw_sectors = min_not_zero(ctrl->max_hw_sectors, max_hw_sectors); - nvme_set_queue_limits(ctrl, ctrl->admin_q); + lim = queue_limits_start_update(ctrl->admin_q); + nvme_set_ctrl_limits(ctrl, &lim); + ret = queue_limits_commit_update(ctrl->admin_q, &lim); + if (ret) + goto out_free; + ctrl->sgls = le32_to_cpu(id->sgls); ctrl->kas = le16_to_cpu(id->kas); ctrl->max_namespaces = le32_to_cpu(id->mnan); @@ -3371,7 +3451,7 @@ int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device, if (minor < 0) return minor; cdev_device->devt = MKDEV(MAJOR(nvme_ns_chr_devt), minor); - cdev_device->class = nvme_ns_chr_class; + cdev_device->class = &nvme_ns_chr_class; cdev_device->release = nvme_cdev_rel; device_initialize(cdev_device); cdev_init(cdev, fops); @@ -3643,7 +3723,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) if (!ns) return; - disk = blk_mq_alloc_disk(ctrl->tagset, ns); + disk = blk_mq_alloc_disk(ctrl->tagset, NULL, ns); if (IS_ERR(disk)) goto out_free_ns; disk->fops = &nvme_bdev_ops; @@ -3714,6 +3794,13 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) nvme_mpath_add_disk(ns, info->anagrpid); nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name); + /* + * Set ns->disk->device->driver_data to ns so we can access + * ns->head->passthru_err_log_enabled in + * nvme_io_passthru_err_log_enabled_[store | show](). + */ + dev_set_drvdata(disk_to_dev(ns->disk), ns); + return; out_cleanup_ns_from_list: @@ -4138,6 +4225,7 @@ static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl) static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl) { struct nvme_fw_slot_info_log *log; + u8 next_fw_slot, cur_fw_slot; log = kmalloc(sizeof(*log), GFP_KERNEL); if (!log) @@ -4149,13 +4237,15 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl) goto out_free_log; } - if (log->afi & 0x70 || !(log->afi & 0x7)) { + cur_fw_slot = log->afi & 0x7; + next_fw_slot = (log->afi & 0x70) >> 4; + if (!cur_fw_slot || (next_fw_slot && (cur_fw_slot != next_fw_slot))) { dev_info(ctrl->device, "Firmware is activated after next Controller Level Reset\n"); goto out_free_log; } - memcpy(ctrl->subsys->firmware_rev, &log->frs[(log->afi & 0x7) - 1], + memcpy(ctrl->subsys->firmware_rev, &log->frs[cur_fw_slot - 1], sizeof(ctrl->subsys->firmware_rev)); out_free_log: @@ -4294,6 +4384,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event); int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, const struct blk_mq_ops *ops, unsigned int cmd_size) { + struct queue_limits lim = {}; int ret; memset(set, 0, sizeof(*set)); @@ -4313,14 +4404,14 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, if (ret) return ret; - ctrl->admin_q = blk_mq_init_queue(set); + ctrl->admin_q = blk_mq_alloc_queue(set, &lim, NULL); if (IS_ERR(ctrl->admin_q)) { ret = PTR_ERR(ctrl->admin_q); goto out_free_tagset; } if (ctrl->ops->flags & NVME_F_FABRICS) { - ctrl->fabrics_q = blk_mq_init_queue(set); + ctrl->fabrics_q = blk_mq_alloc_queue(set, NULL, NULL); if (IS_ERR(ctrl->fabrics_q)) { ret = PTR_ERR(ctrl->fabrics_q); goto out_cleanup_admin_q; @@ -4384,7 +4475,7 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, return ret; if (ctrl->ops->flags & NVME_F_FABRICS) { - ctrl->connect_q = blk_mq_init_queue(set); + ctrl->connect_q = blk_mq_alloc_queue(set, NULL, NULL); if (IS_ERR(ctrl->connect_q)) { ret = PTR_ERR(ctrl->connect_q); goto out_free_tag_set; @@ -4514,6 +4605,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, int ret; WRITE_ONCE(ctrl->state, NVME_CTRL_NEW); + ctrl->passthru_err_log_enabled = false; clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags); spin_lock_init(&ctrl->lock); mutex_init(&ctrl->scan_lock); @@ -4553,7 +4645,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ctrl->device = &ctrl->ctrl_device; ctrl->device->devt = MKDEV(MAJOR(nvme_ctrl_base_chr_devt), ctrl->instance); - ctrl->device->class = nvme_class; + ctrl->device->class = &nvme_class; ctrl->device->parent = ctrl->dev; if (ops->dev_attr_groups) ctrl->device->groups = ops->dev_attr_groups; @@ -4786,42 +4878,36 @@ static int __init nvme_core_init(void) if (result < 0) goto destroy_delete_wq; - nvme_class = class_create("nvme"); - if (IS_ERR(nvme_class)) { - result = PTR_ERR(nvme_class); + result = class_register(&nvme_class); + if (result) goto unregister_chrdev; - } - nvme_class->dev_uevent = nvme_class_uevent; - nvme_subsys_class = class_create("nvme-subsystem"); - if (IS_ERR(nvme_subsys_class)) { - result = PTR_ERR(nvme_subsys_class); + result = class_register(&nvme_subsys_class); + if (result) goto destroy_class; - } result = alloc_chrdev_region(&nvme_ns_chr_devt, 0, NVME_MINORS, "nvme-generic"); if (result < 0) goto destroy_subsys_class; - nvme_ns_chr_class = class_create("nvme-generic"); - if (IS_ERR(nvme_ns_chr_class)) { - result = PTR_ERR(nvme_ns_chr_class); + result = class_register(&nvme_ns_chr_class); + if (result) goto unregister_generic_ns; - } + result = nvme_init_auth(); if (result) goto destroy_ns_chr; return 0; destroy_ns_chr: - class_destroy(nvme_ns_chr_class); + class_unregister(&nvme_ns_chr_class); unregister_generic_ns: unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS); destroy_subsys_class: - class_destroy(nvme_subsys_class); + class_unregister(&nvme_subsys_class); destroy_class: - class_destroy(nvme_class); + class_unregister(&nvme_class); unregister_chrdev: unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS); destroy_delete_wq: @@ -4837,9 +4923,9 @@ out: static void __exit nvme_core_exit(void) { nvme_exit_auth(); - class_destroy(nvme_ns_chr_class); - class_destroy(nvme_subsys_class); - class_destroy(nvme_class); + class_unregister(&nvme_ns_chr_class); + class_unregister(&nvme_subsys_class); + class_unregister(&nvme_class); unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS); unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS); destroy_workqueue(nvme_delete_wq); @@ -4851,5 +4937,6 @@ static void __exit nvme_core_exit(void) MODULE_LICENSE("GPL"); MODULE_VERSION("1.0"); +MODULE_DESCRIPTION("NVMe host core framework"); module_init(nvme_core_init); module_exit(nvme_core_exit); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index b5752a77ad98..1f0ea1f32d22 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -180,7 +180,7 @@ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val) cmd.prop_get.offset = cpu_to_le32(off); ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0, - NVME_QID_ANY, 0, 0); + NVME_QID_ANY, 0); if (ret >= 0) *val = le64_to_cpu(res.u64); @@ -226,7 +226,7 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) cmd.prop_get.offset = cpu_to_le32(off); ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0, - NVME_QID_ANY, 0, 0); + NVME_QID_ANY, 0); if (ret >= 0) *val = le64_to_cpu(res.u64); @@ -271,7 +271,7 @@ int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val) cmd.prop_set.value = cpu_to_le64(val); ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, NULL, NULL, 0, - NVME_QID_ANY, 0, 0); + NVME_QID_ANY, 0); if (unlikely(ret)) dev_err(ctrl->device, "Property Set error: %d, offset %#x\n", @@ -450,8 +450,10 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) return -ENOMEM; ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, - data, sizeof(*data), NVME_QID_ANY, 1, - BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); + data, sizeof(*data), NVME_QID_ANY, + NVME_SUBMIT_AT_HEAD | + NVME_SUBMIT_NOWAIT | + NVME_SUBMIT_RESERVED); if (ret) { nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32), &cmd, data); @@ -525,11 +527,14 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) return -ENOMEM; ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &res, - data, sizeof(*data), qid, 1, - BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); + data, sizeof(*data), qid, + NVME_SUBMIT_AT_HEAD | + NVME_SUBMIT_RESERVED | + NVME_SUBMIT_NOWAIT); if (ret) { nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32), &cmd, data); + goto out_free_data; } result = le32_to_cpu(res.u32); if (result & (NVME_CONNECT_AUTHREQ_ATR | NVME_CONNECT_AUTHREQ_ASCR)) { @@ -633,7 +638,7 @@ static struct key *nvmf_parse_key(int key_id) } key = key_lookup(key_id); - if (!IS_ERR(key)) + if (IS_ERR(key)) pr_err("key id %08x not found\n", key_id); else pr_debug("Using key id %08x\n", key_id); @@ -1314,7 +1319,10 @@ out_free_opts: return ERR_PTR(ret); } -static struct class *nvmf_class; +static const struct class nvmf_class = { + .name = "nvme-fabrics", +}; + static struct device *nvmf_device; static DEFINE_MUTEX(nvmf_dev_mutex); @@ -1434,15 +1442,14 @@ static int __init nvmf_init(void) if (!nvmf_default_host) return -ENOMEM; - nvmf_class = class_create("nvme-fabrics"); - if (IS_ERR(nvmf_class)) { + ret = class_register(&nvmf_class); + if (ret) { pr_err("couldn't register class nvme-fabrics\n"); - ret = PTR_ERR(nvmf_class); goto out_free_host; } nvmf_device = - device_create(nvmf_class, NULL, MKDEV(0, 0), NULL, "ctl"); + device_create(&nvmf_class, NULL, MKDEV(0, 0), NULL, "ctl"); if (IS_ERR(nvmf_device)) { pr_err("couldn't create nvme-fabrics device!\n"); ret = PTR_ERR(nvmf_device); @@ -1458,9 +1465,9 @@ static int __init nvmf_init(void) return 0; out_destroy_device: - device_destroy(nvmf_class, MKDEV(0, 0)); + device_destroy(&nvmf_class, MKDEV(0, 0)); out_destroy_class: - class_destroy(nvmf_class); + class_unregister(&nvmf_class); out_free_host: nvmf_host_put(nvmf_default_host); return ret; @@ -1469,8 +1476,8 @@ out_free_host: static void __exit nvmf_exit(void) { misc_deregister(&nvmf_misc); - device_destroy(nvmf_class, MKDEV(0, 0)); - class_destroy(nvmf_class); + device_destroy(&nvmf_class, MKDEV(0, 0)); + class_unregister(&nvmf_class); nvmf_host_put(nvmf_default_host); BUILD_BUG_ON(sizeof(struct nvmf_common_command) != 64); @@ -1488,6 +1495,7 @@ static void __exit nvmf_exit(void) } MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("NVMe host fabrics library"); module_init(nvmf_init); module_exit(nvmf_exit); diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index fbaee5a7be19..06cc54851b1b 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -185,9 +185,11 @@ static inline bool nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl, struct nvmf_ctrl_options *opts) { - if (ctrl->state == NVME_CTRL_DELETING || - ctrl->state == NVME_CTRL_DELETING_NOIO || - ctrl->state == NVME_CTRL_DEAD || + enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); + + if (state == NVME_CTRL_DELETING || + state == NVME_CTRL_DELETING_NOIO || + state == NVME_CTRL_DEAD || strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) || strcmp(opts->host->nqn, ctrl->opts->host->nqn) || !uuid_equal(&opts->host->id, &ctrl->opts->host->id)) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 16847a316421..68a5d971657b 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -221,11 +221,6 @@ static LIST_HEAD(nvme_fc_lport_list); static DEFINE_IDA(nvme_fc_local_port_cnt); static DEFINE_IDA(nvme_fc_ctrl_cnt); -static struct workqueue_struct *nvme_fc_wq; - -static bool nvme_fc_waiting_to_unload; -static DECLARE_COMPLETION(nvme_fc_unload_proceed); - /* * These items are short-term. They will eventually be moved into * a generic FC class. See comments in module init. @@ -255,8 +250,6 @@ nvme_fc_free_lport(struct kref *ref) /* remove from transport list */ spin_lock_irqsave(&nvme_fc_lock, flags); list_del(&lport->port_list); - if (nvme_fc_waiting_to_unload && list_empty(&nvme_fc_lport_list)) - complete(&nvme_fc_unload_proceed); spin_unlock_irqrestore(&nvme_fc_lock, flags); ida_free(&nvme_fc_local_port_cnt, lport->localport.port_num); @@ -2574,6 +2567,7 @@ static enum blk_eh_timer_return nvme_fc_timeout(struct request *rq) { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); struct nvme_fc_ctrl *ctrl = op->ctrl; + u16 qnum = op->queue->qnum; struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; struct nvme_command *sqe = &cmdiu->sqe; @@ -2582,10 +2576,11 @@ static enum blk_eh_timer_return nvme_fc_timeout(struct request *rq) * will detect the aborted io and will fail the connection. */ dev_info(ctrl->ctrl.device, - "NVME-FC{%d.%d}: io timeout: opcode %d fctype %d w10/11: " + "NVME-FC{%d.%d}: io timeout: opcode %d fctype %d (%s) w10/11: " "x%08x/x%08x\n", - ctrl->cnum, op->queue->qnum, sqe->common.opcode, - sqe->connect.fctype, sqe->common.cdw10, sqe->common.cdw11); + ctrl->cnum, qnum, sqe->common.opcode, sqe->fabrics.fctype, + nvme_fabrics_opcode_str(qnum, sqe), + sqe->common.cdw10, sqe->common.cdw11); if (__nvme_fc_abort_op(ctrl, op)) nvme_fc_error_recovery(ctrl, "io timeout abort failed"); @@ -3575,8 +3570,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, flush_delayed_work(&ctrl->connect_work); dev_info(ctrl->ctrl.device, - "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", - ctrl->cnum, nvmf_ctrl_subsysnqn(&ctrl->ctrl)); + "NVME-FC{%d}: new ctrl: NQN \"%s\", hostnqn: %s\n", + ctrl->cnum, nvmf_ctrl_subsysnqn(&ctrl->ctrl), opts->host->nqn); return &ctrl->ctrl; @@ -3894,10 +3889,6 @@ static int __init nvme_fc_init_module(void) { int ret; - nvme_fc_wq = alloc_workqueue("nvme_fc_wq", WQ_MEM_RECLAIM, 0); - if (!nvme_fc_wq) - return -ENOMEM; - /* * NOTE: * It is expected that in the future the kernel will combine @@ -3915,7 +3906,7 @@ static int __init nvme_fc_init_module(void) ret = class_register(&fc_class); if (ret) { pr_err("couldn't register class fc\n"); - goto out_destroy_wq; + return ret; } /* @@ -3939,8 +3930,6 @@ out_destroy_device: device_destroy(&fc_class, MKDEV(0, 0)); out_destroy_class: class_unregister(&fc_class); -out_destroy_wq: - destroy_workqueue(nvme_fc_wq); return ret; } @@ -3960,48 +3949,27 @@ nvme_fc_delete_controllers(struct nvme_fc_rport *rport) spin_unlock(&rport->lock); } -static void -nvme_fc_cleanup_for_unload(void) +static void __exit nvme_fc_exit_module(void) { struct nvme_fc_lport *lport; struct nvme_fc_rport *rport; - - list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { - list_for_each_entry(rport, &lport->endp_list, endp_list) { - nvme_fc_delete_controllers(rport); - } - } -} - -static void __exit nvme_fc_exit_module(void) -{ unsigned long flags; - bool need_cleanup = false; spin_lock_irqsave(&nvme_fc_lock, flags); - nvme_fc_waiting_to_unload = true; - if (!list_empty(&nvme_fc_lport_list)) { - need_cleanup = true; - nvme_fc_cleanup_for_unload(); - } + list_for_each_entry(lport, &nvme_fc_lport_list, port_list) + list_for_each_entry(rport, &lport->endp_list, endp_list) + nvme_fc_delete_controllers(rport); spin_unlock_irqrestore(&nvme_fc_lock, flags); - if (need_cleanup) { - pr_info("%s: waiting for ctlr deletes\n", __func__); - wait_for_completion(&nvme_fc_unload_proceed); - pr_info("%s: ctrl deletes complete\n", __func__); - } + flush_workqueue(nvme_delete_wq); nvmf_unregister_transport(&nvme_fc_transport); - ida_destroy(&nvme_fc_local_port_cnt); - ida_destroy(&nvme_fc_ctrl_cnt); - device_destroy(&fc_class, MKDEV(0, 0)); class_unregister(&fc_class); - destroy_workqueue(nvme_fc_wq); } module_init(nvme_fc_init_module); module_exit(nvme_fc_exit_module); +MODULE_DESCRIPTION("NVMe host FC transport driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 18f5c1be5d67..3dfd5ae99ae0 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -228,7 +228,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) length = (io.nblocks + 1) << ns->head->lba_shift; if ((io.control & NVME_RW_PRINFO_PRACT) && - ns->head->ms == sizeof(struct t10_pi_tuple)) { + (ns->head->ms == ns->head->pi_size)) { /* * Protection information is stripped/inserted by the * controller. diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 2dd4137a08b2..5397fb428b24 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -156,7 +156,7 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) if (!ns->head->disk) continue; kblockd_schedule_work(&ns->head->requeue_work); - if (ctrl->state == NVME_CTRL_LIVE) + if (nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE) disk_uevent(ns->head->disk, KOBJ_CHANGE); } up_read(&ctrl->namespaces_rwsem); @@ -223,13 +223,14 @@ void nvme_mpath_revalidate_paths(struct nvme_ns *ns) static bool nvme_path_is_disabled(struct nvme_ns *ns) { + enum nvme_ctrl_state state = nvme_ctrl_state(ns->ctrl); + /* * We don't treat NVME_CTRL_DELETING as a disabled path as I/O should * still be able to complete assuming that the controller is connected. * Otherwise it will fail immediately and return to the requeue list. */ - if (ns->ctrl->state != NVME_CTRL_LIVE && - ns->ctrl->state != NVME_CTRL_DELETING) + if (state != NVME_CTRL_LIVE && state != NVME_CTRL_DELETING) return true; if (test_bit(NVME_NS_ANA_PENDING, &ns->flags) || !test_bit(NVME_NS_READY, &ns->flags)) @@ -331,7 +332,7 @@ out: static inline bool nvme_path_is_optimized(struct nvme_ns *ns) { - return ns->ctrl->state == NVME_CTRL_LIVE && + return nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE && ns->ana_state == NVME_ANA_OPTIMIZED; } @@ -358,7 +359,7 @@ static bool nvme_available_path(struct nvme_ns_head *head) list_for_each_entry_rcu(ns, &head->list, siblings) { if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags)) continue; - switch (ns->ctrl->state) { + switch (nvme_ctrl_state(ns->ctrl)) { case NVME_CTRL_LIVE: case NVME_CTRL_RESETTING: case NVME_CTRL_CONNECTING: @@ -515,6 +516,7 @@ static void nvme_requeue_work(struct work_struct *work) int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) { + struct queue_limits lim; bool vwc = false; mutex_init(&head->lock); @@ -531,9 +533,14 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) !nvme_is_unique_nsid(ctrl, head) || !multipath) return 0; - head->disk = blk_alloc_disk(ctrl->numa_node); - if (!head->disk) - return -ENOMEM; + blk_set_stacking_limits(&lim); + lim.dma_alignment = 3; + if (head->ids.csi != NVME_CSI_ZNS) + lim.max_zone_append_sectors = 0; + + head->disk = blk_alloc_disk(&lim, ctrl->numa_node); + if (IS_ERR(head->disk)) + return PTR_ERR(head->disk); head->disk->fops = &nvme_ns_head_ops; head->disk->private_data = head; sprintf(head->disk->disk_name, "nvme%dn%d", @@ -552,11 +559,6 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) ctrl->tagset->map[HCTX_TYPE_POLL].nr_queues) blk_queue_flag_set(QUEUE_FLAG_POLL, head->disk->queue); - /* set to a default value of 512 until the disk is validated */ - blk_queue_logical_block_size(head->disk->queue, 512); - blk_set_stacking_limits(&head->disk->queue->limits); - blk_queue_dma_alignment(head->disk->queue, 3); - /* we need to propagate up the VMC settings */ if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) vwc = true; @@ -667,7 +669,7 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc, * controller is ready. */ if (nvme_state_is_live(ns->ana_state) && - ns->ctrl->state == NVME_CTRL_LIVE) + nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE) nvme_mpath_set_live(ns); } @@ -748,7 +750,7 @@ static void nvme_ana_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, ana_work); - if (ctrl->state != NVME_CTRL_LIVE) + if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) return; nvme_read_ana_log(ctrl); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 030c80818240..24193fcb8bd5 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -263,6 +263,7 @@ enum nvme_ctrl_flags { struct nvme_ctrl { bool comp_seen; bool identified; + bool passthru_err_log_enabled; enum nvme_ctrl_state state; spinlock_t lock; struct mutex scan_lock; @@ -454,6 +455,7 @@ struct nvme_ns_head { struct list_head entry; struct kref ref; bool shared; + bool passthru_err_log_enabled; int instance; struct nvme_effects_log *effects; u64 nuse; @@ -462,6 +464,7 @@ struct nvme_ns_head { u16 ms; u16 pi_size; u8 pi_type; + u8 pi_offset; u8 guard_type; u16 sgs; u32 sws; @@ -522,7 +525,6 @@ struct nvme_ns { struct device cdev_device; struct nvme_fault_inject fault_inject; - }; /* NVMe ns supports metadata actions by the controller (generate/strip) */ @@ -805,17 +807,18 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req); blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl, struct request *req); bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, - bool queue_live); + bool queue_live, enum nvme_ctrl_state state); static inline bool nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, bool queue_live) { - if (likely(ctrl->state == NVME_CTRL_LIVE)) + enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); + + if (likely(state == NVME_CTRL_LIVE)) return true; - if (ctrl->ops->flags & NVME_F_FABRICS && - ctrl->state == NVME_CTRL_DELETING) + if (ctrl->ops->flags & NVME_F_FABRICS && state == NVME_CTRL_DELETING) return queue_live; - return __nvme_check_ready(ctrl, rq, queue_live); + return __nvme_check_ready(ctrl, rq, queue_live, state); } /* @@ -836,12 +839,27 @@ static inline bool nvme_is_unique_nsid(struct nvme_ctrl *ctrl, (ctrl->ctratt & NVME_CTRL_CTRATT_NVM_SETS); } +/* + * Flags for __nvme_submit_sync_cmd() + */ +typedef __u32 __bitwise nvme_submit_flags_t; + +enum { + /* Insert request at the head of the queue */ + NVME_SUBMIT_AT_HEAD = (__force nvme_submit_flags_t)(1 << 0), + /* Set BLK_MQ_REQ_NOWAIT when allocating request */ + NVME_SUBMIT_NOWAIT = (__force nvme_submit_flags_t)(1 << 1), + /* Set BLK_MQ_REQ_RESERVED when allocating request */ + NVME_SUBMIT_RESERVED = (__force nvme_submit_flags_t)(1 << 2), + /* Retry command when NVME_SC_DNR is not set in the result */ + NVME_SUBMIT_RETRY = (__force nvme_submit_flags_t)(1 << 3), +}; + int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buf, unsigned bufflen); int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, union nvme_result *result, void *buffer, unsigned bufflen, - int qid, int at_head, - blk_mq_req_flags_t flags); + int qid, nvme_submit_flags_t flags); int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid, unsigned int dword11, void *buffer, size_t buflen, u32 *result); @@ -1018,11 +1036,11 @@ static inline bool nvme_disk_is_ns_head(struct gendisk *disk) } #endif /* CONFIG_NVME_MULTIPATH */ -int nvme_revalidate_zones(struct nvme_ns *ns); int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); +int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf, + struct queue_limits *lim); #ifdef CONFIG_BLK_DEV_ZONED -int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf); blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req, struct nvme_command *cmnd, enum nvme_zone_mgmt_action action); @@ -1033,13 +1051,6 @@ static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, { return BLK_STS_NOTSUPP; } - -static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) -{ - dev_warn(ns->ctrl->device, - "Please enable CONFIG_BLK_DEV_ZONED to support ZNS devices\n"); - return -EPROTONOSUPPORT; -} #endif static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) @@ -1124,35 +1135,42 @@ static inline bool nvme_multi_css(struct nvme_ctrl *ctrl) } #ifdef CONFIG_NVME_VERBOSE_ERRORS -const unsigned char *nvme_get_error_status_str(u16 status); -const unsigned char *nvme_get_opcode_str(u8 opcode); -const unsigned char *nvme_get_admin_opcode_str(u8 opcode); -const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode); +const char *nvme_get_error_status_str(u16 status); +const char *nvme_get_opcode_str(u8 opcode); +const char *nvme_get_admin_opcode_str(u8 opcode); +const char *nvme_get_fabrics_opcode_str(u8 opcode); #else /* CONFIG_NVME_VERBOSE_ERRORS */ -static inline const unsigned char *nvme_get_error_status_str(u16 status) +static inline const char *nvme_get_error_status_str(u16 status) { return "I/O Error"; } -static inline const unsigned char *nvme_get_opcode_str(u8 opcode) +static inline const char *nvme_get_opcode_str(u8 opcode) { return "I/O Cmd"; } -static inline const unsigned char *nvme_get_admin_opcode_str(u8 opcode) +static inline const char *nvme_get_admin_opcode_str(u8 opcode) { return "Admin Cmd"; } -static inline const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode) +static inline const char *nvme_get_fabrics_opcode_str(u8 opcode) { return "Fabrics Cmd"; } #endif /* CONFIG_NVME_VERBOSE_ERRORS */ -static inline const unsigned char *nvme_opcode_str(int qid, u8 opcode, u8 fctype) +static inline const char *nvme_opcode_str(int qid, u8 opcode) { - if (opcode == nvme_fabrics_command) - return nvme_get_fabrics_opcode_str(fctype); return qid ? nvme_get_opcode_str(opcode) : nvme_get_admin_opcode_str(opcode); } + +static inline const char *nvme_fabrics_opcode_str( + int qid, const struct nvme_command *cmd) +{ + if (nvme_is_fabrics(cmd)) + return nvme_get_fabrics_opcode_str(cmd->fabrics.fctype); + + return nvme_opcode_str(qid, cmd->common.opcode); +} #endif /* _NVME_H */ diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c1d6357ec98a..e6267a6aa380 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1349,7 +1349,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) dev_warn(dev->ctrl.device, "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, reset controller\n", req->tag, nvme_cid(req), opcode, - nvme_opcode_str(nvmeq->qid, opcode, 0), nvmeq->qid); + nvme_opcode_str(nvmeq->qid, opcode), nvmeq->qid); nvme_req(req)->flags |= NVME_REQ_CANCELLED; goto disable; } @@ -3543,5 +3543,6 @@ static void __exit nvme_exit(void) MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>"); MODULE_LICENSE("GPL"); MODULE_VERSION("1.0"); +MODULE_DESCRIPTION("NVMe host PCIe transport driver"); module_init(nvme_init); module_exit(nvme_exit); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 11dde0d83044..366f0bb4ebfc 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1006,6 +1006,7 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) { int ret; bool changed; + u16 max_queue_size; ret = nvme_rdma_configure_admin_queue(ctrl, new); if (ret) @@ -1030,11 +1031,16 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) ctrl->ctrl.opts->queue_size, ctrl->ctrl.sqsize + 1); } - if (ctrl->ctrl.sqsize + 1 > NVME_RDMA_MAX_QUEUE_SIZE) { + if (ctrl->ctrl.max_integrity_segments) + max_queue_size = NVME_RDMA_MAX_METADATA_QUEUE_SIZE; + else + max_queue_size = NVME_RDMA_MAX_QUEUE_SIZE; + + if (ctrl->ctrl.sqsize + 1 > max_queue_size) { dev_warn(ctrl->ctrl.device, - "ctrl sqsize %u > max queue size %u, clamping down\n", - ctrl->ctrl.sqsize + 1, NVME_RDMA_MAX_QUEUE_SIZE); - ctrl->ctrl.sqsize = NVME_RDMA_MAX_QUEUE_SIZE - 1; + "ctrl sqsize %u > max queue size %u, clamping down\n", + ctrl->ctrl.sqsize + 1, max_queue_size); + ctrl->ctrl.sqsize = max_queue_size - 1; } if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) { @@ -1410,6 +1416,8 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue, struct nvme_ns *ns = rq->q->queuedata; struct bio *bio = rq->bio; struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; + struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); + u32 xfer_len; int nr; req->mr = ib_mr_pool_get(queue->qp, &queue->qp->sig_mrs); @@ -1422,8 +1430,7 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue, if (unlikely(nr)) goto mr_put; - nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_bdev->bd_disk), c, - req->mr->sig_attrs, ns->head->pi_type); + nvme_rdma_set_sig_attrs(bi, c, req->mr->sig_attrs, ns->head->pi_type); nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask); ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey)); @@ -1441,7 +1448,11 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue, IB_ACCESS_REMOTE_WRITE; sg->addr = cpu_to_le64(req->mr->iova); - put_unaligned_le24(req->mr->length, sg->length); + xfer_len = req->mr->length; + /* Check if PI is added by the HW */ + if (!pi_count) + xfer_len += (xfer_len >> bi->interval_exp) * ns->head->pi_size; + put_unaligned_le24(xfer_len, sg->length); put_unaligned_le32(req->mr->rkey, sg->key); sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4; @@ -1946,14 +1957,13 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq) struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_queue *queue = req->queue; struct nvme_rdma_ctrl *ctrl = queue->ctrl; - u8 opcode = req->req.cmd->common.opcode; - u8 fctype = req->req.cmd->fabrics.fctype; + struct nvme_command *cmd = req->req.cmd; int qid = nvme_rdma_queue_idx(queue); dev_warn(ctrl->ctrl.device, "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout\n", - rq->tag, nvme_cid(rq), opcode, - nvme_opcode_str(qid, opcode, fctype), qid); + rq->tag, nvme_cid(rq), cmd->common.opcode, + nvme_fabrics_opcode_str(qid, cmd), qid); if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_LIVE) { /* @@ -2296,8 +2306,8 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, if (ret) goto out_uninit_ctrl; - dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n", - nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr); + dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs, hostnqn: %s\n", + nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr, opts->host->nqn); mutex_lock(&nvme_rdma_ctrl_mutex); list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list); @@ -2400,4 +2410,5 @@ static void __exit nvme_rdma_cleanup_module(void) module_init(nvme_rdma_init_module); module_exit(nvme_rdma_cleanup_module); +MODULE_DESCRIPTION("NVMe host RDMA transport driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 754e91111042..09fcaa519e5b 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -35,6 +35,31 @@ static ssize_t nvme_sysfs_rescan(struct device *dev, } static DEVICE_ATTR(rescan_controller, S_IWUSR, NULL, nvme_sysfs_rescan); +static ssize_t nvme_adm_passthru_err_log_enabled_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + return sysfs_emit(buf, + ctrl->passthru_err_log_enabled ? "on\n" : "off\n"); +} + +static ssize_t nvme_adm_passthru_err_log_enabled_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + bool passthru_err_log_enabled; + int err; + + err = kstrtobool(buf, &passthru_err_log_enabled); + if (err) + return -EINVAL; + + ctrl->passthru_err_log_enabled = passthru_err_log_enabled; + + return count; +} + static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev) { struct gendisk *disk = dev_to_disk(dev); @@ -44,6 +69,37 @@ static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev) return nvme_get_ns_from_dev(dev)->head; } +static ssize_t nvme_io_passthru_err_log_enabled_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ns_head *head = dev_to_ns_head(dev); + + return sysfs_emit(buf, head->passthru_err_log_enabled ? "on\n" : "off\n"); +} + +static ssize_t nvme_io_passthru_err_log_enabled_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct nvme_ns_head *head = dev_to_ns_head(dev); + bool passthru_err_log_enabled; + int err; + + err = kstrtobool(buf, &passthru_err_log_enabled); + if (err) + return -EINVAL; + head->passthru_err_log_enabled = passthru_err_log_enabled; + + return count; +} + +static struct device_attribute dev_attr_adm_passthru_err_log_enabled = \ + __ATTR(passthru_err_log_enabled, S_IRUGO | S_IWUSR, \ + nvme_adm_passthru_err_log_enabled_show, nvme_adm_passthru_err_log_enabled_store); + +static struct device_attribute dev_attr_io_passthru_err_log_enabled = \ + __ATTR(passthru_err_log_enabled, S_IRUGO | S_IWUSR, \ + nvme_io_passthru_err_log_enabled_show, nvme_io_passthru_err_log_enabled_store); + static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -165,14 +221,11 @@ static int ns_update_nuse(struct nvme_ns *ns) ret = nvme_identify_ns(ns->ctrl, ns->head->ns_id, &id); if (ret) - goto out_free_id; + return ret; ns->head->nuse = le64_to_cpu(id->nuse); - -out_free_id: kfree(id); - - return ret; + return 0; } static ssize_t nuse_show(struct device *dev, struct device_attribute *attr, @@ -208,6 +261,7 @@ static struct attribute *nvme_ns_attrs[] = { &dev_attr_ana_grpid.attr, &dev_attr_ana_state.attr, #endif + &dev_attr_io_passthru_err_log_enabled.attr, NULL, }; @@ -311,6 +365,7 @@ static ssize_t nvme_sysfs_show_state(struct device *dev, char *buf) { struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + unsigned state = (unsigned)nvme_ctrl_state(ctrl); static const char *const state_name[] = { [NVME_CTRL_NEW] = "new", [NVME_CTRL_LIVE] = "live", @@ -321,9 +376,8 @@ static ssize_t nvme_sysfs_show_state(struct device *dev, [NVME_CTRL_DEAD] = "dead", }; - if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) && - state_name[ctrl->state]) - return sysfs_emit(buf, "%s\n", state_name[ctrl->state]); + if (state < ARRAY_SIZE(state_name) && state_name[state]) + return sysfs_emit(buf, "%s\n", state_name[state]); return sysfs_emit(buf, "unknown state\n"); } @@ -655,6 +709,7 @@ static struct attribute *nvme_dev_attrs[] = { #ifdef CONFIG_NVME_TCP_TLS &dev_attr_tls_key.attr, #endif + &dev_attr_adm_passthru_err_log_enabled.attr, NULL }; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index d058d990532b..3692b56cb58d 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1344,7 +1344,6 @@ static int nvme_tcp_alloc_async_req(struct nvme_tcp_ctrl *ctrl) static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid) { - struct page *page; struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; unsigned int noreclaim_flag; @@ -1355,11 +1354,7 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid) if (queue->hdr_digest || queue->data_digest) nvme_tcp_free_crypto(queue); - if (queue->pf_cache.va) { - page = virt_to_head_page(queue->pf_cache.va); - __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias); - queue->pf_cache.va = NULL; - } + page_frag_cache_drain(&queue->pf_cache); noreclaim_flag = memalloc_noreclaim_save(); /* ->sock will be released by fput() */ @@ -2428,13 +2423,13 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq) struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req); - u8 opc = pdu->cmd.common.opcode, fctype = pdu->cmd.fabrics.fctype; + struct nvme_command *cmd = &pdu->cmd; int qid = nvme_tcp_queue_id(req->queue); dev_warn(ctrl->device, "I/O tag %d (%04x) type %d opcode %#x (%s) QID %d timeout\n", - rq->tag, nvme_cid(rq), pdu->hdr.type, opc, - nvme_opcode_str(qid, opc, fctype), qid); + rq->tag, nvme_cid(rq), pdu->hdr.type, cmd->common.opcode, + nvme_fabrics_opcode_str(qid, cmd), qid); if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) { /* @@ -2753,8 +2748,8 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, if (ret) goto out_uninit_ctrl; - dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n", - nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr); + dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp, hostnqn: %s\n", + nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr, opts->host->nqn); mutex_lock(&nvme_tcp_ctrl_mutex); list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list); @@ -2826,4 +2821,5 @@ static void __exit nvme_tcp_cleanup_module(void) module_init(nvme_tcp_init_module); module_exit(nvme_tcp_cleanup_module); +MODULE_DESCRIPTION("NVMe host TCP transport driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 499bbb0eee8d..722384bcc765 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -7,16 +7,6 @@ #include <linux/vmalloc.h> #include "nvme.h" -int nvme_revalidate_zones(struct nvme_ns *ns) -{ - struct request_queue *q = ns->queue; - - blk_queue_chunk_sectors(q, ns->head->zsze); - blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append); - - return blk_revalidate_disk_zones(ns->disk, NULL); -} - static int nvme_set_max_append(struct nvme_ctrl *ctrl) { struct nvme_command c = { }; @@ -45,10 +35,10 @@ static int nvme_set_max_append(struct nvme_ctrl *ctrl) return 0; } -int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) +int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf, + struct queue_limits *lim) { struct nvme_effects_log *log = ns->head->effects; - struct request_queue *q = ns->queue; struct nvme_command c = { }; struct nvme_id_ns_zns *id; int status; @@ -109,10 +99,12 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) goto free_data; } - disk_set_zoned(ns->disk); - blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); - disk_set_max_open_zones(ns->disk, le32_to_cpu(id->mor) + 1); - disk_set_max_active_zones(ns->disk, le32_to_cpu(id->mar) + 1); + blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ns->queue); + lim->zoned = 1; + lim->max_open_zones = le32_to_cpu(id->mor) + 1; + lim->max_active_zones = le32_to_cpu(id->mar) + 1; + lim->chunk_sectors = ns->head->zsze; + lim->max_zone_append_sectors = ns->ctrl->max_zone_append; free_data: kfree(id); return status; diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 39cb570f833d..f5b7054a4a05 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -428,7 +428,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) id->cqes = (0x4 << 4) | 0x4; /* no enforcement soft-limit for maxcmd - pick arbitrary high value */ - id->maxcmd = cpu_to_le16(NVMET_MAX_CMD); + id->maxcmd = cpu_to_le16(NVMET_MAX_CMD(ctrl)); id->nn = cpu_to_le32(NVMET_MAX_NAMESPACES); id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES); diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 2482a0db2504..77a6e817b315 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -273,6 +273,32 @@ static ssize_t nvmet_param_inline_data_size_store(struct config_item *item, CONFIGFS_ATTR(nvmet_, param_inline_data_size); +static ssize_t nvmet_param_max_queue_size_show(struct config_item *item, + char *page) +{ + struct nvmet_port *port = to_nvmet_port(item); + + return snprintf(page, PAGE_SIZE, "%d\n", port->max_queue_size); +} + +static ssize_t nvmet_param_max_queue_size_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_port *port = to_nvmet_port(item); + int ret; + + if (nvmet_is_port_enabled(port, __func__)) + return -EACCES; + ret = kstrtoint(page, 0, &port->max_queue_size); + if (ret) { + pr_err("Invalid value '%s' for max_queue_size\n", page); + return -EINVAL; + } + return count; +} + +CONFIGFS_ATTR(nvmet_, param_max_queue_size); + #ifdef CONFIG_BLK_DEV_INTEGRITY static ssize_t nvmet_param_pi_enable_show(struct config_item *item, char *page) @@ -1859,6 +1885,7 @@ static struct configfs_attribute *nvmet_port_attrs[] = { &nvmet_attr_addr_trtype, &nvmet_attr_addr_tsas, &nvmet_attr_param_inline_data_size, + &nvmet_attr_param_max_queue_size, #ifdef CONFIG_BLK_DEV_INTEGRITY &nvmet_attr_param_pi_enable, #endif @@ -1917,6 +1944,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group, INIT_LIST_HEAD(&port->subsystems); INIT_LIST_HEAD(&port->referrals); port->inline_data_size = -1; /* < 0 == let the transport choose */ + port->max_queue_size = -1; /* < 0 == let the transport choose */ port->disc_addr.portid = cpu_to_le16(portid); port->disc_addr.adrfam = NVMF_ADDR_FAMILY_MAX; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index d26aa30f8702..6bbe4df0166c 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -248,7 +248,7 @@ void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR)) continue; - nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, + nvmet_add_async_event(ctrl, NVME_AER_NOTICE, NVME_AER_NOTICE_NS_CHANGED, NVME_LOG_CHANGED_NS); } @@ -265,7 +265,7 @@ void nvmet_send_ana_event(struct nvmet_subsys *subsys, continue; if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE)) continue; - nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, + nvmet_add_async_event(ctrl, NVME_AER_NOTICE, NVME_AER_NOTICE_ANA, NVME_LOG_ANA); } mutex_unlock(&subsys->lock); @@ -358,6 +358,18 @@ int nvmet_enable_port(struct nvmet_port *port) if (port->inline_data_size < 0) port->inline_data_size = 0; + /* + * If the transport didn't set the max_queue_size properly, then clamp + * it to the target limits. Also set default values in case the + * transport didn't set it at all. + */ + if (port->max_queue_size < 0) + port->max_queue_size = NVMET_MAX_QUEUE_SIZE; + else + port->max_queue_size = clamp_t(int, port->max_queue_size, + NVMET_MIN_QUEUE_SIZE, + NVMET_MAX_QUEUE_SIZE); + port->enabled = true; port->tr_ops = ops; return 0; @@ -1223,9 +1235,10 @@ static void nvmet_init_cap(struct nvmet_ctrl *ctrl) ctrl->cap |= (15ULL << 24); /* maximum queue entries supported: */ if (ctrl->ops->get_max_queue_size) - ctrl->cap |= ctrl->ops->get_max_queue_size(ctrl) - 1; + ctrl->cap |= min_t(u16, ctrl->ops->get_max_queue_size(ctrl), + ctrl->port->max_queue_size) - 1; else - ctrl->cap |= NVMET_QUEUE_SIZE - 1; + ctrl->cap |= ctrl->port->max_queue_size - 1; if (nvmet_is_passthru_subsys(ctrl->subsys)) nvmet_passthrough_override_cap(ctrl); @@ -1411,6 +1424,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, kref_init(&ctrl->ref); ctrl->subsys = subsys; + ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support; nvmet_init_cap(ctrl); WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL); @@ -1705,4 +1719,5 @@ static void __exit nvmet_exit(void) module_init(nvmet_init); module_exit(nvmet_exit); +MODULE_DESCRIPTION("NVMe target core framework"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 668d257fa986..ce54da8c6b36 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -21,7 +21,7 @@ static void __nvmet_disc_changed(struct nvmet_port *port, if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_DISC_CHANGE)) return; - nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, + nvmet_add_async_event(ctrl, NVME_AER_NOTICE, NVME_AER_NOTICE_DISC_CHANGED, NVME_LOG_DISC); } @@ -282,7 +282,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req) id->lpa = (1 << 2); /* no enforcement soft-limit for maxcmd - pick arbitrary high value */ - id->maxcmd = cpu_to_le16(NVMET_MAX_CMD); + id->maxcmd = cpu_to_le16(NVMET_MAX_CMD(ctrl)); id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */ if (ctrl->ops->flags & NVMF_KEYED_SGLS) diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index d8da840a1c0e..b23f4cf840bd 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -157,7 +157,8 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; } - if (sqsize > mqes) { + /* for fabrics, this value applies to only the I/O Submission Queues */ + if (qid && sqsize > mqes) { pr_warn("sqsize %u is larger than MQES supported %u cntlid %d\n", sqsize, mqes, ctrl->cntlid); req->error_loc = offsetof(struct nvmf_connect_command, sqsize); @@ -209,7 +210,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) struct nvmf_connect_command *c = &req->cmd->connect; struct nvmf_connect_data *d; struct nvmet_ctrl *ctrl = NULL; - u16 status = 0; + u16 status; int ret; if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data))) @@ -251,8 +252,6 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) if (status) goto out; - ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support; - uuid_copy(&ctrl->hostid, &d->hostid); ret = nvmet_setup_auth(ctrl); @@ -290,7 +289,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) struct nvmf_connect_data *d; struct nvmet_ctrl *ctrl; u16 qid = le16_to_cpu(c->qid); - u16 status = 0; + u16 status; if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data))) return; diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index bda7a3009e85..fd229f310c93 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -111,6 +111,8 @@ struct nvmet_fc_tgtport { struct nvmet_fc_port_entry *pe; struct kref ref; u32 max_sg_cnt; + + struct work_struct put_work; }; struct nvmet_fc_port_entry { @@ -145,7 +147,6 @@ struct nvmet_fc_tgt_queue { struct list_head avail_defer_list; struct workqueue_struct *work_q; struct kref ref; - struct rcu_head rcu; /* array of fcp_iods */ struct nvmet_fc_fcp_iod fod[] __counted_by(sqsize); } __aligned(sizeof(unsigned long long)); @@ -166,10 +167,9 @@ struct nvmet_fc_tgt_assoc { struct nvmet_fc_hostport *hostport; struct nvmet_fc_ls_iod *rcv_disconn; struct list_head a_list; - struct nvmet_fc_tgt_queue __rcu *queues[NVMET_NR_QUEUES + 1]; + struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1]; struct kref ref; struct work_struct del_work; - struct rcu_head rcu; }; @@ -249,6 +249,13 @@ static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc); static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue); static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue); static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport); +static void nvmet_fc_put_tgtport_work(struct work_struct *work) +{ + struct nvmet_fc_tgtport *tgtport = + container_of(work, struct nvmet_fc_tgtport, put_work); + + nvmet_fc_tgtport_put(tgtport); +} static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport); static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod); @@ -360,7 +367,7 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) if (!lsop->req_queued) { spin_unlock_irqrestore(&tgtport->lock, flags); - return; + goto out_putwork; } list_del(&lsop->lsreq_list); @@ -373,7 +380,8 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) (lsreq->rqstlen + lsreq->rsplen), DMA_BIDIRECTIONAL); - nvmet_fc_tgtport_put(tgtport); +out_putwork: + queue_work(nvmet_wq, &tgtport->put_work); } static int @@ -489,8 +497,7 @@ nvmet_fc_xmt_disconnect_assoc(struct nvmet_fc_tgt_assoc *assoc) * message is normal. Otherwise, send unless the hostport has * already been invalidated by the lldd. */ - if (!tgtport->ops->ls_req || !assoc->hostport || - assoc->hostport->invalid) + if (!tgtport->ops->ls_req || assoc->hostport->invalid) return; lsop = kzalloc((sizeof(*lsop) + @@ -802,14 +809,11 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, if (!queue) return NULL; - if (!nvmet_fc_tgt_a_get(assoc)) - goto out_free_queue; - queue->work_q = alloc_workqueue("ntfc%d.%d.%d", 0, 0, assoc->tgtport->fc_target_port.port_num, assoc->a_id, qid); if (!queue->work_q) - goto out_a_put; + goto out_free_queue; queue->qid = qid; queue->sqsize = sqsize; @@ -831,15 +835,13 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, goto out_fail_iodlist; WARN_ON(assoc->queues[qid]); - rcu_assign_pointer(assoc->queues[qid], queue); + assoc->queues[qid] = queue; return queue; out_fail_iodlist: nvmet_fc_destroy_fcp_iodlist(assoc->tgtport, queue); destroy_workqueue(queue->work_q); -out_a_put: - nvmet_fc_tgt_a_put(assoc); out_free_queue: kfree(queue); return NULL; @@ -852,15 +854,11 @@ nvmet_fc_tgt_queue_free(struct kref *ref) struct nvmet_fc_tgt_queue *queue = container_of(ref, struct nvmet_fc_tgt_queue, ref); - rcu_assign_pointer(queue->assoc->queues[queue->qid], NULL); - nvmet_fc_destroy_fcp_iodlist(queue->assoc->tgtport, queue); - nvmet_fc_tgt_a_put(queue->assoc); - destroy_workqueue(queue->work_q); - kfree_rcu(queue, rcu); + kfree(queue); } static void @@ -969,7 +967,7 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport, rcu_read_lock(); list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (association_id == assoc->association_id) { - queue = rcu_dereference(assoc->queues[qid]); + queue = assoc->queues[qid]; if (queue && (!atomic_read(&queue->connected) || !nvmet_fc_tgt_q_get(queue))) @@ -1078,8 +1076,6 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) /* new allocation not needed */ kfree(newhost); newhost = match; - /* no new allocation - release reference */ - nvmet_fc_tgtport_put(tgtport); } else { newhost->tgtport = tgtport; newhost->hosthandle = hosthandle; @@ -1094,23 +1090,54 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) } static void -nvmet_fc_delete_assoc(struct work_struct *work) +nvmet_fc_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) +{ + nvmet_fc_delete_target_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); +} + +static void +nvmet_fc_delete_assoc_work(struct work_struct *work) { struct nvmet_fc_tgt_assoc *assoc = container_of(work, struct nvmet_fc_tgt_assoc, del_work); + struct nvmet_fc_tgtport *tgtport = assoc->tgtport; - nvmet_fc_delete_target_assoc(assoc); - nvmet_fc_tgt_a_put(assoc); + nvmet_fc_delete_assoc(assoc); + nvmet_fc_tgtport_put(tgtport); +} + +static void +nvmet_fc_schedule_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) +{ + nvmet_fc_tgtport_get(assoc->tgtport); + queue_work(nvmet_wq, &assoc->del_work); +} + +static bool +nvmet_fc_assoc_exits(struct nvmet_fc_tgtport *tgtport, u64 association_id) +{ + struct nvmet_fc_tgt_assoc *a; + + list_for_each_entry_rcu(a, &tgtport->assoc_list, a_list) { + if (association_id == a->association_id) + return true; + } + + return false; } static struct nvmet_fc_tgt_assoc * nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) { - struct nvmet_fc_tgt_assoc *assoc, *tmpassoc; + struct nvmet_fc_tgt_assoc *assoc; unsigned long flags; + bool done; u64 ran; int idx; - bool needrandom = true; + + if (!tgtport->pe) + return NULL; assoc = kzalloc(sizeof(*assoc), GFP_KERNEL); if (!assoc) @@ -1120,43 +1147,35 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) if (idx < 0) goto out_free_assoc; - if (!nvmet_fc_tgtport_get(tgtport)) - goto out_ida; - assoc->hostport = nvmet_fc_alloc_hostport(tgtport, hosthandle); if (IS_ERR(assoc->hostport)) - goto out_put; + goto out_ida; assoc->tgtport = tgtport; assoc->a_id = idx; INIT_LIST_HEAD(&assoc->a_list); kref_init(&assoc->ref); - INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc); + INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc_work); atomic_set(&assoc->terminating, 0); - while (needrandom) { + done = false; + do { get_random_bytes(&ran, sizeof(ran) - BYTES_FOR_QID); ran = ran << BYTES_FOR_QID_SHIFT; spin_lock_irqsave(&tgtport->lock, flags); - needrandom = false; - list_for_each_entry(tmpassoc, &tgtport->assoc_list, a_list) { - if (ran == tmpassoc->association_id) { - needrandom = true; - break; - } - } - if (!needrandom) { + rcu_read_lock(); + if (!nvmet_fc_assoc_exits(tgtport, ran)) { assoc->association_id = ran; list_add_tail_rcu(&assoc->a_list, &tgtport->assoc_list); + done = true; } + rcu_read_unlock(); spin_unlock_irqrestore(&tgtport->lock, flags); - } + } while (!done); return assoc; -out_put: - nvmet_fc_tgtport_put(tgtport); out_ida: ida_free(&tgtport->assoc_cnt, idx); out_free_assoc: @@ -1172,13 +1191,18 @@ nvmet_fc_target_assoc_free(struct kref *ref) struct nvmet_fc_tgtport *tgtport = assoc->tgtport; struct nvmet_fc_ls_iod *oldls; unsigned long flags; + int i; + + for (i = NVMET_NR_QUEUES; i >= 0; i--) { + if (assoc->queues[i]) + nvmet_fc_delete_target_queue(assoc->queues[i]); + } /* Send Disconnect now that all i/o has completed */ nvmet_fc_xmt_disconnect_assoc(assoc); nvmet_fc_free_hostport(assoc->hostport); spin_lock_irqsave(&tgtport->lock, flags); - list_del_rcu(&assoc->a_list); oldls = assoc->rcv_disconn; spin_unlock_irqrestore(&tgtport->lock, flags); /* if pending Rcv Disconnect Association LS, send rsp now */ @@ -1188,8 +1212,7 @@ nvmet_fc_target_assoc_free(struct kref *ref) dev_info(tgtport->dev, "{%d:%d} Association freed\n", tgtport->fc_target_port.port_num, assoc->a_id); - kfree_rcu(assoc, rcu); - nvmet_fc_tgtport_put(tgtport); + kfree(assoc); } static void @@ -1208,7 +1231,7 @@ static void nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) { struct nvmet_fc_tgtport *tgtport = assoc->tgtport; - struct nvmet_fc_tgt_queue *queue; + unsigned long flags; int i, terminating; terminating = atomic_xchg(&assoc->terminating, 1); @@ -1217,29 +1240,21 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) if (terminating) return; + spin_lock_irqsave(&tgtport->lock, flags); + list_del_rcu(&assoc->a_list); + spin_unlock_irqrestore(&tgtport->lock, flags); - for (i = NVMET_NR_QUEUES; i >= 0; i--) { - rcu_read_lock(); - queue = rcu_dereference(assoc->queues[i]); - if (!queue) { - rcu_read_unlock(); - continue; - } + synchronize_rcu(); - if (!nvmet_fc_tgt_q_get(queue)) { - rcu_read_unlock(); - continue; - } - rcu_read_unlock(); - nvmet_fc_delete_target_queue(queue); - nvmet_fc_tgt_q_put(queue); + /* ensure all in-flight I/Os have been processed */ + for (i = NVMET_NR_QUEUES; i >= 0; i--) { + if (assoc->queues[i]) + flush_workqueue(assoc->queues[i]->work_q); } dev_info(tgtport->dev, "{%d:%d} Association deleted\n", tgtport->fc_target_port.port_num, assoc->a_id); - - nvmet_fc_tgt_a_put(assoc); } static struct nvmet_fc_tgt_assoc * @@ -1415,6 +1430,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, kref_init(&newrec->ref); ida_init(&newrec->assoc_cnt); newrec->max_sg_cnt = template->max_sgl_segments; + INIT_WORK(&newrec->put_work, nvmet_fc_put_tgtport_work); ret = nvmet_fc_alloc_ls_iodlist(newrec); if (ret) { @@ -1492,9 +1508,8 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (!nvmet_fc_tgt_a_get(assoc)) continue; - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + nvmet_fc_schedule_delete_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); } rcu_read_unlock(); } @@ -1540,16 +1555,14 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, spin_lock_irqsave(&tgtport->lock, flags); list_for_each_entry_safe(assoc, next, &tgtport->assoc_list, a_list) { - if (!assoc->hostport || - assoc->hostport->hosthandle != hosthandle) + if (assoc->hostport->hosthandle != hosthandle) continue; if (!nvmet_fc_tgt_a_get(assoc)) continue; assoc->hostport->invalid = 1; noassoc = false; - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + nvmet_fc_schedule_delete_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); } spin_unlock_irqrestore(&tgtport->lock, flags); @@ -1581,7 +1594,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) rcu_read_lock(); list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { - queue = rcu_dereference(assoc->queues[0]); + queue = assoc->queues[0]; if (queue && queue->nvme_sq.ctrl == ctrl) { if (nvmet_fc_tgt_a_get(assoc)) found_ctrl = true; @@ -1593,9 +1606,8 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) nvmet_fc_tgtport_put(tgtport); if (found_ctrl) { - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + nvmet_fc_schedule_delete_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); return; } @@ -1625,6 +1637,8 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port) /* terminate any outstanding associations */ __nvmet_fc_free_assocs(tgtport); + flush_workqueue(nvmet_wq); + /* * should terminate LS's as well. However, LS's will be generated * at the tail end of association termination, so they likely don't @@ -1870,9 +1884,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, sizeof(struct fcnvme_ls_disconnect_assoc_acc)), FCNVME_LS_DISCONNECT_ASSOC); - /* release get taken in nvmet_fc_find_target_assoc */ - nvmet_fc_tgt_a_put(assoc); - /* * The rules for LS response says the response cannot * go back until ABTS's have been sent for all outstanding @@ -1887,8 +1898,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, assoc->rcv_disconn = iod; spin_unlock_irqrestore(&tgtport->lock, flags); - nvmet_fc_delete_target_assoc(assoc); - if (oldls) { dev_info(tgtport->dev, "{%d:%d} Multiple Disconnect Association LS's " @@ -1904,6 +1913,9 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, nvmet_fc_xmt_ls_rsp(tgtport, oldls); } + nvmet_fc_schedule_delete_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); + return false; } @@ -2540,8 +2552,9 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, fod->req.cmd = &fod->cmdiubuf.sqe; fod->req.cqe = &fod->rspiubuf.cqe; - if (tgtport->pe) - fod->req.port = tgtport->pe->port; + if (!tgtport->pe) + goto transport_error; + fod->req.port = tgtport->pe->port; /* clear any response payload */ memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf)); @@ -2902,6 +2915,9 @@ nvmet_fc_remove_port(struct nvmet_port *port) nvmet_fc_portentry_unbind(pe); + /* terminate any outstanding associations */ + __nvmet_fc_free_assocs(pe->tgtport); + kfree(pe); } @@ -2933,6 +2949,9 @@ static int __init nvmet_fc_init_module(void) static void __exit nvmet_fc_exit_module(void) { + /* ensure any shutdown operation, e.g. delete ctrls have finished */ + flush_workqueue(nvmet_wq); + /* sanity check - all lports should be removed */ if (!list_empty(&nvmet_fc_target_list)) pr_warn("%s: targetport list not empty\n", __func__); @@ -2945,4 +2964,5 @@ static void __exit nvmet_fc_exit_module(void) module_init(nvmet_fc_init_module); module_exit(nvmet_fc_exit_module); +MODULE_DESCRIPTION("NVMe target FC transport driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index ead349af30f1..913cd2ec7a6f 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -358,7 +358,7 @@ fcloop_h2t_ls_req(struct nvme_fc_local_port *localport, if (!rport->targetport) { tls_req->status = -ECONNREFUSED; spin_lock(&rport->lock); - list_add_tail(&rport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &rport->ls_list); spin_unlock(&rport->lock); queue_work(nvmet_wq, &rport->ls_work); return ret; @@ -391,7 +391,7 @@ fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, if (remoteport) { rport = remoteport->private; spin_lock(&rport->lock); - list_add_tail(&rport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &rport->ls_list); spin_unlock(&rport->lock); queue_work(nvmet_wq, &rport->ls_work); } @@ -446,7 +446,7 @@ fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle, if (!tport->remoteport) { tls_req->status = -ECONNREFUSED; spin_lock(&tport->lock); - list_add_tail(&tport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &tport->ls_list); spin_unlock(&tport->lock); queue_work(nvmet_wq, &tport->ls_work); return ret; @@ -1556,7 +1556,9 @@ static const struct attribute_group *fcloop_dev_attr_groups[] = { NULL, }; -static struct class *fcloop_class; +static const struct class fcloop_class = { + .name = "fcloop", +}; static struct device *fcloop_device; @@ -1564,15 +1566,14 @@ static int __init fcloop_init(void) { int ret; - fcloop_class = class_create("fcloop"); - if (IS_ERR(fcloop_class)) { + ret = class_register(&fcloop_class); + if (ret) { pr_err("couldn't register class fcloop\n"); - ret = PTR_ERR(fcloop_class); return ret; } fcloop_device = device_create_with_groups( - fcloop_class, NULL, MKDEV(0, 0), NULL, + &fcloop_class, NULL, MKDEV(0, 0), NULL, fcloop_dev_attr_groups, "ctl"); if (IS_ERR(fcloop_device)) { pr_err("couldn't create ctl device!\n"); @@ -1585,7 +1586,7 @@ static int __init fcloop_init(void) return 0; out_destroy_class: - class_destroy(fcloop_class); + class_unregister(&fcloop_class); return ret; } @@ -1643,11 +1644,12 @@ static void __exit fcloop_exit(void) put_device(fcloop_device); - device_destroy(fcloop_class, MKDEV(0, 0)); - class_destroy(fcloop_class); + device_destroy(&fcloop_class, MKDEV(0, 0)); + class_unregister(&fcloop_class); } module_init(fcloop_init); module_exit(fcloop_exit); +MODULE_DESCRIPTION("NVMe target FC loop transport driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index f11400a908f2..6426aac2634a 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -50,10 +50,10 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) void nvmet_bdev_ns_disable(struct nvmet_ns *ns) { - if (ns->bdev_handle) { - bdev_release(ns->bdev_handle); + if (ns->bdev_file) { + fput(ns->bdev_file); ns->bdev = NULL; - ns->bdev_handle = NULL; + ns->bdev_file = NULL; } } @@ -85,18 +85,18 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns) if (ns->buffered_io) return -ENOTBLK; - ns->bdev_handle = bdev_open_by_path(ns->device_path, + ns->bdev_file = bdev_file_open_by_path(ns->device_path, BLK_OPEN_READ | BLK_OPEN_WRITE, NULL, NULL); - if (IS_ERR(ns->bdev_handle)) { - ret = PTR_ERR(ns->bdev_handle); + if (IS_ERR(ns->bdev_file)) { + ret = PTR_ERR(ns->bdev_file); if (ret != -ENOTBLK) { pr_err("failed to open block device %s: (%d)\n", ns->device_path, ret); } - ns->bdev_handle = NULL; + ns->bdev_file = NULL; return ret; } - ns->bdev = ns->bdev_handle->bdev; + ns->bdev = file_bdev(ns->bdev_file); ns->size = bdev_nr_bytes(ns->bdev); ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 9cb434c58075..e589915ddef8 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -400,7 +400,7 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl) } nvme_quiesce_admin_queue(&ctrl->ctrl); - if (ctrl->ctrl.state == NVME_CTRL_LIVE) + if (nvme_ctrl_state(&ctrl->ctrl) == NVME_CTRL_LIVE) nvme_disable_ctrl(&ctrl->ctrl, true); nvme_cancel_admin_tagset(&ctrl->ctrl); @@ -434,8 +434,10 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) nvme_loop_shutdown_ctrl(ctrl); if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { - if (ctrl->ctrl.state != NVME_CTRL_DELETING && - ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO) + enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl); + + if (state != NVME_CTRL_DELETING && + state != NVME_CTRL_DELETING_NOIO) /* state change failure for non-deleted ctrl? */ WARN_ON_ONCE(1); return; @@ -688,5 +690,6 @@ static void __exit nvme_loop_cleanup_module(void) module_init(nvme_loop_init_module); module_exit(nvme_loop_cleanup_module); +MODULE_DESCRIPTION("NVMe target loop transport driver"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("nvmet-transport-254"); /* 254 == NVMF_TRTYPE_LOOP */ diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 6c8acebe1a1a..f460728e1df1 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -58,7 +58,7 @@ struct nvmet_ns { struct percpu_ref ref; - struct bdev_handle *bdev_handle; + struct file *bdev_file; struct block_device *bdev; struct file *file; bool readonly; @@ -163,6 +163,7 @@ struct nvmet_port { void *priv; bool enabled; int inline_data_size; + int max_queue_size; const struct nvmet_fabrics_ops *tr_ops; bool pi_enable; }; @@ -543,9 +544,10 @@ void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys, void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, u8 event_info, u8 log_page); -#define NVMET_QUEUE_SIZE 1024 +#define NVMET_MIN_QUEUE_SIZE 16 +#define NVMET_MAX_QUEUE_SIZE 1024 #define NVMET_NR_QUEUES 128 -#define NVMET_MAX_CMD NVMET_QUEUE_SIZE +#define NVMET_MAX_CMD(ctrl) (NVME_CAP_MQES(ctrl->cap) + 1) /* * Nice round number that makes a list of nsids fit into a page. diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index f2d963e1fe94..bb4a69d538fd 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -132,7 +132,7 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req) id->sqes = min_t(__u8, ((0x6 << 4) | 0x6), id->sqes); id->cqes = min_t(__u8, ((0x4 << 4) | 0x4), id->cqes); - id->maxcmd = cpu_to_le16(NVMET_MAX_CMD); + id->maxcmd = cpu_to_le16(NVMET_MAX_CMD(ctrl)); /* don't support fuse commands */ id->fuses = 0; diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 667f9c04f35d..f2bb9d95ecf4 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1956,6 +1956,14 @@ static int nvmet_rdma_add_port(struct nvmet_port *nport) nport->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE; } + if (nport->max_queue_size < 0) { + nport->max_queue_size = NVME_RDMA_DEFAULT_QUEUE_SIZE; + } else if (nport->max_queue_size > NVME_RDMA_MAX_QUEUE_SIZE) { + pr_warn("max_queue_size %u is too large, reducing to %u\n", + nport->max_queue_size, NVME_RDMA_MAX_QUEUE_SIZE); + nport->max_queue_size = NVME_RDMA_MAX_QUEUE_SIZE; + } + ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr, nport->disc_addr.trsvcid, &port->addr); if (ret) { @@ -2015,6 +2023,8 @@ static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl) static u16 nvmet_rdma_get_max_queue_size(const struct nvmet_ctrl *ctrl) { + if (ctrl->pi_support) + return NVME_RDMA_MAX_METADATA_QUEUE_SIZE; return NVME_RDMA_MAX_QUEUE_SIZE; } @@ -2104,5 +2114,6 @@ static void __exit nvmet_rdma_exit(void) module_init(nvmet_rdma_init); module_exit(nvmet_rdma_exit); +MODULE_DESCRIPTION("NVMe target RDMA transport driver"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("nvmet-transport-1"); /* 1 == NVMF_TRTYPE_RDMA */ diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 6a1e6bb80062..2aa5762e9f50 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1591,7 +1591,6 @@ static void nvmet_tcp_free_cmd_data_in_buffers(struct nvmet_tcp_queue *queue) static void nvmet_tcp_release_queue_work(struct work_struct *w) { - struct page *page; struct nvmet_tcp_queue *queue = container_of(w, struct nvmet_tcp_queue, release_work); @@ -1615,8 +1614,7 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w) if (queue->hdr_digest || queue->data_digest) nvmet_tcp_free_crypto(queue); ida_free(&nvmet_tcp_queue_ida, queue->idx); - page = virt_to_head_page(queue->pf_cache.va); - __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias); + page_frag_cache_drain(&queue->pf_cache); kfree(queue); } @@ -2216,10 +2214,12 @@ static void __exit nvmet_tcp_exit(void) flush_workqueue(nvmet_wq); destroy_workqueue(nvmet_tcp_wq); + ida_destroy(&nvmet_tcp_queue_ida); } module_init(nvmet_tcp_init); module_exit(nvmet_tcp_exit); +MODULE_DESCRIPTION("NVMe target TCP transport driver"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("nvmet-transport-3"); /* 3 == NVMF_TRTYPE_TCP */ diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index 5b5c1e481722..3148d9f1bde6 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -456,8 +456,7 @@ static u16 nvmet_bdev_execute_zmgmt_send_all(struct nvmet_req *req) switch (zsa_req_op(req->cmd->zms.zsa)) { case REQ_OP_ZONE_RESET: ret = blkdev_zone_mgmt(req->ns->bdev, REQ_OP_ZONE_RESET, 0, - get_capacity(req->ns->bdev->bd_disk), - GFP_KERNEL); + get_capacity(req->ns->bdev->bd_disk)); if (ret < 0) return blkdev_zone_mgmt_errno_to_nvme_status(ret); break; @@ -508,7 +507,7 @@ static void nvmet_bdev_zmgmt_send_work(struct work_struct *w) goto out; } - ret = blkdev_zone_mgmt(bdev, op, sect, zone_sectors, GFP_KERNEL); + ret = blkdev_zone_mgmt(bdev, op, sect, zone_sectors); if (ret < 0) status = blkdev_zone_mgmt_errno_to_nvme_status(ret); |