diff options
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r-- | drivers/nvme/host/Makefile | 2 | ||||
-rw-r--r-- | drivers/nvme/host/auth.c | 6 | ||||
-rw-r--r-- | drivers/nvme/host/constants.c | 4 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 915 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 241 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.h | 21 | ||||
-rw-r--r-- | drivers/nvme/host/fault_inject.c | 2 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 45 | ||||
-rw-r--r-- | drivers/nvme/host/ioctl.c | 142 | ||||
-rw-r--r-- | drivers/nvme/host/multipath.c | 16 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 23 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 39 | ||||
-rw-r--r-- | drivers/nvme/host/pr.c | 315 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 81 | ||||
-rw-r--r-- | drivers/nvme/host/sysfs.c | 668 | ||||
-rw-r--r-- | drivers/nvme/host/tcp.c | 141 | ||||
-rw-r--r-- | drivers/nvme/host/zns.c | 9 |
17 files changed, 1471 insertions, 1199 deletions
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index e27202d22c7d..c7c3cf202d12 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_NVME_FC) += nvme-fc.o obj-$(CONFIG_NVME_TCP) += nvme-tcp.o obj-$(CONFIG_NVME_APPLE) += nvme-apple.o -nvme-core-y += core.o ioctl.o +nvme-core-y += core.o ioctl.o sysfs.o pr.o nvme-core-$(CONFIG_NVME_VERBOSE_ERRORS) += constants.o nvme-core-$(CONFIG_TRACING) += trace.o nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index ea16a0aba679..daf5d144a8ea 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -30,18 +30,18 @@ struct nvme_dhchap_queue_context { u32 s2; u16 transaction; u8 status; + u8 dhgroup_id; u8 hash_id; size_t hash_len; - u8 dhgroup_id; u8 c1[64]; u8 c2[64]; u8 response[64]; u8 *host_response; u8 *ctrl_key; - int ctrl_key_len; u8 *host_key; - int host_key_len; u8 *sess_key; + int ctrl_key_len; + int host_key_len; int sess_key_len; }; diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c index bc523ca02254..20f46c230885 100644 --- a/drivers/nvme/host/constants.c +++ b/drivers/nvme/host/constants.c @@ -12,7 +12,7 @@ static const char * const nvme_ops[] = { [nvme_cmd_read] = "Read", [nvme_cmd_write_uncor] = "Write Uncorrectable", [nvme_cmd_compare] = "Compare", - [nvme_cmd_write_zeroes] = "Write Zeros", + [nvme_cmd_write_zeroes] = "Write Zeroes", [nvme_cmd_dsm] = "Dataset Management", [nvme_cmd_verify] = "Verify", [nvme_cmd_resv_register] = "Reservation Register", @@ -21,7 +21,7 @@ static const char * const nvme_ops[] = { [nvme_cmd_resv_release] = "Reservation Release", [nvme_cmd_zone_mgmt_send] = "Zone Management Send", [nvme_cmd_zone_mgmt_recv] = "Zone Management Receive", - [nvme_cmd_zone_append] = "Zone Management Append", + [nvme_cmd_zone_append] = "Zone Append", }; static const char * const nvme_admin_ops[] = { diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 1f0cbb77b249..37b6fa746662 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -237,7 +237,7 @@ int nvme_delete_ctrl(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_delete_ctrl); -static void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl) +void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl) { /* * Keep a reference until nvme_do_delete_ctrl() complete, @@ -279,7 +279,7 @@ static blk_status_t nvme_error_status(u16 status) case NVME_SC_INVALID_PI: return BLK_STS_PROTECTION; case NVME_SC_RESERVATION_CONFLICT: - return BLK_STS_NEXUS; + return BLK_STS_RESV_CONFLICT; case NVME_SC_HOST_PATH_ERROR: return BLK_STS_TRANSPORT; case NVME_SC_ZONE_TOO_MANY_ACTIVE: @@ -397,7 +397,16 @@ void nvme_complete_rq(struct request *req) trace_nvme_complete_rq(req); nvme_cleanup_cmd(req); - if (ctrl->kas) + /* + * Completions of long-running commands should not be able to + * defer sending of periodic keep alives, since the controller + * may have completed processing such commands a long time ago + * (arbitrarily close to command submission time). + * req->deadline - req->timeout is the command submission time + * in jiffies. + */ + if (ctrl->kas && + req->deadline - req->timeout >= ctrl->ka_last_check_time) ctrl->comp_seen = true; switch (nvme_decide_disposition(req)) { @@ -1115,7 +1124,7 @@ u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) } EXPORT_SYMBOL_NS_GPL(nvme_passthru_start, NVME_TARGET_PASSTHRU); -void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, +void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects, struct nvme_command *cmd, int status) { if (effects & NVME_CMD_EFFECTS_CSE_MASK) { @@ -1125,13 +1134,18 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, mutex_unlock(&ctrl->scan_lock); } if (effects & NVME_CMD_EFFECTS_CCC) { - dev_info(ctrl->device, + if (!test_and_set_bit(NVME_CTRL_DIRTY_CAPABILITY, + &ctrl->flags)) { + dev_info(ctrl->device, "controller capabilities changed, reset may be required to take effect.\n"); + } } if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) { nvme_queue_scan(ctrl); flush_work(&ctrl->scan_work); } + if (ns) + return; switch (cmd->common.opcode) { case nvme_admin_set_features: @@ -1161,9 +1175,25 @@ EXPORT_SYMBOL_NS_GPL(nvme_passthru_end, NVME_TARGET_PASSTHRU); * The host should send Keep Alive commands at half of the Keep Alive Timeout * accounting for transport roundtrip times [..]. */ +static unsigned long nvme_keep_alive_work_period(struct nvme_ctrl *ctrl) +{ + unsigned long delay = ctrl->kato * HZ / 2; + + /* + * When using Traffic Based Keep Alive, we need to run + * nvme_keep_alive_work at twice the normal frequency, as one + * command completion can postpone sending a keep alive command + * by up to twice the delay between runs. + */ + if (ctrl->ctratt & NVME_CTRL_ATTR_TBKAS) + delay /= 2; + return delay; +} + static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl) { - queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ / 2); + queue_delayed_work(nvme_wq, &ctrl->ka_work, + nvme_keep_alive_work_period(ctrl)); } static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, @@ -1172,6 +1202,20 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, struct nvme_ctrl *ctrl = rq->end_io_data; unsigned long flags; bool startka = false; + unsigned long rtt = jiffies - (rq->deadline - rq->timeout); + unsigned long delay = nvme_keep_alive_work_period(ctrl); + + /* + * Subtract off the keepalive RTT so nvme_keep_alive_work runs + * at the desired frequency. + */ + if (rtt <= delay) { + delay -= rtt; + } else { + dev_warn(ctrl->device, "long keepalive RTT (%u ms)\n", + jiffies_to_msecs(rtt)); + delay = 0; + } blk_mq_free_request(rq); @@ -1182,6 +1226,7 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, return RQ_END_IO_NONE; } + ctrl->ka_last_check_time = jiffies; ctrl->comp_seen = false; spin_lock_irqsave(&ctrl->lock, flags); if (ctrl->state == NVME_CTRL_LIVE || @@ -1189,7 +1234,7 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, startka = true; spin_unlock_irqrestore(&ctrl->lock, flags); if (startka) - nvme_queue_keep_alive_work(ctrl); + queue_delayed_work(nvme_wq, &ctrl->ka_work, delay); return RQ_END_IO_NONE; } @@ -1200,6 +1245,8 @@ static void nvme_keep_alive_work(struct work_struct *work) bool comp_seen = ctrl->comp_seen; struct request *rq; + ctrl->ka_last_check_time = jiffies; + if ((ctrl->ctratt & NVME_CTRL_ATTR_TBKAS) && comp_seen) { dev_dbg(ctrl->device, "reschedule traffic based keep-alive timer\n"); @@ -1591,12 +1638,12 @@ static void nvme_ns_release(struct nvme_ns *ns) nvme_put_ns(ns); } -static int nvme_open(struct block_device *bdev, fmode_t mode) +static int nvme_open(struct gendisk *disk, blk_mode_t mode) { - return nvme_ns_open(bdev->bd_disk->private_data); + return nvme_ns_open(disk->private_data); } -static void nvme_release(struct gendisk *disk, fmode_t mode) +static void nvme_release(struct gendisk *disk) { nvme_ns_release(disk->private_data); } @@ -1835,7 +1882,7 @@ static void nvme_update_disk_info(struct gendisk *disk, struct nvme_ns *ns, struct nvme_id_ns *id) { sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze)); - unsigned short bs = 1 << ns->lba_shift; + u32 bs = 1U << ns->lba_shift; u32 atomic_bs, phys_bs, io_opt = 0; /* @@ -2061,153 +2108,6 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) } } -static char nvme_pr_type(enum pr_type type) -{ - switch (type) { - case PR_WRITE_EXCLUSIVE: - return 1; - case PR_EXCLUSIVE_ACCESS: - return 2; - case PR_WRITE_EXCLUSIVE_REG_ONLY: - return 3; - case PR_EXCLUSIVE_ACCESS_REG_ONLY: - return 4; - case PR_WRITE_EXCLUSIVE_ALL_REGS: - return 5; - case PR_EXCLUSIVE_ACCESS_ALL_REGS: - return 6; - default: - return 0; - } -} - -static int nvme_send_ns_head_pr_command(struct block_device *bdev, - struct nvme_command *c, u8 data[16]) -{ - struct nvme_ns_head *head = bdev->bd_disk->private_data; - int srcu_idx = srcu_read_lock(&head->srcu); - struct nvme_ns *ns = nvme_find_path(head); - int ret = -EWOULDBLOCK; - - if (ns) { - c->common.nsid = cpu_to_le32(ns->head->ns_id); - ret = nvme_submit_sync_cmd(ns->queue, c, data, 16); - } - srcu_read_unlock(&head->srcu, srcu_idx); - return ret; -} - -static int nvme_send_ns_pr_command(struct nvme_ns *ns, struct nvme_command *c, - u8 data[16]) -{ - c->common.nsid = cpu_to_le32(ns->head->ns_id); - return nvme_submit_sync_cmd(ns->queue, c, data, 16); -} - -static int nvme_sc_to_pr_err(int nvme_sc) -{ - if (nvme_is_path_error(nvme_sc)) - return PR_STS_PATH_FAILED; - - switch (nvme_sc) { - case NVME_SC_SUCCESS: - return PR_STS_SUCCESS; - case NVME_SC_RESERVATION_CONFLICT: - return PR_STS_RESERVATION_CONFLICT; - case NVME_SC_ONCS_NOT_SUPPORTED: - return -EOPNOTSUPP; - case NVME_SC_BAD_ATTRIBUTES: - case NVME_SC_INVALID_OPCODE: - case NVME_SC_INVALID_FIELD: - case NVME_SC_INVALID_NS: - return -EINVAL; - default: - return PR_STS_IOERR; - } -} - -static int nvme_pr_command(struct block_device *bdev, u32 cdw10, - u64 key, u64 sa_key, u8 op) -{ - struct nvme_command c = { }; - u8 data[16] = { 0, }; - int ret; - - put_unaligned_le64(key, &data[0]); - put_unaligned_le64(sa_key, &data[8]); - - c.common.opcode = op; - c.common.cdw10 = cpu_to_le32(cdw10); - - if (IS_ENABLED(CONFIG_NVME_MULTIPATH) && - bdev->bd_disk->fops == &nvme_ns_head_ops) - ret = nvme_send_ns_head_pr_command(bdev, &c, data); - else - ret = nvme_send_ns_pr_command(bdev->bd_disk->private_data, &c, - data); - if (ret < 0) - return ret; - - return nvme_sc_to_pr_err(ret); -} - -static int nvme_pr_register(struct block_device *bdev, u64 old, - u64 new, unsigned flags) -{ - u32 cdw10; - - if (flags & ~PR_FL_IGNORE_KEY) - return -EOPNOTSUPP; - - cdw10 = old ? 2 : 0; - cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0; - cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */ - return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register); -} - -static int nvme_pr_reserve(struct block_device *bdev, u64 key, - enum pr_type type, unsigned flags) -{ - u32 cdw10; - - if (flags & ~PR_FL_IGNORE_KEY) - return -EOPNOTSUPP; - - cdw10 = nvme_pr_type(type) << 8; - cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0); - return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire); -} - -static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, - enum pr_type type, bool abort) -{ - u32 cdw10 = nvme_pr_type(type) << 8 | (abort ? 2 : 1); - - return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire); -} - -static int nvme_pr_clear(struct block_device *bdev, u64 key) -{ - u32 cdw10 = 1 | (key ? 0 : 1 << 3); - - return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); -} - -static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type) -{ - u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 0 : 1 << 3); - - return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); -} - -const struct pr_ops nvme_pr_ops = { - .pr_register = nvme_pr_register, - .pr_reserve = nvme_pr_reserve, - .pr_release = nvme_pr_release, - .pr_preempt = nvme_pr_preempt, - .pr_clear = nvme_pr_clear, -}; - #ifdef CONFIG_BLK_SED_OPAL static int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, bool send) @@ -2256,7 +2156,7 @@ static int nvme_report_zones(struct gendisk *disk, sector_t sector, #define nvme_report_zones NULL #endif /* CONFIG_BLK_DEV_ZONED */ -static const struct block_device_operations nvme_bdev_ops = { +const struct block_device_operations nvme_bdev_ops = { .owner = THIS_MODULE, .ioctl = nvme_ioctl, .compat_ioctl = blkdev_compat_ptr_ioctl, @@ -2791,75 +2691,6 @@ static struct nvme_subsystem *__nvme_find_get_subsystem(const char *subsysnqn) return NULL; } -#define SUBSYS_ATTR_RO(_name, _mode, _show) \ - struct device_attribute subsys_attr_##_name = \ - __ATTR(_name, _mode, _show, NULL) - -static ssize_t nvme_subsys_show_nqn(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct nvme_subsystem *subsys = - container_of(dev, struct nvme_subsystem, dev); - - return sysfs_emit(buf, "%s\n", subsys->subnqn); -} -static SUBSYS_ATTR_RO(subsysnqn, S_IRUGO, nvme_subsys_show_nqn); - -static ssize_t nvme_subsys_show_type(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct nvme_subsystem *subsys = - container_of(dev, struct nvme_subsystem, dev); - - switch (subsys->subtype) { - case NVME_NQN_DISC: - return sysfs_emit(buf, "discovery\n"); - case NVME_NQN_NVME: - return sysfs_emit(buf, "nvm\n"); - default: - return sysfs_emit(buf, "reserved\n"); - } -} -static SUBSYS_ATTR_RO(subsystype, S_IRUGO, nvme_subsys_show_type); - -#define nvme_subsys_show_str_function(field) \ -static ssize_t subsys_##field##_show(struct device *dev, \ - struct device_attribute *attr, char *buf) \ -{ \ - struct nvme_subsystem *subsys = \ - container_of(dev, struct nvme_subsystem, dev); \ - return sysfs_emit(buf, "%.*s\n", \ - (int)sizeof(subsys->field), subsys->field); \ -} \ -static SUBSYS_ATTR_RO(field, S_IRUGO, subsys_##field##_show); - -nvme_subsys_show_str_function(model); -nvme_subsys_show_str_function(serial); -nvme_subsys_show_str_function(firmware_rev); - -static struct attribute *nvme_subsys_attrs[] = { - &subsys_attr_model.attr, - &subsys_attr_serial.attr, - &subsys_attr_firmware_rev.attr, - &subsys_attr_subsysnqn.attr, - &subsys_attr_subsystype.attr, -#ifdef CONFIG_NVME_MULTIPATH - &subsys_attr_iopolicy.attr, -#endif - NULL, -}; - -static const struct attribute_group nvme_subsys_attrs_group = { - .attrs = nvme_subsys_attrs, -}; - -static const struct attribute_group *nvme_subsys_attrs_groups[] = { - &nvme_subsys_attrs_group, - NULL, -}; - static inline bool nvme_discovery_ctrl(struct nvme_ctrl *ctrl) { return ctrl->opts && ctrl->opts->discovery_nqn; @@ -3064,7 +2895,8 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) ctrl->max_zeroes_sectors = 0; if (ctrl->subsys->subtype != NVME_NQN_NVME || - nvme_ctrl_limited_cns(ctrl)) + nvme_ctrl_limited_cns(ctrl) || + test_bit(NVME_CTRL_SKIP_ID_CNS_CS, &ctrl->flags)) return 0; id = kzalloc(sizeof(*id), GFP_KERNEL); @@ -3086,6 +2918,8 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl); free_data: + if (ret > 0) + set_bit(NVME_CTRL_SKIP_ID_CNS_CS, &ctrl->flags); kfree(id); return ret; } @@ -3346,6 +3180,7 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended) return ret; } + clear_bit(NVME_CTRL_DIRTY_CAPABILITY, &ctrl->flags); ctrl->identified = true; return 0; @@ -3393,586 +3228,6 @@ static const struct file_operations nvme_dev_fops = { .uring_cmd = nvme_dev_uring_cmd, }; -static ssize_t nvme_sysfs_reset(struct device *dev, - struct device_attribute *attr, const char *buf, - size_t count) -{ - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - int ret; - - ret = nvme_reset_ctrl_sync(ctrl); - if (ret < 0) - return ret; - return count; -} -static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset); - -static ssize_t nvme_sysfs_rescan(struct device *dev, - struct device_attribute *attr, const char *buf, - size_t count) -{ - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - - nvme_queue_scan(ctrl); - return count; -} -static DEVICE_ATTR(rescan_controller, S_IWUSR, NULL, nvme_sysfs_rescan); - -static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev) -{ - struct gendisk *disk = dev_to_disk(dev); - - if (disk->fops == &nvme_bdev_ops) - return nvme_get_ns_from_dev(dev)->head; - else - return disk->private_data; -} - -static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct nvme_ns_head *head = dev_to_ns_head(dev); - struct nvme_ns_ids *ids = &head->ids; - struct nvme_subsystem *subsys = head->subsys; - int serial_len = sizeof(subsys->serial); - int model_len = sizeof(subsys->model); - - if (!uuid_is_null(&ids->uuid)) - return sysfs_emit(buf, "uuid.%pU\n", &ids->uuid); - - if (memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) - return sysfs_emit(buf, "eui.%16phN\n", ids->nguid); - - if (memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) - return sysfs_emit(buf, "eui.%8phN\n", ids->eui64); - - while (serial_len > 0 && (subsys->serial[serial_len - 1] == ' ' || - subsys->serial[serial_len - 1] == '\0')) - serial_len--; - while (model_len > 0 && (subsys->model[model_len - 1] == ' ' || - subsys->model[model_len - 1] == '\0')) - model_len--; - - return sysfs_emit(buf, "nvme.%04x-%*phN-%*phN-%08x\n", subsys->vendor_id, - serial_len, subsys->serial, model_len, subsys->model, - head->ns_id); -} -static DEVICE_ATTR_RO(wwid); - -static ssize_t nguid_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - return sysfs_emit(buf, "%pU\n", dev_to_ns_head(dev)->ids.nguid); -} -static DEVICE_ATTR_RO(nguid); - -static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct nvme_ns_ids *ids = &dev_to_ns_head(dev)->ids; - - /* For backward compatibility expose the NGUID to userspace if - * we have no UUID set - */ - if (uuid_is_null(&ids->uuid)) { - dev_warn_ratelimited(dev, - "No UUID available providing old NGUID\n"); - return sysfs_emit(buf, "%pU\n", ids->nguid); - } - return sysfs_emit(buf, "%pU\n", &ids->uuid); -} -static DEVICE_ATTR_RO(uuid); - -static ssize_t eui_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - return sysfs_emit(buf, "%8ph\n", dev_to_ns_head(dev)->ids.eui64); -} -static DEVICE_ATTR_RO(eui); - -static ssize_t nsid_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - return sysfs_emit(buf, "%d\n", dev_to_ns_head(dev)->ns_id); -} -static DEVICE_ATTR_RO(nsid); - -static struct attribute *nvme_ns_id_attrs[] = { - &dev_attr_wwid.attr, - &dev_attr_uuid.attr, - &dev_attr_nguid.attr, - &dev_attr_eui.attr, - &dev_attr_nsid.attr, -#ifdef CONFIG_NVME_MULTIPATH - &dev_attr_ana_grpid.attr, - &dev_attr_ana_state.attr, -#endif - NULL, -}; - -static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj, - struct attribute *a, int n) -{ - struct device *dev = container_of(kobj, struct device, kobj); - struct nvme_ns_ids *ids = &dev_to_ns_head(dev)->ids; - - if (a == &dev_attr_uuid.attr) { - if (uuid_is_null(&ids->uuid) && - !memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) - return 0; - } - if (a == &dev_attr_nguid.attr) { - if (!memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) - return 0; - } - if (a == &dev_attr_eui.attr) { - if (!memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) - return 0; - } -#ifdef CONFIG_NVME_MULTIPATH - if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) { - if (dev_to_disk(dev)->fops != &nvme_bdev_ops) /* per-path attr */ - return 0; - if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl)) - return 0; - } -#endif - return a->mode; -} - -static const struct attribute_group nvme_ns_id_attr_group = { - .attrs = nvme_ns_id_attrs, - .is_visible = nvme_ns_id_attrs_are_visible, -}; - -const struct attribute_group *nvme_ns_id_attr_groups[] = { - &nvme_ns_id_attr_group, - NULL, -}; - -#define nvme_show_str_function(field) \ -static ssize_t field##_show(struct device *dev, \ - struct device_attribute *attr, char *buf) \ -{ \ - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \ - return sysfs_emit(buf, "%.*s\n", \ - (int)sizeof(ctrl->subsys->field), ctrl->subsys->field); \ -} \ -static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); - -nvme_show_str_function(model); -nvme_show_str_function(serial); -nvme_show_str_function(firmware_rev); - -#define nvme_show_int_function(field) \ -static ssize_t field##_show(struct device *dev, \ - struct device_attribute *attr, char *buf) \ -{ \ - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \ - return sysfs_emit(buf, "%d\n", ctrl->field); \ -} \ -static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); - -nvme_show_int_function(cntlid); -nvme_show_int_function(numa_node); -nvme_show_int_function(queue_count); -nvme_show_int_function(sqsize); -nvme_show_int_function(kato); - -static ssize_t nvme_sysfs_delete(struct device *dev, - struct device_attribute *attr, const char *buf, - size_t count) -{ - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - - if (!test_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags)) - return -EBUSY; - - if (device_remove_file_self(dev, attr)) - nvme_delete_ctrl_sync(ctrl); - return count; -} -static DEVICE_ATTR(delete_controller, S_IWUSR, NULL, nvme_sysfs_delete); - -static ssize_t nvme_sysfs_show_transport(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - - return sysfs_emit(buf, "%s\n", ctrl->ops->name); -} -static DEVICE_ATTR(transport, S_IRUGO, nvme_sysfs_show_transport, NULL); - -static ssize_t nvme_sysfs_show_state(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - static const char *const state_name[] = { - [NVME_CTRL_NEW] = "new", - [NVME_CTRL_LIVE] = "live", - [NVME_CTRL_RESETTING] = "resetting", - [NVME_CTRL_CONNECTING] = "connecting", - [NVME_CTRL_DELETING] = "deleting", - [NVME_CTRL_DELETING_NOIO]= "deleting (no IO)", - [NVME_CTRL_DEAD] = "dead", - }; - - if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) && - state_name[ctrl->state]) - return sysfs_emit(buf, "%s\n", state_name[ctrl->state]); - - return sysfs_emit(buf, "unknown state\n"); -} - -static DEVICE_ATTR(state, S_IRUGO, nvme_sysfs_show_state, NULL); - -static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - - return sysfs_emit(buf, "%s\n", ctrl->subsys->subnqn); -} -static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL); - -static ssize_t nvme_sysfs_show_hostnqn(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - - return sysfs_emit(buf, "%s\n", ctrl->opts->host->nqn); -} -static DEVICE_ATTR(hostnqn, S_IRUGO, nvme_sysfs_show_hostnqn, NULL); - -static ssize_t nvme_sysfs_show_hostid(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - - return sysfs_emit(buf, "%pU\n", &ctrl->opts->host->id); -} -static DEVICE_ATTR(hostid, S_IRUGO, nvme_sysfs_show_hostid, NULL); - -static ssize_t nvme_sysfs_show_address(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - - return ctrl->ops->get_address(ctrl, buf, PAGE_SIZE); -} -static DEVICE_ATTR(address, S_IRUGO, nvme_sysfs_show_address, NULL); - -static ssize_t nvme_ctrl_loss_tmo_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - struct nvmf_ctrl_options *opts = ctrl->opts; - - if (ctrl->opts->max_reconnects == -1) - return sysfs_emit(buf, "off\n"); - return sysfs_emit(buf, "%d\n", - opts->max_reconnects * opts->reconnect_delay); -} - -static ssize_t nvme_ctrl_loss_tmo_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - struct nvmf_ctrl_options *opts = ctrl->opts; - int ctrl_loss_tmo, err; - - err = kstrtoint(buf, 10, &ctrl_loss_tmo); - if (err) - return -EINVAL; - - if (ctrl_loss_tmo < 0) - opts->max_reconnects = -1; - else - opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo, - opts->reconnect_delay); - return count; -} -static DEVICE_ATTR(ctrl_loss_tmo, S_IRUGO | S_IWUSR, - nvme_ctrl_loss_tmo_show, nvme_ctrl_loss_tmo_store); - -static ssize_t nvme_ctrl_reconnect_delay_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - - if (ctrl->opts->reconnect_delay == -1) - return sysfs_emit(buf, "off\n"); - return sysfs_emit(buf, "%d\n", ctrl->opts->reconnect_delay); -} - -static ssize_t nvme_ctrl_reconnect_delay_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - unsigned int v; - int err; - - err = kstrtou32(buf, 10, &v); - if (err) - return err; - - ctrl->opts->reconnect_delay = v; - return count; -} -static DEVICE_ATTR(reconnect_delay, S_IRUGO | S_IWUSR, - nvme_ctrl_reconnect_delay_show, nvme_ctrl_reconnect_delay_store); - -static ssize_t nvme_ctrl_fast_io_fail_tmo_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - - if (ctrl->opts->fast_io_fail_tmo == -1) - return sysfs_emit(buf, "off\n"); - return sysfs_emit(buf, "%d\n", ctrl->opts->fast_io_fail_tmo); -} - -static ssize_t nvme_ctrl_fast_io_fail_tmo_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - struct nvmf_ctrl_options *opts = ctrl->opts; - int fast_io_fail_tmo, err; - - err = kstrtoint(buf, 10, &fast_io_fail_tmo); - if (err) - return -EINVAL; - - if (fast_io_fail_tmo < 0) - opts->fast_io_fail_tmo = -1; - else - opts->fast_io_fail_tmo = fast_io_fail_tmo; - return count; -} -static DEVICE_ATTR(fast_io_fail_tmo, S_IRUGO | S_IWUSR, - nvme_ctrl_fast_io_fail_tmo_show, nvme_ctrl_fast_io_fail_tmo_store); - -static ssize_t cntrltype_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - static const char * const type[] = { - [NVME_CTRL_IO] = "io\n", - [NVME_CTRL_DISC] = "discovery\n", - [NVME_CTRL_ADMIN] = "admin\n", - }; - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - - if (ctrl->cntrltype > NVME_CTRL_ADMIN || !type[ctrl->cntrltype]) - return sysfs_emit(buf, "reserved\n"); - - return sysfs_emit(buf, type[ctrl->cntrltype]); -} -static DEVICE_ATTR_RO(cntrltype); - -static ssize_t dctype_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - static const char * const type[] = { - [NVME_DCTYPE_NOT_REPORTED] = "none\n", - [NVME_DCTYPE_DDC] = "ddc\n", - [NVME_DCTYPE_CDC] = "cdc\n", - }; - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - - if (ctrl->dctype > NVME_DCTYPE_CDC || !type[ctrl->dctype]) - return sysfs_emit(buf, "reserved\n"); - - return sysfs_emit(buf, type[ctrl->dctype]); -} -static DEVICE_ATTR_RO(dctype); - -#ifdef CONFIG_NVME_AUTH -static ssize_t nvme_ctrl_dhchap_secret_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - struct nvmf_ctrl_options *opts = ctrl->opts; - - if (!opts->dhchap_secret) - return sysfs_emit(buf, "none\n"); - return sysfs_emit(buf, "%s\n", opts->dhchap_secret); -} - -static ssize_t nvme_ctrl_dhchap_secret_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - struct nvmf_ctrl_options *opts = ctrl->opts; - char *dhchap_secret; - - if (!ctrl->opts->dhchap_secret) - return -EINVAL; - if (count < 7) - return -EINVAL; - if (memcmp(buf, "DHHC-1:", 7)) - return -EINVAL; - - dhchap_secret = kzalloc(count + 1, GFP_KERNEL); - if (!dhchap_secret) - return -ENOMEM; - memcpy(dhchap_secret, buf, count); - nvme_auth_stop(ctrl); - if (strcmp(dhchap_secret, opts->dhchap_secret)) { - struct nvme_dhchap_key *key, *host_key; - int ret; - - ret = nvme_auth_generate_key(dhchap_secret, &key); - if (ret) - return ret; - kfree(opts->dhchap_secret); - opts->dhchap_secret = dhchap_secret; - host_key = ctrl->host_key; - mutex_lock(&ctrl->dhchap_auth_mutex); - ctrl->host_key = key; - mutex_unlock(&ctrl->dhchap_auth_mutex); - nvme_auth_free_key(host_key); - } - /* Start re-authentication */ - dev_info(ctrl->device, "re-authenticating controller\n"); - queue_work(nvme_wq, &ctrl->dhchap_auth_work); - - return count; -} -static DEVICE_ATTR(dhchap_secret, S_IRUGO | S_IWUSR, - nvme_ctrl_dhchap_secret_show, nvme_ctrl_dhchap_secret_store); - -static ssize_t nvme_ctrl_dhchap_ctrl_secret_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - struct nvmf_ctrl_options *opts = ctrl->opts; - - if (!opts->dhchap_ctrl_secret) - return sysfs_emit(buf, "none\n"); - return sysfs_emit(buf, "%s\n", opts->dhchap_ctrl_secret); -} - -static ssize_t nvme_ctrl_dhchap_ctrl_secret_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - struct nvmf_ctrl_options *opts = ctrl->opts; - char *dhchap_secret; - - if (!ctrl->opts->dhchap_ctrl_secret) - return -EINVAL; - if (count < 7) - return -EINVAL; - if (memcmp(buf, "DHHC-1:", 7)) - return -EINVAL; - - dhchap_secret = kzalloc(count + 1, GFP_KERNEL); - if (!dhchap_secret) - return -ENOMEM; - memcpy(dhchap_secret, buf, count); - nvme_auth_stop(ctrl); - if (strcmp(dhchap_secret, opts->dhchap_ctrl_secret)) { - struct nvme_dhchap_key *key, *ctrl_key; - int ret; - - ret = nvme_auth_generate_key(dhchap_secret, &key); - if (ret) - return ret; - kfree(opts->dhchap_ctrl_secret); - opts->dhchap_ctrl_secret = dhchap_secret; - ctrl_key = ctrl->ctrl_key; - mutex_lock(&ctrl->dhchap_auth_mutex); - ctrl->ctrl_key = key; - mutex_unlock(&ctrl->dhchap_auth_mutex); - nvme_auth_free_key(ctrl_key); - } - /* Start re-authentication */ - dev_info(ctrl->device, "re-authenticating controller\n"); - queue_work(nvme_wq, &ctrl->dhchap_auth_work); - - return count; -} -static DEVICE_ATTR(dhchap_ctrl_secret, S_IRUGO | S_IWUSR, - nvme_ctrl_dhchap_ctrl_secret_show, nvme_ctrl_dhchap_ctrl_secret_store); -#endif - -static struct attribute *nvme_dev_attrs[] = { - &dev_attr_reset_controller.attr, - &dev_attr_rescan_controller.attr, - &dev_attr_model.attr, - &dev_attr_serial.attr, - &dev_attr_firmware_rev.attr, - &dev_attr_cntlid.attr, - &dev_attr_delete_controller.attr, - &dev_attr_transport.attr, - &dev_attr_subsysnqn.attr, - &dev_attr_address.attr, - &dev_attr_state.attr, - &dev_attr_numa_node.attr, - &dev_attr_queue_count.attr, - &dev_attr_sqsize.attr, - &dev_attr_hostnqn.attr, - &dev_attr_hostid.attr, - &dev_attr_ctrl_loss_tmo.attr, - &dev_attr_reconnect_delay.attr, - &dev_attr_fast_io_fail_tmo.attr, - &dev_attr_kato.attr, - &dev_attr_cntrltype.attr, - &dev_attr_dctype.attr, -#ifdef CONFIG_NVME_AUTH - &dev_attr_dhchap_secret.attr, - &dev_attr_dhchap_ctrl_secret.attr, -#endif - NULL -}; - -static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj, - struct attribute *a, int n) -{ - struct device *dev = container_of(kobj, struct device, kobj); - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - - if (a == &dev_attr_delete_controller.attr && !ctrl->ops->delete_ctrl) - return 0; - if (a == &dev_attr_address.attr && !ctrl->ops->get_address) - return 0; - if (a == &dev_attr_hostnqn.attr && !ctrl->opts) - return 0; - if (a == &dev_attr_hostid.attr && !ctrl->opts) - return 0; - if (a == &dev_attr_ctrl_loss_tmo.attr && !ctrl->opts) - return 0; - if (a == &dev_attr_reconnect_delay.attr && !ctrl->opts) - return 0; - if (a == &dev_attr_fast_io_fail_tmo.attr && !ctrl->opts) - return 0; -#ifdef CONFIG_NVME_AUTH - if (a == &dev_attr_dhchap_secret.attr && !ctrl->opts) - return 0; - if (a == &dev_attr_dhchap_ctrl_secret.attr && !ctrl->opts) - return 0; -#endif - - return a->mode; -} - -const struct attribute_group nvme_dev_attrs_group = { - .attrs = nvme_dev_attrs, - .is_visible = nvme_dev_attrs_are_visible, -}; -EXPORT_SYMBOL_GPL(nvme_dev_attrs_group); - -static const struct attribute_group *nvme_dev_attr_groups[] = { - &nvme_dev_attrs_group, - NULL, -}; - static struct nvme_ns_head *nvme_find_ns_head(struct nvme_ctrl *ctrl, unsigned nsid) { @@ -4176,10 +3431,40 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info) ret = nvme_global_check_duplicate_ids(ctrl->subsys, &info->ids); if (ret) { - dev_err(ctrl->device, - "globally duplicate IDs for nsid %d\n", info->nsid); + /* + * We've found two different namespaces on two different + * subsystems that report the same ID. This is pretty nasty + * for anything that actually requires unique device + * identification. In the kernel we need this for multipathing, + * and in user space the /dev/disk/by-id/ links rely on it. + * + * If the device also claims to be multi-path capable back off + * here now and refuse the probe the second device as this is a + * recipe for data corruption. If not this is probably a + * cheap consumer device if on the PCIe bus, so let the user + * proceed and use the shiny toy, but warn that with changing + * probing order (which due to our async probing could just be + * device taking longer to startup) the other device could show + * up at any time. + */ nvme_print_device_info(ctrl); - return ret; + if ((ns->ctrl->ops->flags & NVME_F_FABRICS) || /* !PCIe */ + ((ns->ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) && + info->is_shared)) { + dev_err(ctrl->device, + "ignoring nsid %d because of duplicate IDs\n", + info->nsid); + return ret; + } + + dev_err(ctrl->device, + "clearing duplicate IDs for nsid %d\n", info->nsid); + dev_err(ctrl->device, + "use of /dev/disk/by-id/ may cause data corruption\n"); + memset(&info->ids.nguid, 0, sizeof(info->ids.nguid)); + memset(&info->ids.uuid, 0, sizeof(info->ids.uuid)); + memset(&info->ids.eui64, 0, sizeof(info->ids.eui64)); + ctrl->quirks |= NVME_QUIRK_BOGUS_NID; } mutex_lock(&ctrl->subsys->lock); @@ -4212,7 +3497,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info) goto out_put_ns_head; } - if (!multipath && !list_empty(&head->list)) { + if (!multipath) { dev_warn(ctrl->device, "Found shared namespace %d, but multipathing not supported.\n", info->nsid); @@ -4313,7 +3598,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) * instance as shared namespaces will show up as multiple block * devices. */ - if (ns->head->disk) { + if (nvme_ns_head_multipath(ns->head)) { sprintf(disk->disk_name, "nvme%dc%dn%d", ctrl->subsys->instance, ctrl->instance, ns->head->instance); disk->flags |= GENHD_FL_HIDDEN; @@ -5199,6 +4484,8 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, return 0; out_free_cdev: + nvme_fault_inject_fini(&ctrl->fault_inject); + dev_pm_qos_hide_latency_tolerance(ctrl->device); cdev_device_del(&ctrl->cdev, ctrl->device); out_free_name: nvme_put_ctrl(ctrl); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 0069ebff85df..8175d49f2909 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -21,35 +21,60 @@ static DEFINE_MUTEX(nvmf_hosts_mutex); static struct nvmf_host *nvmf_default_host; -static struct nvmf_host *__nvmf_host_find(const char *hostnqn) +static struct nvmf_host *nvmf_host_alloc(const char *hostnqn, uuid_t *id) { struct nvmf_host *host; - list_for_each_entry(host, &nvmf_hosts, list) { - if (!strcmp(host->nqn, hostnqn)) - return host; - } + host = kmalloc(sizeof(*host), GFP_KERNEL); + if (!host) + return NULL; - return NULL; + kref_init(&host->ref); + uuid_copy(&host->id, id); + strscpy(host->nqn, hostnqn, NVMF_NQN_SIZE); + + return host; } -static struct nvmf_host *nvmf_host_add(const char *hostnqn) +static struct nvmf_host *nvmf_host_add(const char *hostnqn, uuid_t *id) { struct nvmf_host *host; mutex_lock(&nvmf_hosts_mutex); - host = __nvmf_host_find(hostnqn); - if (host) { - kref_get(&host->ref); - goto out_unlock; + + /* + * We have defined a host as how it is perceived by the target. + * Therefore, we don't allow different Host NQNs with the same Host ID. + * Similarly, we do not allow the usage of the same Host NQN with + * different Host IDs. This'll maintain unambiguous host identification. + */ + list_for_each_entry(host, &nvmf_hosts, list) { + bool same_hostnqn = !strcmp(host->nqn, hostnqn); + bool same_hostid = uuid_equal(&host->id, id); + + if (same_hostnqn && same_hostid) { + kref_get(&host->ref); + goto out_unlock; + } + if (same_hostnqn) { + pr_err("found same hostnqn %s but different hostid %pUb\n", + hostnqn, id); + host = ERR_PTR(-EINVAL); + goto out_unlock; + } + if (same_hostid) { + pr_err("found same hostid %pUb but different hostnqn %s\n", + id, hostnqn); + host = ERR_PTR(-EINVAL); + goto out_unlock; + } } - host = kmalloc(sizeof(*host), GFP_KERNEL); - if (!host) + host = nvmf_host_alloc(hostnqn, id); + if (!host) { + host = ERR_PTR(-ENOMEM); goto out_unlock; - - kref_init(&host->ref); - strscpy(host->nqn, hostnqn, NVMF_NQN_SIZE); + } list_add_tail(&host->list, &nvmf_hosts); out_unlock: @@ -60,16 +85,17 @@ out_unlock: static struct nvmf_host *nvmf_host_default(void) { struct nvmf_host *host; + char nqn[NVMF_NQN_SIZE]; + uuid_t id; - host = kmalloc(sizeof(*host), GFP_KERNEL); + uuid_gen(&id); + snprintf(nqn, NVMF_NQN_SIZE, + "nqn.2014-08.org.nvmexpress:uuid:%pUb", &id); + + host = nvmf_host_alloc(nqn, &id); if (!host) return NULL; - kref_init(&host->ref); - uuid_gen(&host->id); - snprintf(host->nqn, NVMF_NQN_SIZE, - "nqn.2014-08.org.nvmexpress:uuid:%pUb", &host->id); - mutex_lock(&nvmf_hosts_mutex); list_add_tail(&host->list, &nvmf_hosts); mutex_unlock(&nvmf_hosts_mutex); @@ -349,6 +375,45 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl, } } +static struct nvmf_connect_data *nvmf_connect_data_prep(struct nvme_ctrl *ctrl, + u16 cntlid) +{ + struct nvmf_connect_data *data; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return NULL; + + uuid_copy(&data->hostid, &ctrl->opts->host->id); + data->cntlid = cpu_to_le16(cntlid); + strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE); + strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE); + + return data; +} + +static void nvmf_connect_cmd_prep(struct nvme_ctrl *ctrl, u16 qid, + struct nvme_command *cmd) +{ + cmd->connect.opcode = nvme_fabrics_command; + cmd->connect.fctype = nvme_fabrics_type_connect; + cmd->connect.qid = cpu_to_le16(qid); + + if (qid) { + cmd->connect.sqsize = cpu_to_le16(ctrl->sqsize); + } else { + cmd->connect.sqsize = cpu_to_le16(NVME_AQ_DEPTH - 1); + + /* + * set keep-alive timeout in seconds granularity (ms * 1000) + */ + cmd->connect.kato = cpu_to_le32(ctrl->kato * 1000); + } + + if (ctrl->opts->disable_sqflow) + cmd->connect.cattr |= NVME_CONNECT_DISABLE_SQFLOW; +} + /** * nvmf_connect_admin_queue() - NVMe Fabrics Admin Queue "Connect" * API function. @@ -377,28 +442,12 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) int ret; u32 result; - cmd.connect.opcode = nvme_fabrics_command; - cmd.connect.fctype = nvme_fabrics_type_connect; - cmd.connect.qid = 0; - cmd.connect.sqsize = cpu_to_le16(NVME_AQ_DEPTH - 1); - - /* - * Set keep-alive timeout in seconds granularity (ms * 1000) - */ - cmd.connect.kato = cpu_to_le32(ctrl->kato * 1000); - - if (ctrl->opts->disable_sqflow) - cmd.connect.cattr |= NVME_CONNECT_DISABLE_SQFLOW; + nvmf_connect_cmd_prep(ctrl, 0, &cmd); - data = kzalloc(sizeof(*data), GFP_KERNEL); + data = nvmf_connect_data_prep(ctrl, 0xffff); if (!data) return -ENOMEM; - uuid_copy(&data->hostid, &ctrl->opts->host->id); - data->cntlid = cpu_to_le16(0xffff); - strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE); - strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE); - ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, data, sizeof(*data), NVME_QID_ANY, 1, BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); @@ -468,23 +517,12 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) int ret; u32 result; - cmd.connect.opcode = nvme_fabrics_command; - cmd.connect.fctype = nvme_fabrics_type_connect; - cmd.connect.qid = cpu_to_le16(qid); - cmd.connect.sqsize = cpu_to_le16(ctrl->sqsize); + nvmf_connect_cmd_prep(ctrl, qid, &cmd); - if (ctrl->opts->disable_sqflow) - cmd.connect.cattr |= NVME_CONNECT_DISABLE_SQFLOW; - - data = kzalloc(sizeof(*data), GFP_KERNEL); + data = nvmf_connect_data_prep(ctrl, ctrl->cntlid); if (!data) return -ENOMEM; - uuid_copy(&data->hostid, &ctrl->opts->host->id); - data->cntlid = cpu_to_le16(ctrl->cntlid); - strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE); - strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE); - ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &res, data, sizeof(*data), qid, 1, BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); @@ -621,6 +659,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, size_t nqnlen = 0; int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO; uuid_t hostid; + char hostnqn[NVMF_NQN_SIZE]; /* Set defaults */ opts->queue_size = NVMF_DEF_QUEUE_SIZE; @@ -637,7 +676,9 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, if (!options) return -ENOMEM; - uuid_gen(&hostid); + /* use default host if not given by user space */ + uuid_copy(&hostid, &nvmf_default_host->id); + strscpy(hostnqn, nvmf_default_host->nqn, NVMF_NQN_SIZE); while ((p = strsep(&o, ",\n")) != NULL) { if (!*p) @@ -783,12 +824,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -EINVAL; goto out; } - opts->host = nvmf_host_add(p); + strscpy(hostnqn, p, NVMF_NQN_SIZE); kfree(p); - if (!opts->host) { - ret = -ENOMEM; - goto out; - } break; case NVMF_OPT_RECONNECT_DELAY: if (match_int(args, &token)) { @@ -945,18 +982,94 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, opts->fast_io_fail_tmo, ctrl_loss_tmo); } - if (!opts->host) { - kref_get(&nvmf_default_host->ref); - opts->host = nvmf_default_host; + opts->host = nvmf_host_add(hostnqn, &hostid); + if (IS_ERR(opts->host)) { + ret = PTR_ERR(opts->host); + opts->host = NULL; + goto out; } - uuid_copy(&opts->host->id, &hostid); - out: kfree(options); return ret; } +void nvmf_set_io_queues(struct nvmf_ctrl_options *opts, u32 nr_io_queues, + u32 io_queues[HCTX_MAX_TYPES]) +{ + if (opts->nr_write_queues && opts->nr_io_queues < nr_io_queues) { + /* + * separate read/write queues + * hand out dedicated default queues only after we have + * sufficient read queues. + */ + io_queues[HCTX_TYPE_READ] = opts->nr_io_queues; + nr_io_queues -= io_queues[HCTX_TYPE_READ]; + io_queues[HCTX_TYPE_DEFAULT] = + min(opts->nr_write_queues, nr_io_queues); + nr_io_queues -= io_queues[HCTX_TYPE_DEFAULT]; + } else { + /* + * shared read/write queues + * either no write queues were requested, or we don't have + * sufficient queue count to have dedicated default queues. + */ + io_queues[HCTX_TYPE_DEFAULT] = + min(opts->nr_io_queues, nr_io_queues); + nr_io_queues -= io_queues[HCTX_TYPE_DEFAULT]; + } + + if (opts->nr_poll_queues && nr_io_queues) { + /* map dedicated poll queues only if we have queues left */ + io_queues[HCTX_TYPE_POLL] = + min(opts->nr_poll_queues, nr_io_queues); + } +} +EXPORT_SYMBOL_GPL(nvmf_set_io_queues); + +void nvmf_map_queues(struct blk_mq_tag_set *set, struct nvme_ctrl *ctrl, + u32 io_queues[HCTX_MAX_TYPES]) +{ + struct nvmf_ctrl_options *opts = ctrl->opts; + + if (opts->nr_write_queues && io_queues[HCTX_TYPE_READ]) { + /* separate read/write queues */ + set->map[HCTX_TYPE_DEFAULT].nr_queues = + io_queues[HCTX_TYPE_DEFAULT]; + set->map[HCTX_TYPE_DEFAULT].queue_offset = 0; + set->map[HCTX_TYPE_READ].nr_queues = + io_queues[HCTX_TYPE_READ]; + set->map[HCTX_TYPE_READ].queue_offset = + io_queues[HCTX_TYPE_DEFAULT]; + } else { + /* shared read/write queues */ + set->map[HCTX_TYPE_DEFAULT].nr_queues = + io_queues[HCTX_TYPE_DEFAULT]; + set->map[HCTX_TYPE_DEFAULT].queue_offset = 0; + set->map[HCTX_TYPE_READ].nr_queues = + io_queues[HCTX_TYPE_DEFAULT]; + set->map[HCTX_TYPE_READ].queue_offset = 0; + } + + blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); + blk_mq_map_queues(&set->map[HCTX_TYPE_READ]); + if (opts->nr_poll_queues && io_queues[HCTX_TYPE_POLL]) { + /* map dedicated poll queues only if we have queues left */ + set->map[HCTX_TYPE_POLL].nr_queues = io_queues[HCTX_TYPE_POLL]; + set->map[HCTX_TYPE_POLL].queue_offset = + io_queues[HCTX_TYPE_DEFAULT] + + io_queues[HCTX_TYPE_READ]; + blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]); + } + + dev_info(ctrl->device, + "mapped %d/%d/%d default/read/poll queues.\n", + io_queues[HCTX_TYPE_DEFAULT], + io_queues[HCTX_TYPE_READ], + io_queues[HCTX_TYPE_POLL]); +} +EXPORT_SYMBOL_GPL(nvmf_map_queues); + static int nvmf_check_required_opts(struct nvmf_ctrl_options *opts, unsigned int required_opts) { diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index dcac3df8a5f7..82e7a27ffbde 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -77,6 +77,9 @@ enum { * with the parsing opts enum. * @mask: Used by the fabrics library to parse through sysfs options * on adding a NVMe controller. + * @max_reconnects: maximum number of allowed reconnect attempts before removing + * the controller, (-1) means reconnect forever, zero means remove + * immediately; * @transport: Holds the fabric transport "technology name" (for a lack of * better description) that will be used by an NVMe controller * being added. @@ -96,9 +99,6 @@ enum { * @discovery_nqn: indicates if the subsysnqn is the well-known discovery NQN. * @kato: Keep-alive timeout. * @host: Virtual NVMe host, contains the NQN and Host ID. - * @max_reconnects: maximum number of allowed reconnect attempts before removing - * the controller, (-1) means reconnect forever, zero means remove - * immediately; * @dhchap_secret: DH-HMAC-CHAP secret * @dhchap_ctrl_secret: DH-HMAC-CHAP controller secret for bi-directional * authentication @@ -112,6 +112,7 @@ enum { */ struct nvmf_ctrl_options { unsigned mask; + int max_reconnects; char *transport; char *subsysnqn; char *traddr; @@ -125,7 +126,6 @@ struct nvmf_ctrl_options { bool duplicate_connect; unsigned int kato; struct nvmf_host *host; - int max_reconnects; char *dhchap_secret; char *dhchap_ctrl_secret; bool disable_sqflow; @@ -181,7 +181,7 @@ nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl, ctrl->state == NVME_CTRL_DEAD || strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) || strcmp(opts->host->nqn, ctrl->opts->host->nqn) || - memcmp(&opts->host->id, &ctrl->opts->host->id, sizeof(uuid_t))) + !uuid_equal(&opts->host->id, &ctrl->opts->host->id)) return false; return true; @@ -203,6 +203,13 @@ static inline void nvmf_complete_timed_out_request(struct request *rq) } } +static inline unsigned int nvmf_nr_io_queues(struct nvmf_ctrl_options *opts) +{ + return min(opts->nr_io_queues, num_online_cpus()) + + min(opts->nr_write_queues, num_online_cpus()) + + min(opts->nr_poll_queues, num_online_cpus()); +} + int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val); int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val); int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val); @@ -215,5 +222,9 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); bool nvmf_ip_options_match(struct nvme_ctrl *ctrl, struct nvmf_ctrl_options *opts); +void nvmf_set_io_queues(struct nvmf_ctrl_options *opts, u32 nr_io_queues, + u32 io_queues[HCTX_MAX_TYPES]); +void nvmf_map_queues(struct blk_mq_tag_set *set, struct nvme_ctrl *ctrl, + u32 io_queues[HCTX_MAX_TYPES]); #endif /* _NVME_FABRICS_H */ diff --git a/drivers/nvme/host/fault_inject.c b/drivers/nvme/host/fault_inject.c index 83d2e6860d38..1ba10a5c656d 100644 --- a/drivers/nvme/host/fault_inject.c +++ b/drivers/nvme/host/fault_inject.c @@ -27,7 +27,7 @@ void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj, /* create debugfs directory and attribute */ parent = debugfs_create_dir(dev_name, NULL); - if (!parent) { + if (IS_ERR(parent)) { pr_warn("%s: failed to create debugfs directory\n", dev_name); return; } diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 2ed75923507d..1cd2bf82319a 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2548,14 +2548,24 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) * the controller. Abort any ios on the association and let the * create_association error path resolve things. */ - if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { - __nvme_fc_abort_outstanding_ios(ctrl, true); + enum nvme_ctrl_state state; + unsigned long flags; + + spin_lock_irqsave(&ctrl->lock, flags); + state = ctrl->ctrl.state; + if (state == NVME_CTRL_CONNECTING) { set_bit(ASSOC_FAILED, &ctrl->flags); + spin_unlock_irqrestore(&ctrl->lock, flags); + __nvme_fc_abort_outstanding_ios(ctrl, true); + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: transport error during (re)connect\n", + ctrl->cnum); return; } + spin_unlock_irqrestore(&ctrl->lock, flags); /* Otherwise, only proceed if in LIVE state - e.g. on first error */ - if (ctrl->ctrl.state != NVME_CTRL_LIVE) + if (state != NVME_CTRL_LIVE) return; dev_warn(ctrl->ctrl.device, @@ -2917,8 +2927,8 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) ret = nvme_alloc_io_tag_set(&ctrl->ctrl, &ctrl->tag_set, &nvme_fc_mq_ops, 1, - struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, - ctrl->lport->ops->fcprqst_priv_sz)); + struct_size_t(struct nvme_fcp_op_w_sgl, priv, + ctrl->lport->ops->fcprqst_priv_sz)); if (ret) return ret; @@ -3110,7 +3120,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) */ ret = nvme_enable_ctrl(&ctrl->ctrl); - if (ret || test_bit(ASSOC_FAILED, &ctrl->flags)) + if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags)) + ret = -EIO; + if (ret) goto out_disconnect_admin_queue; ctrl->ctrl.max_segments = ctrl->lport->ops->max_sgl_segments; @@ -3120,7 +3132,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) nvme_unquiesce_admin_queue(&ctrl->ctrl); ret = nvme_init_ctrl_finish(&ctrl->ctrl, false); - if (ret || test_bit(ASSOC_FAILED, &ctrl->flags)) + if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags)) + ret = -EIO; + if (ret) goto out_disconnect_admin_queue; /* sanity checks */ @@ -3165,10 +3179,16 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) else ret = nvme_fc_recreate_io_queues(ctrl); } - if (ret || test_bit(ASSOC_FAILED, &ctrl->flags)) - goto out_term_aen_ops; + spin_lock_irqsave(&ctrl->lock, flags); + if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags)) + ret = -EIO; + if (ret) { + spin_unlock_irqrestore(&ctrl->lock, flags); + goto out_term_aen_ops; + } changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); + spin_unlock_irqrestore(&ctrl->lock, flags); ctrl->ctrl.nr_reconnects = 0; @@ -3180,6 +3200,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) out_term_aen_ops: nvme_fc_term_aen_ops(ctrl); out_disconnect_admin_queue: + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: create_assoc failed, assoc_id %llx ret %d\n", + ctrl->cnum, ctrl->association_id, ret); /* send a Disconnect(association) LS to fc-nvme target */ nvme_fc_xmt_disconnect_assoc(ctrl); spin_lock_irqsave(&ctrl->lock, flags); @@ -3536,8 +3559,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ret = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set, &nvme_fc_admin_mq_ops, - struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, - ctrl->lport->ops->fcprqst_priv_sz)); + struct_size_t(struct nvme_fcp_op_w_sgl, priv, + ctrl->lport->ops->fcprqst_priv_sz)); if (ret) goto fail_ctrl; diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 81c5c9e38477..5c3250f36ce7 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -14,7 +14,7 @@ enum { }; static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, - unsigned int flags, fmode_t mode) + unsigned int flags, bool open_for_write) { u32 effects; @@ -80,7 +80,7 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, * writing. */ if (nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) - return mode & FMODE_WRITE; + return open_for_write; return true; } @@ -254,7 +254,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, blk_mq_free_request(req); if (effects) - nvme_passthru_end(ctrl, effects, cmd, ret); + nvme_passthru_end(ctrl, ns, effects, cmd, ret); return ret; } @@ -337,7 +337,7 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, struct nvme_passthru_cmd __user *ucmd, unsigned int flags, - fmode_t mode) + bool open_for_write) { struct nvme_passthru_cmd cmd; struct nvme_command c; @@ -365,7 +365,7 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, c.common.cdw14 = cpu_to_le32(cmd.cdw14); c.common.cdw15 = cpu_to_le32(cmd.cdw15); - if (!nvme_cmd_allowed(ns, &c, 0, mode)) + if (!nvme_cmd_allowed(ns, &c, 0, open_for_write)) return -EACCES; if (cmd.timeout_ms) @@ -385,7 +385,7 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags, - fmode_t mode) + bool open_for_write) { struct nvme_passthru_cmd64 cmd; struct nvme_command c; @@ -412,7 +412,7 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, c.common.cdw14 = cpu_to_le32(cmd.cdw14); c.common.cdw15 = cpu_to_le32(cmd.cdw15); - if (!nvme_cmd_allowed(ns, &c, flags, mode)) + if (!nvme_cmd_allowed(ns, &c, flags, open_for_write)) return -EACCES; if (cmd.timeout_ms) @@ -505,7 +505,6 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, { struct io_uring_cmd *ioucmd = req->end_io_data; struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); - void *cookie = READ_ONCE(ioucmd->cookie); req->bio = pdu->bio; if (nvme_req(req)->flags & NVME_REQ_CANCELLED) @@ -518,10 +517,12 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, * For iopoll, complete it directly. * Otherwise, move the completion to task work. */ - if (cookie != NULL && blk_rq_is_poll(req)) + if (blk_rq_is_poll(req)) { + WRITE_ONCE(ioucmd->cookie, NULL); nvme_uring_task_cb(ioucmd, IO_URING_F_UNLOCKED); - else - io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb); + } else { + io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); + } return RQ_END_IO_FREE; } @@ -531,7 +532,6 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req, { struct io_uring_cmd *ioucmd = req->end_io_data; struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); - void *cookie = READ_ONCE(ioucmd->cookie); req->bio = pdu->bio; pdu->req = req; @@ -540,10 +540,12 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req, * For iopoll, complete it directly. * Otherwise, move the completion to task work. */ - if (cookie != NULL && blk_rq_is_poll(req)) + if (blk_rq_is_poll(req)) { + WRITE_ONCE(ioucmd->cookie, NULL); nvme_uring_task_meta_cb(ioucmd, IO_URING_F_UNLOCKED); - else - io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_meta_cb); + } else { + io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_meta_cb); + } return RQ_END_IO_NONE; } @@ -583,7 +585,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); - if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode)) + if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode & FMODE_WRITE)) return -EACCES; d.metadata = READ_ONCE(cmd->metadata); @@ -599,7 +601,6 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, if (issue_flags & IO_URING_F_IOPOLL) rq_flags |= REQ_POLLED; -retry: req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags); if (IS_ERR(req)) return PTR_ERR(req); @@ -613,17 +614,11 @@ retry: return ret; } - if (issue_flags & IO_URING_F_IOPOLL && rq_flags & REQ_POLLED) { - if (unlikely(!req->bio)) { - /* we can't poll this, so alloc regular req instead */ - blk_mq_free_request(req); - rq_flags &= ~REQ_POLLED; - goto retry; - } else { - WRITE_ONCE(ioucmd->cookie, req->bio); - req->bio->bi_opf |= REQ_POLLED; - } + if (blk_rq_is_poll(req)) { + ioucmd->flags |= IORING_URING_CMD_POLLED; + WRITE_ONCE(ioucmd->cookie, req); } + /* to free bio on completion, as req->bio will be null at that time */ pdu->bio = req->bio; pdu->meta_len = d.metadata_len; @@ -649,13 +644,13 @@ static bool is_ctrl_ioctl(unsigned int cmd) } static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, - void __user *argp, fmode_t mode) + void __user *argp, bool open_for_write) { switch (cmd) { case NVME_IOCTL_ADMIN_CMD: - return nvme_user_cmd(ctrl, NULL, argp, 0, mode); + return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); case NVME_IOCTL_ADMIN64_CMD: - return nvme_user_cmd64(ctrl, NULL, argp, 0, mode); + return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); default: return sed_ioctl(ctrl->opal_dev, cmd, argp); } @@ -680,14 +675,14 @@ struct nvme_user_io32 { #endif /* COMPAT_FOR_U64_ALIGNMENT */ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, - void __user *argp, unsigned int flags, fmode_t mode) + void __user *argp, unsigned int flags, bool open_for_write) { switch (cmd) { case NVME_IOCTL_ID: force_successful_syscall_return(); return ns->head->ns_id; case NVME_IOCTL_IO_CMD: - return nvme_user_cmd(ns->ctrl, ns, argp, flags, mode); + return nvme_user_cmd(ns->ctrl, ns, argp, flags, open_for_write); /* * struct nvme_user_io can have different padding on some 32-bit ABIs. * Just accept the compat version as all fields that are used are the @@ -702,16 +697,18 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, flags |= NVME_IOCTL_VEC; fallthrough; case NVME_IOCTL_IO64_CMD: - return nvme_user_cmd64(ns->ctrl, ns, argp, flags, mode); + return nvme_user_cmd64(ns->ctrl, ns, argp, flags, + open_for_write); default: return -ENOTTY; } } -int nvme_ioctl(struct block_device *bdev, fmode_t mode, +int nvme_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg) { struct nvme_ns *ns = bdev->bd_disk->private_data; + bool open_for_write = mode & BLK_OPEN_WRITE; void __user *argp = (void __user *)arg; unsigned int flags = 0; @@ -719,19 +716,20 @@ int nvme_ioctl(struct block_device *bdev, fmode_t mode, flags |= NVME_IOCTL_PARTITION; if (is_ctrl_ioctl(cmd)) - return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, mode); - return nvme_ns_ioctl(ns, cmd, argp, flags, mode); + return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); + return nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); } long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct nvme_ns *ns = container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); + bool open_for_write = file->f_mode & FMODE_WRITE; void __user *argp = (void __user *)arg; if (is_ctrl_ioctl(cmd)) - return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, file->f_mode); - return nvme_ns_ioctl(ns, cmd, argp, 0, file->f_mode); + return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); + return nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); } static int nvme_uring_cmd_checks(unsigned int issue_flags) @@ -782,25 +780,23 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, struct io_comp_batch *iob, unsigned int poll_flags) { - struct bio *bio; + struct request *req; int ret = 0; - struct nvme_ns *ns; - struct request_queue *q; + + if (!(ioucmd->flags & IORING_URING_CMD_POLLED)) + return 0; rcu_read_lock(); - bio = READ_ONCE(ioucmd->cookie); - ns = container_of(file_inode(ioucmd->file)->i_cdev, - struct nvme_ns, cdev); - q = ns->queue; - if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && bio && bio->bi_bdev) - ret = bio_poll(bio, iob, poll_flags); + req = READ_ONCE(ioucmd->cookie); + if (req && blk_rq_is_poll(req)) + ret = blk_rq_poll(req, iob, poll_flags); rcu_read_unlock(); return ret; } #ifdef CONFIG_NVME_MULTIPATH static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *argp, struct nvme_ns_head *head, int srcu_idx, - fmode_t mode) + bool open_for_write) __releases(&head->srcu) { struct nvme_ctrl *ctrl = ns->ctrl; @@ -808,16 +804,17 @@ static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, nvme_get_ctrl(ns->ctrl); srcu_read_unlock(&head->srcu, srcu_idx); - ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, mode); + ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); nvme_put_ctrl(ctrl); return ret; } -int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, +int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg) { struct nvme_ns_head *head = bdev->bd_disk->private_data; + bool open_for_write = mode & BLK_OPEN_WRITE; void __user *argp = (void __user *)arg; struct nvme_ns *ns; int srcu_idx, ret = -EWOULDBLOCK; @@ -838,9 +835,9 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, */ if (is_ctrl_ioctl(cmd)) return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, - mode); + open_for_write); - ret = nvme_ns_ioctl(ns, cmd, argp, flags, mode); + ret = nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); out_unlock: srcu_read_unlock(&head->srcu, srcu_idx); return ret; @@ -849,6 +846,7 @@ out_unlock: long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { + bool open_for_write = file->f_mode & FMODE_WRITE; struct cdev *cdev = file_inode(file)->i_cdev; struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); @@ -863,9 +861,9 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, if (is_ctrl_ioctl(cmd)) return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, - file->f_mode); + open_for_write); - ret = nvme_ns_ioctl(ns, cmd, argp, 0, file->f_mode); + ret = nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); out_unlock: srcu_read_unlock(&head->srcu, srcu_idx); return ret; @@ -885,31 +883,6 @@ int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, srcu_read_unlock(&head->srcu, srcu_idx); return ret; } - -int nvme_ns_head_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, - struct io_comp_batch *iob, - unsigned int poll_flags) -{ - struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; - struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); - int srcu_idx = srcu_read_lock(&head->srcu); - struct nvme_ns *ns = nvme_find_path(head); - struct bio *bio; - int ret = 0; - struct request_queue *q; - - if (ns) { - rcu_read_lock(); - bio = READ_ONCE(ioucmd->cookie); - q = ns->queue; - if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && bio - && bio->bi_bdev) - ret = bio_poll(bio, iob, poll_flags); - rcu_read_unlock(); - } - srcu_read_unlock(&head->srcu, srcu_idx); - return ret; -} #endif /* CONFIG_NVME_MULTIPATH */ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) @@ -940,7 +913,7 @@ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) } static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, - fmode_t mode) + bool open_for_write) { struct nvme_ns *ns; int ret; @@ -964,7 +937,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, kref_get(&ns->kref); up_read(&ctrl->namespaces_rwsem); - ret = nvme_user_cmd(ctrl, ns, argp, 0, mode); + ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write); nvme_put_ns(ns); return ret; @@ -976,16 +949,17 @@ out_unlock: long nvme_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { + bool open_for_write = file->f_mode & FMODE_WRITE; struct nvme_ctrl *ctrl = file->private_data; void __user *argp = (void __user *)arg; switch (cmd) { case NVME_IOCTL_ADMIN_CMD: - return nvme_user_cmd(ctrl, NULL, argp, 0, file->f_mode); + return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); case NVME_IOCTL_ADMIN64_CMD: - return nvme_user_cmd64(ctrl, NULL, argp, 0, file->f_mode); + return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); case NVME_IOCTL_IO_CMD: - return nvme_dev_user_cmd(ctrl, argp, file->f_mode); + return nvme_dev_user_cmd(ctrl, argp, open_for_write); case NVME_IOCTL_RESET: if (!capable(CAP_SYS_ADMIN)) return -EACCES; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 2bc159a318ff..0a88d7bdc5e3 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -106,6 +106,14 @@ void nvme_failover_req(struct request *req) bio->bi_opf &= ~REQ_POLLED; bio->bi_cookie = BLK_QC_T_NONE; } + /* + * The alternate request queue that we may end up submitting + * the bio to may be frozen temporarily, in this case REQ_NOWAIT + * will fail the I/O immediately with EAGAIN to the issuer. + * We are not in the issuer context which cannot block. Clear + * the flag to avoid spurious EAGAIN I/O failures. + */ + bio->bi_opf &= ~REQ_NOWAIT; } blk_steal_bios(&ns->head->requeue_list, req); spin_unlock_irqrestore(&ns->head->requeue_lock, flags); @@ -402,14 +410,14 @@ static void nvme_ns_head_submit_bio(struct bio *bio) srcu_read_unlock(&head->srcu, srcu_idx); } -static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode) +static int nvme_ns_head_open(struct gendisk *disk, blk_mode_t mode) { - if (!nvme_tryget_ns_head(bdev->bd_disk->private_data)) + if (!nvme_tryget_ns_head(disk->private_data)) return -ENXIO; return 0; } -static void nvme_ns_head_release(struct gendisk *disk, fmode_t mode) +static void nvme_ns_head_release(struct gendisk *disk) { nvme_put_ns_head(disk->private_data); } @@ -470,7 +478,7 @@ static const struct file_operations nvme_ns_head_chr_fops = { .unlocked_ioctl = nvme_ns_head_chr_ioctl, .compat_ioctl = compat_ptr_ioctl, .uring_cmd = nvme_ns_head_chr_uring_cmd, - .uring_cmd_iopoll = nvme_ns_head_chr_uring_cmd_iopoll, + .uring_cmd_iopoll = nvme_ns_chr_uring_cmd_iopoll, }; static int nvme_add_ns_head_cdev(struct nvme_ns_head *head) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a2d4f59e0535..f35647c470af 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -19,6 +19,8 @@ #include <trace/events/block.h> +extern const struct pr_ops nvme_pr_ops; + extern unsigned int nvme_io_timeout; #define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) @@ -247,12 +249,14 @@ enum nvme_ctrl_flags { NVME_CTRL_ADMIN_Q_STOPPED = 1, NVME_CTRL_STARTED_ONCE = 2, NVME_CTRL_STOPPED = 3, + NVME_CTRL_SKIP_ID_CNS_CS = 4, + NVME_CTRL_DIRTY_CAPABILITY = 5, }; struct nvme_ctrl { bool comp_seen; - enum nvme_ctrl_state state; bool identified; + enum nvme_ctrl_state state; spinlock_t lock; struct mutex scan_lock; const struct nvme_ctrl_ops *ops; @@ -284,8 +288,8 @@ struct nvme_ctrl { char name[12]; u16 cntlid; - u32 ctrl_config; u16 mtfa; + u32 ctrl_config; u32 queue_count; u64 cap; @@ -328,6 +332,7 @@ struct nvme_ctrl { struct delayed_work ka_work; struct delayed_work failfast_work; struct nvme_command ka_cmd; + unsigned long ka_last_check_time; struct work_struct fw_act_work; unsigned long events; @@ -358,10 +363,10 @@ struct nvme_ctrl { bool apst_enabled; /* PCIe only: */ + u16 hmmaxd; u32 hmpre; u32 hmmin; u32 hmminds; - u16 hmmaxd; /* Fabrics only */ u32 ioccsz; @@ -841,10 +846,10 @@ void nvme_put_ns_head(struct nvme_ns_head *head); int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device, const struct file_operations *fops, struct module *owner); void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device); -int nvme_ioctl(struct block_device *bdev, fmode_t mode, +int nvme_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg); long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, +int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg); long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg); @@ -852,8 +857,6 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg); int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, struct io_comp_batch *iob, unsigned int poll_flags); -int nvme_ns_head_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, - struct io_comp_batch *iob, unsigned int poll_flags); int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags); int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, @@ -865,7 +868,11 @@ extern const struct attribute_group *nvme_ns_id_attr_groups[]; extern const struct pr_ops nvme_pr_ops; extern const struct block_device_operations nvme_ns_head_ops; extern const struct attribute_group nvme_dev_attrs_group; +extern const struct attribute_group *nvme_subsys_attrs_groups[]; +extern const struct attribute_group *nvme_dev_attr_groups[]; +extern const struct block_device_operations nvme_bdev_ops; +void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl); struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); #ifdef CONFIG_NVME_MULTIPATH static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl) @@ -1077,7 +1084,7 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode); u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode); int nvme_execute_rq(struct request *rq, bool at_head); -void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, +void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects, struct nvme_command *cmd, int status); struct nvme_ctrl *nvme_ctrl_from_file(struct file *file); struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 32244582fdb0..baf69af7ea78 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -420,10 +420,9 @@ static int nvme_pci_init_request(struct blk_mq_tag_set *set, struct request *req, unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_dev *dev = to_nvme_dev(set->driver_data); struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - nvme_req(req)->ctrl = &dev->ctrl; + nvme_req(req)->ctrl = set->driver_data; nvme_req(req)->cmd = &iod->cmd; return 0; } @@ -968,7 +967,7 @@ static __always_inline void nvme_pci_unmap_rq(struct request *req) struct nvme_iod *iod = blk_mq_rq_to_pdu(req); dma_unmap_page(dev->dev, iod->meta_dma, - rq_integrity_vec(req)->bv_len, rq_data_dir(req)); + rq_integrity_vec(req)->bv_len, rq_dma_dir(req)); } if (blk_rq_nr_phys_segments(req)) @@ -1299,9 +1298,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) */ if (nvme_should_reset(dev, csts)) { nvme_warn_reset(dev, csts); - nvme_dev_disable(dev, false); - nvme_reset_ctrl(&dev->ctrl); - return BLK_EH_DONE; + goto disable; } /* @@ -1352,10 +1349,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) "I/O %d QID %d timeout, reset controller\n", req->tag, nvmeq->qid); nvme_req(req)->flags |= NVME_REQ_CANCELLED; - nvme_dev_disable(dev, false); - nvme_reset_ctrl(&dev->ctrl); - - return BLK_EH_DONE; + goto disable; } if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) { @@ -1392,6 +1386,15 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) * as the device then is in a faulty state. */ return BLK_EH_RESET_TIMER; + +disable: + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) + return BLK_EH_DONE; + + nvme_dev_disable(dev, false); + if (nvme_try_sched_reset(&dev->ctrl)) + nvme_unquiesce_io_queues(&dev->ctrl); + return BLK_EH_DONE; } static void nvme_free_queue(struct nvme_queue *nvmeq) @@ -2691,7 +2694,8 @@ static void nvme_reset_work(struct work_struct *work) if (dev->ctrl.state != NVME_CTRL_RESETTING) { dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n", dev->ctrl.state); - return; + result = -ENODEV; + goto out; } /* @@ -2778,7 +2782,9 @@ static void nvme_reset_work(struct work_struct *work) result); nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); nvme_dev_disable(dev, true); + nvme_sync_queues(&dev->ctrl); nvme_mark_namespaces_dead(&dev->ctrl); + nvme_unquiesce_io_queues(&dev->ctrl); nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD); } @@ -3276,6 +3282,10 @@ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev, case pci_channel_io_frozen: dev_warn(dev->ctrl.device, "frozen state error detected, reset controller\n"); + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) { + nvme_dev_disable(dev, true); + return PCI_ERS_RESULT_DISCONNECT; + } nvme_dev_disable(dev, false); return PCI_ERS_RESULT_NEED_RESET; case pci_channel_io_perm_failure: @@ -3292,7 +3302,8 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev) dev_info(dev->ctrl.device, "restart after slot reset\n"); pci_restore_state(pdev); - nvme_reset_ctrl(&dev->ctrl); + if (!nvme_try_sched_reset(&dev->ctrl)) + nvme_unquiesce_io_queues(&dev->ctrl); return PCI_ERS_RESULT_RECOVERED; } @@ -3394,6 +3405,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x144d, 0xa809), /* Samsung MZALQ256HBJD 256G */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + { PCI_DEVICE(0x144d, 0xa802), /* Samsung SM953 */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1cc4, 0x6303), /* UMIS RPJTJ512MGE1QDY 512G */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x1cc4, 0x6302), /* UMIS RPJTJ256MGE1QDY 256G */ @@ -3424,6 +3437,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1e4B, 0x1202), /* MAXIO MAP1202 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1e4B, 0x1602), /* MAXIO MAP1602 */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1cc1, 0x5350), /* ADATA XPG GAMMIX S50 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1dbe, 0x5236), /* ADATA XPG GAMMIX S70 */ diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c new file mode 100644 index 000000000000..391b1465ebfd --- /dev/null +++ b/drivers/nvme/host/pr.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2015 Intel Corporation + * Keith Busch <kbusch@kernel.org> + */ +#include <linux/blkdev.h> +#include <linux/pr.h> +#include <asm/unaligned.h> + +#include "nvme.h" + +static enum nvme_pr_type nvme_pr_type_from_blk(enum pr_type type) +{ + switch (type) { + case PR_WRITE_EXCLUSIVE: + return NVME_PR_WRITE_EXCLUSIVE; + case PR_EXCLUSIVE_ACCESS: + return NVME_PR_EXCLUSIVE_ACCESS; + case PR_WRITE_EXCLUSIVE_REG_ONLY: + return NVME_PR_WRITE_EXCLUSIVE_REG_ONLY; + case PR_EXCLUSIVE_ACCESS_REG_ONLY: + return NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY; + case PR_WRITE_EXCLUSIVE_ALL_REGS: + return NVME_PR_WRITE_EXCLUSIVE_ALL_REGS; + case PR_EXCLUSIVE_ACCESS_ALL_REGS: + return NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS; + } + + return 0; +} + +static enum pr_type block_pr_type_from_nvme(enum nvme_pr_type type) +{ + switch (type) { + case NVME_PR_WRITE_EXCLUSIVE: + return PR_WRITE_EXCLUSIVE; + case NVME_PR_EXCLUSIVE_ACCESS: + return PR_EXCLUSIVE_ACCESS; + case NVME_PR_WRITE_EXCLUSIVE_REG_ONLY: + return PR_WRITE_EXCLUSIVE_REG_ONLY; + case NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY: + return PR_EXCLUSIVE_ACCESS_REG_ONLY; + case NVME_PR_WRITE_EXCLUSIVE_ALL_REGS: + return PR_WRITE_EXCLUSIVE_ALL_REGS; + case NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS: + return PR_EXCLUSIVE_ACCESS_ALL_REGS; + } + + return 0; +} + +static int nvme_send_ns_head_pr_command(struct block_device *bdev, + struct nvme_command *c, void *data, unsigned int data_len) +{ + struct nvme_ns_head *head = bdev->bd_disk->private_data; + int srcu_idx = srcu_read_lock(&head->srcu); + struct nvme_ns *ns = nvme_find_path(head); + int ret = -EWOULDBLOCK; + + if (ns) { + c->common.nsid = cpu_to_le32(ns->head->ns_id); + ret = nvme_submit_sync_cmd(ns->queue, c, data, data_len); + } + srcu_read_unlock(&head->srcu, srcu_idx); + return ret; +} + +static int nvme_send_ns_pr_command(struct nvme_ns *ns, struct nvme_command *c, + void *data, unsigned int data_len) +{ + c->common.nsid = cpu_to_le32(ns->head->ns_id); + return nvme_submit_sync_cmd(ns->queue, c, data, data_len); +} + +static int nvme_sc_to_pr_err(int nvme_sc) +{ + if (nvme_is_path_error(nvme_sc)) + return PR_STS_PATH_FAILED; + + switch (nvme_sc) { + case NVME_SC_SUCCESS: + return PR_STS_SUCCESS; + case NVME_SC_RESERVATION_CONFLICT: + return PR_STS_RESERVATION_CONFLICT; + case NVME_SC_ONCS_NOT_SUPPORTED: + return -EOPNOTSUPP; + case NVME_SC_BAD_ATTRIBUTES: + case NVME_SC_INVALID_OPCODE: + case NVME_SC_INVALID_FIELD: + case NVME_SC_INVALID_NS: + return -EINVAL; + default: + return PR_STS_IOERR; + } +} + +static int nvme_send_pr_command(struct block_device *bdev, + struct nvme_command *c, void *data, unsigned int data_len) +{ + if (IS_ENABLED(CONFIG_NVME_MULTIPATH) && + bdev->bd_disk->fops == &nvme_ns_head_ops) + return nvme_send_ns_head_pr_command(bdev, c, data, data_len); + + return nvme_send_ns_pr_command(bdev->bd_disk->private_data, c, data, + data_len); +} + +static int nvme_pr_command(struct block_device *bdev, u32 cdw10, + u64 key, u64 sa_key, u8 op) +{ + struct nvme_command c = { }; + u8 data[16] = { 0, }; + int ret; + + put_unaligned_le64(key, &data[0]); + put_unaligned_le64(sa_key, &data[8]); + + c.common.opcode = op; + c.common.cdw10 = cpu_to_le32(cdw10); + + ret = nvme_send_pr_command(bdev, &c, data, sizeof(data)); + if (ret < 0) + return ret; + + return nvme_sc_to_pr_err(ret); +} + +static int nvme_pr_register(struct block_device *bdev, u64 old, + u64 new, unsigned flags) +{ + u32 cdw10; + + if (flags & ~PR_FL_IGNORE_KEY) + return -EOPNOTSUPP; + + cdw10 = old ? 2 : 0; + cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0; + cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */ + return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register); +} + +static int nvme_pr_reserve(struct block_device *bdev, u64 key, + enum pr_type type, unsigned flags) +{ + u32 cdw10; + + if (flags & ~PR_FL_IGNORE_KEY) + return -EOPNOTSUPP; + + cdw10 = nvme_pr_type_from_blk(type) << 8; + cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0); + return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire); +} + +static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, + enum pr_type type, bool abort) +{ + u32 cdw10 = nvme_pr_type_from_blk(type) << 8 | (abort ? 2 : 1); + + return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire); +} + +static int nvme_pr_clear(struct block_device *bdev, u64 key) +{ + u32 cdw10 = 1 | (key ? 0 : 1 << 3); + + return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); +} + +static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type) +{ + u32 cdw10 = nvme_pr_type_from_blk(type) << 8 | (key ? 0 : 1 << 3); + + return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); +} + +static int nvme_pr_resv_report(struct block_device *bdev, void *data, + u32 data_len, bool *eds) +{ + struct nvme_command c = { }; + int ret; + + c.common.opcode = nvme_cmd_resv_report; + c.common.cdw10 = cpu_to_le32(nvme_bytes_to_numd(data_len)); + c.common.cdw11 = cpu_to_le32(NVME_EXTENDED_DATA_STRUCT); + *eds = true; + +retry: + ret = nvme_send_pr_command(bdev, &c, data, data_len); + if (ret == NVME_SC_HOST_ID_INCONSIST && + c.common.cdw11 == cpu_to_le32(NVME_EXTENDED_DATA_STRUCT)) { + c.common.cdw11 = 0; + *eds = false; + goto retry; + } + + if (ret < 0) + return ret; + + return nvme_sc_to_pr_err(ret); +} + +static int nvme_pr_read_keys(struct block_device *bdev, + struct pr_keys *keys_info) +{ + u32 rse_len, num_keys = keys_info->num_keys; + struct nvme_reservation_status_ext *rse; + int ret, i; + bool eds; + + /* + * Assume we are using 128-bit host IDs and allocate a buffer large + * enough to get enough keys to fill the return keys buffer. + */ + rse_len = struct_size(rse, regctl_eds, num_keys); + rse = kzalloc(rse_len, GFP_KERNEL); + if (!rse) + return -ENOMEM; + + ret = nvme_pr_resv_report(bdev, rse, rse_len, &eds); + if (ret) + goto free_rse; + + keys_info->generation = le32_to_cpu(rse->gen); + keys_info->num_keys = get_unaligned_le16(&rse->regctl); + + num_keys = min(num_keys, keys_info->num_keys); + for (i = 0; i < num_keys; i++) { + if (eds) { + keys_info->keys[i] = + le64_to_cpu(rse->regctl_eds[i].rkey); + } else { + struct nvme_reservation_status *rs; + + rs = (struct nvme_reservation_status *)rse; + keys_info->keys[i] = le64_to_cpu(rs->regctl_ds[i].rkey); + } + } + +free_rse: + kfree(rse); + return ret; +} + +static int nvme_pr_read_reservation(struct block_device *bdev, + struct pr_held_reservation *resv) +{ + struct nvme_reservation_status_ext tmp_rse, *rse; + int ret, i, num_regs; + u32 rse_len; + bool eds; + +get_num_regs: + /* + * Get the number of registrations so we know how big to allocate + * the response buffer. + */ + ret = nvme_pr_resv_report(bdev, &tmp_rse, sizeof(tmp_rse), &eds); + if (ret) + return ret; + + num_regs = get_unaligned_le16(&tmp_rse.regctl); + if (!num_regs) { + resv->generation = le32_to_cpu(tmp_rse.gen); + return 0; + } + + rse_len = struct_size(rse, regctl_eds, num_regs); + rse = kzalloc(rse_len, GFP_KERNEL); + if (!rse) + return -ENOMEM; + + ret = nvme_pr_resv_report(bdev, rse, rse_len, &eds); + if (ret) + goto free_rse; + + if (num_regs != get_unaligned_le16(&rse->regctl)) { + kfree(rse); + goto get_num_regs; + } + + resv->generation = le32_to_cpu(rse->gen); + resv->type = block_pr_type_from_nvme(rse->rtype); + + for (i = 0; i < num_regs; i++) { + if (eds) { + if (rse->regctl_eds[i].rcsts) { + resv->key = le64_to_cpu(rse->regctl_eds[i].rkey); + break; + } + } else { + struct nvme_reservation_status *rs; + + rs = (struct nvme_reservation_status *)rse; + if (rs->regctl_ds[i].rcsts) { + resv->key = le64_to_cpu(rs->regctl_ds[i].rkey); + break; + } + } + } + +free_rse: + kfree(rse); + return ret; +} + +const struct pr_ops nvme_pr_ops = { + .pr_register = nvme_pr_register, + .pr_reserve = nvme_pr_reserve, + .pr_release = nvme_pr_release, + .pr_preempt = nvme_pr_preempt, + .pr_clear = nvme_pr_clear, + .pr_read_keys = nvme_pr_read_keys, + .pr_read_reservation = nvme_pr_read_reservation, +}; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 0eb79696fb73..d433b2ec07a6 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -501,7 +501,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) } ibdev = queue->device->dev; - /* +1 for ib_stop_cq */ + /* +1 for ib_drain_qp */ queue->cq_size = cq_factor * queue->queue_size + 1; ret = nvme_rdma_create_cq(ibdev, queue); @@ -713,18 +713,10 @@ out_stop_queues: static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl) { struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; - struct ib_device *ibdev = ctrl->device->dev; - unsigned int nr_io_queues, nr_default_queues; - unsigned int nr_read_queues, nr_poll_queues; + unsigned int nr_io_queues; int i, ret; - nr_read_queues = min_t(unsigned int, ibdev->num_comp_vectors, - min(opts->nr_io_queues, num_online_cpus())); - nr_default_queues = min_t(unsigned int, ibdev->num_comp_vectors, - min(opts->nr_write_queues, num_online_cpus())); - nr_poll_queues = min(opts->nr_poll_queues, num_online_cpus()); - nr_io_queues = nr_read_queues + nr_default_queues + nr_poll_queues; - + nr_io_queues = nvmf_nr_io_queues(opts); ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); if (ret) return ret; @@ -739,34 +731,7 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl) dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", nr_io_queues); - if (opts->nr_write_queues && nr_read_queues < nr_io_queues) { - /* - * separate read/write queues - * hand out dedicated default queues only after we have - * sufficient read queues. - */ - ctrl->io_queues[HCTX_TYPE_READ] = nr_read_queues; - nr_io_queues -= ctrl->io_queues[HCTX_TYPE_READ]; - ctrl->io_queues[HCTX_TYPE_DEFAULT] = - min(nr_default_queues, nr_io_queues); - nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT]; - } else { - /* - * shared read/write queues - * either no write queues were requested, or we don't have - * sufficient queue count to have dedicated default queues. - */ - ctrl->io_queues[HCTX_TYPE_DEFAULT] = - min(nr_read_queues, nr_io_queues); - nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT]; - } - - if (opts->nr_poll_queues && nr_io_queues) { - /* map dedicated poll queues only if we have queues left */ - ctrl->io_queues[HCTX_TYPE_POLL] = - min(nr_poll_queues, nr_io_queues); - } - + nvmf_set_io_queues(opts, nr_io_queues, ctrl->io_queues); for (i = 1; i < ctrl->ctrl.queue_count; i++) { ret = nvme_rdma_alloc_queue(ctrl, i, ctrl->ctrl.sqsize + 1); @@ -2138,44 +2103,8 @@ static void nvme_rdma_complete_rq(struct request *rq) static void nvme_rdma_map_queues(struct blk_mq_tag_set *set) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(set->driver_data); - struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; - if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) { - /* separate read/write queues */ - set->map[HCTX_TYPE_DEFAULT].nr_queues = - ctrl->io_queues[HCTX_TYPE_DEFAULT]; - set->map[HCTX_TYPE_DEFAULT].queue_offset = 0; - set->map[HCTX_TYPE_READ].nr_queues = - ctrl->io_queues[HCTX_TYPE_READ]; - set->map[HCTX_TYPE_READ].queue_offset = - ctrl->io_queues[HCTX_TYPE_DEFAULT]; - } else { - /* shared read/write queues */ - set->map[HCTX_TYPE_DEFAULT].nr_queues = - ctrl->io_queues[HCTX_TYPE_DEFAULT]; - set->map[HCTX_TYPE_DEFAULT].queue_offset = 0; - set->map[HCTX_TYPE_READ].nr_queues = - ctrl->io_queues[HCTX_TYPE_DEFAULT]; - set->map[HCTX_TYPE_READ].queue_offset = 0; - } - blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); - blk_mq_map_queues(&set->map[HCTX_TYPE_READ]); - - if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) { - /* map dedicated poll queues only if we have queues left */ - set->map[HCTX_TYPE_POLL].nr_queues = - ctrl->io_queues[HCTX_TYPE_POLL]; - set->map[HCTX_TYPE_POLL].queue_offset = - ctrl->io_queues[HCTX_TYPE_DEFAULT] + - ctrl->io_queues[HCTX_TYPE_READ]; - blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]); - } - - dev_info(ctrl->ctrl.device, - "mapped %d/%d/%d default/read/poll queues.\n", - ctrl->io_queues[HCTX_TYPE_DEFAULT], - ctrl->io_queues[HCTX_TYPE_READ], - ctrl->io_queues[HCTX_TYPE_POLL]); + nvmf_map_queues(set, &ctrl->ctrl, ctrl->io_queues); } static const struct blk_mq_ops nvme_rdma_mq_ops = { diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c new file mode 100644 index 000000000000..212e1b05d298 --- /dev/null +++ b/drivers/nvme/host/sysfs.c @@ -0,0 +1,668 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Sysfs interface for the NVMe core driver. + * + * Copyright (c) 2011-2014, Intel Corporation. + */ + +#include <linux/nvme-auth.h> + +#include "nvme.h" +#include "fabrics.h" + +static ssize_t nvme_sysfs_reset(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + int ret; + + ret = nvme_reset_ctrl_sync(ctrl); + if (ret < 0) + return ret; + return count; +} +static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset); + +static ssize_t nvme_sysfs_rescan(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + nvme_queue_scan(ctrl); + return count; +} +static DEVICE_ATTR(rescan_controller, S_IWUSR, NULL, nvme_sysfs_rescan); + +static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev) +{ + struct gendisk *disk = dev_to_disk(dev); + + if (disk->fops == &nvme_bdev_ops) + return nvme_get_ns_from_dev(dev)->head; + else + return disk->private_data; +} + +static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct nvme_ns_head *head = dev_to_ns_head(dev); + struct nvme_ns_ids *ids = &head->ids; + struct nvme_subsystem *subsys = head->subsys; + int serial_len = sizeof(subsys->serial); + int model_len = sizeof(subsys->model); + + if (!uuid_is_null(&ids->uuid)) + return sysfs_emit(buf, "uuid.%pU\n", &ids->uuid); + + if (memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) + return sysfs_emit(buf, "eui.%16phN\n", ids->nguid); + + if (memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) + return sysfs_emit(buf, "eui.%8phN\n", ids->eui64); + + while (serial_len > 0 && (subsys->serial[serial_len - 1] == ' ' || + subsys->serial[serial_len - 1] == '\0')) + serial_len--; + while (model_len > 0 && (subsys->model[model_len - 1] == ' ' || + subsys->model[model_len - 1] == '\0')) + model_len--; + + return sysfs_emit(buf, "nvme.%04x-%*phN-%*phN-%08x\n", subsys->vendor_id, + serial_len, subsys->serial, model_len, subsys->model, + head->ns_id); +} +static DEVICE_ATTR_RO(wwid); + +static ssize_t nguid_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, "%pU\n", dev_to_ns_head(dev)->ids.nguid); +} +static DEVICE_ATTR_RO(nguid); + +static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct nvme_ns_ids *ids = &dev_to_ns_head(dev)->ids; + + /* For backward compatibility expose the NGUID to userspace if + * we have no UUID set + */ + if (uuid_is_null(&ids->uuid)) { + dev_warn_once(dev, + "No UUID available providing old NGUID\n"); + return sysfs_emit(buf, "%pU\n", ids->nguid); + } + return sysfs_emit(buf, "%pU\n", &ids->uuid); +} +static DEVICE_ATTR_RO(uuid); + +static ssize_t eui_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, "%8ph\n", dev_to_ns_head(dev)->ids.eui64); +} +static DEVICE_ATTR_RO(eui); + +static ssize_t nsid_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, "%d\n", dev_to_ns_head(dev)->ns_id); +} +static DEVICE_ATTR_RO(nsid); + +static struct attribute *nvme_ns_id_attrs[] = { + &dev_attr_wwid.attr, + &dev_attr_uuid.attr, + &dev_attr_nguid.attr, + &dev_attr_eui.attr, + &dev_attr_nsid.attr, +#ifdef CONFIG_NVME_MULTIPATH + &dev_attr_ana_grpid.attr, + &dev_attr_ana_state.attr, +#endif + NULL, +}; + +static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct nvme_ns_ids *ids = &dev_to_ns_head(dev)->ids; + + if (a == &dev_attr_uuid.attr) { + if (uuid_is_null(&ids->uuid) && + !memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) + return 0; + } + if (a == &dev_attr_nguid.attr) { + if (!memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) + return 0; + } + if (a == &dev_attr_eui.attr) { + if (!memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) + return 0; + } +#ifdef CONFIG_NVME_MULTIPATH + if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) { + if (dev_to_disk(dev)->fops != &nvme_bdev_ops) /* per-path attr */ + return 0; + if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl)) + return 0; + } +#endif + return a->mode; +} + +static const struct attribute_group nvme_ns_id_attr_group = { + .attrs = nvme_ns_id_attrs, + .is_visible = nvme_ns_id_attrs_are_visible, +}; + +const struct attribute_group *nvme_ns_id_attr_groups[] = { + &nvme_ns_id_attr_group, + NULL, +}; + +#define nvme_show_str_function(field) \ +static ssize_t field##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \ + return sysfs_emit(buf, "%.*s\n", \ + (int)sizeof(ctrl->subsys->field), ctrl->subsys->field); \ +} \ +static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); + +nvme_show_str_function(model); +nvme_show_str_function(serial); +nvme_show_str_function(firmware_rev); + +#define nvme_show_int_function(field) \ +static ssize_t field##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \ + return sysfs_emit(buf, "%d\n", ctrl->field); \ +} \ +static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); + +nvme_show_int_function(cntlid); +nvme_show_int_function(numa_node); +nvme_show_int_function(queue_count); +nvme_show_int_function(sqsize); +nvme_show_int_function(kato); + +static ssize_t nvme_sysfs_delete(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + if (!test_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags)) + return -EBUSY; + + if (device_remove_file_self(dev, attr)) + nvme_delete_ctrl_sync(ctrl); + return count; +} +static DEVICE_ATTR(delete_controller, S_IWUSR, NULL, nvme_sysfs_delete); + +static ssize_t nvme_sysfs_show_transport(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%s\n", ctrl->ops->name); +} +static DEVICE_ATTR(transport, S_IRUGO, nvme_sysfs_show_transport, NULL); + +static ssize_t nvme_sysfs_show_state(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + static const char *const state_name[] = { + [NVME_CTRL_NEW] = "new", + [NVME_CTRL_LIVE] = "live", + [NVME_CTRL_RESETTING] = "resetting", + [NVME_CTRL_CONNECTING] = "connecting", + [NVME_CTRL_DELETING] = "deleting", + [NVME_CTRL_DELETING_NOIO]= "deleting (no IO)", + [NVME_CTRL_DEAD] = "dead", + }; + + if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) && + state_name[ctrl->state]) + return sysfs_emit(buf, "%s\n", state_name[ctrl->state]); + + return sysfs_emit(buf, "unknown state\n"); +} + +static DEVICE_ATTR(state, S_IRUGO, nvme_sysfs_show_state, NULL); + +static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%s\n", ctrl->subsys->subnqn); +} +static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL); + +static ssize_t nvme_sysfs_show_hostnqn(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%s\n", ctrl->opts->host->nqn); +} +static DEVICE_ATTR(hostnqn, S_IRUGO, nvme_sysfs_show_hostnqn, NULL); + +static ssize_t nvme_sysfs_show_hostid(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%pU\n", &ctrl->opts->host->id); +} +static DEVICE_ATTR(hostid, S_IRUGO, nvme_sysfs_show_hostid, NULL); + +static ssize_t nvme_sysfs_show_address(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + return ctrl->ops->get_address(ctrl, buf, PAGE_SIZE); +} +static DEVICE_ATTR(address, S_IRUGO, nvme_sysfs_show_address, NULL); + +static ssize_t nvme_ctrl_loss_tmo_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + struct nvmf_ctrl_options *opts = ctrl->opts; + + if (ctrl->opts->max_reconnects == -1) + return sysfs_emit(buf, "off\n"); + return sysfs_emit(buf, "%d\n", + opts->max_reconnects * opts->reconnect_delay); +} + +static ssize_t nvme_ctrl_loss_tmo_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + struct nvmf_ctrl_options *opts = ctrl->opts; + int ctrl_loss_tmo, err; + + err = kstrtoint(buf, 10, &ctrl_loss_tmo); + if (err) + return -EINVAL; + + if (ctrl_loss_tmo < 0) + opts->max_reconnects = -1; + else + opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo, + opts->reconnect_delay); + return count; +} +static DEVICE_ATTR(ctrl_loss_tmo, S_IRUGO | S_IWUSR, + nvme_ctrl_loss_tmo_show, nvme_ctrl_loss_tmo_store); + +static ssize_t nvme_ctrl_reconnect_delay_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + if (ctrl->opts->reconnect_delay == -1) + return sysfs_emit(buf, "off\n"); + return sysfs_emit(buf, "%d\n", ctrl->opts->reconnect_delay); +} + +static ssize_t nvme_ctrl_reconnect_delay_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + unsigned int v; + int err; + + err = kstrtou32(buf, 10, &v); + if (err) + return err; + + ctrl->opts->reconnect_delay = v; + return count; +} +static DEVICE_ATTR(reconnect_delay, S_IRUGO | S_IWUSR, + nvme_ctrl_reconnect_delay_show, nvme_ctrl_reconnect_delay_store); + +static ssize_t nvme_ctrl_fast_io_fail_tmo_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + if (ctrl->opts->fast_io_fail_tmo == -1) + return sysfs_emit(buf, "off\n"); + return sysfs_emit(buf, "%d\n", ctrl->opts->fast_io_fail_tmo); +} + +static ssize_t nvme_ctrl_fast_io_fail_tmo_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + struct nvmf_ctrl_options *opts = ctrl->opts; + int fast_io_fail_tmo, err; + + err = kstrtoint(buf, 10, &fast_io_fail_tmo); + if (err) + return -EINVAL; + + if (fast_io_fail_tmo < 0) + opts->fast_io_fail_tmo = -1; + else + opts->fast_io_fail_tmo = fast_io_fail_tmo; + return count; +} +static DEVICE_ATTR(fast_io_fail_tmo, S_IRUGO | S_IWUSR, + nvme_ctrl_fast_io_fail_tmo_show, nvme_ctrl_fast_io_fail_tmo_store); + +static ssize_t cntrltype_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + static const char * const type[] = { + [NVME_CTRL_IO] = "io\n", + [NVME_CTRL_DISC] = "discovery\n", + [NVME_CTRL_ADMIN] = "admin\n", + }; + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + if (ctrl->cntrltype > NVME_CTRL_ADMIN || !type[ctrl->cntrltype]) + return sysfs_emit(buf, "reserved\n"); + + return sysfs_emit(buf, type[ctrl->cntrltype]); +} +static DEVICE_ATTR_RO(cntrltype); + +static ssize_t dctype_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + static const char * const type[] = { + [NVME_DCTYPE_NOT_REPORTED] = "none\n", + [NVME_DCTYPE_DDC] = "ddc\n", + [NVME_DCTYPE_CDC] = "cdc\n", + }; + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + if (ctrl->dctype > NVME_DCTYPE_CDC || !type[ctrl->dctype]) + return sysfs_emit(buf, "reserved\n"); + + return sysfs_emit(buf, type[ctrl->dctype]); +} +static DEVICE_ATTR_RO(dctype); + +#ifdef CONFIG_NVME_AUTH +static ssize_t nvme_ctrl_dhchap_secret_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + struct nvmf_ctrl_options *opts = ctrl->opts; + + if (!opts->dhchap_secret) + return sysfs_emit(buf, "none\n"); + return sysfs_emit(buf, "%s\n", opts->dhchap_secret); +} + +static ssize_t nvme_ctrl_dhchap_secret_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + struct nvmf_ctrl_options *opts = ctrl->opts; + char *dhchap_secret; + + if (!ctrl->opts->dhchap_secret) + return -EINVAL; + if (count < 7) + return -EINVAL; + if (memcmp(buf, "DHHC-1:", 7)) + return -EINVAL; + + dhchap_secret = kzalloc(count + 1, GFP_KERNEL); + if (!dhchap_secret) + return -ENOMEM; + memcpy(dhchap_secret, buf, count); + nvme_auth_stop(ctrl); + if (strcmp(dhchap_secret, opts->dhchap_secret)) { + struct nvme_dhchap_key *key, *host_key; + int ret; + + ret = nvme_auth_generate_key(dhchap_secret, &key); + if (ret) { + kfree(dhchap_secret); + return ret; + } + kfree(opts->dhchap_secret); + opts->dhchap_secret = dhchap_secret; + host_key = ctrl->host_key; + mutex_lock(&ctrl->dhchap_auth_mutex); + ctrl->host_key = key; + mutex_unlock(&ctrl->dhchap_auth_mutex); + nvme_auth_free_key(host_key); + } else + kfree(dhchap_secret); + /* Start re-authentication */ + dev_info(ctrl->device, "re-authenticating controller\n"); + queue_work(nvme_wq, &ctrl->dhchap_auth_work); + + return count; +} + +static DEVICE_ATTR(dhchap_secret, S_IRUGO | S_IWUSR, + nvme_ctrl_dhchap_secret_show, nvme_ctrl_dhchap_secret_store); + +static ssize_t nvme_ctrl_dhchap_ctrl_secret_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + struct nvmf_ctrl_options *opts = ctrl->opts; + + if (!opts->dhchap_ctrl_secret) + return sysfs_emit(buf, "none\n"); + return sysfs_emit(buf, "%s\n", opts->dhchap_ctrl_secret); +} + +static ssize_t nvme_ctrl_dhchap_ctrl_secret_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + struct nvmf_ctrl_options *opts = ctrl->opts; + char *dhchap_secret; + + if (!ctrl->opts->dhchap_ctrl_secret) + return -EINVAL; + if (count < 7) + return -EINVAL; + if (memcmp(buf, "DHHC-1:", 7)) + return -EINVAL; + + dhchap_secret = kzalloc(count + 1, GFP_KERNEL); + if (!dhchap_secret) + return -ENOMEM; + memcpy(dhchap_secret, buf, count); + nvme_auth_stop(ctrl); + if (strcmp(dhchap_secret, opts->dhchap_ctrl_secret)) { + struct nvme_dhchap_key *key, *ctrl_key; + int ret; + + ret = nvme_auth_generate_key(dhchap_secret, &key); + if (ret) { + kfree(dhchap_secret); + return ret; + } + kfree(opts->dhchap_ctrl_secret); + opts->dhchap_ctrl_secret = dhchap_secret; + ctrl_key = ctrl->ctrl_key; + mutex_lock(&ctrl->dhchap_auth_mutex); + ctrl->ctrl_key = key; + mutex_unlock(&ctrl->dhchap_auth_mutex); + nvme_auth_free_key(ctrl_key); + } else + kfree(dhchap_secret); + /* Start re-authentication */ + dev_info(ctrl->device, "re-authenticating controller\n"); + queue_work(nvme_wq, &ctrl->dhchap_auth_work); + + return count; +} + +static DEVICE_ATTR(dhchap_ctrl_secret, S_IRUGO | S_IWUSR, + nvme_ctrl_dhchap_ctrl_secret_show, nvme_ctrl_dhchap_ctrl_secret_store); +#endif + +static struct attribute *nvme_dev_attrs[] = { + &dev_attr_reset_controller.attr, + &dev_attr_rescan_controller.attr, + &dev_attr_model.attr, + &dev_attr_serial.attr, + &dev_attr_firmware_rev.attr, + &dev_attr_cntlid.attr, + &dev_attr_delete_controller.attr, + &dev_attr_transport.attr, + &dev_attr_subsysnqn.attr, + &dev_attr_address.attr, + &dev_attr_state.attr, + &dev_attr_numa_node.attr, + &dev_attr_queue_count.attr, + &dev_attr_sqsize.attr, + &dev_attr_hostnqn.attr, + &dev_attr_hostid.attr, + &dev_attr_ctrl_loss_tmo.attr, + &dev_attr_reconnect_delay.attr, + &dev_attr_fast_io_fail_tmo.attr, + &dev_attr_kato.attr, + &dev_attr_cntrltype.attr, + &dev_attr_dctype.attr, +#ifdef CONFIG_NVME_AUTH + &dev_attr_dhchap_secret.attr, + &dev_attr_dhchap_ctrl_secret.attr, +#endif + NULL +}; + +static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + if (a == &dev_attr_delete_controller.attr && !ctrl->ops->delete_ctrl) + return 0; + if (a == &dev_attr_address.attr && !ctrl->ops->get_address) + return 0; + if (a == &dev_attr_hostnqn.attr && !ctrl->opts) + return 0; + if (a == &dev_attr_hostid.attr && !ctrl->opts) + return 0; + if (a == &dev_attr_ctrl_loss_tmo.attr && !ctrl->opts) + return 0; + if (a == &dev_attr_reconnect_delay.attr && !ctrl->opts) + return 0; + if (a == &dev_attr_fast_io_fail_tmo.attr && !ctrl->opts) + return 0; +#ifdef CONFIG_NVME_AUTH + if (a == &dev_attr_dhchap_secret.attr && !ctrl->opts) + return 0; + if (a == &dev_attr_dhchap_ctrl_secret.attr && !ctrl->opts) + return 0; +#endif + + return a->mode; +} + +const struct attribute_group nvme_dev_attrs_group = { + .attrs = nvme_dev_attrs, + .is_visible = nvme_dev_attrs_are_visible, +}; +EXPORT_SYMBOL_GPL(nvme_dev_attrs_group); + +const struct attribute_group *nvme_dev_attr_groups[] = { + &nvme_dev_attrs_group, + NULL, +}; + +#define SUBSYS_ATTR_RO(_name, _mode, _show) \ + struct device_attribute subsys_attr_##_name = \ + __ATTR(_name, _mode, _show, NULL) + +static ssize_t nvme_subsys_show_nqn(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvme_subsystem *subsys = + container_of(dev, struct nvme_subsystem, dev); + + return sysfs_emit(buf, "%s\n", subsys->subnqn); +} +static SUBSYS_ATTR_RO(subsysnqn, S_IRUGO, nvme_subsys_show_nqn); + +static ssize_t nvme_subsys_show_type(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvme_subsystem *subsys = + container_of(dev, struct nvme_subsystem, dev); + + switch (subsys->subtype) { + case NVME_NQN_DISC: + return sysfs_emit(buf, "discovery\n"); + case NVME_NQN_NVME: + return sysfs_emit(buf, "nvm\n"); + default: + return sysfs_emit(buf, "reserved\n"); + } +} +static SUBSYS_ATTR_RO(subsystype, S_IRUGO, nvme_subsys_show_type); + +#define nvme_subsys_show_str_function(field) \ +static ssize_t subsys_##field##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct nvme_subsystem *subsys = \ + container_of(dev, struct nvme_subsystem, dev); \ + return sysfs_emit(buf, "%.*s\n", \ + (int)sizeof(subsys->field), subsys->field); \ +} \ +static SUBSYS_ATTR_RO(field, S_IRUGO, subsys_##field##_show); + +nvme_subsys_show_str_function(model); +nvme_subsys_show_str_function(serial); +nvme_subsys_show_str_function(firmware_rev); + +static struct attribute *nvme_subsys_attrs[] = { + &subsys_attr_model.attr, + &subsys_attr_serial.attr, + &subsys_attr_firmware_rev.attr, + &subsys_attr_subsysnqn.attr, + &subsys_attr_subsystype.attr, +#ifdef CONFIG_NVME_MULTIPATH + &subsys_attr_iopolicy.attr, +#endif + NULL, +}; + +static const struct attribute_group nvme_subsys_attrs_group = { + .attrs = nvme_subsys_attrs, +}; + +const struct attribute_group *nvme_subsys_attrs_groups[] = { + &nvme_subsys_attrs_group, + NULL, +}; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index bf0230442d57..9ce417cd32a7 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -997,25 +997,28 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) u32 h2cdata_left = req->h2cdata_left; while (true) { + struct bio_vec bvec; + struct msghdr msg = { + .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES, + }; struct page *page = nvme_tcp_req_cur_page(req); size_t offset = nvme_tcp_req_cur_offset(req); size_t len = nvme_tcp_req_cur_length(req); bool last = nvme_tcp_pdu_last_send(req, len); int req_data_sent = req->data_sent; - int ret, flags = MSG_DONTWAIT; + int ret; if (last && !queue->data_digest && !nvme_tcp_queue_more(queue)) - flags |= MSG_EOR; + msg.msg_flags |= MSG_EOR; else - flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; + msg.msg_flags |= MSG_MORE; - if (sendpage_ok(page)) { - ret = kernel_sendpage(queue->sock, page, offset, len, - flags); - } else { - ret = sock_no_sendpage(queue->sock, page, offset, len, - flags); - } + if (!sendpage_ok(page)) + msg.msg_flags &= ~MSG_SPLICE_PAGES; + + bvec_set_page(&bvec, page, len, offset); + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len); + ret = sock_sendmsg(queue->sock, &msg); if (ret <= 0) return ret; @@ -1054,22 +1057,24 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req) { struct nvme_tcp_queue *queue = req->queue; struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req); + struct bio_vec bvec; + struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES, }; bool inline_data = nvme_tcp_has_inline_data(req); u8 hdgst = nvme_tcp_hdgst_len(queue); int len = sizeof(*pdu) + hdgst - req->offset; - int flags = MSG_DONTWAIT; int ret; if (inline_data || nvme_tcp_queue_more(queue)) - flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; + msg.msg_flags |= MSG_MORE; else - flags |= MSG_EOR; + msg.msg_flags |= MSG_EOR; if (queue->hdr_digest && !req->offset) nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); - ret = kernel_sendpage(queue->sock, virt_to_page(pdu), - offset_in_page(pdu) + req->offset, len, flags); + bvec_set_virt(&bvec, (void *)pdu + req->offset, len); + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len); + ret = sock_sendmsg(queue->sock, &msg); if (unlikely(ret <= 0)) return ret; @@ -1093,6 +1098,8 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) { struct nvme_tcp_queue *queue = req->queue; struct nvme_tcp_data_pdu *pdu = nvme_tcp_req_data_pdu(req); + struct bio_vec bvec; + struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_MORE, }; u8 hdgst = nvme_tcp_hdgst_len(queue); int len = sizeof(*pdu) - req->offset + hdgst; int ret; @@ -1101,13 +1108,11 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); if (!req->h2cdata_left) - ret = kernel_sendpage(queue->sock, virt_to_page(pdu), - offset_in_page(pdu) + req->offset, len, - MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST); - else - ret = sock_no_sendpage(queue->sock, virt_to_page(pdu), - offset_in_page(pdu) + req->offset, len, - MSG_DONTWAIT | MSG_MORE); + msg.msg_flags |= MSG_SPLICE_PAGES; + + bvec_set_virt(&bvec, (void *)pdu + req->offset, len); + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len); + ret = sock_sendmsg(queue->sock, &msg); if (unlikely(ret <= 0)) return ret; @@ -1802,58 +1807,12 @@ out_free_queues: return ret; } -static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl) -{ - unsigned int nr_io_queues; - - nr_io_queues = min(ctrl->opts->nr_io_queues, num_online_cpus()); - nr_io_queues += min(ctrl->opts->nr_write_queues, num_online_cpus()); - nr_io_queues += min(ctrl->opts->nr_poll_queues, num_online_cpus()); - - return nr_io_queues; -} - -static void nvme_tcp_set_io_queues(struct nvme_ctrl *nctrl, - unsigned int nr_io_queues) -{ - struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); - struct nvmf_ctrl_options *opts = nctrl->opts; - - if (opts->nr_write_queues && opts->nr_io_queues < nr_io_queues) { - /* - * separate read/write queues - * hand out dedicated default queues only after we have - * sufficient read queues. - */ - ctrl->io_queues[HCTX_TYPE_READ] = opts->nr_io_queues; - nr_io_queues -= ctrl->io_queues[HCTX_TYPE_READ]; - ctrl->io_queues[HCTX_TYPE_DEFAULT] = - min(opts->nr_write_queues, nr_io_queues); - nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT]; - } else { - /* - * shared read/write queues - * either no write queues were requested, or we don't have - * sufficient queue count to have dedicated default queues. - */ - ctrl->io_queues[HCTX_TYPE_DEFAULT] = - min(opts->nr_io_queues, nr_io_queues); - nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT]; - } - - if (opts->nr_poll_queues && nr_io_queues) { - /* map dedicated poll queues only if we have queues left */ - ctrl->io_queues[HCTX_TYPE_POLL] = - min(opts->nr_poll_queues, nr_io_queues); - } -} - static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) { unsigned int nr_io_queues; int ret; - nr_io_queues = nvme_tcp_nr_io_queues(ctrl); + nr_io_queues = nvmf_nr_io_queues(ctrl->opts); ret = nvme_set_queue_count(ctrl, &nr_io_queues); if (ret) return ret; @@ -1868,8 +1827,8 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) dev_info(ctrl->device, "creating %d I/O queues.\n", nr_io_queues); - nvme_tcp_set_io_queues(ctrl, nr_io_queues); - + nvmf_set_io_queues(ctrl->opts, nr_io_queues, + to_tcp_ctrl(ctrl)->io_queues); return __nvme_tcp_alloc_io_queues(ctrl); } @@ -2449,44 +2408,8 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx, static void nvme_tcp_map_queues(struct blk_mq_tag_set *set) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data); - struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; - - if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) { - /* separate read/write queues */ - set->map[HCTX_TYPE_DEFAULT].nr_queues = - ctrl->io_queues[HCTX_TYPE_DEFAULT]; - set->map[HCTX_TYPE_DEFAULT].queue_offset = 0; - set->map[HCTX_TYPE_READ].nr_queues = - ctrl->io_queues[HCTX_TYPE_READ]; - set->map[HCTX_TYPE_READ].queue_offset = - ctrl->io_queues[HCTX_TYPE_DEFAULT]; - } else { - /* shared read/write queues */ - set->map[HCTX_TYPE_DEFAULT].nr_queues = - ctrl->io_queues[HCTX_TYPE_DEFAULT]; - set->map[HCTX_TYPE_DEFAULT].queue_offset = 0; - set->map[HCTX_TYPE_READ].nr_queues = - ctrl->io_queues[HCTX_TYPE_DEFAULT]; - set->map[HCTX_TYPE_READ].queue_offset = 0; - } - blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); - blk_mq_map_queues(&set->map[HCTX_TYPE_READ]); - - if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) { - /* map dedicated poll queues only if we have queues left */ - set->map[HCTX_TYPE_POLL].nr_queues = - ctrl->io_queues[HCTX_TYPE_POLL]; - set->map[HCTX_TYPE_POLL].queue_offset = - ctrl->io_queues[HCTX_TYPE_DEFAULT] + - ctrl->io_queues[HCTX_TYPE_READ]; - blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]); - } - - dev_info(ctrl->ctrl.device, - "mapped %d/%d/%d default/read/poll queues.\n", - ctrl->io_queues[HCTX_TYPE_DEFAULT], - ctrl->io_queues[HCTX_TYPE_READ], - ctrl->io_queues[HCTX_TYPE_POLL]); + + nvmf_map_queues(set, &ctrl->ctrl, ctrl->io_queues); } static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 12316ab51bda..ec8557810c21 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -10,12 +10,11 @@ int nvme_revalidate_zones(struct nvme_ns *ns) { struct request_queue *q = ns->queue; - int ret; - ret = blk_revalidate_disk_zones(ns->disk, NULL); - if (!ret) - blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append); - return ret; + blk_queue_chunk_sectors(q, ns->zsze); + blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append); + + return blk_revalidate_disk_zones(ns->disk, NULL); } static int nvme_set_max_append(struct nvme_ctrl *ctrl) |