diff options
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/core.c | 170 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 25 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.h | 11 | ||||
-rw-r--r-- | drivers/nvme/host/lightnvm.c | 33 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 31 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 189 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 27 | ||||
-rw-r--r-- | drivers/nvme/host/scsi.c | 84 | ||||
-rw-r--r-- | drivers/nvme/target/admin-cmd.c | 96 | ||||
-rw-r--r-- | drivers/nvme/target/core.c | 2 | ||||
-rw-r--r-- | drivers/nvme/target/discovery.c | 4 | ||||
-rw-r--r-- | drivers/nvme/target/io-cmd.c | 3 | ||||
-rw-r--r-- | drivers/nvme/target/loop.c | 2 | ||||
-rw-r--r-- | drivers/nvme/target/rdma.c | 2 |
14 files changed, 382 insertions, 297 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2feacc70bf61..79e679d12f3b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -156,12 +156,14 @@ static void nvme_free_ns(struct kref *kref) { struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref); - if (ns->type == NVME_NS_LIGHTNVM) - nvme_nvm_unregister(ns->queue, ns->disk->disk_name); + if (ns->ndev) + nvme_nvm_unregister(ns); - spin_lock(&dev_list_lock); - ns->disk->private_data = NULL; - spin_unlock(&dev_list_lock); + if (ns->disk) { + spin_lock(&dev_list_lock); + ns->disk->private_data = NULL; + spin_unlock(&dev_list_lock); + } put_disk(ns->disk); ida_simple_remove(&ns->ctrl->ns_ida, ns->instance); @@ -552,7 +554,7 @@ int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ c.identify.opcode = nvme_admin_identify; - c.identify.cns = cpu_to_le32(1); + c.identify.cns = cpu_to_le32(NVME_ID_CNS_CTRL); *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL); if (!*id) @@ -570,7 +572,7 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n struct nvme_command c = { }; c.identify.opcode = nvme_admin_identify; - c.identify.cns = cpu_to_le32(2); + c.identify.cns = cpu_to_le32(NVME_ID_CNS_NS_ACTIVE_LIST); c.identify.nsid = cpu_to_le32(nsid); return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000); } @@ -597,7 +599,7 @@ int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid, } int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, - dma_addr_t dma_addr, u32 *result) + void *buffer, size_t buflen, u32 *result) { struct nvme_command c; struct nvme_completion cqe; @@ -606,10 +608,9 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, memset(&c, 0, sizeof(c)); c.features.opcode = nvme_admin_get_features; c.features.nsid = cpu_to_le32(nsid); - c.features.dptr.prp1 = cpu_to_le64(dma_addr); c.features.fid = cpu_to_le32(fid); - ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0, + ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, buffer, buflen, 0, NVME_QID_ANY, 0, 0); if (ret >= 0 && result) *result = le32_to_cpu(cqe.result); @@ -617,7 +618,7 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, } int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, - dma_addr_t dma_addr, u32 *result) + void *buffer, size_t buflen, u32 *result) { struct nvme_command c; struct nvme_completion cqe; @@ -625,12 +626,11 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, memset(&c, 0, sizeof(c)); c.features.opcode = nvme_admin_set_features; - c.features.dptr.prp1 = cpu_to_le64(dma_addr); c.features.fid = cpu_to_le32(fid); c.features.dword11 = cpu_to_le32(dword11); - ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0, - NVME_QID_ANY, 0, 0); + ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, + buffer, buflen, 0, NVME_QID_ANY, 0, 0); if (ret >= 0 && result) *result = le32_to_cpu(cqe.result); return ret; @@ -664,7 +664,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count) u32 result; int status, nr_io_queues; - status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, 0, + status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, NULL, 0, &result); if (status < 0) return status; @@ -888,42 +888,32 @@ static void nvme_config_discard(struct nvme_ns *ns) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); } -static int nvme_revalidate_disk(struct gendisk *disk) +static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id) { - struct nvme_ns *ns = disk->private_data; - struct nvme_id_ns *id; - u8 lbaf, pi_type; - u16 old_ms; - unsigned short bs; - - if (test_bit(NVME_NS_DEAD, &ns->flags)) { - set_capacity(disk, 0); - return -ENODEV; - } - if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) { - dev_warn(disk_to_dev(ns->disk), "%s: Identify failure\n", - __func__); + if (nvme_identify_ns(ns->ctrl, ns->ns_id, id)) { + dev_warn(ns->ctrl->dev, "%s: Identify failure\n", __func__); return -ENODEV; } - if (id->ncap == 0) { - kfree(id); + + if ((*id)->ncap == 0) { + kfree(*id); return -ENODEV; } - if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) { - if (nvme_nvm_register(ns->queue, disk->disk_name)) { - dev_warn(disk_to_dev(ns->disk), - "%s: LightNVM init failure\n", __func__); - kfree(id); - return -ENODEV; - } - ns->type = NVME_NS_LIGHTNVM; - } + if (ns->ctrl->vs >= NVME_VS(1, 1, 0)) + memcpy(ns->eui, (*id)->eui64, sizeof(ns->eui)); + if (ns->ctrl->vs >= NVME_VS(1, 2, 0)) + memcpy(ns->uuid, (*id)->nguid, sizeof(ns->uuid)); + + return 0; +} - if (ns->ctrl->vs >= NVME_VS(1, 1)) - memcpy(ns->eui, id->eui64, sizeof(ns->eui)); - if (ns->ctrl->vs >= NVME_VS(1, 2)) - memcpy(ns->uuid, id->nguid, sizeof(ns->uuid)); +static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) +{ + struct nvme_ns *ns = disk->private_data; + u8 lbaf, pi_type; + u16 old_ms; + unsigned short bs; old_ms = ns->ms; lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; @@ -962,8 +952,26 @@ static int nvme_revalidate_disk(struct gendisk *disk) if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM) nvme_config_discard(ns); blk_mq_unfreeze_queue(disk->queue); +} +static int nvme_revalidate_disk(struct gendisk *disk) +{ + struct nvme_ns *ns = disk->private_data; + struct nvme_id_ns *id = NULL; + int ret; + + if (test_bit(NVME_NS_DEAD, &ns->flags)) { + set_capacity(disk, 0); + return -ENODEV; + } + + ret = nvme_revalidate_ns(ns, &id); + if (ret) + return ret; + + __nvme_revalidate_disk(disk, id); kfree(id); + return 0; } @@ -1078,6 +1086,8 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) int ret; while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) { + if (csts == ~0) + return -ENODEV; if ((csts & NVME_CSTS_RDY) == bit) break; @@ -1232,7 +1242,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) } page_shift = NVME_CAP_MPSMIN(cap) + 12; - if (ctrl->vs >= NVME_VS(1, 1)) + if (ctrl->vs >= NVME_VS(1, 1, 0)) ctrl->subsystem = NVME_CAP_NSSRC(cap); ret = nvme_identify_ctrl(ctrl, &id); @@ -1425,7 +1435,7 @@ static DEVICE_ATTR(rescan_controller, S_IWUSR, NULL, nvme_sysfs_rescan); static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nvme_ns *ns = dev_to_disk(dev)->private_data; + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); struct nvme_ctrl *ctrl = ns->ctrl; int serial_len = sizeof(ctrl->serial); int model_len = sizeof(ctrl->model); @@ -1449,7 +1459,7 @@ static DEVICE_ATTR(wwid, S_IRUGO, wwid_show, NULL); static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nvme_ns *ns = dev_to_disk(dev)->private_data; + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); return sprintf(buf, "%pU\n", ns->uuid); } static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL); @@ -1457,7 +1467,7 @@ static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL); static ssize_t eui_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nvme_ns *ns = dev_to_disk(dev)->private_data; + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); return sprintf(buf, "%8phd\n", ns->eui); } static DEVICE_ATTR(eui, S_IRUGO, eui_show, NULL); @@ -1465,7 +1475,7 @@ static DEVICE_ATTR(eui, S_IRUGO, eui_show, NULL); static ssize_t nsid_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nvme_ns *ns = dev_to_disk(dev)->private_data; + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); return sprintf(buf, "%d\n", ns->ns_id); } static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL); @@ -1482,7 +1492,7 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = container_of(kobj, struct device, kobj); - struct nvme_ns *ns = dev_to_disk(dev)->private_data; + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); if (a == &dev_attr_uuid.attr) { if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid))) @@ -1642,6 +1652,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns; struct gendisk *disk; + struct nvme_id_ns *id; + char disk_name[DISK_NAME_LEN]; int node = dev_to_node(ctrl->dev); ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); @@ -1659,34 +1671,49 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ns->queue->queuedata = ns; ns->ctrl = ctrl; - disk = alloc_disk_node(0, node); - if (!disk) - goto out_free_queue; - kref_init(&ns->kref); ns->ns_id = nsid; - ns->disk = disk; ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */ - blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); nvme_set_queue_limits(ctrl, ns->queue); - disk->fops = &nvme_fops; - disk->private_data = ns; - disk->queue = ns->queue; - disk->flags = GENHD_FL_EXT_DEVT; - sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, ns->instance); + sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance); + + if (nvme_revalidate_ns(ns, &id)) + goto out_free_queue; + + if (nvme_nvm_ns_supported(ns, id)) { + if (nvme_nvm_register(ns, disk_name, node, + &nvme_ns_attr_group)) { + dev_warn(ctrl->dev, "%s: LightNVM init failure\n", + __func__); + goto out_free_id; + } + } else { + disk = alloc_disk_node(0, node); + if (!disk) + goto out_free_id; - if (nvme_revalidate_disk(ns->disk)) - goto out_free_disk; + disk->fops = &nvme_fops; + disk->private_data = ns; + disk->queue = ns->queue; + disk->flags = GENHD_FL_EXT_DEVT; + memcpy(disk->disk_name, disk_name, DISK_NAME_LEN); + ns->disk = disk; + + __nvme_revalidate_disk(disk, id); + } mutex_lock(&ctrl->namespaces_mutex); list_add_tail(&ns->list, &ctrl->namespaces); mutex_unlock(&ctrl->namespaces_mutex); kref_get(&ctrl->kref); - if (ns->type == NVME_NS_LIGHTNVM) + + kfree(id); + + if (ns->ndev) return; device_add_disk(ctrl->device, ns->disk); @@ -1695,8 +1722,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) pr_warn("%s: failed to create sysfs group for identification\n", ns->disk->disk_name); return; - out_free_disk: - kfree(disk); + out_free_id: + kfree(id); out_free_queue: blk_cleanup_queue(ns->queue); out_release_instance: @@ -1710,7 +1737,7 @@ static void nvme_ns_remove(struct nvme_ns *ns) if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) return; - if (ns->disk->flags & GENHD_FL_UP) { + if (ns->disk && ns->disk->flags & GENHD_FL_UP) { if (blk_get_integrity(ns->disk)) blk_integrity_unregister(ns->disk); sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, @@ -1733,7 +1760,7 @@ static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid) ns = nvme_find_get_ns(ctrl, nsid); if (ns) { - if (revalidate_disk(ns->disk)) + if (ns->disk && revalidate_disk(ns->disk)) nvme_ns_remove(ns); nvme_put_ns(ns); } else @@ -1815,7 +1842,7 @@ static void nvme_scan_work(struct work_struct *work) return; nn = le32_to_cpu(id->nn); - if (ctrl->vs >= NVME_VS(1, 1) && + if (ctrl->vs >= NVME_VS(1, 1, 0) && !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) { if (!nvme_scan_ns_list(ctrl, nn)) goto done; @@ -1826,9 +1853,6 @@ static void nvme_scan_work(struct work_struct *work) list_sort(NULL, &ctrl->namespaces, ns_cmp); mutex_unlock(&ctrl->namespaces_mutex); kfree(id); - - if (ctrl->ops->post_scan) - ctrl->ops->post_scan(ctrl); } void nvme_queue_scan(struct nvme_ctrl *ctrl) @@ -2038,7 +2062,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) * Revalidating a dead namespace sets capacity to 0. This will * end buffered writers dirtying pages that can't be synced. */ - if (!test_and_set_bit(NVME_NS_DEAD, &ns->flags)) + if (ns->disk && !test_and_set_bit(NVME_NS_DEAD, &ns->flags)) revalidate_disk(ns->disk); blk_set_queue_dying(ns->queue); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 4eff49174466..5a3f008d3480 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -111,8 +111,19 @@ static void nvmf_host_put(struct nvmf_host *host) */ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size) { - return snprintf(buf, size, "traddr=%s,trsvcid=%s\n", - ctrl->opts->traddr, ctrl->opts->trsvcid); + int len = 0; + + if (ctrl->opts->mask & NVMF_OPT_TRADDR) + len += snprintf(buf, size, "traddr=%s", ctrl->opts->traddr); + if (ctrl->opts->mask & NVMF_OPT_TRSVCID) + len += snprintf(buf + len, size - len, "%strsvcid=%s", + (len) ? "," : "", ctrl->opts->trsvcid); + if (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR) + len += snprintf(buf + len, size - len, "%shost_traddr=%s", + (len) ? "," : "", ctrl->opts->host_traddr); + len += snprintf(buf + len, size - len, "\n"); + + return len; } EXPORT_SYMBOL_GPL(nvmf_get_address); @@ -519,6 +530,7 @@ static const match_table_t opt_tokens = { { NVMF_OPT_RECONNECT_DELAY, "reconnect_delay=%d" }, { NVMF_OPT_KATO, "keep_alive_tmo=%d" }, { NVMF_OPT_HOSTNQN, "hostnqn=%s" }, + { NVMF_OPT_HOST_TRADDR, "host_traddr=%s" }, { NVMF_OPT_ERR, NULL } }; @@ -675,6 +687,14 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, } opts->reconnect_delay = token; break; + case NVMF_OPT_HOST_TRADDR: + p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } + opts->host_traddr = p; + break; default: pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n", p); @@ -741,6 +761,7 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts) kfree(opts->traddr); kfree(opts->trsvcid); kfree(opts->subsysnqn); + kfree(opts->host_traddr); kfree(opts); } EXPORT_SYMBOL_GPL(nvmf_free_options); diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 46e460aee52d..924145c979f1 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -52,6 +52,7 @@ enum { NVMF_OPT_KATO = 1 << 7, NVMF_OPT_HOSTNQN = 1 << 8, NVMF_OPT_RECONNECT_DELAY = 1 << 9, + NVMF_OPT_HOST_TRADDR = 1 << 10, }; /** @@ -64,9 +65,12 @@ enum { * being added. * @subsysnqn: Hold the fully qualified NQN subystem name (format defined * in the NVMe specification, "NVMe Qualified Names"). - * @traddr: network address that will be used by the host to communicate - * to the added NVMe controller. - * @trsvcid: network port used for host-controller communication. + * @traddr: The transport-specific TRADDR field for a port on the + * subsystem which is adding a controller. + * @trsvcid: The transport-specific TRSVCID field for a port on the + * subsystem which is adding a controller. + * @host_traddr: A transport-specific field identifying the NVME host port + * to use for the connection to the controller. * @queue_size: Number of IO queue elements. * @nr_io_queues: Number of controller IO queues that will be established. * @reconnect_delay: Time between two consecutive reconnect attempts. @@ -80,6 +84,7 @@ struct nvmf_ctrl_options { char *subsysnqn; char *traddr; char *trsvcid; + char *host_traddr; size_t queue_size; unsigned int nr_io_queues; unsigned int reconnect_delay; diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 63f483daf930..f5e3011e31fc 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -475,7 +475,7 @@ static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd, if (rqd->opcode == NVM_OP_HBWRITE || rqd->opcode == NVM_OP_HBREAD) c->hb_rw.slba = cpu_to_le64(nvme_block_nr(ns, - rqd->bio->bi_iter.bi_sector)); + rqd->bio->bi_iter.bi_sector)); } static void nvme_nvm_end_io(struct request *rq, int error) @@ -592,14 +592,37 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = { .max_phys_sect = 64, }; -int nvme_nvm_register(struct request_queue *q, char *disk_name) +int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node, + const struct attribute_group *attrs) { - return nvm_register(q, disk_name, &nvme_nvm_dev_ops); + struct request_queue *q = ns->queue; + struct nvm_dev *dev; + int ret; + + dev = nvm_alloc_dev(node); + if (!dev) + return -ENOMEM; + + dev->q = q; + memcpy(dev->name, disk_name, DISK_NAME_LEN); + dev->ops = &nvme_nvm_dev_ops; + dev->parent_dev = ns->ctrl->device; + dev->private_data = ns; + ns->ndev = dev; + + ret = nvm_register(dev); + + ns->lba_shift = ilog2(dev->sec_size) - 9; + + if (sysfs_create_group(&dev->dev.kobj, attrs)) + pr_warn("%s: failed to create sysfs group for identification\n", + disk_name); + return ret; } -void nvme_nvm_unregister(struct request_queue *q, char *disk_name) +void nvme_nvm_unregister(struct nvme_ns *ns) { - nvm_unregister(disk_name); + nvm_unregister(ns->ndev); } /* move to shared place when used in multiple places. */ diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ab18b78102bf..d47f5a5d18c7 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -18,6 +18,7 @@ #include <linux/pci.h> #include <linux/kref.h> #include <linux/blk-mq.h> +#include <linux/lightnvm.h> enum { /* @@ -154,6 +155,7 @@ struct nvme_ns { struct nvme_ctrl *ctrl; struct request_queue *queue; struct gendisk *disk; + struct nvm_dev *ndev; struct kref kref; int instance; @@ -165,7 +167,6 @@ struct nvme_ns { u16 ms; bool ext; u8 pi_type; - int type; unsigned long flags; #define NVME_NS_REMOVING 0 @@ -184,7 +185,6 @@ struct nvme_ctrl_ops { int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val); int (*reset_ctrl)(struct nvme_ctrl *ctrl); void (*free_ctrl)(struct nvme_ctrl *ctrl); - void (*post_scan)(struct nvme_ctrl *ctrl); void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx); int (*delete_ctrl)(struct nvme_ctrl *ctrl); const char *(*get_subsysnqn)(struct nvme_ctrl *ctrl); @@ -292,9 +292,9 @@ int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid, struct nvme_id_ns **id); int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log); int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, - dma_addr_t dma_addr, u32 *result); + void *buffer, size_t buflen, u32 *result); int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, - dma_addr_t dma_addr, u32 *result); + void *buffer, size_t buflen, u32 *result); int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_start_keep_alive(struct nvme_ctrl *ctrl); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); @@ -307,20 +307,35 @@ int nvme_sg_get_version_num(int __user *ip); #ifdef CONFIG_NVM int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id); -int nvme_nvm_register(struct request_queue *q, char *disk_name); -void nvme_nvm_unregister(struct request_queue *q, char *disk_name); +int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node, + const struct attribute_group *attrs); +void nvme_nvm_unregister(struct nvme_ns *ns); + +static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) +{ + if (dev->type->devnode) + return dev_to_disk(dev)->private_data; + + return (container_of(dev, struct nvm_dev, dev))->private_data; +} #else -static inline int nvme_nvm_register(struct request_queue *q, char *disk_name) +static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, + int node, + const struct attribute_group *attrs) { return 0; } -static inline void nvme_nvm_unregister(struct request_queue *q, char *disk_name) {}; +static inline void nvme_nvm_unregister(struct nvme_ns *ns) {}; static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id) { return 0; } +static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) +{ + return dev_to_disk(dev)->private_data; +} #endif /* CONFIG_NVM */ int __init nvme_core_init(void); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 60f7eab11865..0248d0e21fee 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -16,6 +16,7 @@ #include <linux/bitops.h> #include <linux/blkdev.h> #include <linux/blk-mq.h> +#include <linux/blk-mq-pci.h> #include <linux/cpu.h> #include <linux/delay.h> #include <linux/errno.h> @@ -88,7 +89,6 @@ struct nvme_dev { unsigned max_qid; int q_depth; u32 db_stride; - struct msix_entry *entry; void __iomem *bar; struct work_struct reset_work; struct work_struct remove_work; @@ -99,6 +99,7 @@ struct nvme_dev { dma_addr_t cmb_dma_addr; u64 cmb_size; u32 cmbsz; + u32 cmbloc; struct nvme_ctrl ctrl; struct completion ioq_wait; }; @@ -201,6 +202,11 @@ static unsigned int nvme_cmd_size(struct nvme_dev *dev) nvme_iod_alloc_size(dev, NVME_INT_BYTES(dev), NVME_INT_PAGES); } +static int nvmeq_irq(struct nvme_queue *nvmeq) +{ + return pci_irq_vector(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector); +} + static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { @@ -263,6 +269,13 @@ static int nvme_init_request(void *data, struct request *req, return 0; } +static int nvme_pci_map_queues(struct blk_mq_tag_set *set) +{ + struct nvme_dev *dev = set->driver_data; + + return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev)); +} + /** * __nvme_submit_cmd() - Copy a command into a queue and ring the doorbell * @nvmeq: The queue to use @@ -503,7 +516,8 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req, goto out; ret = BLK_MQ_RQ_QUEUE_BUSY; - if (!dma_map_sg(dev->dev, iod->sg, iod->nents, dma_dir)) + if (!dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir, + DMA_ATTR_NO_WARN)) goto out; if (!nvme_setup_prps(dev, req, size)) @@ -880,7 +894,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) "I/O %d QID %d timeout, reset controller\n", req->tag, nvmeq->qid); nvme_dev_disable(dev, false); - queue_work(nvme_workq, &dev->reset_work); + nvme_reset(dev); /* * Mark the request as handled, since the inline shutdown @@ -960,7 +974,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) spin_unlock_irq(&nvmeq->q_lock); return 1; } - vector = nvmeq->dev->entry[nvmeq->cq_vector].vector; + vector = nvmeq_irq(nvmeq); nvmeq->dev->online_queues--; nvmeq->cq_vector = -1; spin_unlock_irq(&nvmeq->q_lock); @@ -968,7 +982,6 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) blk_mq_stop_hw_queues(nvmeq->dev->ctrl.admin_q); - irq_set_affinity_hint(vector, NULL); free_irq(vector, nvmeq); return 0; @@ -1075,15 +1088,14 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, return NULL; } -static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq, - const char *name) +static int queue_request_irq(struct nvme_queue *nvmeq) { if (use_threaded_interrupts) - return request_threaded_irq(dev->entry[nvmeq->cq_vector].vector, - nvme_irq_check, nvme_irq, IRQF_SHARED, - name, nvmeq); - return request_irq(dev->entry[nvmeq->cq_vector].vector, nvme_irq, - IRQF_SHARED, name, nvmeq); + return request_threaded_irq(nvmeq_irq(nvmeq), nvme_irq_check, + nvme_irq, IRQF_SHARED, nvmeq->irqname, nvmeq); + else + return request_irq(nvmeq_irq(nvmeq), nvme_irq, IRQF_SHARED, + nvmeq->irqname, nvmeq); } static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) @@ -1114,7 +1126,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) if (result < 0) goto release_cq; - result = queue_request_irq(dev, nvmeq, nvmeq->irqname); + result = queue_request_irq(nvmeq); if (result < 0) goto release_sq; @@ -1131,7 +1143,6 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) static struct blk_mq_ops nvme_mq_admin_ops = { .queue_rq = nvme_queue_rq, .complete = nvme_complete_rq, - .map_queue = blk_mq_map_queue, .init_hctx = nvme_admin_init_hctx, .exit_hctx = nvme_admin_exit_hctx, .init_request = nvme_admin_init_request, @@ -1141,9 +1152,9 @@ static struct blk_mq_ops nvme_mq_admin_ops = { static struct blk_mq_ops nvme_mq_ops = { .queue_rq = nvme_queue_rq, .complete = nvme_complete_rq, - .map_queue = blk_mq_map_queue, .init_hctx = nvme_init_hctx, .init_request = nvme_init_request, + .map_queues = nvme_pci_map_queues, .timeout = nvme_timeout, .poll = nvme_poll, }; @@ -1204,7 +1215,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP); struct nvme_queue *nvmeq; - dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1) ? + dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ? NVME_CAP_NSSRC(cap) : 0; if (dev->subsystem && @@ -1234,7 +1245,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) goto free_nvmeq; nvmeq->cq_vector = 0; - result = queue_request_irq(dev, nvmeq, nvmeq->irqname); + result = queue_request_irq(nvmeq); if (result) { nvmeq->cq_vector = -1; goto free_nvmeq; @@ -1281,7 +1292,7 @@ static void nvme_watchdog_timer(unsigned long data) /* Skip controllers under certain specific conditions. */ if (nvme_should_reset(dev, csts)) { - if (queue_work(nvme_workq, &dev->reset_work)) + if (!nvme_reset(dev)) dev_warn(dev->dev, "Failed status: 0x%x, reset controller.\n", csts); @@ -1321,28 +1332,37 @@ static int nvme_create_io_queues(struct nvme_dev *dev) return ret >= 0 ? 0 : ret; } +static ssize_t nvme_cmb_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev)); + + return snprintf(buf, PAGE_SIZE, "cmbloc : x%08x\ncmbsz : x%08x\n", + ndev->cmbloc, ndev->cmbsz); +} +static DEVICE_ATTR(cmb, S_IRUGO, nvme_cmb_show, NULL); + static void __iomem *nvme_map_cmb(struct nvme_dev *dev) { u64 szu, size, offset; - u32 cmbloc; resource_size_t bar_size; struct pci_dev *pdev = to_pci_dev(dev->dev); void __iomem *cmb; dma_addr_t dma_addr; - if (!use_cmb_sqes) - return NULL; - dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ); if (!(NVME_CMB_SZ(dev->cmbsz))) return NULL; + dev->cmbloc = readl(dev->bar + NVME_REG_CMBLOC); - cmbloc = readl(dev->bar + NVME_REG_CMBLOC); + if (!use_cmb_sqes) + return NULL; szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz)); size = szu * NVME_CMB_SZ(dev->cmbsz); - offset = szu * NVME_CMB_OFST(cmbloc); - bar_size = pci_resource_len(pdev, NVME_CMB_BIR(cmbloc)); + offset = szu * NVME_CMB_OFST(dev->cmbloc); + bar_size = pci_resource_len(pdev, NVME_CMB_BIR(dev->cmbloc)); if (offset > bar_size) return NULL; @@ -1355,7 +1375,7 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev) if (size > bar_size - offset) size = bar_size - offset; - dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(cmbloc)) + offset; + dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(dev->cmbloc)) + offset; cmb = ioremap_wc(dma_addr, size); if (!cmb) return NULL; @@ -1382,7 +1402,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) { struct nvme_queue *adminq = dev->queues[0]; struct pci_dev *pdev = to_pci_dev(dev->dev); - int result, i, vecs, nr_io_queues, size; + int result, nr_io_queues, size; nr_io_queues = num_online_cpus(); result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); @@ -1417,29 +1437,18 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) } /* Deregister the admin queue's interrupt */ - free_irq(dev->entry[0].vector, adminq); + free_irq(pci_irq_vector(pdev, 0), adminq); /* * If we enable msix early due to not intx, disable it again before * setting up the full range we need. */ - if (pdev->msi_enabled) - pci_disable_msi(pdev); - else if (pdev->msix_enabled) - pci_disable_msix(pdev); - - for (i = 0; i < nr_io_queues; i++) - dev->entry[i].entry = i; - vecs = pci_enable_msix_range(pdev, dev->entry, 1, nr_io_queues); - if (vecs < 0) { - vecs = pci_enable_msi_range(pdev, 1, min(nr_io_queues, 32)); - if (vecs < 0) { - vecs = 1; - } else { - for (i = 0; i < vecs; i++) - dev->entry[i].vector = i + pdev->irq; - } - } + pci_free_irq_vectors(pdev); + nr_io_queues = pci_alloc_irq_vectors(pdev, 1, nr_io_queues, + PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY); + if (nr_io_queues <= 0) + return -EIO; + dev->max_qid = nr_io_queues; /* * Should investigate if there's a performance win from allocating @@ -1447,10 +1456,8 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) * path to scale better, even if the receive path is limited by the * number of interrupts. */ - nr_io_queues = vecs; - dev->max_qid = nr_io_queues; - result = queue_request_irq(dev, adminq, adminq->irqname); + result = queue_request_irq(adminq); if (result) { adminq->cq_vector = -1; goto free_queues; @@ -1462,23 +1469,6 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) return result; } -static void nvme_pci_post_scan(struct nvme_ctrl *ctrl) -{ - struct nvme_dev *dev = to_nvme_dev(ctrl); - struct nvme_queue *nvmeq; - int i; - - for (i = 0; i < dev->online_queues; i++) { - nvmeq = dev->queues[i]; - - if (!nvmeq->tags || !(*nvmeq->tags)) - continue; - - irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector, - blk_mq_tags_cpumask(*nvmeq->tags)); - } -} - static void nvme_del_queue_end(struct request *req, int error) { struct nvme_queue *nvmeq = req->end_io_data; @@ -1531,9 +1521,9 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) return 0; } -static void nvme_disable_io_queues(struct nvme_dev *dev) +static void nvme_disable_io_queues(struct nvme_dev *dev, int queues) { - int pass, queues = dev->online_queues - 1; + int pass; unsigned long timeout; u8 opcode = nvme_admin_delete_sq; @@ -1615,15 +1605,9 @@ static int nvme_pci_enable(struct nvme_dev *dev) * interrupts. Pre-enable a single MSIX or MSI vec for setup. We'll * adjust this later. */ - if (pci_enable_msix(pdev, dev->entry, 1)) { - pci_enable_msi(pdev); - dev->entry[0].vector = pdev->irq; - } - - if (!dev->entry[0].vector) { - result = -ENODEV; - goto disable; - } + result = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES); + if (result < 0) + return result; cap = lo_hi_readq(dev->bar + NVME_REG_CAP); @@ -1642,9 +1626,25 @@ static int nvme_pci_enable(struct nvme_dev *dev) dev->q_depth); } - if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2)) + /* + * CMBs can currently only exist on >=1.2 PCIe devices. We only + * populate sysfs if a CMB is implemented. Note that we add the + * CMB attribute to the nvme_ctrl kobj which removes the need to remove + * it on exit. Since nvme_dev_attrs_group has no name we can pass + * NULL as final argument to sysfs_add_file_to_group. + */ + + if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2, 0)) { dev->cmb = nvme_map_cmb(dev); + if (dev->cmbsz) { + if (sysfs_add_file_to_group(&dev->ctrl.device->kobj, + &dev_attr_cmb.attr, NULL)) + dev_warn(dev->dev, + "failed to add sysfs attribute for CMB\n"); + } + } + pci_enable_pcie_error_reporting(pdev); pci_save_state(pdev); return 0; @@ -1665,10 +1665,7 @@ static void nvme_pci_disable(struct nvme_dev *dev) { struct pci_dev *pdev = to_pci_dev(dev->dev); - if (pdev->msi_enabled) - pci_disable_msi(pdev); - else if (pdev->msix_enabled) - pci_disable_msix(pdev); + pci_free_irq_vectors(pdev); if (pci_is_enabled(pdev)) { pci_disable_pcie_error_reporting(pdev); @@ -1678,7 +1675,7 @@ static void nvme_pci_disable(struct nvme_dev *dev) static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) { - int i; + int i, queues; u32 csts = -1; del_timer_sync(&dev->watchdog_timer); @@ -1689,6 +1686,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) csts = readl(dev->bar + NVME_REG_CSTS); } + queues = dev->online_queues - 1; for (i = dev->queue_count - 1; i > 0; i--) nvme_suspend_queue(dev->queues[i]); @@ -1700,7 +1698,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) if (dev->queue_count) nvme_suspend_queue(dev->queues[0]); } else { - nvme_disable_io_queues(dev); + nvme_disable_io_queues(dev, queues); nvme_disable_admin_queue(dev, shutdown); } nvme_pci_disable(dev); @@ -1743,7 +1741,6 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) if (dev->ctrl.admin_q) blk_put_queue(dev->ctrl.admin_q); kfree(dev->queues); - kfree(dev->entry); kfree(dev); } @@ -1848,11 +1845,10 @@ static int nvme_reset(struct nvme_dev *dev) { if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q)) return -ENODEV; - + if (work_busy(&dev->reset_work)) + return -ENODEV; if (!queue_work(nvme_workq, &dev->reset_work)) return -EBUSY; - - flush_work(&dev->reset_work); return 0; } @@ -1876,7 +1872,12 @@ static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl) { - return nvme_reset(to_nvme_dev(ctrl)); + struct nvme_dev *dev = to_nvme_dev(ctrl); + int ret = nvme_reset(dev); + + if (!ret) + flush_work(&dev->reset_work); + return ret; } static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { @@ -1887,7 +1888,6 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { .reg_read64 = nvme_pci_reg_read64, .reset_ctrl = nvme_pci_reset_ctrl, .free_ctrl = nvme_pci_free_ctrl, - .post_scan = nvme_pci_post_scan, .submit_async_event = nvme_pci_submit_async_event, }; @@ -1920,10 +1920,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node); if (!dev) return -ENOMEM; - dev->entry = kzalloc_node(num_possible_cpus() * sizeof(*dev->entry), - GFP_KERNEL, node); - if (!dev->entry) - goto free; dev->queues = kzalloc_node((num_possible_cpus() + 1) * sizeof(void *), GFP_KERNEL, node); if (!dev->queues) @@ -1964,7 +1960,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) nvme_dev_unmap(dev); free: kfree(dev->queues); - kfree(dev->entry); kfree(dev); return result; } @@ -1976,7 +1971,7 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare) if (prepare) nvme_dev_disable(dev, false); else - queue_work(nvme_workq, &dev->reset_work); + nvme_reset(dev); } static void nvme_shutdown(struct pci_dev *pdev) @@ -2045,7 +2040,7 @@ static int nvme_resume(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); struct nvme_dev *ndev = pci_get_drvdata(pdev); - queue_work(nvme_workq, &ndev->reset_work); + nvme_reset(ndev); return 0; } #endif @@ -2084,7 +2079,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev) dev_info(dev->ctrl.device, "restart after slot reset\n"); pci_restore_state(pdev); - queue_work(nvme_workq, &dev->reset_work); + nvme_reset(dev); return PCI_ERS_RESULT_RECOVERED; } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index fbdb2267e460..5a8388177959 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -54,7 +54,6 @@ struct nvme_rdma_device { struct ib_device *dev; struct ib_pd *pd; - struct ib_mr *mr; struct kref ref; struct list_head entry; }; @@ -408,10 +407,7 @@ static void nvme_rdma_free_dev(struct kref *ref) list_del(&ndev->entry); mutex_unlock(&device_list_mutex); - if (!register_always) - ib_dereg_mr(ndev->mr); ib_dealloc_pd(ndev->pd); - kfree(ndev); } @@ -444,24 +440,16 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id) ndev->dev = cm_id->device; kref_init(&ndev->ref); - ndev->pd = ib_alloc_pd(ndev->dev); + ndev->pd = ib_alloc_pd(ndev->dev, + register_always ? 0 : IB_PD_UNSAFE_GLOBAL_RKEY); if (IS_ERR(ndev->pd)) goto out_free_dev; - if (!register_always) { - ndev->mr = ib_get_dma_mr(ndev->pd, - IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE); - if (IS_ERR(ndev->mr)) - goto out_free_pd; - } - if (!(ndev->dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) { dev_err(&ndev->dev->dev, "Memory registrations not supported.\n"); - goto out_free_mr; + goto out_free_pd; } list_add(&ndev->entry, &device_list); @@ -469,9 +457,6 @@ out_unlock: mutex_unlock(&device_list_mutex); return ndev; -out_free_mr: - if (!register_always) - ib_dereg_mr(ndev->mr); out_free_pd: ib_dealloc_pd(ndev->pd); out_free_dev: @@ -915,7 +900,7 @@ static int nvme_rdma_map_sg_single(struct nvme_rdma_queue *queue, sg->addr = cpu_to_le64(sg_dma_address(req->sg_table.sgl)); put_unaligned_le24(sg_dma_len(req->sg_table.sgl), sg->length); - put_unaligned_le32(queue->device->mr->rkey, sg->key); + put_unaligned_le32(queue->device->pd->unsafe_global_rkey, sg->key); sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4; return 0; } @@ -1000,7 +985,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, nvme_rdma_queue_idx(queue)) return nvme_rdma_map_sg_inline(queue, req, c); - if (!register_always) + if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) return nvme_rdma_map_sg_single(queue, req, c); } @@ -1495,7 +1480,6 @@ static void nvme_rdma_complete_rq(struct request *rq) static struct blk_mq_ops nvme_rdma_mq_ops = { .queue_rq = nvme_rdma_queue_rq, .complete = nvme_rdma_complete_rq, - .map_queue = blk_mq_map_queue, .init_request = nvme_rdma_init_request, .exit_request = nvme_rdma_exit_request, .reinit_request = nvme_rdma_reinit_request, @@ -1507,7 +1491,6 @@ static struct blk_mq_ops nvme_rdma_mq_ops = { static struct blk_mq_ops nvme_rdma_admin_mq_ops = { .queue_rq = nvme_rdma_queue_rq, .complete = nvme_rdma_complete_rq, - .map_queue = blk_mq_map_queue, .init_request = nvme_rdma_init_admin_request, .exit_request = nvme_rdma_exit_admin_request, .reinit_request = nvme_rdma_reinit_request, diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c index e947e298a737..3eaa4d27801e 100644 --- a/drivers/nvme/host/scsi.c +++ b/drivers/nvme/host/scsi.c @@ -72,15 +72,6 @@ static int sg_version_num = 30534; /* 2 digits for each component */ #define ALL_LUNS_RETURNED 0x02 #define ALL_WELL_KNOWN_LUNS_RETURNED 0x01 #define RESTRICTED_LUNS_RETURNED 0x00 -#define NVME_POWER_STATE_START_VALID 0x00 -#define NVME_POWER_STATE_ACTIVE 0x01 -#define NVME_POWER_STATE_IDLE 0x02 -#define NVME_POWER_STATE_STANDBY 0x03 -#define NVME_POWER_STATE_LU_CONTROL 0x07 -#define POWER_STATE_0 0 -#define POWER_STATE_1 1 -#define POWER_STATE_2 2 -#define POWER_STATE_3 3 #define DOWNLOAD_SAVE_ACTIVATE 0x05 #define DOWNLOAD_SAVE_DEFER_ACTIVATE 0x0E #define ACTIVATE_DEFERRED_MICROCODE 0x0F @@ -615,7 +606,7 @@ static int nvme_fill_device_id_eui64(struct nvme_ns *ns, struct sg_io_hdr *hdr, eui = id_ns->eui64; len = sizeof(id_ns->eui64); - if (ns->ctrl->vs >= NVME_VS(1, 2)) { + if (ns->ctrl->vs >= NVME_VS(1, 2, 0)) { if (bitmap_empty(eui, len * 8)) { eui = id_ns->nguid; len = sizeof(id_ns->nguid); @@ -688,7 +679,7 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, { int res; - if (ns->ctrl->vs >= NVME_VS(1, 1)) { + if (ns->ctrl->vs >= NVME_VS(1, 1, 0)) { res = nvme_fill_device_id_eui64(ns, hdr, resp, alloc_len); if (res != -EOPNOTSUPP) return res; @@ -915,7 +906,7 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, kfree(smart_log); /* Get Features for Temp Threshold */ - res = nvme_get_features(ns->ctrl, NVME_FEAT_TEMP_THRESH, 0, 0, + res = nvme_get_features(ns->ctrl, NVME_FEAT_TEMP_THRESH, 0, NULL, 0, &feature_resp); if (res != NVME_SC_SUCCESS) temp_c_thresh = LOG_TEMP_UNKNOWN; @@ -1048,7 +1039,7 @@ static int nvme_trans_fill_caching_page(struct nvme_ns *ns, if (len < MODE_PAGE_CACHING_LEN) return -EINVAL; - nvme_sc = nvme_get_features(ns->ctrl, NVME_FEAT_VOLATILE_WC, 0, 0, + nvme_sc = nvme_get_features(ns->ctrl, NVME_FEAT_VOLATILE_WC, 0, NULL, 0, &feature_resp); res = nvme_trans_status_code(hdr, nvme_sc); if (res) @@ -1229,64 +1220,6 @@ static void nvme_trans_fill_read_cap(u8 *response, struct nvme_id_ns *id_ns, /* Start Stop Unit Helper Functions */ -static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, - u8 pc, u8 pcmod, u8 start) -{ - int res; - int nvme_sc; - struct nvme_id_ctrl *id_ctrl; - int lowest_pow_st; /* max npss = lowest power consumption */ - unsigned ps_desired = 0; - - nvme_sc = nvme_identify_ctrl(ns->ctrl, &id_ctrl); - res = nvme_trans_status_code(hdr, nvme_sc); - if (res) - return res; - - lowest_pow_st = max(POWER_STATE_0, (int)(id_ctrl->npss - 1)); - kfree(id_ctrl); - - switch (pc) { - case NVME_POWER_STATE_START_VALID: - /* Action unspecified if POWER CONDITION MODIFIER != 0 */ - if (pcmod == 0 && start == 0x1) - ps_desired = POWER_STATE_0; - if (pcmod == 0 && start == 0x0) - ps_desired = lowest_pow_st; - break; - case NVME_POWER_STATE_ACTIVE: - /* Action unspecified if POWER CONDITION MODIFIER != 0 */ - if (pcmod == 0) - ps_desired = POWER_STATE_0; - break; - case NVME_POWER_STATE_IDLE: - /* Action unspecified if POWER CONDITION MODIFIER != [0,1,2] */ - if (pcmod == 0x0) - ps_desired = POWER_STATE_1; - else if (pcmod == 0x1) - ps_desired = POWER_STATE_2; - else if (pcmod == 0x2) - ps_desired = POWER_STATE_3; - break; - case NVME_POWER_STATE_STANDBY: - /* Action unspecified if POWER CONDITION MODIFIER != [0,1] */ - if (pcmod == 0x0) - ps_desired = max(POWER_STATE_0, (lowest_pow_st - 2)); - else if (pcmod == 0x1) - ps_desired = max(POWER_STATE_0, (lowest_pow_st - 1)); - break; - case NVME_POWER_STATE_LU_CONTROL: - default: - res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, - ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, - SCSI_ASCQ_CAUSE_NOT_REPORTABLE); - break; - } - nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_POWER_MGMT, ps_desired, 0, - NULL); - return nvme_trans_status_code(hdr, nvme_sc); -} - static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 buffer_id) { @@ -1395,7 +1328,7 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr, case MODE_PAGE_CACHING: dword11 = ((mode_page[2] & CACHING_MODE_PAGE_WCE_MASK) ? 1 : 0); nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_VOLATILE_WC, - dword11, 0, NULL); + dword11, NULL, 0, NULL); res = nvme_trans_status_code(hdr, nvme_sc); break; case MODE_PAGE_CONTROL: @@ -2235,11 +2168,10 @@ static int nvme_trans_synchronize_cache(struct nvme_ns *ns, static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 *cmd) { - u8 immed, pcmod, pc, no_flush, start; + u8 immed, pcmod, no_flush, start; immed = cmd[1] & 0x01; pcmod = cmd[3] & 0x0f; - pc = (cmd[4] & 0xf0) >> 4; no_flush = cmd[4] & 0x04; start = cmd[4] & 0x01; @@ -2254,8 +2186,8 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, if (res) return res; } - /* Setup the expected power state transition */ - return nvme_trans_power_state(ns, hdr, pc, pcmod, start); + + return 0; } } diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 47c564b5a289..6fe4c48a21e4 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -14,6 +14,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <generated/utsrelease.h> +#include <asm/unaligned.h> #include "nvmet.h" u32 nvmet_get_log_page_len(struct nvme_command *cmd) @@ -29,8 +30,84 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd) return len; } +static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, + struct nvme_smart_log *slog) +{ + u16 status; + struct nvmet_ns *ns; + u64 host_reads, host_writes, data_units_read, data_units_written; + + status = NVME_SC_SUCCESS; + ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->get_log_page.nsid); + if (!ns) { + status = NVME_SC_INVALID_NS; + pr_err("nvmet : Counld not find namespace id : %d\n", + le32_to_cpu(req->cmd->get_log_page.nsid)); + goto out; + } + + host_reads = part_stat_read(ns->bdev->bd_part, ios[READ]); + data_units_read = part_stat_read(ns->bdev->bd_part, sectors[READ]); + host_writes = part_stat_read(ns->bdev->bd_part, ios[WRITE]); + data_units_written = part_stat_read(ns->bdev->bd_part, sectors[WRITE]); + + put_unaligned_le64(host_reads, &slog->host_reads[0]); + put_unaligned_le64(data_units_read, &slog->data_units_read[0]); + put_unaligned_le64(host_writes, &slog->host_writes[0]); + put_unaligned_le64(data_units_written, &slog->data_units_written[0]); + nvmet_put_namespace(ns); +out: + return status; +} + +static u16 nvmet_get_smart_log_all(struct nvmet_req *req, + struct nvme_smart_log *slog) +{ + u16 status; + u64 host_reads = 0, host_writes = 0; + u64 data_units_read = 0, data_units_written = 0; + struct nvmet_ns *ns; + struct nvmet_ctrl *ctrl; + + status = NVME_SC_SUCCESS; + ctrl = req->sq->ctrl; + + rcu_read_lock(); + list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) { + host_reads += part_stat_read(ns->bdev->bd_part, ios[READ]); + data_units_read += + part_stat_read(ns->bdev->bd_part, sectors[READ]); + host_writes += part_stat_read(ns->bdev->bd_part, ios[WRITE]); + data_units_written += + part_stat_read(ns->bdev->bd_part, sectors[WRITE]); + + } + rcu_read_unlock(); + + put_unaligned_le64(host_reads, &slog->host_reads[0]); + put_unaligned_le64(data_units_read, &slog->data_units_read[0]); + put_unaligned_le64(host_writes, &slog->host_writes[0]); + put_unaligned_le64(data_units_written, &slog->data_units_written[0]); + + return status; +} + +static u16 nvmet_get_smart_log(struct nvmet_req *req, + struct nvme_smart_log *slog) +{ + u16 status; + + WARN_ON(req == NULL || slog == NULL); + if (req->cmd->get_log_page.nsid == 0xFFFFFFFF) + status = nvmet_get_smart_log_all(req, slog); + else + status = nvmet_get_smart_log_nsid(req, slog); + return status; +} + static void nvmet_execute_get_log_page(struct nvmet_req *req) { + struct nvme_smart_log *smart_log; size_t data_len = nvmet_get_log_page_len(req->cmd); void *buf; u16 status = 0; @@ -59,6 +136,16 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req) * available (e.g. units or commands read/written) those aren't * persistent over power loss. */ + if (data_len != sizeof(*smart_log)) { + status = NVME_SC_INTERNAL; + goto err; + } + smart_log = buf; + status = nvmet_get_smart_log(req, smart_log); + if (status) { + memset(buf, '\0', data_len); + goto err; + } break; case 0x03: /* @@ -73,6 +160,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req) status = nvmet_copy_to_sgl(req, 0, buf, data_len); +err: kfree(buf); out: nvmet_req_complete(req, status); @@ -111,7 +199,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) */ /* we support multiple ports and multiples hosts: */ - id->mic = (1 << 0) | (1 << 1); + id->cmic = (1 << 0) | (1 << 1); /* no limit on data transfer sizes for now */ id->mdts = 0; @@ -423,13 +511,13 @@ int nvmet_parse_admin_cmd(struct nvmet_req *req) case nvme_admin_identify: req->data_len = 4096; switch (le32_to_cpu(cmd->identify.cns)) { - case 0x00: + case NVME_ID_CNS_NS: req->execute = nvmet_execute_identify_ns; return 0; - case 0x01: + case NVME_ID_CNS_CTRL: req->execute = nvmet_execute_identify_ctrl; return 0; - case 0x02: + case NVME_ID_CNS_NS_ACTIVE_LIST: req->execute = nvmet_execute_identify_nslist; return 0; } diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 6559d5afa7bf..b4cacb6f0258 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -882,7 +882,7 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, if (!subsys) return NULL; - subsys->ver = (1 << 16) | (2 << 8) | 1; /* NVMe 1.2.1 */ + subsys->ver = NVME_VS(1, 2, 1); /* NVMe 1.2.1 */ switch (type) { case NVME_NQN_NVME: diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 6f65646e89cf..12f39eea569f 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -54,7 +54,7 @@ static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr, /* we support only dynamic controllers */ e->cntlid = cpu_to_le16(NVME_CNTLID_DYNAMIC); e->asqsz = cpu_to_le16(NVMF_AQ_DEPTH); - e->nqntype = type; + e->subtype = type; memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE); memcpy(e->traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); memcpy(e->tsas.common, port->disc_addr.tsas.common, NVMF_TSAS_SIZE); @@ -187,7 +187,7 @@ int nvmet_parse_discovery_cmd(struct nvmet_req *req) case nvme_admin_identify: req->data_len = 4096; switch (le32_to_cpu(cmd->identify.cns)) { - case 0x01: + case NVME_ID_CNS_CTRL: req->execute = nvmet_execute_identify_disc_ctrl; return 0; diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c index 2cd069b691ae..4a96c2049b7b 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd.c @@ -58,6 +58,7 @@ static void nvmet_execute_rw(struct nvmet_req *req) if (req->cmd->rw.opcode == nvme_cmd_write) { op = REQ_OP_WRITE; + op_flags = WRITE_ODIRECT; if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) op_flags |= REQ_FUA; } else { @@ -205,7 +206,7 @@ int nvmet_parse_io_cmd(struct nvmet_req *req) return 0; case nvme_cmd_dsm: req->execute = nvmet_execute_dsm; - req->data_len = le32_to_cpu(cmd->dsm.nr) * + req->data_len = le32_to_cpu(cmd->dsm.nr + 1) * sizeof(struct nvme_dsm_range); return 0; default: diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 395e60dad835..d5df77d686b2 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -273,7 +273,6 @@ static int nvme_loop_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, static struct blk_mq_ops nvme_loop_mq_ops = { .queue_rq = nvme_loop_queue_rq, .complete = nvme_loop_complete_rq, - .map_queue = blk_mq_map_queue, .init_request = nvme_loop_init_request, .init_hctx = nvme_loop_init_hctx, .timeout = nvme_loop_timeout, @@ -282,7 +281,6 @@ static struct blk_mq_ops nvme_loop_mq_ops = { static struct blk_mq_ops nvme_loop_admin_mq_ops = { .queue_rq = nvme_loop_queue_rq, .complete = nvme_loop_complete_rq, - .map_queue = blk_mq_map_queue, .init_request = nvme_loop_init_admin_request, .init_hctx = nvme_loop_init_admin_hctx, .timeout = nvme_loop_timeout, diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 1cbe6e053b5b..f8d23999e0f2 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -848,7 +848,7 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) ndev->device = cm_id->device; kref_init(&ndev->ref); - ndev->pd = ib_alloc_pd(ndev->device); + ndev->pd = ib_alloc_pd(ndev->device, 0); if (IS_ERR(ndev->pd)) goto out_free_dev; |