summaryrefslogtreecommitdiff
path: root/drivers/nvme/host
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r--drivers/nvme/host/Makefile2
-rw-r--r--drivers/nvme/host/auth.c6
-rw-r--r--drivers/nvme/host/constants.c4
-rw-r--r--drivers/nvme/host/core.c915
-rw-r--r--drivers/nvme/host/fabrics.c241
-rw-r--r--drivers/nvme/host/fabrics.h21
-rw-r--r--drivers/nvme/host/fault_inject.c2
-rw-r--r--drivers/nvme/host/fc.c45
-rw-r--r--drivers/nvme/host/ioctl.c142
-rw-r--r--drivers/nvme/host/multipath.c16
-rw-r--r--drivers/nvme/host/nvme.h23
-rw-r--r--drivers/nvme/host/pci.c39
-rw-r--r--drivers/nvme/host/pr.c315
-rw-r--r--drivers/nvme/host/rdma.c81
-rw-r--r--drivers/nvme/host/sysfs.c668
-rw-r--r--drivers/nvme/host/tcp.c141
-rw-r--r--drivers/nvme/host/zns.c9
17 files changed, 1471 insertions, 1199 deletions
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index e27202d22c7d..c7c3cf202d12 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_NVME_FC) += nvme-fc.o
obj-$(CONFIG_NVME_TCP) += nvme-tcp.o
obj-$(CONFIG_NVME_APPLE) += nvme-apple.o
-nvme-core-y += core.o ioctl.o
+nvme-core-y += core.o ioctl.o sysfs.o pr.o
nvme-core-$(CONFIG_NVME_VERBOSE_ERRORS) += constants.o
nvme-core-$(CONFIG_TRACING) += trace.o
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c
index ea16a0aba679..daf5d144a8ea 100644
--- a/drivers/nvme/host/auth.c
+++ b/drivers/nvme/host/auth.c
@@ -30,18 +30,18 @@ struct nvme_dhchap_queue_context {
u32 s2;
u16 transaction;
u8 status;
+ u8 dhgroup_id;
u8 hash_id;
size_t hash_len;
- u8 dhgroup_id;
u8 c1[64];
u8 c2[64];
u8 response[64];
u8 *host_response;
u8 *ctrl_key;
- int ctrl_key_len;
u8 *host_key;
- int host_key_len;
u8 *sess_key;
+ int ctrl_key_len;
+ int host_key_len;
int sess_key_len;
};
diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c
index bc523ca02254..20f46c230885 100644
--- a/drivers/nvme/host/constants.c
+++ b/drivers/nvme/host/constants.c
@@ -12,7 +12,7 @@ static const char * const nvme_ops[] = {
[nvme_cmd_read] = "Read",
[nvme_cmd_write_uncor] = "Write Uncorrectable",
[nvme_cmd_compare] = "Compare",
- [nvme_cmd_write_zeroes] = "Write Zeros",
+ [nvme_cmd_write_zeroes] = "Write Zeroes",
[nvme_cmd_dsm] = "Dataset Management",
[nvme_cmd_verify] = "Verify",
[nvme_cmd_resv_register] = "Reservation Register",
@@ -21,7 +21,7 @@ static const char * const nvme_ops[] = {
[nvme_cmd_resv_release] = "Reservation Release",
[nvme_cmd_zone_mgmt_send] = "Zone Management Send",
[nvme_cmd_zone_mgmt_recv] = "Zone Management Receive",
- [nvme_cmd_zone_append] = "Zone Management Append",
+ [nvme_cmd_zone_append] = "Zone Append",
};
static const char * const nvme_admin_ops[] = {
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1f0cbb77b249..37b6fa746662 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -237,7 +237,7 @@ int nvme_delete_ctrl(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_delete_ctrl);
-static void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
+void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
{
/*
* Keep a reference until nvme_do_delete_ctrl() complete,
@@ -279,7 +279,7 @@ static blk_status_t nvme_error_status(u16 status)
case NVME_SC_INVALID_PI:
return BLK_STS_PROTECTION;
case NVME_SC_RESERVATION_CONFLICT:
- return BLK_STS_NEXUS;
+ return BLK_STS_RESV_CONFLICT;
case NVME_SC_HOST_PATH_ERROR:
return BLK_STS_TRANSPORT;
case NVME_SC_ZONE_TOO_MANY_ACTIVE:
@@ -397,7 +397,16 @@ void nvme_complete_rq(struct request *req)
trace_nvme_complete_rq(req);
nvme_cleanup_cmd(req);
- if (ctrl->kas)
+ /*
+ * Completions of long-running commands should not be able to
+ * defer sending of periodic keep alives, since the controller
+ * may have completed processing such commands a long time ago
+ * (arbitrarily close to command submission time).
+ * req->deadline - req->timeout is the command submission time
+ * in jiffies.
+ */
+ if (ctrl->kas &&
+ req->deadline - req->timeout >= ctrl->ka_last_check_time)
ctrl->comp_seen = true;
switch (nvme_decide_disposition(req)) {
@@ -1115,7 +1124,7 @@ u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
}
EXPORT_SYMBOL_NS_GPL(nvme_passthru_start, NVME_TARGET_PASSTHRU);
-void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects,
+void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects,
struct nvme_command *cmd, int status)
{
if (effects & NVME_CMD_EFFECTS_CSE_MASK) {
@@ -1125,13 +1134,18 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects,
mutex_unlock(&ctrl->scan_lock);
}
if (effects & NVME_CMD_EFFECTS_CCC) {
- dev_info(ctrl->device,
+ if (!test_and_set_bit(NVME_CTRL_DIRTY_CAPABILITY,
+ &ctrl->flags)) {
+ dev_info(ctrl->device,
"controller capabilities changed, reset may be required to take effect.\n");
+ }
}
if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) {
nvme_queue_scan(ctrl);
flush_work(&ctrl->scan_work);
}
+ if (ns)
+ return;
switch (cmd->common.opcode) {
case nvme_admin_set_features:
@@ -1161,9 +1175,25 @@ EXPORT_SYMBOL_NS_GPL(nvme_passthru_end, NVME_TARGET_PASSTHRU);
* The host should send Keep Alive commands at half of the Keep Alive Timeout
* accounting for transport roundtrip times [..].
*/
+static unsigned long nvme_keep_alive_work_period(struct nvme_ctrl *ctrl)
+{
+ unsigned long delay = ctrl->kato * HZ / 2;
+
+ /*
+ * When using Traffic Based Keep Alive, we need to run
+ * nvme_keep_alive_work at twice the normal frequency, as one
+ * command completion can postpone sending a keep alive command
+ * by up to twice the delay between runs.
+ */
+ if (ctrl->ctratt & NVME_CTRL_ATTR_TBKAS)
+ delay /= 2;
+ return delay;
+}
+
static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl)
{
- queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ / 2);
+ queue_delayed_work(nvme_wq, &ctrl->ka_work,
+ nvme_keep_alive_work_period(ctrl));
}
static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
@@ -1172,6 +1202,20 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
struct nvme_ctrl *ctrl = rq->end_io_data;
unsigned long flags;
bool startka = false;
+ unsigned long rtt = jiffies - (rq->deadline - rq->timeout);
+ unsigned long delay = nvme_keep_alive_work_period(ctrl);
+
+ /*
+ * Subtract off the keepalive RTT so nvme_keep_alive_work runs
+ * at the desired frequency.
+ */
+ if (rtt <= delay) {
+ delay -= rtt;
+ } else {
+ dev_warn(ctrl->device, "long keepalive RTT (%u ms)\n",
+ jiffies_to_msecs(rtt));
+ delay = 0;
+ }
blk_mq_free_request(rq);
@@ -1182,6 +1226,7 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
return RQ_END_IO_NONE;
}
+ ctrl->ka_last_check_time = jiffies;
ctrl->comp_seen = false;
spin_lock_irqsave(&ctrl->lock, flags);
if (ctrl->state == NVME_CTRL_LIVE ||
@@ -1189,7 +1234,7 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
startka = true;
spin_unlock_irqrestore(&ctrl->lock, flags);
if (startka)
- nvme_queue_keep_alive_work(ctrl);
+ queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
return RQ_END_IO_NONE;
}
@@ -1200,6 +1245,8 @@ static void nvme_keep_alive_work(struct work_struct *work)
bool comp_seen = ctrl->comp_seen;
struct request *rq;
+ ctrl->ka_last_check_time = jiffies;
+
if ((ctrl->ctratt & NVME_CTRL_ATTR_TBKAS) && comp_seen) {
dev_dbg(ctrl->device,
"reschedule traffic based keep-alive timer\n");
@@ -1591,12 +1638,12 @@ static void nvme_ns_release(struct nvme_ns *ns)
nvme_put_ns(ns);
}
-static int nvme_open(struct block_device *bdev, fmode_t mode)
+static int nvme_open(struct gendisk *disk, blk_mode_t mode)
{
- return nvme_ns_open(bdev->bd_disk->private_data);
+ return nvme_ns_open(disk->private_data);
}
-static void nvme_release(struct gendisk *disk, fmode_t mode)
+static void nvme_release(struct gendisk *disk)
{
nvme_ns_release(disk->private_data);
}
@@ -1835,7 +1882,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
struct nvme_ns *ns, struct nvme_id_ns *id)
{
sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze));
- unsigned short bs = 1 << ns->lba_shift;
+ u32 bs = 1U << ns->lba_shift;
u32 atomic_bs, phys_bs, io_opt = 0;
/*
@@ -2061,153 +2108,6 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
}
}
-static char nvme_pr_type(enum pr_type type)
-{
- switch (type) {
- case PR_WRITE_EXCLUSIVE:
- return 1;
- case PR_EXCLUSIVE_ACCESS:
- return 2;
- case PR_WRITE_EXCLUSIVE_REG_ONLY:
- return 3;
- case PR_EXCLUSIVE_ACCESS_REG_ONLY:
- return 4;
- case PR_WRITE_EXCLUSIVE_ALL_REGS:
- return 5;
- case PR_EXCLUSIVE_ACCESS_ALL_REGS:
- return 6;
- default:
- return 0;
- }
-}
-
-static int nvme_send_ns_head_pr_command(struct block_device *bdev,
- struct nvme_command *c, u8 data[16])
-{
- struct nvme_ns_head *head = bdev->bd_disk->private_data;
- int srcu_idx = srcu_read_lock(&head->srcu);
- struct nvme_ns *ns = nvme_find_path(head);
- int ret = -EWOULDBLOCK;
-
- if (ns) {
- c->common.nsid = cpu_to_le32(ns->head->ns_id);
- ret = nvme_submit_sync_cmd(ns->queue, c, data, 16);
- }
- srcu_read_unlock(&head->srcu, srcu_idx);
- return ret;
-}
-
-static int nvme_send_ns_pr_command(struct nvme_ns *ns, struct nvme_command *c,
- u8 data[16])
-{
- c->common.nsid = cpu_to_le32(ns->head->ns_id);
- return nvme_submit_sync_cmd(ns->queue, c, data, 16);
-}
-
-static int nvme_sc_to_pr_err(int nvme_sc)
-{
- if (nvme_is_path_error(nvme_sc))
- return PR_STS_PATH_FAILED;
-
- switch (nvme_sc) {
- case NVME_SC_SUCCESS:
- return PR_STS_SUCCESS;
- case NVME_SC_RESERVATION_CONFLICT:
- return PR_STS_RESERVATION_CONFLICT;
- case NVME_SC_ONCS_NOT_SUPPORTED:
- return -EOPNOTSUPP;
- case NVME_SC_BAD_ATTRIBUTES:
- case NVME_SC_INVALID_OPCODE:
- case NVME_SC_INVALID_FIELD:
- case NVME_SC_INVALID_NS:
- return -EINVAL;
- default:
- return PR_STS_IOERR;
- }
-}
-
-static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
- u64 key, u64 sa_key, u8 op)
-{
- struct nvme_command c = { };
- u8 data[16] = { 0, };
- int ret;
-
- put_unaligned_le64(key, &data[0]);
- put_unaligned_le64(sa_key, &data[8]);
-
- c.common.opcode = op;
- c.common.cdw10 = cpu_to_le32(cdw10);
-
- if (IS_ENABLED(CONFIG_NVME_MULTIPATH) &&
- bdev->bd_disk->fops == &nvme_ns_head_ops)
- ret = nvme_send_ns_head_pr_command(bdev, &c, data);
- else
- ret = nvme_send_ns_pr_command(bdev->bd_disk->private_data, &c,
- data);
- if (ret < 0)
- return ret;
-
- return nvme_sc_to_pr_err(ret);
-}
-
-static int nvme_pr_register(struct block_device *bdev, u64 old,
- u64 new, unsigned flags)
-{
- u32 cdw10;
-
- if (flags & ~PR_FL_IGNORE_KEY)
- return -EOPNOTSUPP;
-
- cdw10 = old ? 2 : 0;
- cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0;
- cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */
- return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register);
-}
-
-static int nvme_pr_reserve(struct block_device *bdev, u64 key,
- enum pr_type type, unsigned flags)
-{
- u32 cdw10;
-
- if (flags & ~PR_FL_IGNORE_KEY)
- return -EOPNOTSUPP;
-
- cdw10 = nvme_pr_type(type) << 8;
- cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0);
- return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire);
-}
-
-static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
- enum pr_type type, bool abort)
-{
- u32 cdw10 = nvme_pr_type(type) << 8 | (abort ? 2 : 1);
-
- return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
-}
-
-static int nvme_pr_clear(struct block_device *bdev, u64 key)
-{
- u32 cdw10 = 1 | (key ? 0 : 1 << 3);
-
- return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
-}
-
-static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
-{
- u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 0 : 1 << 3);
-
- return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
-}
-
-const struct pr_ops nvme_pr_ops = {
- .pr_register = nvme_pr_register,
- .pr_reserve = nvme_pr_reserve,
- .pr_release = nvme_pr_release,
- .pr_preempt = nvme_pr_preempt,
- .pr_clear = nvme_pr_clear,
-};
-
#ifdef CONFIG_BLK_SED_OPAL
static int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
bool send)
@@ -2256,7 +2156,7 @@ static int nvme_report_zones(struct gendisk *disk, sector_t sector,
#define nvme_report_zones NULL
#endif /* CONFIG_BLK_DEV_ZONED */
-static const struct block_device_operations nvme_bdev_ops = {
+const struct block_device_operations nvme_bdev_ops = {
.owner = THIS_MODULE,
.ioctl = nvme_ioctl,
.compat_ioctl = blkdev_compat_ptr_ioctl,
@@ -2791,75 +2691,6 @@ static struct nvme_subsystem *__nvme_find_get_subsystem(const char *subsysnqn)
return NULL;
}
-#define SUBSYS_ATTR_RO(_name, _mode, _show) \
- struct device_attribute subsys_attr_##_name = \
- __ATTR(_name, _mode, _show, NULL)
-
-static ssize_t nvme_subsys_show_nqn(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct nvme_subsystem *subsys =
- container_of(dev, struct nvme_subsystem, dev);
-
- return sysfs_emit(buf, "%s\n", subsys->subnqn);
-}
-static SUBSYS_ATTR_RO(subsysnqn, S_IRUGO, nvme_subsys_show_nqn);
-
-static ssize_t nvme_subsys_show_type(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct nvme_subsystem *subsys =
- container_of(dev, struct nvme_subsystem, dev);
-
- switch (subsys->subtype) {
- case NVME_NQN_DISC:
- return sysfs_emit(buf, "discovery\n");
- case NVME_NQN_NVME:
- return sysfs_emit(buf, "nvm\n");
- default:
- return sysfs_emit(buf, "reserved\n");
- }
-}
-static SUBSYS_ATTR_RO(subsystype, S_IRUGO, nvme_subsys_show_type);
-
-#define nvme_subsys_show_str_function(field) \
-static ssize_t subsys_##field##_show(struct device *dev, \
- struct device_attribute *attr, char *buf) \
-{ \
- struct nvme_subsystem *subsys = \
- container_of(dev, struct nvme_subsystem, dev); \
- return sysfs_emit(buf, "%.*s\n", \
- (int)sizeof(subsys->field), subsys->field); \
-} \
-static SUBSYS_ATTR_RO(field, S_IRUGO, subsys_##field##_show);
-
-nvme_subsys_show_str_function(model);
-nvme_subsys_show_str_function(serial);
-nvme_subsys_show_str_function(firmware_rev);
-
-static struct attribute *nvme_subsys_attrs[] = {
- &subsys_attr_model.attr,
- &subsys_attr_serial.attr,
- &subsys_attr_firmware_rev.attr,
- &subsys_attr_subsysnqn.attr,
- &subsys_attr_subsystype.attr,
-#ifdef CONFIG_NVME_MULTIPATH
- &subsys_attr_iopolicy.attr,
-#endif
- NULL,
-};
-
-static const struct attribute_group nvme_subsys_attrs_group = {
- .attrs = nvme_subsys_attrs,
-};
-
-static const struct attribute_group *nvme_subsys_attrs_groups[] = {
- &nvme_subsys_attrs_group,
- NULL,
-};
-
static inline bool nvme_discovery_ctrl(struct nvme_ctrl *ctrl)
{
return ctrl->opts && ctrl->opts->discovery_nqn;
@@ -3064,7 +2895,8 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
ctrl->max_zeroes_sectors = 0;
if (ctrl->subsys->subtype != NVME_NQN_NVME ||
- nvme_ctrl_limited_cns(ctrl))
+ nvme_ctrl_limited_cns(ctrl) ||
+ test_bit(NVME_CTRL_SKIP_ID_CNS_CS, &ctrl->flags))
return 0;
id = kzalloc(sizeof(*id), GFP_KERNEL);
@@ -3086,6 +2918,8 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl);
free_data:
+ if (ret > 0)
+ set_bit(NVME_CTRL_SKIP_ID_CNS_CS, &ctrl->flags);
kfree(id);
return ret;
}
@@ -3346,6 +3180,7 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended)
return ret;
}
+ clear_bit(NVME_CTRL_DIRTY_CAPABILITY, &ctrl->flags);
ctrl->identified = true;
return 0;
@@ -3393,586 +3228,6 @@ static const struct file_operations nvme_dev_fops = {
.uring_cmd = nvme_dev_uring_cmd,
};
-static ssize_t nvme_sysfs_reset(struct device *dev,
- struct device_attribute *attr, const char *buf,
- size_t count)
-{
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
- int ret;
-
- ret = nvme_reset_ctrl_sync(ctrl);
- if (ret < 0)
- return ret;
- return count;
-}
-static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
-
-static ssize_t nvme_sysfs_rescan(struct device *dev,
- struct device_attribute *attr, const char *buf,
- size_t count)
-{
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
-
- nvme_queue_scan(ctrl);
- return count;
-}
-static DEVICE_ATTR(rescan_controller, S_IWUSR, NULL, nvme_sysfs_rescan);
-
-static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev)
-{
- struct gendisk *disk = dev_to_disk(dev);
-
- if (disk->fops == &nvme_bdev_ops)
- return nvme_get_ns_from_dev(dev)->head;
- else
- return disk->private_data;
-}
-
-static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct nvme_ns_head *head = dev_to_ns_head(dev);
- struct nvme_ns_ids *ids = &head->ids;
- struct nvme_subsystem *subsys = head->subsys;
- int serial_len = sizeof(subsys->serial);
- int model_len = sizeof(subsys->model);
-
- if (!uuid_is_null(&ids->uuid))
- return sysfs_emit(buf, "uuid.%pU\n", &ids->uuid);
-
- if (memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
- return sysfs_emit(buf, "eui.%16phN\n", ids->nguid);
-
- if (memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
- return sysfs_emit(buf, "eui.%8phN\n", ids->eui64);
-
- while (serial_len > 0 && (subsys->serial[serial_len - 1] == ' ' ||
- subsys->serial[serial_len - 1] == '\0'))
- serial_len--;
- while (model_len > 0 && (subsys->model[model_len - 1] == ' ' ||
- subsys->model[model_len - 1] == '\0'))
- model_len--;
-
- return sysfs_emit(buf, "nvme.%04x-%*phN-%*phN-%08x\n", subsys->vendor_id,
- serial_len, subsys->serial, model_len, subsys->model,
- head->ns_id);
-}
-static DEVICE_ATTR_RO(wwid);
-
-static ssize_t nguid_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- return sysfs_emit(buf, "%pU\n", dev_to_ns_head(dev)->ids.nguid);
-}
-static DEVICE_ATTR_RO(nguid);
-
-static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct nvme_ns_ids *ids = &dev_to_ns_head(dev)->ids;
-
- /* For backward compatibility expose the NGUID to userspace if
- * we have no UUID set
- */
- if (uuid_is_null(&ids->uuid)) {
- dev_warn_ratelimited(dev,
- "No UUID available providing old NGUID\n");
- return sysfs_emit(buf, "%pU\n", ids->nguid);
- }
- return sysfs_emit(buf, "%pU\n", &ids->uuid);
-}
-static DEVICE_ATTR_RO(uuid);
-
-static ssize_t eui_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- return sysfs_emit(buf, "%8ph\n", dev_to_ns_head(dev)->ids.eui64);
-}
-static DEVICE_ATTR_RO(eui);
-
-static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- return sysfs_emit(buf, "%d\n", dev_to_ns_head(dev)->ns_id);
-}
-static DEVICE_ATTR_RO(nsid);
-
-static struct attribute *nvme_ns_id_attrs[] = {
- &dev_attr_wwid.attr,
- &dev_attr_uuid.attr,
- &dev_attr_nguid.attr,
- &dev_attr_eui.attr,
- &dev_attr_nsid.attr,
-#ifdef CONFIG_NVME_MULTIPATH
- &dev_attr_ana_grpid.attr,
- &dev_attr_ana_state.attr,
-#endif
- NULL,
-};
-
-static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
- struct attribute *a, int n)
-{
- struct device *dev = container_of(kobj, struct device, kobj);
- struct nvme_ns_ids *ids = &dev_to_ns_head(dev)->ids;
-
- if (a == &dev_attr_uuid.attr) {
- if (uuid_is_null(&ids->uuid) &&
- !memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
- return 0;
- }
- if (a == &dev_attr_nguid.attr) {
- if (!memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
- return 0;
- }
- if (a == &dev_attr_eui.attr) {
- if (!memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
- return 0;
- }
-#ifdef CONFIG_NVME_MULTIPATH
- if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) {
- if (dev_to_disk(dev)->fops != &nvme_bdev_ops) /* per-path attr */
- return 0;
- if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
- return 0;
- }
-#endif
- return a->mode;
-}
-
-static const struct attribute_group nvme_ns_id_attr_group = {
- .attrs = nvme_ns_id_attrs,
- .is_visible = nvme_ns_id_attrs_are_visible,
-};
-
-const struct attribute_group *nvme_ns_id_attr_groups[] = {
- &nvme_ns_id_attr_group,
- NULL,
-};
-
-#define nvme_show_str_function(field) \
-static ssize_t field##_show(struct device *dev, \
- struct device_attribute *attr, char *buf) \
-{ \
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \
- return sysfs_emit(buf, "%.*s\n", \
- (int)sizeof(ctrl->subsys->field), ctrl->subsys->field); \
-} \
-static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
-
-nvme_show_str_function(model);
-nvme_show_str_function(serial);
-nvme_show_str_function(firmware_rev);
-
-#define nvme_show_int_function(field) \
-static ssize_t field##_show(struct device *dev, \
- struct device_attribute *attr, char *buf) \
-{ \
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \
- return sysfs_emit(buf, "%d\n", ctrl->field); \
-} \
-static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
-
-nvme_show_int_function(cntlid);
-nvme_show_int_function(numa_node);
-nvme_show_int_function(queue_count);
-nvme_show_int_function(sqsize);
-nvme_show_int_function(kato);
-
-static ssize_t nvme_sysfs_delete(struct device *dev,
- struct device_attribute *attr, const char *buf,
- size_t count)
-{
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
-
- if (!test_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags))
- return -EBUSY;
-
- if (device_remove_file_self(dev, attr))
- nvme_delete_ctrl_sync(ctrl);
- return count;
-}
-static DEVICE_ATTR(delete_controller, S_IWUSR, NULL, nvme_sysfs_delete);
-
-static ssize_t nvme_sysfs_show_transport(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
-
- return sysfs_emit(buf, "%s\n", ctrl->ops->name);
-}
-static DEVICE_ATTR(transport, S_IRUGO, nvme_sysfs_show_transport, NULL);
-
-static ssize_t nvme_sysfs_show_state(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
- static const char *const state_name[] = {
- [NVME_CTRL_NEW] = "new",
- [NVME_CTRL_LIVE] = "live",
- [NVME_CTRL_RESETTING] = "resetting",
- [NVME_CTRL_CONNECTING] = "connecting",
- [NVME_CTRL_DELETING] = "deleting",
- [NVME_CTRL_DELETING_NOIO]= "deleting (no IO)",
- [NVME_CTRL_DEAD] = "dead",
- };
-
- if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) &&
- state_name[ctrl->state])
- return sysfs_emit(buf, "%s\n", state_name[ctrl->state]);
-
- return sysfs_emit(buf, "unknown state\n");
-}
-
-static DEVICE_ATTR(state, S_IRUGO, nvme_sysfs_show_state, NULL);
-
-static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
-
- return sysfs_emit(buf, "%s\n", ctrl->subsys->subnqn);
-}
-static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL);
-
-static ssize_t nvme_sysfs_show_hostnqn(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
-
- return sysfs_emit(buf, "%s\n", ctrl->opts->host->nqn);
-}
-static DEVICE_ATTR(hostnqn, S_IRUGO, nvme_sysfs_show_hostnqn, NULL);
-
-static ssize_t nvme_sysfs_show_hostid(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
-
- return sysfs_emit(buf, "%pU\n", &ctrl->opts->host->id);
-}
-static DEVICE_ATTR(hostid, S_IRUGO, nvme_sysfs_show_hostid, NULL);
-
-static ssize_t nvme_sysfs_show_address(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
-
- return ctrl->ops->get_address(ctrl, buf, PAGE_SIZE);
-}
-static DEVICE_ATTR(address, S_IRUGO, nvme_sysfs_show_address, NULL);
-
-static ssize_t nvme_ctrl_loss_tmo_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
- struct nvmf_ctrl_options *opts = ctrl->opts;
-
- if (ctrl->opts->max_reconnects == -1)
- return sysfs_emit(buf, "off\n");
- return sysfs_emit(buf, "%d\n",
- opts->max_reconnects * opts->reconnect_delay);
-}
-
-static ssize_t nvme_ctrl_loss_tmo_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t count)
-{
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
- struct nvmf_ctrl_options *opts = ctrl->opts;
- int ctrl_loss_tmo, err;
-
- err = kstrtoint(buf, 10, &ctrl_loss_tmo);
- if (err)
- return -EINVAL;
-
- if (ctrl_loss_tmo < 0)
- opts->max_reconnects = -1;
- else
- opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
- opts->reconnect_delay);
- return count;
-}
-static DEVICE_ATTR(ctrl_loss_tmo, S_IRUGO | S_IWUSR,
- nvme_ctrl_loss_tmo_show, nvme_ctrl_loss_tmo_store);
-
-static ssize_t nvme_ctrl_reconnect_delay_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
-
- if (ctrl->opts->reconnect_delay == -1)
- return sysfs_emit(buf, "off\n");
- return sysfs_emit(buf, "%d\n", ctrl->opts->reconnect_delay);
-}
-
-static ssize_t nvme_ctrl_reconnect_delay_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t count)
-{
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
- unsigned int v;
- int err;
-
- err = kstrtou32(buf, 10, &v);
- if (err)
- return err;
-
- ctrl->opts->reconnect_delay = v;
- return count;
-}
-static DEVICE_ATTR(reconnect_delay, S_IRUGO | S_IWUSR,
- nvme_ctrl_reconnect_delay_show, nvme_ctrl_reconnect_delay_store);
-
-static ssize_t nvme_ctrl_fast_io_fail_tmo_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
-
- if (ctrl->opts->fast_io_fail_tmo == -1)
- return sysfs_emit(buf, "off\n");
- return sysfs_emit(buf, "%d\n", ctrl->opts->fast_io_fail_tmo);
-}
-
-static ssize_t nvme_ctrl_fast_io_fail_tmo_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t count)
-{
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
- struct nvmf_ctrl_options *opts = ctrl->opts;
- int fast_io_fail_tmo, err;
-
- err = kstrtoint(buf, 10, &fast_io_fail_tmo);
- if (err)
- return -EINVAL;
-
- if (fast_io_fail_tmo < 0)
- opts->fast_io_fail_tmo = -1;
- else
- opts->fast_io_fail_tmo = fast_io_fail_tmo;
- return count;
-}
-static DEVICE_ATTR(fast_io_fail_tmo, S_IRUGO | S_IWUSR,
- nvme_ctrl_fast_io_fail_tmo_show, nvme_ctrl_fast_io_fail_tmo_store);
-
-static ssize_t cntrltype_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- static const char * const type[] = {
- [NVME_CTRL_IO] = "io\n",
- [NVME_CTRL_DISC] = "discovery\n",
- [NVME_CTRL_ADMIN] = "admin\n",
- };
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
-
- if (ctrl->cntrltype > NVME_CTRL_ADMIN || !type[ctrl->cntrltype])
- return sysfs_emit(buf, "reserved\n");
-
- return sysfs_emit(buf, type[ctrl->cntrltype]);
-}
-static DEVICE_ATTR_RO(cntrltype);
-
-static ssize_t dctype_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- static const char * const type[] = {
- [NVME_DCTYPE_NOT_REPORTED] = "none\n",
- [NVME_DCTYPE_DDC] = "ddc\n",
- [NVME_DCTYPE_CDC] = "cdc\n",
- };
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
-
- if (ctrl->dctype > NVME_DCTYPE_CDC || !type[ctrl->dctype])
- return sysfs_emit(buf, "reserved\n");
-
- return sysfs_emit(buf, type[ctrl->dctype]);
-}
-static DEVICE_ATTR_RO(dctype);
-
-#ifdef CONFIG_NVME_AUTH
-static ssize_t nvme_ctrl_dhchap_secret_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
- struct nvmf_ctrl_options *opts = ctrl->opts;
-
- if (!opts->dhchap_secret)
- return sysfs_emit(buf, "none\n");
- return sysfs_emit(buf, "%s\n", opts->dhchap_secret);
-}
-
-static ssize_t nvme_ctrl_dhchap_secret_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t count)
-{
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
- struct nvmf_ctrl_options *opts = ctrl->opts;
- char *dhchap_secret;
-
- if (!ctrl->opts->dhchap_secret)
- return -EINVAL;
- if (count < 7)
- return -EINVAL;
- if (memcmp(buf, "DHHC-1:", 7))
- return -EINVAL;
-
- dhchap_secret = kzalloc(count + 1, GFP_KERNEL);
- if (!dhchap_secret)
- return -ENOMEM;
- memcpy(dhchap_secret, buf, count);
- nvme_auth_stop(ctrl);
- if (strcmp(dhchap_secret, opts->dhchap_secret)) {
- struct nvme_dhchap_key *key, *host_key;
- int ret;
-
- ret = nvme_auth_generate_key(dhchap_secret, &key);
- if (ret)
- return ret;
- kfree(opts->dhchap_secret);
- opts->dhchap_secret = dhchap_secret;
- host_key = ctrl->host_key;
- mutex_lock(&ctrl->dhchap_auth_mutex);
- ctrl->host_key = key;
- mutex_unlock(&ctrl->dhchap_auth_mutex);
- nvme_auth_free_key(host_key);
- }
- /* Start re-authentication */
- dev_info(ctrl->device, "re-authenticating controller\n");
- queue_work(nvme_wq, &ctrl->dhchap_auth_work);
-
- return count;
-}
-static DEVICE_ATTR(dhchap_secret, S_IRUGO | S_IWUSR,
- nvme_ctrl_dhchap_secret_show, nvme_ctrl_dhchap_secret_store);
-
-static ssize_t nvme_ctrl_dhchap_ctrl_secret_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
- struct nvmf_ctrl_options *opts = ctrl->opts;
-
- if (!opts->dhchap_ctrl_secret)
- return sysfs_emit(buf, "none\n");
- return sysfs_emit(buf, "%s\n", opts->dhchap_ctrl_secret);
-}
-
-static ssize_t nvme_ctrl_dhchap_ctrl_secret_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t count)
-{
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
- struct nvmf_ctrl_options *opts = ctrl->opts;
- char *dhchap_secret;
-
- if (!ctrl->opts->dhchap_ctrl_secret)
- return -EINVAL;
- if (count < 7)
- return -EINVAL;
- if (memcmp(buf, "DHHC-1:", 7))
- return -EINVAL;
-
- dhchap_secret = kzalloc(count + 1, GFP_KERNEL);
- if (!dhchap_secret)
- return -ENOMEM;
- memcpy(dhchap_secret, buf, count);
- nvme_auth_stop(ctrl);
- if (strcmp(dhchap_secret, opts->dhchap_ctrl_secret)) {
- struct nvme_dhchap_key *key, *ctrl_key;
- int ret;
-
- ret = nvme_auth_generate_key(dhchap_secret, &key);
- if (ret)
- return ret;
- kfree(opts->dhchap_ctrl_secret);
- opts->dhchap_ctrl_secret = dhchap_secret;
- ctrl_key = ctrl->ctrl_key;
- mutex_lock(&ctrl->dhchap_auth_mutex);
- ctrl->ctrl_key = key;
- mutex_unlock(&ctrl->dhchap_auth_mutex);
- nvme_auth_free_key(ctrl_key);
- }
- /* Start re-authentication */
- dev_info(ctrl->device, "re-authenticating controller\n");
- queue_work(nvme_wq, &ctrl->dhchap_auth_work);
-
- return count;
-}
-static DEVICE_ATTR(dhchap_ctrl_secret, S_IRUGO | S_IWUSR,
- nvme_ctrl_dhchap_ctrl_secret_show, nvme_ctrl_dhchap_ctrl_secret_store);
-#endif
-
-static struct attribute *nvme_dev_attrs[] = {
- &dev_attr_reset_controller.attr,
- &dev_attr_rescan_controller.attr,
- &dev_attr_model.attr,
- &dev_attr_serial.attr,
- &dev_attr_firmware_rev.attr,
- &dev_attr_cntlid.attr,
- &dev_attr_delete_controller.attr,
- &dev_attr_transport.attr,
- &dev_attr_subsysnqn.attr,
- &dev_attr_address.attr,
- &dev_attr_state.attr,
- &dev_attr_numa_node.attr,
- &dev_attr_queue_count.attr,
- &dev_attr_sqsize.attr,
- &dev_attr_hostnqn.attr,
- &dev_attr_hostid.attr,
- &dev_attr_ctrl_loss_tmo.attr,
- &dev_attr_reconnect_delay.attr,
- &dev_attr_fast_io_fail_tmo.attr,
- &dev_attr_kato.attr,
- &dev_attr_cntrltype.attr,
- &dev_attr_dctype.attr,
-#ifdef CONFIG_NVME_AUTH
- &dev_attr_dhchap_secret.attr,
- &dev_attr_dhchap_ctrl_secret.attr,
-#endif
- NULL
-};
-
-static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
- struct attribute *a, int n)
-{
- struct device *dev = container_of(kobj, struct device, kobj);
- struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
-
- if (a == &dev_attr_delete_controller.attr && !ctrl->ops->delete_ctrl)
- return 0;
- if (a == &dev_attr_address.attr && !ctrl->ops->get_address)
- return 0;
- if (a == &dev_attr_hostnqn.attr && !ctrl->opts)
- return 0;
- if (a == &dev_attr_hostid.attr && !ctrl->opts)
- return 0;
- if (a == &dev_attr_ctrl_loss_tmo.attr && !ctrl->opts)
- return 0;
- if (a == &dev_attr_reconnect_delay.attr && !ctrl->opts)
- return 0;
- if (a == &dev_attr_fast_io_fail_tmo.attr && !ctrl->opts)
- return 0;
-#ifdef CONFIG_NVME_AUTH
- if (a == &dev_attr_dhchap_secret.attr && !ctrl->opts)
- return 0;
- if (a == &dev_attr_dhchap_ctrl_secret.attr && !ctrl->opts)
- return 0;
-#endif
-
- return a->mode;
-}
-
-const struct attribute_group nvme_dev_attrs_group = {
- .attrs = nvme_dev_attrs,
- .is_visible = nvme_dev_attrs_are_visible,
-};
-EXPORT_SYMBOL_GPL(nvme_dev_attrs_group);
-
-static const struct attribute_group *nvme_dev_attr_groups[] = {
- &nvme_dev_attrs_group,
- NULL,
-};
-
static struct nvme_ns_head *nvme_find_ns_head(struct nvme_ctrl *ctrl,
unsigned nsid)
{
@@ -4176,10 +3431,40 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
ret = nvme_global_check_duplicate_ids(ctrl->subsys, &info->ids);
if (ret) {
- dev_err(ctrl->device,
- "globally duplicate IDs for nsid %d\n", info->nsid);
+ /*
+ * We've found two different namespaces on two different
+ * subsystems that report the same ID. This is pretty nasty
+ * for anything that actually requires unique device
+ * identification. In the kernel we need this for multipathing,
+ * and in user space the /dev/disk/by-id/ links rely on it.
+ *
+ * If the device also claims to be multi-path capable back off
+ * here now and refuse the probe the second device as this is a
+ * recipe for data corruption. If not this is probably a
+ * cheap consumer device if on the PCIe bus, so let the user
+ * proceed and use the shiny toy, but warn that with changing
+ * probing order (which due to our async probing could just be
+ * device taking longer to startup) the other device could show
+ * up at any time.
+ */
nvme_print_device_info(ctrl);
- return ret;
+ if ((ns->ctrl->ops->flags & NVME_F_FABRICS) || /* !PCIe */
+ ((ns->ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) &&
+ info->is_shared)) {
+ dev_err(ctrl->device,
+ "ignoring nsid %d because of duplicate IDs\n",
+ info->nsid);
+ return ret;
+ }
+
+ dev_err(ctrl->device,
+ "clearing duplicate IDs for nsid %d\n", info->nsid);
+ dev_err(ctrl->device,
+ "use of /dev/disk/by-id/ may cause data corruption\n");
+ memset(&info->ids.nguid, 0, sizeof(info->ids.nguid));
+ memset(&info->ids.uuid, 0, sizeof(info->ids.uuid));
+ memset(&info->ids.eui64, 0, sizeof(info->ids.eui64));
+ ctrl->quirks |= NVME_QUIRK_BOGUS_NID;
}
mutex_lock(&ctrl->subsys->lock);
@@ -4212,7 +3497,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
goto out_put_ns_head;
}
- if (!multipath && !list_empty(&head->list)) {
+ if (!multipath) {
dev_warn(ctrl->device,
"Found shared namespace %d, but multipathing not supported.\n",
info->nsid);
@@ -4313,7 +3598,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
* instance as shared namespaces will show up as multiple block
* devices.
*/
- if (ns->head->disk) {
+ if (nvme_ns_head_multipath(ns->head)) {
sprintf(disk->disk_name, "nvme%dc%dn%d", ctrl->subsys->instance,
ctrl->instance, ns->head->instance);
disk->flags |= GENHD_FL_HIDDEN;
@@ -5199,6 +4484,8 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
return 0;
out_free_cdev:
+ nvme_fault_inject_fini(&ctrl->fault_inject);
+ dev_pm_qos_hide_latency_tolerance(ctrl->device);
cdev_device_del(&ctrl->cdev, ctrl->device);
out_free_name:
nvme_put_ctrl(ctrl);
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 0069ebff85df..8175d49f2909 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -21,35 +21,60 @@ static DEFINE_MUTEX(nvmf_hosts_mutex);
static struct nvmf_host *nvmf_default_host;
-static struct nvmf_host *__nvmf_host_find(const char *hostnqn)
+static struct nvmf_host *nvmf_host_alloc(const char *hostnqn, uuid_t *id)
{
struct nvmf_host *host;
- list_for_each_entry(host, &nvmf_hosts, list) {
- if (!strcmp(host->nqn, hostnqn))
- return host;
- }
+ host = kmalloc(sizeof(*host), GFP_KERNEL);
+ if (!host)
+ return NULL;
- return NULL;
+ kref_init(&host->ref);
+ uuid_copy(&host->id, id);
+ strscpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
+
+ return host;
}
-static struct nvmf_host *nvmf_host_add(const char *hostnqn)
+static struct nvmf_host *nvmf_host_add(const char *hostnqn, uuid_t *id)
{
struct nvmf_host *host;
mutex_lock(&nvmf_hosts_mutex);
- host = __nvmf_host_find(hostnqn);
- if (host) {
- kref_get(&host->ref);
- goto out_unlock;
+
+ /*
+ * We have defined a host as how it is perceived by the target.
+ * Therefore, we don't allow different Host NQNs with the same Host ID.
+ * Similarly, we do not allow the usage of the same Host NQN with
+ * different Host IDs. This'll maintain unambiguous host identification.
+ */
+ list_for_each_entry(host, &nvmf_hosts, list) {
+ bool same_hostnqn = !strcmp(host->nqn, hostnqn);
+ bool same_hostid = uuid_equal(&host->id, id);
+
+ if (same_hostnqn && same_hostid) {
+ kref_get(&host->ref);
+ goto out_unlock;
+ }
+ if (same_hostnqn) {
+ pr_err("found same hostnqn %s but different hostid %pUb\n",
+ hostnqn, id);
+ host = ERR_PTR(-EINVAL);
+ goto out_unlock;
+ }
+ if (same_hostid) {
+ pr_err("found same hostid %pUb but different hostnqn %s\n",
+ id, hostnqn);
+ host = ERR_PTR(-EINVAL);
+ goto out_unlock;
+ }
}
- host = kmalloc(sizeof(*host), GFP_KERNEL);
- if (!host)
+ host = nvmf_host_alloc(hostnqn, id);
+ if (!host) {
+ host = ERR_PTR(-ENOMEM);
goto out_unlock;
-
- kref_init(&host->ref);
- strscpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
+ }
list_add_tail(&host->list, &nvmf_hosts);
out_unlock:
@@ -60,16 +85,17 @@ out_unlock:
static struct nvmf_host *nvmf_host_default(void)
{
struct nvmf_host *host;
+ char nqn[NVMF_NQN_SIZE];
+ uuid_t id;
- host = kmalloc(sizeof(*host), GFP_KERNEL);
+ uuid_gen(&id);
+ snprintf(nqn, NVMF_NQN_SIZE,
+ "nqn.2014-08.org.nvmexpress:uuid:%pUb", &id);
+
+ host = nvmf_host_alloc(nqn, &id);
if (!host)
return NULL;
- kref_init(&host->ref);
- uuid_gen(&host->id);
- snprintf(host->nqn, NVMF_NQN_SIZE,
- "nqn.2014-08.org.nvmexpress:uuid:%pUb", &host->id);
-
mutex_lock(&nvmf_hosts_mutex);
list_add_tail(&host->list, &nvmf_hosts);
mutex_unlock(&nvmf_hosts_mutex);
@@ -349,6 +375,45 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
}
}
+static struct nvmf_connect_data *nvmf_connect_data_prep(struct nvme_ctrl *ctrl,
+ u16 cntlid)
+{
+ struct nvmf_connect_data *data;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return NULL;
+
+ uuid_copy(&data->hostid, &ctrl->opts->host->id);
+ data->cntlid = cpu_to_le16(cntlid);
+ strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
+ strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
+
+ return data;
+}
+
+static void nvmf_connect_cmd_prep(struct nvme_ctrl *ctrl, u16 qid,
+ struct nvme_command *cmd)
+{
+ cmd->connect.opcode = nvme_fabrics_command;
+ cmd->connect.fctype = nvme_fabrics_type_connect;
+ cmd->connect.qid = cpu_to_le16(qid);
+
+ if (qid) {
+ cmd->connect.sqsize = cpu_to_le16(ctrl->sqsize);
+ } else {
+ cmd->connect.sqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
+
+ /*
+ * set keep-alive timeout in seconds granularity (ms * 1000)
+ */
+ cmd->connect.kato = cpu_to_le32(ctrl->kato * 1000);
+ }
+
+ if (ctrl->opts->disable_sqflow)
+ cmd->connect.cattr |= NVME_CONNECT_DISABLE_SQFLOW;
+}
+
/**
* nvmf_connect_admin_queue() - NVMe Fabrics Admin Queue "Connect"
* API function.
@@ -377,28 +442,12 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
int ret;
u32 result;
- cmd.connect.opcode = nvme_fabrics_command;
- cmd.connect.fctype = nvme_fabrics_type_connect;
- cmd.connect.qid = 0;
- cmd.connect.sqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
-
- /*
- * Set keep-alive timeout in seconds granularity (ms * 1000)
- */
- cmd.connect.kato = cpu_to_le32(ctrl->kato * 1000);
-
- if (ctrl->opts->disable_sqflow)
- cmd.connect.cattr |= NVME_CONNECT_DISABLE_SQFLOW;
+ nvmf_connect_cmd_prep(ctrl, 0, &cmd);
- data = kzalloc(sizeof(*data), GFP_KERNEL);
+ data = nvmf_connect_data_prep(ctrl, 0xffff);
if (!data)
return -ENOMEM;
- uuid_copy(&data->hostid, &ctrl->opts->host->id);
- data->cntlid = cpu_to_le16(0xffff);
- strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
- strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
-
ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res,
data, sizeof(*data), NVME_QID_ANY, 1,
BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
@@ -468,23 +517,12 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
int ret;
u32 result;
- cmd.connect.opcode = nvme_fabrics_command;
- cmd.connect.fctype = nvme_fabrics_type_connect;
- cmd.connect.qid = cpu_to_le16(qid);
- cmd.connect.sqsize = cpu_to_le16(ctrl->sqsize);
+ nvmf_connect_cmd_prep(ctrl, qid, &cmd);
- if (ctrl->opts->disable_sqflow)
- cmd.connect.cattr |= NVME_CONNECT_DISABLE_SQFLOW;
-
- data = kzalloc(sizeof(*data), GFP_KERNEL);
+ data = nvmf_connect_data_prep(ctrl, ctrl->cntlid);
if (!data)
return -ENOMEM;
- uuid_copy(&data->hostid, &ctrl->opts->host->id);
- data->cntlid = cpu_to_le16(ctrl->cntlid);
- strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
- strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
-
ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &res,
data, sizeof(*data), qid, 1,
BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
@@ -621,6 +659,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
size_t nqnlen = 0;
int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO;
uuid_t hostid;
+ char hostnqn[NVMF_NQN_SIZE];
/* Set defaults */
opts->queue_size = NVMF_DEF_QUEUE_SIZE;
@@ -637,7 +676,9 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
if (!options)
return -ENOMEM;
- uuid_gen(&hostid);
+ /* use default host if not given by user space */
+ uuid_copy(&hostid, &nvmf_default_host->id);
+ strscpy(hostnqn, nvmf_default_host->nqn, NVMF_NQN_SIZE);
while ((p = strsep(&o, ",\n")) != NULL) {
if (!*p)
@@ -783,12 +824,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
ret = -EINVAL;
goto out;
}
- opts->host = nvmf_host_add(p);
+ strscpy(hostnqn, p, NVMF_NQN_SIZE);
kfree(p);
- if (!opts->host) {
- ret = -ENOMEM;
- goto out;
- }
break;
case NVMF_OPT_RECONNECT_DELAY:
if (match_int(args, &token)) {
@@ -945,18 +982,94 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
opts->fast_io_fail_tmo, ctrl_loss_tmo);
}
- if (!opts->host) {
- kref_get(&nvmf_default_host->ref);
- opts->host = nvmf_default_host;
+ opts->host = nvmf_host_add(hostnqn, &hostid);
+ if (IS_ERR(opts->host)) {
+ ret = PTR_ERR(opts->host);
+ opts->host = NULL;
+ goto out;
}
- uuid_copy(&opts->host->id, &hostid);
-
out:
kfree(options);
return ret;
}
+void nvmf_set_io_queues(struct nvmf_ctrl_options *opts, u32 nr_io_queues,
+ u32 io_queues[HCTX_MAX_TYPES])
+{
+ if (opts->nr_write_queues && opts->nr_io_queues < nr_io_queues) {
+ /*
+ * separate read/write queues
+ * hand out dedicated default queues only after we have
+ * sufficient read queues.
+ */
+ io_queues[HCTX_TYPE_READ] = opts->nr_io_queues;
+ nr_io_queues -= io_queues[HCTX_TYPE_READ];
+ io_queues[HCTX_TYPE_DEFAULT] =
+ min(opts->nr_write_queues, nr_io_queues);
+ nr_io_queues -= io_queues[HCTX_TYPE_DEFAULT];
+ } else {
+ /*
+ * shared read/write queues
+ * either no write queues were requested, or we don't have
+ * sufficient queue count to have dedicated default queues.
+ */
+ io_queues[HCTX_TYPE_DEFAULT] =
+ min(opts->nr_io_queues, nr_io_queues);
+ nr_io_queues -= io_queues[HCTX_TYPE_DEFAULT];
+ }
+
+ if (opts->nr_poll_queues && nr_io_queues) {
+ /* map dedicated poll queues only if we have queues left */
+ io_queues[HCTX_TYPE_POLL] =
+ min(opts->nr_poll_queues, nr_io_queues);
+ }
+}
+EXPORT_SYMBOL_GPL(nvmf_set_io_queues);
+
+void nvmf_map_queues(struct blk_mq_tag_set *set, struct nvme_ctrl *ctrl,
+ u32 io_queues[HCTX_MAX_TYPES])
+{
+ struct nvmf_ctrl_options *opts = ctrl->opts;
+
+ if (opts->nr_write_queues && io_queues[HCTX_TYPE_READ]) {
+ /* separate read/write queues */
+ set->map[HCTX_TYPE_DEFAULT].nr_queues =
+ io_queues[HCTX_TYPE_DEFAULT];
+ set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
+ set->map[HCTX_TYPE_READ].nr_queues =
+ io_queues[HCTX_TYPE_READ];
+ set->map[HCTX_TYPE_READ].queue_offset =
+ io_queues[HCTX_TYPE_DEFAULT];
+ } else {
+ /* shared read/write queues */
+ set->map[HCTX_TYPE_DEFAULT].nr_queues =
+ io_queues[HCTX_TYPE_DEFAULT];
+ set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
+ set->map[HCTX_TYPE_READ].nr_queues =
+ io_queues[HCTX_TYPE_DEFAULT];
+ set->map[HCTX_TYPE_READ].queue_offset = 0;
+ }
+
+ blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
+ blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
+ if (opts->nr_poll_queues && io_queues[HCTX_TYPE_POLL]) {
+ /* map dedicated poll queues only if we have queues left */
+ set->map[HCTX_TYPE_POLL].nr_queues = io_queues[HCTX_TYPE_POLL];
+ set->map[HCTX_TYPE_POLL].queue_offset =
+ io_queues[HCTX_TYPE_DEFAULT] +
+ io_queues[HCTX_TYPE_READ];
+ blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
+ }
+
+ dev_info(ctrl->device,
+ "mapped %d/%d/%d default/read/poll queues.\n",
+ io_queues[HCTX_TYPE_DEFAULT],
+ io_queues[HCTX_TYPE_READ],
+ io_queues[HCTX_TYPE_POLL]);
+}
+EXPORT_SYMBOL_GPL(nvmf_map_queues);
+
static int nvmf_check_required_opts(struct nvmf_ctrl_options *opts,
unsigned int required_opts)
{
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index dcac3df8a5f7..82e7a27ffbde 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -77,6 +77,9 @@ enum {
* with the parsing opts enum.
* @mask: Used by the fabrics library to parse through sysfs options
* on adding a NVMe controller.
+ * @max_reconnects: maximum number of allowed reconnect attempts before removing
+ * the controller, (-1) means reconnect forever, zero means remove
+ * immediately;
* @transport: Holds the fabric transport "technology name" (for a lack of
* better description) that will be used by an NVMe controller
* being added.
@@ -96,9 +99,6 @@ enum {
* @discovery_nqn: indicates if the subsysnqn is the well-known discovery NQN.
* @kato: Keep-alive timeout.
* @host: Virtual NVMe host, contains the NQN and Host ID.
- * @max_reconnects: maximum number of allowed reconnect attempts before removing
- * the controller, (-1) means reconnect forever, zero means remove
- * immediately;
* @dhchap_secret: DH-HMAC-CHAP secret
* @dhchap_ctrl_secret: DH-HMAC-CHAP controller secret for bi-directional
* authentication
@@ -112,6 +112,7 @@ enum {
*/
struct nvmf_ctrl_options {
unsigned mask;
+ int max_reconnects;
char *transport;
char *subsysnqn;
char *traddr;
@@ -125,7 +126,6 @@ struct nvmf_ctrl_options {
bool duplicate_connect;
unsigned int kato;
struct nvmf_host *host;
- int max_reconnects;
char *dhchap_secret;
char *dhchap_ctrl_secret;
bool disable_sqflow;
@@ -181,7 +181,7 @@ nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl,
ctrl->state == NVME_CTRL_DEAD ||
strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) ||
strcmp(opts->host->nqn, ctrl->opts->host->nqn) ||
- memcmp(&opts->host->id, &ctrl->opts->host->id, sizeof(uuid_t)))
+ !uuid_equal(&opts->host->id, &ctrl->opts->host->id))
return false;
return true;
@@ -203,6 +203,13 @@ static inline void nvmf_complete_timed_out_request(struct request *rq)
}
}
+static inline unsigned int nvmf_nr_io_queues(struct nvmf_ctrl_options *opts)
+{
+ return min(opts->nr_io_queues, num_online_cpus()) +
+ min(opts->nr_write_queues, num_online_cpus()) +
+ min(opts->nr_poll_queues, num_online_cpus());
+}
+
int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val);
int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
@@ -215,5 +222,9 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
struct nvmf_ctrl_options *opts);
+void nvmf_set_io_queues(struct nvmf_ctrl_options *opts, u32 nr_io_queues,
+ u32 io_queues[HCTX_MAX_TYPES]);
+void nvmf_map_queues(struct blk_mq_tag_set *set, struct nvme_ctrl *ctrl,
+ u32 io_queues[HCTX_MAX_TYPES]);
#endif /* _NVME_FABRICS_H */
diff --git a/drivers/nvme/host/fault_inject.c b/drivers/nvme/host/fault_inject.c
index 83d2e6860d38..1ba10a5c656d 100644
--- a/drivers/nvme/host/fault_inject.c
+++ b/drivers/nvme/host/fault_inject.c
@@ -27,7 +27,7 @@ void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj,
/* create debugfs directory and attribute */
parent = debugfs_create_dir(dev_name, NULL);
- if (!parent) {
+ if (IS_ERR(parent)) {
pr_warn("%s: failed to create debugfs directory\n", dev_name);
return;
}
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 2ed75923507d..1cd2bf82319a 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2548,14 +2548,24 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
* the controller. Abort any ios on the association and let the
* create_association error path resolve things.
*/
- if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
- __nvme_fc_abort_outstanding_ios(ctrl, true);
+ enum nvme_ctrl_state state;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctrl->lock, flags);
+ state = ctrl->ctrl.state;
+ if (state == NVME_CTRL_CONNECTING) {
set_bit(ASSOC_FAILED, &ctrl->flags);
+ spin_unlock_irqrestore(&ctrl->lock, flags);
+ __nvme_fc_abort_outstanding_ios(ctrl, true);
+ dev_warn(ctrl->ctrl.device,
+ "NVME-FC{%d}: transport error during (re)connect\n",
+ ctrl->cnum);
return;
}
+ spin_unlock_irqrestore(&ctrl->lock, flags);
/* Otherwise, only proceed if in LIVE state - e.g. on first error */
- if (ctrl->ctrl.state != NVME_CTRL_LIVE)
+ if (state != NVME_CTRL_LIVE)
return;
dev_warn(ctrl->ctrl.device,
@@ -2917,8 +2927,8 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
ret = nvme_alloc_io_tag_set(&ctrl->ctrl, &ctrl->tag_set,
&nvme_fc_mq_ops, 1,
- struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
- ctrl->lport->ops->fcprqst_priv_sz));
+ struct_size_t(struct nvme_fcp_op_w_sgl, priv,
+ ctrl->lport->ops->fcprqst_priv_sz));
if (ret)
return ret;
@@ -3110,7 +3120,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
*/
ret = nvme_enable_ctrl(&ctrl->ctrl);
- if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
+ if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags))
+ ret = -EIO;
+ if (ret)
goto out_disconnect_admin_queue;
ctrl->ctrl.max_segments = ctrl->lport->ops->max_sgl_segments;
@@ -3120,7 +3132,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
nvme_unquiesce_admin_queue(&ctrl->ctrl);
ret = nvme_init_ctrl_finish(&ctrl->ctrl, false);
- if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
+ if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags))
+ ret = -EIO;
+ if (ret)
goto out_disconnect_admin_queue;
/* sanity checks */
@@ -3165,10 +3179,16 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
else
ret = nvme_fc_recreate_io_queues(ctrl);
}
- if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
- goto out_term_aen_ops;
+ spin_lock_irqsave(&ctrl->lock, flags);
+ if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags))
+ ret = -EIO;
+ if (ret) {
+ spin_unlock_irqrestore(&ctrl->lock, flags);
+ goto out_term_aen_ops;
+ }
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
+ spin_unlock_irqrestore(&ctrl->lock, flags);
ctrl->ctrl.nr_reconnects = 0;
@@ -3180,6 +3200,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
out_term_aen_ops:
nvme_fc_term_aen_ops(ctrl);
out_disconnect_admin_queue:
+ dev_warn(ctrl->ctrl.device,
+ "NVME-FC{%d}: create_assoc failed, assoc_id %llx ret %d\n",
+ ctrl->cnum, ctrl->association_id, ret);
/* send a Disconnect(association) LS to fc-nvme target */
nvme_fc_xmt_disconnect_assoc(ctrl);
spin_lock_irqsave(&ctrl->lock, flags);
@@ -3536,8 +3559,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ret = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set,
&nvme_fc_admin_mq_ops,
- struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
- ctrl->lport->ops->fcprqst_priv_sz));
+ struct_size_t(struct nvme_fcp_op_w_sgl, priv,
+ ctrl->lport->ops->fcprqst_priv_sz));
if (ret)
goto fail_ctrl;
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 81c5c9e38477..5c3250f36ce7 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -14,7 +14,7 @@ enum {
};
static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
- unsigned int flags, fmode_t mode)
+ unsigned int flags, bool open_for_write)
{
u32 effects;
@@ -80,7 +80,7 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
* writing.
*/
if (nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC))
- return mode & FMODE_WRITE;
+ return open_for_write;
return true;
}
@@ -254,7 +254,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
blk_mq_free_request(req);
if (effects)
- nvme_passthru_end(ctrl, effects, cmd, ret);
+ nvme_passthru_end(ctrl, ns, effects, cmd, ret);
return ret;
}
@@ -337,7 +337,7 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl,
static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct nvme_passthru_cmd __user *ucmd, unsigned int flags,
- fmode_t mode)
+ bool open_for_write)
{
struct nvme_passthru_cmd cmd;
struct nvme_command c;
@@ -365,7 +365,7 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
c.common.cdw14 = cpu_to_le32(cmd.cdw14);
c.common.cdw15 = cpu_to_le32(cmd.cdw15);
- if (!nvme_cmd_allowed(ns, &c, 0, mode))
+ if (!nvme_cmd_allowed(ns, &c, 0, open_for_write))
return -EACCES;
if (cmd.timeout_ms)
@@ -385,7 +385,7 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags,
- fmode_t mode)
+ bool open_for_write)
{
struct nvme_passthru_cmd64 cmd;
struct nvme_command c;
@@ -412,7 +412,7 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
c.common.cdw14 = cpu_to_le32(cmd.cdw14);
c.common.cdw15 = cpu_to_le32(cmd.cdw15);
- if (!nvme_cmd_allowed(ns, &c, flags, mode))
+ if (!nvme_cmd_allowed(ns, &c, flags, open_for_write))
return -EACCES;
if (cmd.timeout_ms)
@@ -505,7 +505,6 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
{
struct io_uring_cmd *ioucmd = req->end_io_data;
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
- void *cookie = READ_ONCE(ioucmd->cookie);
req->bio = pdu->bio;
if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
@@ -518,10 +517,12 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
* For iopoll, complete it directly.
* Otherwise, move the completion to task work.
*/
- if (cookie != NULL && blk_rq_is_poll(req))
+ if (blk_rq_is_poll(req)) {
+ WRITE_ONCE(ioucmd->cookie, NULL);
nvme_uring_task_cb(ioucmd, IO_URING_F_UNLOCKED);
- else
- io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb);
+ } else {
+ io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb);
+ }
return RQ_END_IO_FREE;
}
@@ -531,7 +532,6 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req,
{
struct io_uring_cmd *ioucmd = req->end_io_data;
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
- void *cookie = READ_ONCE(ioucmd->cookie);
req->bio = pdu->bio;
pdu->req = req;
@@ -540,10 +540,12 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req,
* For iopoll, complete it directly.
* Otherwise, move the completion to task work.
*/
- if (cookie != NULL && blk_rq_is_poll(req))
+ if (blk_rq_is_poll(req)) {
+ WRITE_ONCE(ioucmd->cookie, NULL);
nvme_uring_task_meta_cb(ioucmd, IO_URING_F_UNLOCKED);
- else
- io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_meta_cb);
+ } else {
+ io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_meta_cb);
+ }
return RQ_END_IO_NONE;
}
@@ -583,7 +585,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14));
c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15));
- if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode))
+ if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode & FMODE_WRITE))
return -EACCES;
d.metadata = READ_ONCE(cmd->metadata);
@@ -599,7 +601,6 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
if (issue_flags & IO_URING_F_IOPOLL)
rq_flags |= REQ_POLLED;
-retry:
req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -613,17 +614,11 @@ retry:
return ret;
}
- if (issue_flags & IO_URING_F_IOPOLL && rq_flags & REQ_POLLED) {
- if (unlikely(!req->bio)) {
- /* we can't poll this, so alloc regular req instead */
- blk_mq_free_request(req);
- rq_flags &= ~REQ_POLLED;
- goto retry;
- } else {
- WRITE_ONCE(ioucmd->cookie, req->bio);
- req->bio->bi_opf |= REQ_POLLED;
- }
+ if (blk_rq_is_poll(req)) {
+ ioucmd->flags |= IORING_URING_CMD_POLLED;
+ WRITE_ONCE(ioucmd->cookie, req);
}
+
/* to free bio on completion, as req->bio will be null at that time */
pdu->bio = req->bio;
pdu->meta_len = d.metadata_len;
@@ -649,13 +644,13 @@ static bool is_ctrl_ioctl(unsigned int cmd)
}
static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd,
- void __user *argp, fmode_t mode)
+ void __user *argp, bool open_for_write)
{
switch (cmd) {
case NVME_IOCTL_ADMIN_CMD:
- return nvme_user_cmd(ctrl, NULL, argp, 0, mode);
+ return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write);
case NVME_IOCTL_ADMIN64_CMD:
- return nvme_user_cmd64(ctrl, NULL, argp, 0, mode);
+ return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write);
default:
return sed_ioctl(ctrl->opal_dev, cmd, argp);
}
@@ -680,14 +675,14 @@ struct nvme_user_io32 {
#endif /* COMPAT_FOR_U64_ALIGNMENT */
static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
- void __user *argp, unsigned int flags, fmode_t mode)
+ void __user *argp, unsigned int flags, bool open_for_write)
{
switch (cmd) {
case NVME_IOCTL_ID:
force_successful_syscall_return();
return ns->head->ns_id;
case NVME_IOCTL_IO_CMD:
- return nvme_user_cmd(ns->ctrl, ns, argp, flags, mode);
+ return nvme_user_cmd(ns->ctrl, ns, argp, flags, open_for_write);
/*
* struct nvme_user_io can have different padding on some 32-bit ABIs.
* Just accept the compat version as all fields that are used are the
@@ -702,16 +697,18 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
flags |= NVME_IOCTL_VEC;
fallthrough;
case NVME_IOCTL_IO64_CMD:
- return nvme_user_cmd64(ns->ctrl, ns, argp, flags, mode);
+ return nvme_user_cmd64(ns->ctrl, ns, argp, flags,
+ open_for_write);
default:
return -ENOTTY;
}
}
-int nvme_ioctl(struct block_device *bdev, fmode_t mode,
+int nvme_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long arg)
{
struct nvme_ns *ns = bdev->bd_disk->private_data;
+ bool open_for_write = mode & BLK_OPEN_WRITE;
void __user *argp = (void __user *)arg;
unsigned int flags = 0;
@@ -719,19 +716,20 @@ int nvme_ioctl(struct block_device *bdev, fmode_t mode,
flags |= NVME_IOCTL_PARTITION;
if (is_ctrl_ioctl(cmd))
- return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, mode);
- return nvme_ns_ioctl(ns, cmd, argp, flags, mode);
+ return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write);
+ return nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write);
}
long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct nvme_ns *ns =
container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev);
+ bool open_for_write = file->f_mode & FMODE_WRITE;
void __user *argp = (void __user *)arg;
if (is_ctrl_ioctl(cmd))
- return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, file->f_mode);
- return nvme_ns_ioctl(ns, cmd, argp, 0, file->f_mode);
+ return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write);
+ return nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write);
}
static int nvme_uring_cmd_checks(unsigned int issue_flags)
@@ -782,25 +780,23 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
struct io_comp_batch *iob,
unsigned int poll_flags)
{
- struct bio *bio;
+ struct request *req;
int ret = 0;
- struct nvme_ns *ns;
- struct request_queue *q;
+
+ if (!(ioucmd->flags & IORING_URING_CMD_POLLED))
+ return 0;
rcu_read_lock();
- bio = READ_ONCE(ioucmd->cookie);
- ns = container_of(file_inode(ioucmd->file)->i_cdev,
- struct nvme_ns, cdev);
- q = ns->queue;
- if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && bio && bio->bi_bdev)
- ret = bio_poll(bio, iob, poll_flags);
+ req = READ_ONCE(ioucmd->cookie);
+ if (req && blk_rq_is_poll(req))
+ ret = blk_rq_poll(req, iob, poll_flags);
rcu_read_unlock();
return ret;
}
#ifdef CONFIG_NVME_MULTIPATH
static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
void __user *argp, struct nvme_ns_head *head, int srcu_idx,
- fmode_t mode)
+ bool open_for_write)
__releases(&head->srcu)
{
struct nvme_ctrl *ctrl = ns->ctrl;
@@ -808,16 +804,17 @@ static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
nvme_get_ctrl(ns->ctrl);
srcu_read_unlock(&head->srcu, srcu_idx);
- ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, mode);
+ ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write);
nvme_put_ctrl(ctrl);
return ret;
}
-int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
+int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long arg)
{
struct nvme_ns_head *head = bdev->bd_disk->private_data;
+ bool open_for_write = mode & BLK_OPEN_WRITE;
void __user *argp = (void __user *)arg;
struct nvme_ns *ns;
int srcu_idx, ret = -EWOULDBLOCK;
@@ -838,9 +835,9 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
*/
if (is_ctrl_ioctl(cmd))
return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx,
- mode);
+ open_for_write);
- ret = nvme_ns_ioctl(ns, cmd, argp, flags, mode);
+ ret = nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write);
out_unlock:
srcu_read_unlock(&head->srcu, srcu_idx);
return ret;
@@ -849,6 +846,7 @@ out_unlock:
long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
+ bool open_for_write = file->f_mode & FMODE_WRITE;
struct cdev *cdev = file_inode(file)->i_cdev;
struct nvme_ns_head *head =
container_of(cdev, struct nvme_ns_head, cdev);
@@ -863,9 +861,9 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
if (is_ctrl_ioctl(cmd))
return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx,
- file->f_mode);
+ open_for_write);
- ret = nvme_ns_ioctl(ns, cmd, argp, 0, file->f_mode);
+ ret = nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write);
out_unlock:
srcu_read_unlock(&head->srcu, srcu_idx);
return ret;
@@ -885,31 +883,6 @@ int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
srcu_read_unlock(&head->srcu, srcu_idx);
return ret;
}
-
-int nvme_ns_head_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
- struct io_comp_batch *iob,
- unsigned int poll_flags)
-{
- struct cdev *cdev = file_inode(ioucmd->file)->i_cdev;
- struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev);
- int srcu_idx = srcu_read_lock(&head->srcu);
- struct nvme_ns *ns = nvme_find_path(head);
- struct bio *bio;
- int ret = 0;
- struct request_queue *q;
-
- if (ns) {
- rcu_read_lock();
- bio = READ_ONCE(ioucmd->cookie);
- q = ns->queue;
- if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && bio
- && bio->bi_bdev)
- ret = bio_poll(bio, iob, poll_flags);
- rcu_read_unlock();
- }
- srcu_read_unlock(&head->srcu, srcu_idx);
- return ret;
-}
#endif /* CONFIG_NVME_MULTIPATH */
int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
@@ -940,7 +913,7 @@ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
}
static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp,
- fmode_t mode)
+ bool open_for_write)
{
struct nvme_ns *ns;
int ret;
@@ -964,7 +937,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp,
kref_get(&ns->kref);
up_read(&ctrl->namespaces_rwsem);
- ret = nvme_user_cmd(ctrl, ns, argp, 0, mode);
+ ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write);
nvme_put_ns(ns);
return ret;
@@ -976,16 +949,17 @@ out_unlock:
long nvme_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
+ bool open_for_write = file->f_mode & FMODE_WRITE;
struct nvme_ctrl *ctrl = file->private_data;
void __user *argp = (void __user *)arg;
switch (cmd) {
case NVME_IOCTL_ADMIN_CMD:
- return nvme_user_cmd(ctrl, NULL, argp, 0, file->f_mode);
+ return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write);
case NVME_IOCTL_ADMIN64_CMD:
- return nvme_user_cmd64(ctrl, NULL, argp, 0, file->f_mode);
+ return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write);
case NVME_IOCTL_IO_CMD:
- return nvme_dev_user_cmd(ctrl, argp, file->f_mode);
+ return nvme_dev_user_cmd(ctrl, argp, open_for_write);
case NVME_IOCTL_RESET:
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 2bc159a318ff..0a88d7bdc5e3 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -106,6 +106,14 @@ void nvme_failover_req(struct request *req)
bio->bi_opf &= ~REQ_POLLED;
bio->bi_cookie = BLK_QC_T_NONE;
}
+ /*
+ * The alternate request queue that we may end up submitting
+ * the bio to may be frozen temporarily, in this case REQ_NOWAIT
+ * will fail the I/O immediately with EAGAIN to the issuer.
+ * We are not in the issuer context which cannot block. Clear
+ * the flag to avoid spurious EAGAIN I/O failures.
+ */
+ bio->bi_opf &= ~REQ_NOWAIT;
}
blk_steal_bios(&ns->head->requeue_list, req);
spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
@@ -402,14 +410,14 @@ static void nvme_ns_head_submit_bio(struct bio *bio)
srcu_read_unlock(&head->srcu, srcu_idx);
}
-static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode)
+static int nvme_ns_head_open(struct gendisk *disk, blk_mode_t mode)
{
- if (!nvme_tryget_ns_head(bdev->bd_disk->private_data))
+ if (!nvme_tryget_ns_head(disk->private_data))
return -ENXIO;
return 0;
}
-static void nvme_ns_head_release(struct gendisk *disk, fmode_t mode)
+static void nvme_ns_head_release(struct gendisk *disk)
{
nvme_put_ns_head(disk->private_data);
}
@@ -470,7 +478,7 @@ static const struct file_operations nvme_ns_head_chr_fops = {
.unlocked_ioctl = nvme_ns_head_chr_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.uring_cmd = nvme_ns_head_chr_uring_cmd,
- .uring_cmd_iopoll = nvme_ns_head_chr_uring_cmd_iopoll,
+ .uring_cmd_iopoll = nvme_ns_chr_uring_cmd_iopoll,
};
static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a2d4f59e0535..f35647c470af 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -19,6 +19,8 @@
#include <trace/events/block.h>
+extern const struct pr_ops nvme_pr_ops;
+
extern unsigned int nvme_io_timeout;
#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ)
@@ -247,12 +249,14 @@ enum nvme_ctrl_flags {
NVME_CTRL_ADMIN_Q_STOPPED = 1,
NVME_CTRL_STARTED_ONCE = 2,
NVME_CTRL_STOPPED = 3,
+ NVME_CTRL_SKIP_ID_CNS_CS = 4,
+ NVME_CTRL_DIRTY_CAPABILITY = 5,
};
struct nvme_ctrl {
bool comp_seen;
- enum nvme_ctrl_state state;
bool identified;
+ enum nvme_ctrl_state state;
spinlock_t lock;
struct mutex scan_lock;
const struct nvme_ctrl_ops *ops;
@@ -284,8 +288,8 @@ struct nvme_ctrl {
char name[12];
u16 cntlid;
- u32 ctrl_config;
u16 mtfa;
+ u32 ctrl_config;
u32 queue_count;
u64 cap;
@@ -328,6 +332,7 @@ struct nvme_ctrl {
struct delayed_work ka_work;
struct delayed_work failfast_work;
struct nvme_command ka_cmd;
+ unsigned long ka_last_check_time;
struct work_struct fw_act_work;
unsigned long events;
@@ -358,10 +363,10 @@ struct nvme_ctrl {
bool apst_enabled;
/* PCIe only: */
+ u16 hmmaxd;
u32 hmpre;
u32 hmmin;
u32 hmminds;
- u16 hmmaxd;
/* Fabrics only */
u32 ioccsz;
@@ -841,10 +846,10 @@ void nvme_put_ns_head(struct nvme_ns_head *head);
int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
const struct file_operations *fops, struct module *owner);
void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device);
-int nvme_ioctl(struct block_device *bdev, fmode_t mode,
+int nvme_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long arg);
long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
-int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
+int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long arg);
long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
unsigned long arg);
@@ -852,8 +857,6 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg);
int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
struct io_comp_batch *iob, unsigned int poll_flags);
-int nvme_ns_head_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
- struct io_comp_batch *iob, unsigned int poll_flags);
int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd,
unsigned int issue_flags);
int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
@@ -865,7 +868,11 @@ extern const struct attribute_group *nvme_ns_id_attr_groups[];
extern const struct pr_ops nvme_pr_ops;
extern const struct block_device_operations nvme_ns_head_ops;
extern const struct attribute_group nvme_dev_attrs_group;
+extern const struct attribute_group *nvme_subsys_attrs_groups[];
+extern const struct attribute_group *nvme_dev_attr_groups[];
+extern const struct block_device_operations nvme_bdev_ops;
+void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl);
struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
#ifdef CONFIG_NVME_MULTIPATH
static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
@@ -1077,7 +1084,7 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
u8 opcode);
u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode);
int nvme_execute_rq(struct request *rq, bool at_head);
-void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects,
+void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects,
struct nvme_command *cmd, int status);
struct nvme_ctrl *nvme_ctrl_from_file(struct file *file);
struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 32244582fdb0..baf69af7ea78 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -420,10 +420,9 @@ static int nvme_pci_init_request(struct blk_mq_tag_set *set,
struct request *req, unsigned int hctx_idx,
unsigned int numa_node)
{
- struct nvme_dev *dev = to_nvme_dev(set->driver_data);
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- nvme_req(req)->ctrl = &dev->ctrl;
+ nvme_req(req)->ctrl = set->driver_data;
nvme_req(req)->cmd = &iod->cmd;
return 0;
}
@@ -968,7 +967,7 @@ static __always_inline void nvme_pci_unmap_rq(struct request *req)
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
dma_unmap_page(dev->dev, iod->meta_dma,
- rq_integrity_vec(req)->bv_len, rq_data_dir(req));
+ rq_integrity_vec(req)->bv_len, rq_dma_dir(req));
}
if (blk_rq_nr_phys_segments(req))
@@ -1299,9 +1298,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
*/
if (nvme_should_reset(dev, csts)) {
nvme_warn_reset(dev, csts);
- nvme_dev_disable(dev, false);
- nvme_reset_ctrl(&dev->ctrl);
- return BLK_EH_DONE;
+ goto disable;
}
/*
@@ -1352,10 +1349,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
"I/O %d QID %d timeout, reset controller\n",
req->tag, nvmeq->qid);
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
- nvme_dev_disable(dev, false);
- nvme_reset_ctrl(&dev->ctrl);
-
- return BLK_EH_DONE;
+ goto disable;
}
if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) {
@@ -1392,6 +1386,15 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
* as the device then is in a faulty state.
*/
return BLK_EH_RESET_TIMER;
+
+disable:
+ if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
+ return BLK_EH_DONE;
+
+ nvme_dev_disable(dev, false);
+ if (nvme_try_sched_reset(&dev->ctrl))
+ nvme_unquiesce_io_queues(&dev->ctrl);
+ return BLK_EH_DONE;
}
static void nvme_free_queue(struct nvme_queue *nvmeq)
@@ -2691,7 +2694,8 @@ static void nvme_reset_work(struct work_struct *work)
if (dev->ctrl.state != NVME_CTRL_RESETTING) {
dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n",
dev->ctrl.state);
- return;
+ result = -ENODEV;
+ goto out;
}
/*
@@ -2778,7 +2782,9 @@ static void nvme_reset_work(struct work_struct *work)
result);
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
nvme_dev_disable(dev, true);
+ nvme_sync_queues(&dev->ctrl);
nvme_mark_namespaces_dead(&dev->ctrl);
+ nvme_unquiesce_io_queues(&dev->ctrl);
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
}
@@ -3276,6 +3282,10 @@ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
case pci_channel_io_frozen:
dev_warn(dev->ctrl.device,
"frozen state error detected, reset controller\n");
+ if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) {
+ nvme_dev_disable(dev, true);
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
nvme_dev_disable(dev, false);
return PCI_ERS_RESULT_NEED_RESET;
case pci_channel_io_perm_failure:
@@ -3292,7 +3302,8 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
dev_info(dev->ctrl.device, "restart after slot reset\n");
pci_restore_state(pdev);
- nvme_reset_ctrl(&dev->ctrl);
+ if (!nvme_try_sched_reset(&dev->ctrl))
+ nvme_unquiesce_io_queues(&dev->ctrl);
return PCI_ERS_RESULT_RECOVERED;
}
@@ -3394,6 +3405,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x144d, 0xa809), /* Samsung MZALQ256HBJD 256G */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ { PCI_DEVICE(0x144d, 0xa802), /* Samsung SM953 */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1cc4, 0x6303), /* UMIS RPJTJ512MGE1QDY 512G */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x1cc4, 0x6302), /* UMIS RPJTJ256MGE1QDY 256G */
@@ -3424,6 +3437,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1e4B, 0x1202), /* MAXIO MAP1202 */
.driver_data = NVME_QUIRK_BOGUS_NID, },
+ { PCI_DEVICE(0x1e4B, 0x1602), /* MAXIO MAP1602 */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1cc1, 0x5350), /* ADATA XPG GAMMIX S50 */
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1dbe, 0x5236), /* ADATA XPG GAMMIX S70 */
diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c
new file mode 100644
index 000000000000..391b1465ebfd
--- /dev/null
+++ b/drivers/nvme/host/pr.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2015 Intel Corporation
+ * Keith Busch <kbusch@kernel.org>
+ */
+#include <linux/blkdev.h>
+#include <linux/pr.h>
+#include <asm/unaligned.h>
+
+#include "nvme.h"
+
+static enum nvme_pr_type nvme_pr_type_from_blk(enum pr_type type)
+{
+ switch (type) {
+ case PR_WRITE_EXCLUSIVE:
+ return NVME_PR_WRITE_EXCLUSIVE;
+ case PR_EXCLUSIVE_ACCESS:
+ return NVME_PR_EXCLUSIVE_ACCESS;
+ case PR_WRITE_EXCLUSIVE_REG_ONLY:
+ return NVME_PR_WRITE_EXCLUSIVE_REG_ONLY;
+ case PR_EXCLUSIVE_ACCESS_REG_ONLY:
+ return NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY;
+ case PR_WRITE_EXCLUSIVE_ALL_REGS:
+ return NVME_PR_WRITE_EXCLUSIVE_ALL_REGS;
+ case PR_EXCLUSIVE_ACCESS_ALL_REGS:
+ return NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS;
+ }
+
+ return 0;
+}
+
+static enum pr_type block_pr_type_from_nvme(enum nvme_pr_type type)
+{
+ switch (type) {
+ case NVME_PR_WRITE_EXCLUSIVE:
+ return PR_WRITE_EXCLUSIVE;
+ case NVME_PR_EXCLUSIVE_ACCESS:
+ return PR_EXCLUSIVE_ACCESS;
+ case NVME_PR_WRITE_EXCLUSIVE_REG_ONLY:
+ return PR_WRITE_EXCLUSIVE_REG_ONLY;
+ case NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY:
+ return PR_EXCLUSIVE_ACCESS_REG_ONLY;
+ case NVME_PR_WRITE_EXCLUSIVE_ALL_REGS:
+ return PR_WRITE_EXCLUSIVE_ALL_REGS;
+ case NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS:
+ return PR_EXCLUSIVE_ACCESS_ALL_REGS;
+ }
+
+ return 0;
+}
+
+static int nvme_send_ns_head_pr_command(struct block_device *bdev,
+ struct nvme_command *c, void *data, unsigned int data_len)
+{
+ struct nvme_ns_head *head = bdev->bd_disk->private_data;
+ int srcu_idx = srcu_read_lock(&head->srcu);
+ struct nvme_ns *ns = nvme_find_path(head);
+ int ret = -EWOULDBLOCK;
+
+ if (ns) {
+ c->common.nsid = cpu_to_le32(ns->head->ns_id);
+ ret = nvme_submit_sync_cmd(ns->queue, c, data, data_len);
+ }
+ srcu_read_unlock(&head->srcu, srcu_idx);
+ return ret;
+}
+
+static int nvme_send_ns_pr_command(struct nvme_ns *ns, struct nvme_command *c,
+ void *data, unsigned int data_len)
+{
+ c->common.nsid = cpu_to_le32(ns->head->ns_id);
+ return nvme_submit_sync_cmd(ns->queue, c, data, data_len);
+}
+
+static int nvme_sc_to_pr_err(int nvme_sc)
+{
+ if (nvme_is_path_error(nvme_sc))
+ return PR_STS_PATH_FAILED;
+
+ switch (nvme_sc) {
+ case NVME_SC_SUCCESS:
+ return PR_STS_SUCCESS;
+ case NVME_SC_RESERVATION_CONFLICT:
+ return PR_STS_RESERVATION_CONFLICT;
+ case NVME_SC_ONCS_NOT_SUPPORTED:
+ return -EOPNOTSUPP;
+ case NVME_SC_BAD_ATTRIBUTES:
+ case NVME_SC_INVALID_OPCODE:
+ case NVME_SC_INVALID_FIELD:
+ case NVME_SC_INVALID_NS:
+ return -EINVAL;
+ default:
+ return PR_STS_IOERR;
+ }
+}
+
+static int nvme_send_pr_command(struct block_device *bdev,
+ struct nvme_command *c, void *data, unsigned int data_len)
+{
+ if (IS_ENABLED(CONFIG_NVME_MULTIPATH) &&
+ bdev->bd_disk->fops == &nvme_ns_head_ops)
+ return nvme_send_ns_head_pr_command(bdev, c, data, data_len);
+
+ return nvme_send_ns_pr_command(bdev->bd_disk->private_data, c, data,
+ data_len);
+}
+
+static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
+ u64 key, u64 sa_key, u8 op)
+{
+ struct nvme_command c = { };
+ u8 data[16] = { 0, };
+ int ret;
+
+ put_unaligned_le64(key, &data[0]);
+ put_unaligned_le64(sa_key, &data[8]);
+
+ c.common.opcode = op;
+ c.common.cdw10 = cpu_to_le32(cdw10);
+
+ ret = nvme_send_pr_command(bdev, &c, data, sizeof(data));
+ if (ret < 0)
+ return ret;
+
+ return nvme_sc_to_pr_err(ret);
+}
+
+static int nvme_pr_register(struct block_device *bdev, u64 old,
+ u64 new, unsigned flags)
+{
+ u32 cdw10;
+
+ if (flags & ~PR_FL_IGNORE_KEY)
+ return -EOPNOTSUPP;
+
+ cdw10 = old ? 2 : 0;
+ cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0;
+ cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */
+ return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register);
+}
+
+static int nvme_pr_reserve(struct block_device *bdev, u64 key,
+ enum pr_type type, unsigned flags)
+{
+ u32 cdw10;
+
+ if (flags & ~PR_FL_IGNORE_KEY)
+ return -EOPNOTSUPP;
+
+ cdw10 = nvme_pr_type_from_blk(type) << 8;
+ cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0);
+ return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire);
+}
+
+static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
+ enum pr_type type, bool abort)
+{
+ u32 cdw10 = nvme_pr_type_from_blk(type) << 8 | (abort ? 2 : 1);
+
+ return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
+}
+
+static int nvme_pr_clear(struct block_device *bdev, u64 key)
+{
+ u32 cdw10 = 1 | (key ? 0 : 1 << 3);
+
+ return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
+}
+
+static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
+{
+ u32 cdw10 = nvme_pr_type_from_blk(type) << 8 | (key ? 0 : 1 << 3);
+
+ return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
+}
+
+static int nvme_pr_resv_report(struct block_device *bdev, void *data,
+ u32 data_len, bool *eds)
+{
+ struct nvme_command c = { };
+ int ret;
+
+ c.common.opcode = nvme_cmd_resv_report;
+ c.common.cdw10 = cpu_to_le32(nvme_bytes_to_numd(data_len));
+ c.common.cdw11 = cpu_to_le32(NVME_EXTENDED_DATA_STRUCT);
+ *eds = true;
+
+retry:
+ ret = nvme_send_pr_command(bdev, &c, data, data_len);
+ if (ret == NVME_SC_HOST_ID_INCONSIST &&
+ c.common.cdw11 == cpu_to_le32(NVME_EXTENDED_DATA_STRUCT)) {
+ c.common.cdw11 = 0;
+ *eds = false;
+ goto retry;
+ }
+
+ if (ret < 0)
+ return ret;
+
+ return nvme_sc_to_pr_err(ret);
+}
+
+static int nvme_pr_read_keys(struct block_device *bdev,
+ struct pr_keys *keys_info)
+{
+ u32 rse_len, num_keys = keys_info->num_keys;
+ struct nvme_reservation_status_ext *rse;
+ int ret, i;
+ bool eds;
+
+ /*
+ * Assume we are using 128-bit host IDs and allocate a buffer large
+ * enough to get enough keys to fill the return keys buffer.
+ */
+ rse_len = struct_size(rse, regctl_eds, num_keys);
+ rse = kzalloc(rse_len, GFP_KERNEL);
+ if (!rse)
+ return -ENOMEM;
+
+ ret = nvme_pr_resv_report(bdev, rse, rse_len, &eds);
+ if (ret)
+ goto free_rse;
+
+ keys_info->generation = le32_to_cpu(rse->gen);
+ keys_info->num_keys = get_unaligned_le16(&rse->regctl);
+
+ num_keys = min(num_keys, keys_info->num_keys);
+ for (i = 0; i < num_keys; i++) {
+ if (eds) {
+ keys_info->keys[i] =
+ le64_to_cpu(rse->regctl_eds[i].rkey);
+ } else {
+ struct nvme_reservation_status *rs;
+
+ rs = (struct nvme_reservation_status *)rse;
+ keys_info->keys[i] = le64_to_cpu(rs->regctl_ds[i].rkey);
+ }
+ }
+
+free_rse:
+ kfree(rse);
+ return ret;
+}
+
+static int nvme_pr_read_reservation(struct block_device *bdev,
+ struct pr_held_reservation *resv)
+{
+ struct nvme_reservation_status_ext tmp_rse, *rse;
+ int ret, i, num_regs;
+ u32 rse_len;
+ bool eds;
+
+get_num_regs:
+ /*
+ * Get the number of registrations so we know how big to allocate
+ * the response buffer.
+ */
+ ret = nvme_pr_resv_report(bdev, &tmp_rse, sizeof(tmp_rse), &eds);
+ if (ret)
+ return ret;
+
+ num_regs = get_unaligned_le16(&tmp_rse.regctl);
+ if (!num_regs) {
+ resv->generation = le32_to_cpu(tmp_rse.gen);
+ return 0;
+ }
+
+ rse_len = struct_size(rse, regctl_eds, num_regs);
+ rse = kzalloc(rse_len, GFP_KERNEL);
+ if (!rse)
+ return -ENOMEM;
+
+ ret = nvme_pr_resv_report(bdev, rse, rse_len, &eds);
+ if (ret)
+ goto free_rse;
+
+ if (num_regs != get_unaligned_le16(&rse->regctl)) {
+ kfree(rse);
+ goto get_num_regs;
+ }
+
+ resv->generation = le32_to_cpu(rse->gen);
+ resv->type = block_pr_type_from_nvme(rse->rtype);
+
+ for (i = 0; i < num_regs; i++) {
+ if (eds) {
+ if (rse->regctl_eds[i].rcsts) {
+ resv->key = le64_to_cpu(rse->regctl_eds[i].rkey);
+ break;
+ }
+ } else {
+ struct nvme_reservation_status *rs;
+
+ rs = (struct nvme_reservation_status *)rse;
+ if (rs->regctl_ds[i].rcsts) {
+ resv->key = le64_to_cpu(rs->regctl_ds[i].rkey);
+ break;
+ }
+ }
+ }
+
+free_rse:
+ kfree(rse);
+ return ret;
+}
+
+const struct pr_ops nvme_pr_ops = {
+ .pr_register = nvme_pr_register,
+ .pr_reserve = nvme_pr_reserve,
+ .pr_release = nvme_pr_release,
+ .pr_preempt = nvme_pr_preempt,
+ .pr_clear = nvme_pr_clear,
+ .pr_read_keys = nvme_pr_read_keys,
+ .pr_read_reservation = nvme_pr_read_reservation,
+};
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 0eb79696fb73..d433b2ec07a6 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -501,7 +501,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
}
ibdev = queue->device->dev;
- /* +1 for ib_stop_cq */
+ /* +1 for ib_drain_qp */
queue->cq_size = cq_factor * queue->queue_size + 1;
ret = nvme_rdma_create_cq(ibdev, queue);
@@ -713,18 +713,10 @@ out_stop_queues:
static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
{
struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
- struct ib_device *ibdev = ctrl->device->dev;
- unsigned int nr_io_queues, nr_default_queues;
- unsigned int nr_read_queues, nr_poll_queues;
+ unsigned int nr_io_queues;
int i, ret;
- nr_read_queues = min_t(unsigned int, ibdev->num_comp_vectors,
- min(opts->nr_io_queues, num_online_cpus()));
- nr_default_queues = min_t(unsigned int, ibdev->num_comp_vectors,
- min(opts->nr_write_queues, num_online_cpus()));
- nr_poll_queues = min(opts->nr_poll_queues, num_online_cpus());
- nr_io_queues = nr_read_queues + nr_default_queues + nr_poll_queues;
-
+ nr_io_queues = nvmf_nr_io_queues(opts);
ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
if (ret)
return ret;
@@ -739,34 +731,7 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
dev_info(ctrl->ctrl.device,
"creating %d I/O queues.\n", nr_io_queues);
- if (opts->nr_write_queues && nr_read_queues < nr_io_queues) {
- /*
- * separate read/write queues
- * hand out dedicated default queues only after we have
- * sufficient read queues.
- */
- ctrl->io_queues[HCTX_TYPE_READ] = nr_read_queues;
- nr_io_queues -= ctrl->io_queues[HCTX_TYPE_READ];
- ctrl->io_queues[HCTX_TYPE_DEFAULT] =
- min(nr_default_queues, nr_io_queues);
- nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
- } else {
- /*
- * shared read/write queues
- * either no write queues were requested, or we don't have
- * sufficient queue count to have dedicated default queues.
- */
- ctrl->io_queues[HCTX_TYPE_DEFAULT] =
- min(nr_read_queues, nr_io_queues);
- nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
- }
-
- if (opts->nr_poll_queues && nr_io_queues) {
- /* map dedicated poll queues only if we have queues left */
- ctrl->io_queues[HCTX_TYPE_POLL] =
- min(nr_poll_queues, nr_io_queues);
- }
-
+ nvmf_set_io_queues(opts, nr_io_queues, ctrl->io_queues);
for (i = 1; i < ctrl->ctrl.queue_count; i++) {
ret = nvme_rdma_alloc_queue(ctrl, i,
ctrl->ctrl.sqsize + 1);
@@ -2138,44 +2103,8 @@ static void nvme_rdma_complete_rq(struct request *rq)
static void nvme_rdma_map_queues(struct blk_mq_tag_set *set)
{
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(set->driver_data);
- struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
- if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
- /* separate read/write queues */
- set->map[HCTX_TYPE_DEFAULT].nr_queues =
- ctrl->io_queues[HCTX_TYPE_DEFAULT];
- set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
- set->map[HCTX_TYPE_READ].nr_queues =
- ctrl->io_queues[HCTX_TYPE_READ];
- set->map[HCTX_TYPE_READ].queue_offset =
- ctrl->io_queues[HCTX_TYPE_DEFAULT];
- } else {
- /* shared read/write queues */
- set->map[HCTX_TYPE_DEFAULT].nr_queues =
- ctrl->io_queues[HCTX_TYPE_DEFAULT];
- set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
- set->map[HCTX_TYPE_READ].nr_queues =
- ctrl->io_queues[HCTX_TYPE_DEFAULT];
- set->map[HCTX_TYPE_READ].queue_offset = 0;
- }
- blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
- blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
-
- if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
- /* map dedicated poll queues only if we have queues left */
- set->map[HCTX_TYPE_POLL].nr_queues =
- ctrl->io_queues[HCTX_TYPE_POLL];
- set->map[HCTX_TYPE_POLL].queue_offset =
- ctrl->io_queues[HCTX_TYPE_DEFAULT] +
- ctrl->io_queues[HCTX_TYPE_READ];
- blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
- }
-
- dev_info(ctrl->ctrl.device,
- "mapped %d/%d/%d default/read/poll queues.\n",
- ctrl->io_queues[HCTX_TYPE_DEFAULT],
- ctrl->io_queues[HCTX_TYPE_READ],
- ctrl->io_queues[HCTX_TYPE_POLL]);
+ nvmf_map_queues(set, &ctrl->ctrl, ctrl->io_queues);
}
static const struct blk_mq_ops nvme_rdma_mq_ops = {
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
new file mode 100644
index 000000000000..212e1b05d298
--- /dev/null
+++ b/drivers/nvme/host/sysfs.c
@@ -0,0 +1,668 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Sysfs interface for the NVMe core driver.
+ *
+ * Copyright (c) 2011-2014, Intel Corporation.
+ */
+
+#include <linux/nvme-auth.h>
+
+#include "nvme.h"
+#include "fabrics.h"
+
+static ssize_t nvme_sysfs_reset(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ int ret;
+
+ ret = nvme_reset_ctrl_sync(ctrl);
+ if (ret < 0)
+ return ret;
+ return count;
+}
+static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
+
+static ssize_t nvme_sysfs_rescan(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ nvme_queue_scan(ctrl);
+ return count;
+}
+static DEVICE_ATTR(rescan_controller, S_IWUSR, NULL, nvme_sysfs_rescan);
+
+static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+
+ if (disk->fops == &nvme_bdev_ops)
+ return nvme_get_ns_from_dev(dev)->head;
+ else
+ return disk->private_data;
+}
+
+static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ns_head *head = dev_to_ns_head(dev);
+ struct nvme_ns_ids *ids = &head->ids;
+ struct nvme_subsystem *subsys = head->subsys;
+ int serial_len = sizeof(subsys->serial);
+ int model_len = sizeof(subsys->model);
+
+ if (!uuid_is_null(&ids->uuid))
+ return sysfs_emit(buf, "uuid.%pU\n", &ids->uuid);
+
+ if (memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
+ return sysfs_emit(buf, "eui.%16phN\n", ids->nguid);
+
+ if (memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
+ return sysfs_emit(buf, "eui.%8phN\n", ids->eui64);
+
+ while (serial_len > 0 && (subsys->serial[serial_len - 1] == ' ' ||
+ subsys->serial[serial_len - 1] == '\0'))
+ serial_len--;
+ while (model_len > 0 && (subsys->model[model_len - 1] == ' ' ||
+ subsys->model[model_len - 1] == '\0'))
+ model_len--;
+
+ return sysfs_emit(buf, "nvme.%04x-%*phN-%*phN-%08x\n", subsys->vendor_id,
+ serial_len, subsys->serial, model_len, subsys->model,
+ head->ns_id);
+}
+static DEVICE_ATTR_RO(wwid);
+
+static ssize_t nguid_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%pU\n", dev_to_ns_head(dev)->ids.nguid);
+}
+static DEVICE_ATTR_RO(nguid);
+
+static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ns_ids *ids = &dev_to_ns_head(dev)->ids;
+
+ /* For backward compatibility expose the NGUID to userspace if
+ * we have no UUID set
+ */
+ if (uuid_is_null(&ids->uuid)) {
+ dev_warn_once(dev,
+ "No UUID available providing old NGUID\n");
+ return sysfs_emit(buf, "%pU\n", ids->nguid);
+ }
+ return sysfs_emit(buf, "%pU\n", &ids->uuid);
+}
+static DEVICE_ATTR_RO(uuid);
+
+static ssize_t eui_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%8ph\n", dev_to_ns_head(dev)->ids.eui64);
+}
+static DEVICE_ATTR_RO(eui);
+
+static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%d\n", dev_to_ns_head(dev)->ns_id);
+}
+static DEVICE_ATTR_RO(nsid);
+
+static struct attribute *nvme_ns_id_attrs[] = {
+ &dev_attr_wwid.attr,
+ &dev_attr_uuid.attr,
+ &dev_attr_nguid.attr,
+ &dev_attr_eui.attr,
+ &dev_attr_nsid.attr,
+#ifdef CONFIG_NVME_MULTIPATH
+ &dev_attr_ana_grpid.attr,
+ &dev_attr_ana_state.attr,
+#endif
+ NULL,
+};
+
+static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
+ struct attribute *a, int n)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct nvme_ns_ids *ids = &dev_to_ns_head(dev)->ids;
+
+ if (a == &dev_attr_uuid.attr) {
+ if (uuid_is_null(&ids->uuid) &&
+ !memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
+ return 0;
+ }
+ if (a == &dev_attr_nguid.attr) {
+ if (!memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
+ return 0;
+ }
+ if (a == &dev_attr_eui.attr) {
+ if (!memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
+ return 0;
+ }
+#ifdef CONFIG_NVME_MULTIPATH
+ if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) {
+ if (dev_to_disk(dev)->fops != &nvme_bdev_ops) /* per-path attr */
+ return 0;
+ if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
+ return 0;
+ }
+#endif
+ return a->mode;
+}
+
+static const struct attribute_group nvme_ns_id_attr_group = {
+ .attrs = nvme_ns_id_attrs,
+ .is_visible = nvme_ns_id_attrs_are_visible,
+};
+
+const struct attribute_group *nvme_ns_id_attr_groups[] = {
+ &nvme_ns_id_attr_group,
+ NULL,
+};
+
+#define nvme_show_str_function(field) \
+static ssize_t field##_show(struct device *dev, \
+ struct device_attribute *attr, char *buf) \
+{ \
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \
+ return sysfs_emit(buf, "%.*s\n", \
+ (int)sizeof(ctrl->subsys->field), ctrl->subsys->field); \
+} \
+static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
+
+nvme_show_str_function(model);
+nvme_show_str_function(serial);
+nvme_show_str_function(firmware_rev);
+
+#define nvme_show_int_function(field) \
+static ssize_t field##_show(struct device *dev, \
+ struct device_attribute *attr, char *buf) \
+{ \
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \
+ return sysfs_emit(buf, "%d\n", ctrl->field); \
+} \
+static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
+
+nvme_show_int_function(cntlid);
+nvme_show_int_function(numa_node);
+nvme_show_int_function(queue_count);
+nvme_show_int_function(sqsize);
+nvme_show_int_function(kato);
+
+static ssize_t nvme_sysfs_delete(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ if (!test_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags))
+ return -EBUSY;
+
+ if (device_remove_file_self(dev, attr))
+ nvme_delete_ctrl_sync(ctrl);
+ return count;
+}
+static DEVICE_ATTR(delete_controller, S_IWUSR, NULL, nvme_sysfs_delete);
+
+static ssize_t nvme_sysfs_show_transport(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf, "%s\n", ctrl->ops->name);
+}
+static DEVICE_ATTR(transport, S_IRUGO, nvme_sysfs_show_transport, NULL);
+
+static ssize_t nvme_sysfs_show_state(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ static const char *const state_name[] = {
+ [NVME_CTRL_NEW] = "new",
+ [NVME_CTRL_LIVE] = "live",
+ [NVME_CTRL_RESETTING] = "resetting",
+ [NVME_CTRL_CONNECTING] = "connecting",
+ [NVME_CTRL_DELETING] = "deleting",
+ [NVME_CTRL_DELETING_NOIO]= "deleting (no IO)",
+ [NVME_CTRL_DEAD] = "dead",
+ };
+
+ if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) &&
+ state_name[ctrl->state])
+ return sysfs_emit(buf, "%s\n", state_name[ctrl->state]);
+
+ return sysfs_emit(buf, "unknown state\n");
+}
+
+static DEVICE_ATTR(state, S_IRUGO, nvme_sysfs_show_state, NULL);
+
+static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf, "%s\n", ctrl->subsys->subnqn);
+}
+static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL);
+
+static ssize_t nvme_sysfs_show_hostnqn(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf, "%s\n", ctrl->opts->host->nqn);
+}
+static DEVICE_ATTR(hostnqn, S_IRUGO, nvme_sysfs_show_hostnqn, NULL);
+
+static ssize_t nvme_sysfs_show_hostid(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf, "%pU\n", &ctrl->opts->host->id);
+}
+static DEVICE_ATTR(hostid, S_IRUGO, nvme_sysfs_show_hostid, NULL);
+
+static ssize_t nvme_sysfs_show_address(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ return ctrl->ops->get_address(ctrl, buf, PAGE_SIZE);
+}
+static DEVICE_ATTR(address, S_IRUGO, nvme_sysfs_show_address, NULL);
+
+static ssize_t nvme_ctrl_loss_tmo_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ struct nvmf_ctrl_options *opts = ctrl->opts;
+
+ if (ctrl->opts->max_reconnects == -1)
+ return sysfs_emit(buf, "off\n");
+ return sysfs_emit(buf, "%d\n",
+ opts->max_reconnects * opts->reconnect_delay);
+}
+
+static ssize_t nvme_ctrl_loss_tmo_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ struct nvmf_ctrl_options *opts = ctrl->opts;
+ int ctrl_loss_tmo, err;
+
+ err = kstrtoint(buf, 10, &ctrl_loss_tmo);
+ if (err)
+ return -EINVAL;
+
+ if (ctrl_loss_tmo < 0)
+ opts->max_reconnects = -1;
+ else
+ opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
+ opts->reconnect_delay);
+ return count;
+}
+static DEVICE_ATTR(ctrl_loss_tmo, S_IRUGO | S_IWUSR,
+ nvme_ctrl_loss_tmo_show, nvme_ctrl_loss_tmo_store);
+
+static ssize_t nvme_ctrl_reconnect_delay_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ if (ctrl->opts->reconnect_delay == -1)
+ return sysfs_emit(buf, "off\n");
+ return sysfs_emit(buf, "%d\n", ctrl->opts->reconnect_delay);
+}
+
+static ssize_t nvme_ctrl_reconnect_delay_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ unsigned int v;
+ int err;
+
+ err = kstrtou32(buf, 10, &v);
+ if (err)
+ return err;
+
+ ctrl->opts->reconnect_delay = v;
+ return count;
+}
+static DEVICE_ATTR(reconnect_delay, S_IRUGO | S_IWUSR,
+ nvme_ctrl_reconnect_delay_show, nvme_ctrl_reconnect_delay_store);
+
+static ssize_t nvme_ctrl_fast_io_fail_tmo_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ if (ctrl->opts->fast_io_fail_tmo == -1)
+ return sysfs_emit(buf, "off\n");
+ return sysfs_emit(buf, "%d\n", ctrl->opts->fast_io_fail_tmo);
+}
+
+static ssize_t nvme_ctrl_fast_io_fail_tmo_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ struct nvmf_ctrl_options *opts = ctrl->opts;
+ int fast_io_fail_tmo, err;
+
+ err = kstrtoint(buf, 10, &fast_io_fail_tmo);
+ if (err)
+ return -EINVAL;
+
+ if (fast_io_fail_tmo < 0)
+ opts->fast_io_fail_tmo = -1;
+ else
+ opts->fast_io_fail_tmo = fast_io_fail_tmo;
+ return count;
+}
+static DEVICE_ATTR(fast_io_fail_tmo, S_IRUGO | S_IWUSR,
+ nvme_ctrl_fast_io_fail_tmo_show, nvme_ctrl_fast_io_fail_tmo_store);
+
+static ssize_t cntrltype_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ static const char * const type[] = {
+ [NVME_CTRL_IO] = "io\n",
+ [NVME_CTRL_DISC] = "discovery\n",
+ [NVME_CTRL_ADMIN] = "admin\n",
+ };
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ if (ctrl->cntrltype > NVME_CTRL_ADMIN || !type[ctrl->cntrltype])
+ return sysfs_emit(buf, "reserved\n");
+
+ return sysfs_emit(buf, type[ctrl->cntrltype]);
+}
+static DEVICE_ATTR_RO(cntrltype);
+
+static ssize_t dctype_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ static const char * const type[] = {
+ [NVME_DCTYPE_NOT_REPORTED] = "none\n",
+ [NVME_DCTYPE_DDC] = "ddc\n",
+ [NVME_DCTYPE_CDC] = "cdc\n",
+ };
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ if (ctrl->dctype > NVME_DCTYPE_CDC || !type[ctrl->dctype])
+ return sysfs_emit(buf, "reserved\n");
+
+ return sysfs_emit(buf, type[ctrl->dctype]);
+}
+static DEVICE_ATTR_RO(dctype);
+
+#ifdef CONFIG_NVME_AUTH
+static ssize_t nvme_ctrl_dhchap_secret_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ struct nvmf_ctrl_options *opts = ctrl->opts;
+
+ if (!opts->dhchap_secret)
+ return sysfs_emit(buf, "none\n");
+ return sysfs_emit(buf, "%s\n", opts->dhchap_secret);
+}
+
+static ssize_t nvme_ctrl_dhchap_secret_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ struct nvmf_ctrl_options *opts = ctrl->opts;
+ char *dhchap_secret;
+
+ if (!ctrl->opts->dhchap_secret)
+ return -EINVAL;
+ if (count < 7)
+ return -EINVAL;
+ if (memcmp(buf, "DHHC-1:", 7))
+ return -EINVAL;
+
+ dhchap_secret = kzalloc(count + 1, GFP_KERNEL);
+ if (!dhchap_secret)
+ return -ENOMEM;
+ memcpy(dhchap_secret, buf, count);
+ nvme_auth_stop(ctrl);
+ if (strcmp(dhchap_secret, opts->dhchap_secret)) {
+ struct nvme_dhchap_key *key, *host_key;
+ int ret;
+
+ ret = nvme_auth_generate_key(dhchap_secret, &key);
+ if (ret) {
+ kfree(dhchap_secret);
+ return ret;
+ }
+ kfree(opts->dhchap_secret);
+ opts->dhchap_secret = dhchap_secret;
+ host_key = ctrl->host_key;
+ mutex_lock(&ctrl->dhchap_auth_mutex);
+ ctrl->host_key = key;
+ mutex_unlock(&ctrl->dhchap_auth_mutex);
+ nvme_auth_free_key(host_key);
+ } else
+ kfree(dhchap_secret);
+ /* Start re-authentication */
+ dev_info(ctrl->device, "re-authenticating controller\n");
+ queue_work(nvme_wq, &ctrl->dhchap_auth_work);
+
+ return count;
+}
+
+static DEVICE_ATTR(dhchap_secret, S_IRUGO | S_IWUSR,
+ nvme_ctrl_dhchap_secret_show, nvme_ctrl_dhchap_secret_store);
+
+static ssize_t nvme_ctrl_dhchap_ctrl_secret_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ struct nvmf_ctrl_options *opts = ctrl->opts;
+
+ if (!opts->dhchap_ctrl_secret)
+ return sysfs_emit(buf, "none\n");
+ return sysfs_emit(buf, "%s\n", opts->dhchap_ctrl_secret);
+}
+
+static ssize_t nvme_ctrl_dhchap_ctrl_secret_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ struct nvmf_ctrl_options *opts = ctrl->opts;
+ char *dhchap_secret;
+
+ if (!ctrl->opts->dhchap_ctrl_secret)
+ return -EINVAL;
+ if (count < 7)
+ return -EINVAL;
+ if (memcmp(buf, "DHHC-1:", 7))
+ return -EINVAL;
+
+ dhchap_secret = kzalloc(count + 1, GFP_KERNEL);
+ if (!dhchap_secret)
+ return -ENOMEM;
+ memcpy(dhchap_secret, buf, count);
+ nvme_auth_stop(ctrl);
+ if (strcmp(dhchap_secret, opts->dhchap_ctrl_secret)) {
+ struct nvme_dhchap_key *key, *ctrl_key;
+ int ret;
+
+ ret = nvme_auth_generate_key(dhchap_secret, &key);
+ if (ret) {
+ kfree(dhchap_secret);
+ return ret;
+ }
+ kfree(opts->dhchap_ctrl_secret);
+ opts->dhchap_ctrl_secret = dhchap_secret;
+ ctrl_key = ctrl->ctrl_key;
+ mutex_lock(&ctrl->dhchap_auth_mutex);
+ ctrl->ctrl_key = key;
+ mutex_unlock(&ctrl->dhchap_auth_mutex);
+ nvme_auth_free_key(ctrl_key);
+ } else
+ kfree(dhchap_secret);
+ /* Start re-authentication */
+ dev_info(ctrl->device, "re-authenticating controller\n");
+ queue_work(nvme_wq, &ctrl->dhchap_auth_work);
+
+ return count;
+}
+
+static DEVICE_ATTR(dhchap_ctrl_secret, S_IRUGO | S_IWUSR,
+ nvme_ctrl_dhchap_ctrl_secret_show, nvme_ctrl_dhchap_ctrl_secret_store);
+#endif
+
+static struct attribute *nvme_dev_attrs[] = {
+ &dev_attr_reset_controller.attr,
+ &dev_attr_rescan_controller.attr,
+ &dev_attr_model.attr,
+ &dev_attr_serial.attr,
+ &dev_attr_firmware_rev.attr,
+ &dev_attr_cntlid.attr,
+ &dev_attr_delete_controller.attr,
+ &dev_attr_transport.attr,
+ &dev_attr_subsysnqn.attr,
+ &dev_attr_address.attr,
+ &dev_attr_state.attr,
+ &dev_attr_numa_node.attr,
+ &dev_attr_queue_count.attr,
+ &dev_attr_sqsize.attr,
+ &dev_attr_hostnqn.attr,
+ &dev_attr_hostid.attr,
+ &dev_attr_ctrl_loss_tmo.attr,
+ &dev_attr_reconnect_delay.attr,
+ &dev_attr_fast_io_fail_tmo.attr,
+ &dev_attr_kato.attr,
+ &dev_attr_cntrltype.attr,
+ &dev_attr_dctype.attr,
+#ifdef CONFIG_NVME_AUTH
+ &dev_attr_dhchap_secret.attr,
+ &dev_attr_dhchap_ctrl_secret.attr,
+#endif
+ NULL
+};
+
+static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
+ struct attribute *a, int n)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ if (a == &dev_attr_delete_controller.attr && !ctrl->ops->delete_ctrl)
+ return 0;
+ if (a == &dev_attr_address.attr && !ctrl->ops->get_address)
+ return 0;
+ if (a == &dev_attr_hostnqn.attr && !ctrl->opts)
+ return 0;
+ if (a == &dev_attr_hostid.attr && !ctrl->opts)
+ return 0;
+ if (a == &dev_attr_ctrl_loss_tmo.attr && !ctrl->opts)
+ return 0;
+ if (a == &dev_attr_reconnect_delay.attr && !ctrl->opts)
+ return 0;
+ if (a == &dev_attr_fast_io_fail_tmo.attr && !ctrl->opts)
+ return 0;
+#ifdef CONFIG_NVME_AUTH
+ if (a == &dev_attr_dhchap_secret.attr && !ctrl->opts)
+ return 0;
+ if (a == &dev_attr_dhchap_ctrl_secret.attr && !ctrl->opts)
+ return 0;
+#endif
+
+ return a->mode;
+}
+
+const struct attribute_group nvme_dev_attrs_group = {
+ .attrs = nvme_dev_attrs,
+ .is_visible = nvme_dev_attrs_are_visible,
+};
+EXPORT_SYMBOL_GPL(nvme_dev_attrs_group);
+
+const struct attribute_group *nvme_dev_attr_groups[] = {
+ &nvme_dev_attrs_group,
+ NULL,
+};
+
+#define SUBSYS_ATTR_RO(_name, _mode, _show) \
+ struct device_attribute subsys_attr_##_name = \
+ __ATTR(_name, _mode, _show, NULL)
+
+static ssize_t nvme_subsys_show_nqn(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_subsystem *subsys =
+ container_of(dev, struct nvme_subsystem, dev);
+
+ return sysfs_emit(buf, "%s\n", subsys->subnqn);
+}
+static SUBSYS_ATTR_RO(subsysnqn, S_IRUGO, nvme_subsys_show_nqn);
+
+static ssize_t nvme_subsys_show_type(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_subsystem *subsys =
+ container_of(dev, struct nvme_subsystem, dev);
+
+ switch (subsys->subtype) {
+ case NVME_NQN_DISC:
+ return sysfs_emit(buf, "discovery\n");
+ case NVME_NQN_NVME:
+ return sysfs_emit(buf, "nvm\n");
+ default:
+ return sysfs_emit(buf, "reserved\n");
+ }
+}
+static SUBSYS_ATTR_RO(subsystype, S_IRUGO, nvme_subsys_show_type);
+
+#define nvme_subsys_show_str_function(field) \
+static ssize_t subsys_##field##_show(struct device *dev, \
+ struct device_attribute *attr, char *buf) \
+{ \
+ struct nvme_subsystem *subsys = \
+ container_of(dev, struct nvme_subsystem, dev); \
+ return sysfs_emit(buf, "%.*s\n", \
+ (int)sizeof(subsys->field), subsys->field); \
+} \
+static SUBSYS_ATTR_RO(field, S_IRUGO, subsys_##field##_show);
+
+nvme_subsys_show_str_function(model);
+nvme_subsys_show_str_function(serial);
+nvme_subsys_show_str_function(firmware_rev);
+
+static struct attribute *nvme_subsys_attrs[] = {
+ &subsys_attr_model.attr,
+ &subsys_attr_serial.attr,
+ &subsys_attr_firmware_rev.attr,
+ &subsys_attr_subsysnqn.attr,
+ &subsys_attr_subsystype.attr,
+#ifdef CONFIG_NVME_MULTIPATH
+ &subsys_attr_iopolicy.attr,
+#endif
+ NULL,
+};
+
+static const struct attribute_group nvme_subsys_attrs_group = {
+ .attrs = nvme_subsys_attrs,
+};
+
+const struct attribute_group *nvme_subsys_attrs_groups[] = {
+ &nvme_subsys_attrs_group,
+ NULL,
+};
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index bf0230442d57..9ce417cd32a7 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -997,25 +997,28 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
u32 h2cdata_left = req->h2cdata_left;
while (true) {
+ struct bio_vec bvec;
+ struct msghdr msg = {
+ .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES,
+ };
struct page *page = nvme_tcp_req_cur_page(req);
size_t offset = nvme_tcp_req_cur_offset(req);
size_t len = nvme_tcp_req_cur_length(req);
bool last = nvme_tcp_pdu_last_send(req, len);
int req_data_sent = req->data_sent;
- int ret, flags = MSG_DONTWAIT;
+ int ret;
if (last && !queue->data_digest && !nvme_tcp_queue_more(queue))
- flags |= MSG_EOR;
+ msg.msg_flags |= MSG_EOR;
else
- flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
+ msg.msg_flags |= MSG_MORE;
- if (sendpage_ok(page)) {
- ret = kernel_sendpage(queue->sock, page, offset, len,
- flags);
- } else {
- ret = sock_no_sendpage(queue->sock, page, offset, len,
- flags);
- }
+ if (!sendpage_ok(page))
+ msg.msg_flags &= ~MSG_SPLICE_PAGES;
+
+ bvec_set_page(&bvec, page, len, offset);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len);
+ ret = sock_sendmsg(queue->sock, &msg);
if (ret <= 0)
return ret;
@@ -1054,22 +1057,24 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
{
struct nvme_tcp_queue *queue = req->queue;
struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req);
+ struct bio_vec bvec;
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES, };
bool inline_data = nvme_tcp_has_inline_data(req);
u8 hdgst = nvme_tcp_hdgst_len(queue);
int len = sizeof(*pdu) + hdgst - req->offset;
- int flags = MSG_DONTWAIT;
int ret;
if (inline_data || nvme_tcp_queue_more(queue))
- flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
+ msg.msg_flags |= MSG_MORE;
else
- flags |= MSG_EOR;
+ msg.msg_flags |= MSG_EOR;
if (queue->hdr_digest && !req->offset)
nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
- ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
- offset_in_page(pdu) + req->offset, len, flags);
+ bvec_set_virt(&bvec, (void *)pdu + req->offset, len);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len);
+ ret = sock_sendmsg(queue->sock, &msg);
if (unlikely(ret <= 0))
return ret;
@@ -1093,6 +1098,8 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
{
struct nvme_tcp_queue *queue = req->queue;
struct nvme_tcp_data_pdu *pdu = nvme_tcp_req_data_pdu(req);
+ struct bio_vec bvec;
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_MORE, };
u8 hdgst = nvme_tcp_hdgst_len(queue);
int len = sizeof(*pdu) - req->offset + hdgst;
int ret;
@@ -1101,13 +1108,11 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
if (!req->h2cdata_left)
- ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
- offset_in_page(pdu) + req->offset, len,
- MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
- else
- ret = sock_no_sendpage(queue->sock, virt_to_page(pdu),
- offset_in_page(pdu) + req->offset, len,
- MSG_DONTWAIT | MSG_MORE);
+ msg.msg_flags |= MSG_SPLICE_PAGES;
+
+ bvec_set_virt(&bvec, (void *)pdu + req->offset, len);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len);
+ ret = sock_sendmsg(queue->sock, &msg);
if (unlikely(ret <= 0))
return ret;
@@ -1802,58 +1807,12 @@ out_free_queues:
return ret;
}
-static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl)
-{
- unsigned int nr_io_queues;
-
- nr_io_queues = min(ctrl->opts->nr_io_queues, num_online_cpus());
- nr_io_queues += min(ctrl->opts->nr_write_queues, num_online_cpus());
- nr_io_queues += min(ctrl->opts->nr_poll_queues, num_online_cpus());
-
- return nr_io_queues;
-}
-
-static void nvme_tcp_set_io_queues(struct nvme_ctrl *nctrl,
- unsigned int nr_io_queues)
-{
- struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
- struct nvmf_ctrl_options *opts = nctrl->opts;
-
- if (opts->nr_write_queues && opts->nr_io_queues < nr_io_queues) {
- /*
- * separate read/write queues
- * hand out dedicated default queues only after we have
- * sufficient read queues.
- */
- ctrl->io_queues[HCTX_TYPE_READ] = opts->nr_io_queues;
- nr_io_queues -= ctrl->io_queues[HCTX_TYPE_READ];
- ctrl->io_queues[HCTX_TYPE_DEFAULT] =
- min(opts->nr_write_queues, nr_io_queues);
- nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
- } else {
- /*
- * shared read/write queues
- * either no write queues were requested, or we don't have
- * sufficient queue count to have dedicated default queues.
- */
- ctrl->io_queues[HCTX_TYPE_DEFAULT] =
- min(opts->nr_io_queues, nr_io_queues);
- nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
- }
-
- if (opts->nr_poll_queues && nr_io_queues) {
- /* map dedicated poll queues only if we have queues left */
- ctrl->io_queues[HCTX_TYPE_POLL] =
- min(opts->nr_poll_queues, nr_io_queues);
- }
-}
-
static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
{
unsigned int nr_io_queues;
int ret;
- nr_io_queues = nvme_tcp_nr_io_queues(ctrl);
+ nr_io_queues = nvmf_nr_io_queues(ctrl->opts);
ret = nvme_set_queue_count(ctrl, &nr_io_queues);
if (ret)
return ret;
@@ -1868,8 +1827,8 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
dev_info(ctrl->device,
"creating %d I/O queues.\n", nr_io_queues);
- nvme_tcp_set_io_queues(ctrl, nr_io_queues);
-
+ nvmf_set_io_queues(ctrl->opts, nr_io_queues,
+ to_tcp_ctrl(ctrl)->io_queues);
return __nvme_tcp_alloc_io_queues(ctrl);
}
@@ -2449,44 +2408,8 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
static void nvme_tcp_map_queues(struct blk_mq_tag_set *set)
{
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data);
- struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
-
- if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
- /* separate read/write queues */
- set->map[HCTX_TYPE_DEFAULT].nr_queues =
- ctrl->io_queues[HCTX_TYPE_DEFAULT];
- set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
- set->map[HCTX_TYPE_READ].nr_queues =
- ctrl->io_queues[HCTX_TYPE_READ];
- set->map[HCTX_TYPE_READ].queue_offset =
- ctrl->io_queues[HCTX_TYPE_DEFAULT];
- } else {
- /* shared read/write queues */
- set->map[HCTX_TYPE_DEFAULT].nr_queues =
- ctrl->io_queues[HCTX_TYPE_DEFAULT];
- set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
- set->map[HCTX_TYPE_READ].nr_queues =
- ctrl->io_queues[HCTX_TYPE_DEFAULT];
- set->map[HCTX_TYPE_READ].queue_offset = 0;
- }
- blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
- blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
-
- if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
- /* map dedicated poll queues only if we have queues left */
- set->map[HCTX_TYPE_POLL].nr_queues =
- ctrl->io_queues[HCTX_TYPE_POLL];
- set->map[HCTX_TYPE_POLL].queue_offset =
- ctrl->io_queues[HCTX_TYPE_DEFAULT] +
- ctrl->io_queues[HCTX_TYPE_READ];
- blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
- }
-
- dev_info(ctrl->ctrl.device,
- "mapped %d/%d/%d default/read/poll queues.\n",
- ctrl->io_queues[HCTX_TYPE_DEFAULT],
- ctrl->io_queues[HCTX_TYPE_READ],
- ctrl->io_queues[HCTX_TYPE_POLL]);
+
+ nvmf_map_queues(set, &ctrl->ctrl, ctrl->io_queues);
}
static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
index 12316ab51bda..ec8557810c21 100644
--- a/drivers/nvme/host/zns.c
+++ b/drivers/nvme/host/zns.c
@@ -10,12 +10,11 @@
int nvme_revalidate_zones(struct nvme_ns *ns)
{
struct request_queue *q = ns->queue;
- int ret;
- ret = blk_revalidate_disk_zones(ns->disk, NULL);
- if (!ret)
- blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append);
- return ret;
+ blk_queue_chunk_sectors(q, ns->zsze);
+ blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append);
+
+ return blk_revalidate_disk_zones(ns->disk, NULL);
}
static int nvme_set_max_append(struct nvme_ctrl *ctrl)