summaryrefslogtreecommitdiff
path: root/drivers/nvme
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme')
-rw-r--r--drivers/nvme/host/Kconfig2
-rw-r--r--drivers/nvme/host/core.c196
-rw-r--r--drivers/nvme/host/fabrics.c106
-rw-r--r--drivers/nvme/host/fabrics.h4
-rw-r--r--drivers/nvme/host/fc.c15
-rw-r--r--drivers/nvme/host/multipath.c24
-rw-r--r--drivers/nvme/host/nvme.h26
-rw-r--r--drivers/nvme/host/pci.c270
-rw-r--r--drivers/nvme/host/rdma.c12
-rw-r--r--drivers/nvme/host/trace.h4
-rw-r--r--drivers/nvme/target/Kconfig2
-rw-r--r--drivers/nvme/target/Makefile4
-rw-r--r--drivers/nvme/target/admin-cmd.c143
-rw-r--r--drivers/nvme/target/core.c128
-rw-r--r--drivers/nvme/target/discovery.c2
-rw-r--r--drivers/nvme/target/fabrics-cmd.c4
-rw-r--r--drivers/nvme/target/fc.c2
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c (renamed from drivers/nvme/target/io-cmd.c)77
-rw-r--r--drivers/nvme/target/io-cmd-file.c304
-rw-r--r--drivers/nvme/target/loop.c58
-rw-r--r--drivers/nvme/target/nvmet.h51
21 files changed, 999 insertions, 435 deletions
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index b979cf3bce65..88a8b5916624 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -27,7 +27,7 @@ config NVME_FABRICS
config NVME_RDMA
tristate "NVM Express over Fabrics RDMA host driver"
- depends on INFINIBAND && BLOCK
+ depends on INFINIBAND && INFINIBAND_ADDR_TRANS && BLOCK
select NVME_CORE
select NVME_FABRICS
select SG_POOL
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 9df4f71e58ca..c8b30067b6ae 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -99,6 +99,16 @@ static struct class *nvme_subsys_class;
static void nvme_ns_remove(struct nvme_ns *ns);
static int nvme_revalidate_disk(struct gendisk *disk);
+static void nvme_put_subsystem(struct nvme_subsystem *subsys);
+
+static void nvme_queue_scan(struct nvme_ctrl *ctrl)
+{
+ /*
+ * Only new queue scan work when admin and IO queues are both alive
+ */
+ if (ctrl->state == NVME_CTRL_LIVE)
+ queue_work(nvme_wq, &ctrl->scan_work);
+}
int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
{
@@ -117,7 +127,8 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
ret = nvme_reset_ctrl(ctrl);
if (!ret) {
flush_work(&ctrl->reset_work);
- if (ctrl->state != NVME_CTRL_LIVE)
+ if (ctrl->state != NVME_CTRL_LIVE &&
+ ctrl->state != NVME_CTRL_ADMIN_ONLY)
ret = -ENETRESET;
}
@@ -242,9 +253,6 @@ EXPORT_SYMBOL_GPL(nvme_complete_rq);
void nvme_cancel_request(struct request *req, void *data, bool reserved)
{
- if (!blk_mq_request_started(req))
- return;
-
dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,
"Cancelling I/O %d", req->tag);
@@ -349,7 +357,8 @@ static void nvme_free_ns_head(struct kref *ref)
nvme_mpath_remove_disk(head);
ida_simple_remove(&head->subsys->ns_ida, head->instance);
list_del_init(&head->entry);
- cleanup_srcu_struct(&head->srcu);
+ cleanup_srcu_struct_quiesced(&head->srcu);
+ nvme_put_subsystem(head->subsys);
kfree(head);
}
@@ -764,6 +773,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
ret = PTR_ERR(meta);
goto out_unmap;
}
+ req->cmd_flags |= REQ_INTEGRITY;
}
}
@@ -1029,6 +1039,21 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
}
EXPORT_SYMBOL_GPL(nvme_set_queue_count);
+#define NVME_AEN_SUPPORTED \
+ (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT)
+
+static void nvme_enable_aen(struct nvme_ctrl *ctrl)
+{
+ u32 result;
+ int status;
+
+ status = nvme_set_features(ctrl, NVME_FEAT_ASYNC_EVENT,
+ ctrl->oaes & NVME_AEN_SUPPORTED, NULL, 0, &result);
+ if (status)
+ dev_warn(ctrl->device, "Failed to configure AEN (cfg %x)\n",
+ ctrl->oaes & NVME_AEN_SUPPORTED);
+}
+
static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
{
struct nvme_user_io io;
@@ -1347,13 +1372,19 @@ static void nvme_set_chunk_size(struct nvme_ns *ns)
blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
}
-static void nvme_config_discard(struct nvme_ctrl *ctrl,
- unsigned stream_alignment, struct request_queue *queue)
+static void nvme_config_discard(struct nvme_ns *ns)
{
+ struct nvme_ctrl *ctrl = ns->ctrl;
+ struct request_queue *queue = ns->queue;
u32 size = queue_logical_block_size(queue);
- if (stream_alignment)
- size *= stream_alignment;
+ if (!(ctrl->oncs & NVME_CTRL_ONCS_DSM)) {
+ blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue);
+ return;
+ }
+
+ if (ctrl->nr_streams && ns->sws && ns->sgs)
+ size *= ns->sws * ns->sgs;
BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
NVME_DSM_MAX_RANGES);
@@ -1361,9 +1392,12 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl,
queue->limits.discard_alignment = 0;
queue->limits.discard_granularity = size;
+ /* If discard is already enabled, don't reset queue limits */
+ if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue))
+ return;
+
blk_queue_max_discard_sectors(queue, UINT_MAX);
blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, queue);
if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
@@ -1407,10 +1441,6 @@ static void nvme_update_disk_info(struct gendisk *disk,
{
sector_t capacity = le64_to_cpup(&id->nsze) << (ns->lba_shift - 9);
unsigned short bs = 1 << ns->lba_shift;
- unsigned stream_alignment = 0;
-
- if (ns->ctrl->nr_streams && ns->sws && ns->sgs)
- stream_alignment = ns->sws * ns->sgs;
blk_mq_freeze_queue(disk->queue);
blk_integrity_unregister(disk);
@@ -1424,10 +1454,9 @@ static void nvme_update_disk_info(struct gendisk *disk,
nvme_init_integrity(disk, ns->ms, ns->pi_type);
if (ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk))
capacity = 0;
- set_capacity(disk, capacity);
- if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
- nvme_config_discard(ns->ctrl, stream_alignment, disk->queue);
+ set_capacity(disk, capacity);
+ nvme_config_discard(ns);
blk_mq_unfreeze_queue(disk->queue);
}
@@ -1443,8 +1472,8 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
if (ns->lba_shift == 0)
ns->lba_shift = 9;
ns->noiob = le16_to_cpu(id->noiob);
- ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
+ ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
/* the PI implementation requires metadata equal t10 pi tuple size */
if (ns->ms == sizeof(struct t10_pi_tuple))
ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
@@ -1573,7 +1602,7 @@ static int nvme_pr_reserve(struct block_device *bdev, u64 key,
static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
enum pr_type type, bool abort)
{
- u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1;
+ u32 cdw10 = nvme_pr_type(type) << 8 | (abort ? 2 : 1);
return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
}
@@ -1585,7 +1614,7 @@ static int nvme_pr_clear(struct block_device *bdev, u64 key)
static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
{
- u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0;
+ u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 1 << 3 : 0);
return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
}
@@ -2179,7 +2208,8 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
* Verify that the subsystem actually supports multiple
* controllers, else bail out.
*/
- if (nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) {
+ if (!ctrl->opts->discovery_nqn &&
+ nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) {
dev_err(ctrl->device,
"ignoring ctrl due to duplicate subnqn (%s).\n",
found->subnqn);
@@ -2310,7 +2340,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) {
ret = nvme_get_effects_log(ctrl);
if (ret < 0)
- return ret;
+ goto out_free;
}
if (!ctrl->identified) {
@@ -2341,6 +2371,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->oacs = le16_to_cpu(id->oacs);
ctrl->oncs = le16_to_cpup(&id->oncs);
+ ctrl->oaes = le32_to_cpu(id->oaes);
atomic_set(&ctrl->abort_limit, id->acl + 1);
ctrl->vwc = id->vwc;
ctrl->cntlid = le16_to_cpup(&id->cntlid);
@@ -2860,6 +2891,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
goto out_cleanup_srcu;
list_add_tail(&head->entry, &ctrl->subsys->nsheads);
+
+ kref_get(&ctrl->subsys->ref);
+
return head;
out_cleanup_srcu:
cleanup_srcu_struct(&head->srcu);
@@ -2997,31 +3031,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
if (nvme_init_ns_head(ns, nsid, id))
goto out_free_id;
nvme_setup_streams_ns(ctrl, ns);
-
-#ifdef CONFIG_NVME_MULTIPATH
- /*
- * If multipathing is enabled we need to always use the subsystem
- * instance number for numbering our devices to avoid conflicts
- * between subsystems that have multiple controllers and thus use
- * the multipath-aware subsystem node and those that have a single
- * controller and use the controller node directly.
- */
- if (ns->head->disk) {
- sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance,
- ctrl->cntlid, ns->head->instance);
- flags = GENHD_FL_HIDDEN;
- } else {
- sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance,
- ns->head->instance);
- }
-#else
- /*
- * But without the multipath code enabled, multiple controller per
- * subsystems are visible as devices and thus we cannot use the
- * subsystem instance.
- */
- sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
-#endif
+ nvme_set_disk_name(disk_name, ns, ctrl, &flags);
if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
if (nvme_nvm_register(ns, disk_name, node)) {
@@ -3187,6 +3197,42 @@ static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn)
nvme_remove_invalid_namespaces(ctrl, nn);
}
+static bool nvme_scan_changed_ns_log(struct nvme_ctrl *ctrl)
+{
+ size_t log_size = NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32);
+ __le32 *log;
+ int error, i;
+ bool ret = false;
+
+ log = kzalloc(log_size, GFP_KERNEL);
+ if (!log)
+ return false;
+
+ error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size);
+ if (error) {
+ dev_warn(ctrl->device,
+ "reading changed ns log failed: %d\n", error);
+ goto out_free_log;
+ }
+
+ if (log[0] == cpu_to_le32(0xffffffff))
+ goto out_free_log;
+
+ for (i = 0; i < NVME_MAX_CHANGED_NAMESPACES; i++) {
+ u32 nsid = le32_to_cpu(log[i]);
+
+ if (nsid == 0)
+ break;
+ dev_info(ctrl->device, "rescanning namespace %d.\n", nsid);
+ nvme_validate_ns(ctrl, nsid);
+ }
+ ret = true;
+
+out_free_log:
+ kfree(log);
+ return ret;
+}
+
static void nvme_scan_work(struct work_struct *work)
{
struct nvme_ctrl *ctrl =
@@ -3199,6 +3245,12 @@ static void nvme_scan_work(struct work_struct *work)
WARN_ON_ONCE(!ctrl->tagset);
+ if (test_and_clear_bit(EVENT_NS_CHANGED, &ctrl->events)) {
+ if (nvme_scan_changed_ns_log(ctrl))
+ goto out_sort_namespaces;
+ dev_info(ctrl->device, "rescanning namespaces.\n");
+ }
+
if (nvme_identify_ctrl(ctrl, &id))
return;
@@ -3206,25 +3258,16 @@ static void nvme_scan_work(struct work_struct *work)
if (ctrl->vs >= NVME_VS(1, 1, 0) &&
!(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
if (!nvme_scan_ns_list(ctrl, nn))
- goto done;
+ goto out_free_id;
}
nvme_scan_ns_sequential(ctrl, nn);
- done:
+out_free_id:
+ kfree(id);
+out_sort_namespaces:
down_write(&ctrl->namespaces_rwsem);
list_sort(NULL, &ctrl->namespaces, ns_cmp);
up_write(&ctrl->namespaces_rwsem);
- kfree(id);
-}
-
-void nvme_queue_scan(struct nvme_ctrl *ctrl)
-{
- /*
- * Only new queue scan work when admin and IO queues are both alive
- */
- if (ctrl->state == NVME_CTRL_LIVE)
- queue_work(nvme_wq, &ctrl->scan_work);
}
-EXPORT_SYMBOL_GPL(nvme_queue_scan);
/*
* This function iterates the namespace list unlocked to allow recovery from
@@ -3339,8 +3382,23 @@ static void nvme_fw_act_work(struct work_struct *work)
nvme_get_fw_slot_info(ctrl);
}
+static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
+{
+ switch ((result & 0xff00) >> 8) {
+ case NVME_AER_NOTICE_NS_CHANGED:
+ set_bit(EVENT_NS_CHANGED, &ctrl->events);
+ nvme_queue_scan(ctrl);
+ break;
+ case NVME_AER_NOTICE_FW_ACT_STARTING:
+ queue_work(nvme_wq, &ctrl->fw_act_work);
+ break;
+ default:
+ dev_warn(ctrl->device, "async event result %08x\n", result);
+ }
+}
+
void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
- union nvme_result *res)
+ volatile union nvme_result *res)
{
u32 result = le32_to_cpu(res->u32);
@@ -3348,6 +3406,9 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
return;
switch (result & 0x7) {
+ case NVME_AER_NOTICE:
+ nvme_handle_aen_notice(ctrl, result);
+ break;
case NVME_AER_ERROR:
case NVME_AER_SMART:
case NVME_AER_CSS:
@@ -3357,18 +3418,6 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
default:
break;
}
-
- switch (result & 0xff07) {
- case NVME_AER_NOTICE_NS_CHANGED:
- dev_info(ctrl->device, "rescanning\n");
- nvme_queue_scan(ctrl);
- break;
- case NVME_AER_NOTICE_FW_ACT_STARTING:
- queue_work(nvme_wq, &ctrl->fw_act_work);
- break;
- default:
- dev_warn(ctrl->device, "async event result %08x\n", result);
- }
queue_work(nvme_wq, &ctrl->async_event_work);
}
EXPORT_SYMBOL_GPL(nvme_complete_async_event);
@@ -3391,6 +3440,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl)
if (ctrl->queue_count > 1) {
nvme_queue_scan(ctrl);
+ nvme_enable_aen(ctrl);
queue_work(nvme_wq, &ctrl->async_event_work);
nvme_start_queues(ctrl);
}
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 124c458806df..5f5f7067c41d 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -57,7 +57,7 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn)
goto out_unlock;
kref_init(&host->ref);
- memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
+ strlcpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
list_add_tail(&host->list, &nvmf_hosts);
out_unlock:
@@ -545,71 +545,54 @@ blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, struct request *rq,
return BLK_STS_OK;
switch (ctrl->state) {
- case NVME_CTRL_DELETING:
- goto reject_io;
-
case NVME_CTRL_NEW:
case NVME_CTRL_CONNECTING:
+ case NVME_CTRL_DELETING:
+ /*
+ * This is the case of starting a new or deleting an association
+ * but connectivity was lost before it was fully created or torn
+ * down. We need to error the commands used to initialize the
+ * controller so the reconnect can go into a retry attempt. The
+ * commands should all be marked REQ_FAILFAST_DRIVER, which will
+ * hit the reject path below. Anything else will be queued while
+ * the state settles.
+ */
if (!is_connected)
- /*
- * This is the case of starting a new
- * association but connectivity was lost
- * before it was fully created. We need to
- * error the commands used to initialize the
- * controller so the reconnect can go into a
- * retry attempt. The commands should all be
- * marked REQ_FAILFAST_DRIVER, which will hit
- * the reject path below. Anything else will
- * be queued while the state settles.
- */
- goto reject_or_queue_io;
-
- if ((queue_live &&
- !(nvme_req(rq)->flags & NVME_REQ_USERCMD)) ||
- (!queue_live && blk_rq_is_passthrough(rq) &&
- cmd->common.opcode == nvme_fabrics_command &&
- cmd->fabrics.fctype == nvme_fabrics_type_connect))
- /*
- * If queue is live, allow only commands that
- * are internally generated pass through. These
- * are commands on the admin queue to initialize
- * the controller. This will reject any ioctl
- * admin cmds received while initializing.
- *
- * If the queue is not live, allow only a
- * connect command. This will reject any ioctl
- * admin cmd as well as initialization commands
- * if the controller reverted the queue to non-live.
- */
+ break;
+
+ /*
+ * If queue is live, allow only commands that are internally
+ * generated pass through. These are commands on the admin
+ * queue to initialize the controller. This will reject any
+ * ioctl admin cmds received while initializing.
+ */
+ if (queue_live && !(nvme_req(rq)->flags & NVME_REQ_USERCMD))
return BLK_STS_OK;
/*
- * fall-thru to the reject_or_queue_io clause
+ * If the queue is not live, allow only a connect command. This
+ * will reject any ioctl admin cmd as well as initialization
+ * commands if the controller reverted the queue to non-live.
*/
+ if (!queue_live && blk_rq_is_passthrough(rq) &&
+ cmd->common.opcode == nvme_fabrics_command &&
+ cmd->fabrics.fctype == nvme_fabrics_type_connect)
+ return BLK_STS_OK;
break;
-
- /* these cases fall-thru
- * case NVME_CTRL_LIVE:
- * case NVME_CTRL_RESETTING:
- */
default:
break;
}
-reject_or_queue_io:
/*
- * Any other new io is something we're not in a state to send
- * to the device. Default action is to busy it and retry it
- * after the controller state is recovered. However, anything
- * marked for failfast or nvme multipath is immediately failed.
- * Note: commands used to initialize the controller will be
- * marked for failfast.
+ * Any other new io is something we're not in a state to send to the
+ * device. Default action is to busy it and retry it after the
+ * controller state is recovered. However, anything marked for failfast
+ * or nvme multipath is immediately failed. Note: commands used to
+ * initialize the controller will be marked for failfast.
* Note: nvme cli/ioctl commands are marked for failfast.
*/
if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
return BLK_STS_RESOURCE;
-
-reject_io:
nvme_req(rq)->status = NVME_SC_ABORT_REQ;
return BLK_STS_IOERR;
}
@@ -668,6 +651,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
ret = -ENOMEM;
goto out;
}
+ kfree(opts->transport);
opts->transport = p;
break;
case NVMF_OPT_NQN:
@@ -676,6 +660,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
ret = -ENOMEM;
goto out;
}
+ kfree(opts->subsysnqn);
opts->subsysnqn = p;
nqnlen = strlen(opts->subsysnqn);
if (nqnlen >= NVMF_NQN_SIZE) {
@@ -687,10 +672,6 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
opts->discovery_nqn =
!(strcmp(opts->subsysnqn,
NVME_DISC_SUBSYS_NAME));
- if (opts->discovery_nqn) {
- opts->kato = 0;
- opts->nr_io_queues = 0;
- }
break;
case NVMF_OPT_TRADDR:
p = match_strdup(args);
@@ -698,6 +679,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
ret = -ENOMEM;
goto out;
}
+ kfree(opts->traddr);
opts->traddr = p;
break;
case NVMF_OPT_TRSVCID:
@@ -706,6 +688,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
ret = -ENOMEM;
goto out;
}
+ kfree(opts->trsvcid);
opts->trsvcid = p;
break;
case NVMF_OPT_QUEUE_SIZE:
@@ -792,6 +775,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
ret = -EINVAL;
goto out;
}
+ nvmf_host_put(opts->host);
opts->host = nvmf_host_add(p);
kfree(p);
if (!opts->host) {
@@ -817,6 +801,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
ret = -ENOMEM;
goto out;
}
+ kfree(opts->host_traddr);
opts->host_traddr = p;
break;
case NVMF_OPT_HOST_ID:
@@ -845,6 +830,11 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
}
}
+ if (opts->discovery_nqn) {
+ opts->kato = 0;
+ opts->nr_io_queues = 0;
+ opts->duplicate_connect = true;
+ }
if (ctrl_loss_tmo < 0)
opts->max_reconnects = -1;
else
@@ -977,16 +967,6 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
goto out_module_put;
}
- if (strcmp(ctrl->subsys->subnqn, opts->subsysnqn)) {
- dev_warn(ctrl->device,
- "controller returned incorrect NQN: \"%s\".\n",
- ctrl->subsys->subnqn);
- module_put(ops->module);
- up_read(&nvmf_transports_rwsem);
- nvme_delete_ctrl_sync(ctrl);
- return ERR_PTR(-EINVAL);
- }
-
module_put(ops->module);
up_read(&nvmf_transports_rwsem);
return ctrl;
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index ef46c915b7b5..0cf0460a5c92 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -139,7 +139,9 @@ static inline bool
nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl,
struct nvmf_ctrl_options *opts)
{
- if (strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) ||
+ if (ctrl->state == NVME_CTRL_DELETING ||
+ ctrl->state == NVME_CTRL_DEAD ||
+ strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) ||
strcmp(opts->host->nqn, ctrl->opts->host->nqn) ||
memcmp(&opts->host->id, &ctrl->opts->host->id, sizeof(uuid_t)))
return false;
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 6cb26bcf6ec0..0bad65803271 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1686,16 +1686,6 @@ done:
goto check_error;
}
- /*
- * Force failures of commands if we're killing the controller
- * or have an error on a command used to create an new association
- */
- if (status &&
- (blk_queue_dying(rq->q) ||
- ctrl->ctrl.state == NVME_CTRL_NEW ||
- ctrl->ctrl.state == NVME_CTRL_CONNECTING))
- status |= cpu_to_le16(NVME_SC_DNR << 1);
-
__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
nvme_end_request(rq, status, result);
@@ -2403,9 +2393,6 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
- if (!blk_mq_request_started(req))
- return;
-
__nvme_fc_abort_op(ctrl, op);
}
@@ -3284,6 +3271,8 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts)
}
spin_unlock_irqrestore(&nvme_fc_lock, flags);
+ pr_warn("%s: %s - %s combination not found\n",
+ __func__, opts->traddr, opts->host_traddr);
return ERR_PTR(-ENOENT);
}
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 956e0b8e9c4d..d7b664ae5923 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -15,10 +15,32 @@
#include "nvme.h"
static bool multipath = true;
-module_param(multipath, bool, 0644);
+module_param(multipath, bool, 0444);
MODULE_PARM_DESC(multipath,
"turn on native support for multiple controllers per subsystem");
+/*
+ * If multipathing is enabled we need to always use the subsystem instance
+ * number for numbering our devices to avoid conflicts between subsystems that
+ * have multiple controllers and thus use the multipath-aware subsystem node
+ * and those that have a single controller and use the controller node
+ * directly.
+ */
+void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
+ struct nvme_ctrl *ctrl, int *flags)
+{
+ if (!multipath) {
+ sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
+ } else if (ns->head->disk) {
+ sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance,
+ ctrl->cntlid, ns->head->instance);
+ *flags = GENHD_FL_HIDDEN;
+ } else {
+ sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance,
+ ns->head->instance);
+ }
+}
+
void nvme_failover_req(struct request *req)
{
struct nvme_ns *ns = req->q->queuedata;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 061fecfd44f5..de24fe77c80b 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -22,6 +22,7 @@
#include <linux/lightnvm.h>
#include <linux/sed-opal.h>
#include <linux/fault-inject.h>
+#include <linux/rcupdate.h>
extern unsigned int nvme_io_timeout;
#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ)
@@ -84,6 +85,11 @@ enum nvme_quirks {
* Supports the LighNVM command set if indicated in vs[1].
*/
NVME_QUIRK_LIGHTNVM = (1 << 6),
+
+ /*
+ * Set MEDIUM priority on SQ creation
+ */
+ NVME_QUIRK_MEDIUM_PRIO_SQ = (1 << 7),
};
/*
@@ -175,6 +181,7 @@ struct nvme_ctrl {
u16 kas;
u8 npss;
u8 apsta;
+ u32 oaes;
u32 aen_result;
unsigned int shutdown_timeout;
unsigned int kato;
@@ -187,6 +194,8 @@ struct nvme_ctrl {
struct delayed_work ka_work;
struct nvme_command ka_cmd;
struct work_struct fw_act_work;
+#define EVENT_NS_CHANGED (1 << 0)
+ unsigned long events;
/* Power saving configuration */
u64 ps_max_latency_us;
@@ -393,14 +402,13 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
void nvme_put_ctrl(struct nvme_ctrl *ctrl);
int nvme_init_identify(struct nvme_ctrl *ctrl);
-void nvme_queue_scan(struct nvme_ctrl *ctrl);
void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
bool send);
void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
- union nvme_result *res);
+ volatile union nvme_result *res);
void nvme_stop_queues(struct nvme_ctrl *ctrl);
void nvme_start_queues(struct nvme_ctrl *ctrl);
@@ -436,6 +444,8 @@ extern const struct attribute_group nvme_ns_id_attr_group;
extern const struct block_device_operations nvme_ns_head_ops;
#ifdef CONFIG_NVME_MULTIPATH
+void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
+ struct nvme_ctrl *ctrl, int *flags);
void nvme_failover_req(struct request *req);
bool nvme_req_needs_failover(struct request *req, blk_status_t error);
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
@@ -447,7 +457,7 @@ static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
{
struct nvme_ns_head *head = ns->head;
- if (head && ns == srcu_dereference(head->current_path, &head->srcu))
+ if (head && ns == rcu_access_pointer(head->current_path))
rcu_assign_pointer(head->current_path, NULL);
}
struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
@@ -461,6 +471,16 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
}
#else
+/*
+ * Without the multipath code enabled, multiple controller per subsystems are
+ * visible as devices and thus we cannot use the subsystem instance.
+ */
+static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
+ struct nvme_ctrl *ctrl, int *flags)
+{
+ sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
+}
+
static inline void nvme_failover_req(struct request *req)
{
}
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index fbc71fac6f1e..e526437bacbf 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -13,6 +13,7 @@
*/
#include <linux/aer.h>
+#include <linux/async.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/blk-mq-pci.h>
@@ -68,7 +69,6 @@ MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
struct nvme_dev;
struct nvme_queue;
-static void nvme_process_cq(struct nvme_queue *nvmeq);
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
/*
@@ -147,9 +147,10 @@ static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
struct nvme_queue {
struct device *q_dmadev;
struct nvme_dev *dev;
- spinlock_t q_lock;
+ spinlock_t sq_lock;
struct nvme_command *sq_cmds;
struct nvme_command __iomem *sq_cmds_io;
+ spinlock_t cq_lock ____cacheline_aligned_in_smp;
volatile struct nvme_completion *cqes;
struct blk_mq_tags **tags;
dma_addr_t sq_dma_addr;
@@ -159,9 +160,9 @@ struct nvme_queue {
s16 cq_vector;
u16 sq_tail;
u16 cq_head;
+ u16 last_cq_head;
u16 qid;
u8 cq_phase;
- u8 cqe_seen;
u32 *dbbuf_sq_db;
u32 *dbbuf_cq_db;
u32 *dbbuf_sq_ei;
@@ -420,28 +421,25 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
}
/**
- * __nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
+ * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
* @nvmeq: The queue to use
* @cmd: The command to send
- *
- * Safe to use from interrupt context
*/
-static void __nvme_submit_cmd(struct nvme_queue *nvmeq,
- struct nvme_command *cmd)
+static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
{
- u16 tail = nvmeq->sq_tail;
-
+ spin_lock(&nvmeq->sq_lock);
if (nvmeq->sq_cmds_io)
- memcpy_toio(&nvmeq->sq_cmds_io[tail], cmd, sizeof(*cmd));
+ memcpy_toio(&nvmeq->sq_cmds_io[nvmeq->sq_tail], cmd,
+ sizeof(*cmd));
else
- memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
+ memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd));
- if (++tail == nvmeq->q_depth)
- tail = 0;
- if (nvme_dbbuf_update_and_check_event(tail, nvmeq->dbbuf_sq_db,
- nvmeq->dbbuf_sq_ei))
- writel(tail, nvmeq->q_db);
- nvmeq->sq_tail = tail;
+ if (++nvmeq->sq_tail == nvmeq->q_depth)
+ nvmeq->sq_tail = 0;
+ if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail,
+ nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei))
+ writel(nvmeq->sq_tail, nvmeq->q_db);
+ spin_unlock(&nvmeq->sq_lock);
}
static void **nvme_pci_iod_list(struct request *req)
@@ -872,6 +870,13 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_command cmnd;
blk_status_t ret;
+ /*
+ * We should not need to do this, but we're still using this to
+ * ensure we can drain requests on a dying queue.
+ */
+ if (unlikely(nvmeq->cq_vector < 0))
+ return BLK_STS_IOERR;
+
ret = nvme_setup_cmd(ns, req, &cmnd);
if (ret)
return ret;
@@ -887,16 +892,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
}
blk_mq_start_request(req);
-
- spin_lock_irq(&nvmeq->q_lock);
- if (unlikely(nvmeq->cq_vector < 0)) {
- ret = BLK_STS_IOERR;
- spin_unlock_irq(&nvmeq->q_lock);
- goto out_cleanup_iod;
- }
- __nvme_submit_cmd(nvmeq, &cmnd);
- nvme_process_cq(nvmeq);
- spin_unlock_irq(&nvmeq->q_lock);
+ nvme_submit_cmd(nvmeq, &cmnd);
return BLK_STS_OK;
out_cleanup_iod:
nvme_free_iod(dev, req);
@@ -914,10 +910,10 @@ static void nvme_pci_complete_rq(struct request *req)
}
/* We read the CQE phase first to check if the rest of the entry is valid */
-static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
- u16 phase)
+static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq)
{
- return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
+ return (le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) ==
+ nvmeq->cq_phase;
}
static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
@@ -931,9 +927,9 @@ static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
}
}
-static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
- struct nvme_completion *cqe)
+static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
{
+ volatile struct nvme_completion *cqe = &nvmeq->cqes[idx];
struct request *req;
if (unlikely(cqe->command_id >= nvmeq->q_depth)) {
@@ -956,83 +952,87 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
return;
}
- nvmeq->cqe_seen = 1;
req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id);
nvme_end_request(req, cqe->status, cqe->result);
}
-static inline bool nvme_read_cqe(struct nvme_queue *nvmeq,
- struct nvme_completion *cqe)
+static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end)
{
- if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
- *cqe = nvmeq->cqes[nvmeq->cq_head];
+ while (start != end) {
+ nvme_handle_cqe(nvmeq, start);
+ if (++start == nvmeq->q_depth)
+ start = 0;
+ }
+}
- if (++nvmeq->cq_head == nvmeq->q_depth) {
- nvmeq->cq_head = 0;
- nvmeq->cq_phase = !nvmeq->cq_phase;
- }
- return true;
+static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
+{
+ if (++nvmeq->cq_head == nvmeq->q_depth) {
+ nvmeq->cq_head = 0;
+ nvmeq->cq_phase = !nvmeq->cq_phase;
}
- return false;
}
-static void nvme_process_cq(struct nvme_queue *nvmeq)
+static inline bool nvme_process_cq(struct nvme_queue *nvmeq, u16 *start,
+ u16 *end, int tag)
{
- struct nvme_completion cqe;
- int consumed = 0;
+ bool found = false;
- while (nvme_read_cqe(nvmeq, &cqe)) {
- nvme_handle_cqe(nvmeq, &cqe);
- consumed++;
+ *start = nvmeq->cq_head;
+ while (!found && nvme_cqe_pending(nvmeq)) {
+ if (nvmeq->cqes[nvmeq->cq_head].command_id == tag)
+ found = true;
+ nvme_update_cq_head(nvmeq);
}
+ *end = nvmeq->cq_head;
- if (consumed)
+ if (*start != *end)
nvme_ring_cq_doorbell(nvmeq);
+ return found;
}
static irqreturn_t nvme_irq(int irq, void *data)
{
- irqreturn_t result;
struct nvme_queue *nvmeq = data;
- spin_lock(&nvmeq->q_lock);
- nvme_process_cq(nvmeq);
- result = nvmeq->cqe_seen ? IRQ_HANDLED : IRQ_NONE;
- nvmeq->cqe_seen = 0;
- spin_unlock(&nvmeq->q_lock);
- return result;
+ irqreturn_t ret = IRQ_NONE;
+ u16 start, end;
+
+ spin_lock(&nvmeq->cq_lock);
+ if (nvmeq->cq_head != nvmeq->last_cq_head)
+ ret = IRQ_HANDLED;
+ nvme_process_cq(nvmeq, &start, &end, -1);
+ nvmeq->last_cq_head = nvmeq->cq_head;
+ spin_unlock(&nvmeq->cq_lock);
+
+ if (start != end) {
+ nvme_complete_cqes(nvmeq, start, end);
+ return IRQ_HANDLED;
+ }
+
+ return ret;
}
static irqreturn_t nvme_irq_check(int irq, void *data)
{
struct nvme_queue *nvmeq = data;
- if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
+ if (nvme_cqe_pending(nvmeq))
return IRQ_WAKE_THREAD;
return IRQ_NONE;
}
static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
{
- struct nvme_completion cqe;
- int found = 0, consumed = 0;
+ u16 start, end;
+ bool found;
- if (!nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
+ if (!nvme_cqe_pending(nvmeq))
return 0;
- spin_lock_irq(&nvmeq->q_lock);
- while (nvme_read_cqe(nvmeq, &cqe)) {
- nvme_handle_cqe(nvmeq, &cqe);
- consumed++;
-
- if (tag == cqe.command_id) {
- found = 1;
- break;
- }
- }
-
- if (consumed)
- nvme_ring_cq_doorbell(nvmeq);
- spin_unlock_irq(&nvmeq->q_lock);
+ spin_lock_irq(&nvmeq->cq_lock);
+ found = nvme_process_cq(nvmeq, &start, &end, tag);
+ spin_unlock_irq(&nvmeq->cq_lock);
+ nvme_complete_cqes(nvmeq, start, end);
return found;
}
@@ -1052,10 +1052,7 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl)
memset(&c, 0, sizeof(c));
c.common.opcode = nvme_admin_async_event;
c.common.command_id = NVME_AQ_BLK_MQ_DEPTH;
-
- spin_lock_irq(&nvmeq->q_lock);
- __nvme_submit_cmd(nvmeq, &c);
- spin_unlock_irq(&nvmeq->q_lock);
+ nvme_submit_cmd(nvmeq, &c);
}
static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
@@ -1070,7 +1067,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
}
static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
- struct nvme_queue *nvmeq)
+ struct nvme_queue *nvmeq, s16 vector)
{
struct nvme_command c;
int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
@@ -1085,7 +1082,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
c.create_cq.cqid = cpu_to_le16(qid);
c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
c.create_cq.cq_flags = cpu_to_le16(flags);
- c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
+ c.create_cq.irq_vector = cpu_to_le16(vector);
return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
}
@@ -1093,10 +1090,19 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
struct nvme_queue *nvmeq)
{
+ struct nvme_ctrl *ctrl = &dev->ctrl;
struct nvme_command c;
int flags = NVME_QUEUE_PHYS_CONTIG;
/*
+ * Some drives have a bug that auto-enables WRRU if MEDIUM isn't
+ * set. Since URGENT priority is zeroes, it makes all queues
+ * URGENT.
+ */
+ if (ctrl->quirks & NVME_QUIRK_MEDIUM_PRIO_SQ)
+ flags |= NVME_SQ_PRIO_MEDIUM;
+
+ /*
* Note: we (ab)use the fact that the prp fields survive if no data
* is attached to the request.
*/
@@ -1199,7 +1205,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
nvme_warn_reset(dev, csts);
nvme_dev_disable(dev, false);
nvme_reset_ctrl(&dev->ctrl);
- return BLK_EH_HANDLED;
+ return BLK_EH_DONE;
}
/*
@@ -1209,24 +1215,24 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
dev_warn(dev->ctrl.device,
"I/O %d QID %d timeout, completion polled\n",
req->tag, nvmeq->qid);
- return BLK_EH_HANDLED;
+ return BLK_EH_DONE;
}
/*
* Shutdown immediately if controller times out while starting. The
* reset work will see the pci device disabled when it gets the forced
* cancellation error. All outstanding requests are completed on
- * shutdown, so we return BLK_EH_HANDLED.
+ * shutdown, so we return BLK_EH_DONE.
*/
switch (dev->ctrl.state) {
case NVME_CTRL_CONNECTING:
case NVME_CTRL_RESETTING:
- dev_warn(dev->ctrl.device,
+ dev_warn_ratelimited(dev->ctrl.device,
"I/O %d QID %d timeout, disable controller\n",
req->tag, nvmeq->qid);
nvme_dev_disable(dev, false);
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
- return BLK_EH_HANDLED;
+ return BLK_EH_DONE;
default:
break;
}
@@ -1243,12 +1249,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
nvme_dev_disable(dev, false);
nvme_reset_ctrl(&dev->ctrl);
- /*
- * Mark the request as handled, since the inline shutdown
- * forces all outstanding requests to complete.
- */
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
- return BLK_EH_HANDLED;
+ return BLK_EH_DONE;
}
if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) {
@@ -1312,15 +1314,21 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
{
int vector;
- spin_lock_irq(&nvmeq->q_lock);
+ spin_lock_irq(&nvmeq->cq_lock);
if (nvmeq->cq_vector == -1) {
- spin_unlock_irq(&nvmeq->q_lock);
+ spin_unlock_irq(&nvmeq->cq_lock);
return 1;
}
vector = nvmeq->cq_vector;
nvmeq->dev->online_queues--;
nvmeq->cq_vector = -1;
- spin_unlock_irq(&nvmeq->q_lock);
+ spin_unlock_irq(&nvmeq->cq_lock);
+
+ /*
+ * Ensure that nvme_queue_rq() sees it ->cq_vector == -1 without
+ * having to grab the lock.
+ */
+ mb();
if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q);
@@ -1333,15 +1341,18 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
{
struct nvme_queue *nvmeq = &dev->queues[0];
+ u16 start, end;
if (shutdown)
nvme_shutdown_ctrl(&dev->ctrl);
else
nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
- spin_lock_irq(&nvmeq->q_lock);
- nvme_process_cq(nvmeq);
- spin_unlock_irq(&nvmeq->q_lock);
+ spin_lock_irq(&nvmeq->cq_lock);
+ nvme_process_cq(nvmeq, &start, &end, -1);
+ spin_unlock_irq(&nvmeq->cq_lock);
+
+ nvme_complete_cqes(nvmeq, start, end);
}
static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
@@ -1399,7 +1410,8 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth)
nvmeq->q_dmadev = dev->dev;
nvmeq->dev = dev;
- spin_lock_init(&nvmeq->q_lock);
+ spin_lock_init(&nvmeq->sq_lock);
+ spin_lock_init(&nvmeq->cq_lock);
nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
@@ -1435,7 +1447,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
{
struct nvme_dev *dev = nvmeq->dev;
- spin_lock_irq(&nvmeq->q_lock);
+ spin_lock_irq(&nvmeq->cq_lock);
nvmeq->sq_tail = 0;
nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
@@ -1443,13 +1455,14 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
nvme_dbbuf_init(dev, nvmeq, qid);
dev->online_queues++;
- spin_unlock_irq(&nvmeq->q_lock);
+ spin_unlock_irq(&nvmeq->cq_lock);
}
static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
{
struct nvme_dev *dev = nvmeq->dev;
int result;
+ s16 vector;
if (dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
unsigned offset = (qid - 1) * roundup(SQ_SIZE(nvmeq->q_depth),
@@ -1462,15 +1475,21 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
* A queue's vector matches the queue identifier unless the controller
* has only one vector available.
*/
- nvmeq->cq_vector = dev->num_vecs == 1 ? 0 : qid;
- result = adapter_alloc_cq(dev, qid, nvmeq);
+ vector = dev->num_vecs == 1 ? 0 : qid;
+ result = adapter_alloc_cq(dev, qid, nvmeq, vector);
if (result < 0)
- goto release_vector;
+ goto out;
result = adapter_alloc_sq(dev, qid, nvmeq);
if (result < 0)
goto release_cq;
+ /*
+ * Set cq_vector after alloc cq/sq, otherwise nvme_suspend_queue will
+ * invoke free_irq for it and cause a 'Trying to free already-free IRQ
+ * xxx' warning if the create CQ/SQ command times out.
+ */
+ nvmeq->cq_vector = vector;
nvme_init_queue(nvmeq, qid);
result = queue_request_irq(nvmeq);
if (result < 0)
@@ -1478,13 +1497,13 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
return result;
- release_sq:
+release_sq:
+ nvmeq->cq_vector = -1;
dev->online_queues--;
adapter_delete_sq(dev, qid);
- release_cq:
+release_cq:
adapter_delete_cq(dev, qid);
- release_vector:
- nvmeq->cq_vector = -1;
+out:
return result;
}
@@ -1988,19 +2007,22 @@ static void nvme_del_queue_end(struct request *req, blk_status_t error)
static void nvme_del_cq_end(struct request *req, blk_status_t error)
{
struct nvme_queue *nvmeq = req->end_io_data;
+ u16 start, end;
if (!error) {
unsigned long flags;
/*
- * We might be called with the AQ q_lock held
- * and the I/O queue q_lock should always
+ * We might be called with the AQ cq_lock held
+ * and the I/O queue cq_lock should always
* nest inside the AQ one.
*/
- spin_lock_irqsave_nested(&nvmeq->q_lock, flags,
+ spin_lock_irqsave_nested(&nvmeq->cq_lock, flags,
SINGLE_DEPTH_NESTING);
- nvme_process_cq(nvmeq);
- spin_unlock_irqrestore(&nvmeq->q_lock, flags);
+ nvme_process_cq(nvmeq, &start, &end, -1);
+ spin_unlock_irqrestore(&nvmeq->cq_lock, flags);
+
+ nvme_complete_cqes(nvmeq, start, end);
}
nvme_del_queue_end(req, error);
@@ -2488,6 +2510,15 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
return 0;
}
+static void nvme_async_probe(void *data, async_cookie_t cookie)
+{
+ struct nvme_dev *dev = data;
+
+ nvme_reset_ctrl_sync(&dev->ctrl);
+ flush_work(&dev->ctrl.scan_work);
+ nvme_put_ctrl(&dev->ctrl);
+}
+
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
int node, result = -ENOMEM;
@@ -2532,7 +2563,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
- nvme_reset_ctrl(&dev->ctrl);
+ nvme_get_ctrl(&dev->ctrl);
+ async_schedule(nvme_async_probe, dev);
return 0;
@@ -2676,6 +2708,9 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
static void nvme_error_resume(struct pci_dev *pdev)
{
+ struct nvme_dev *dev = pci_get_drvdata(pdev);
+
+ flush_work(&dev->ctrl.reset_work);
pci_cleanup_aer_uncorrect_error_status(pdev);
}
@@ -2701,9 +2736,12 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_STRIPE_SIZE |
NVME_QUIRK_DEALLOCATE_ZEROES, },
{ PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */
- .driver_data = NVME_QUIRK_NO_DEEPEST_PS },
+ .driver_data = NVME_QUIRK_NO_DEEPEST_PS |
+ NVME_QUIRK_MEDIUM_PRIO_SQ },
{ PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */
.driver_data = NVME_QUIRK_IDENTIFY_CNS, },
+ { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */
+ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE(0x1c58, 0x0023), /* WDC SN200 adapter */
@@ -2718,6 +2756,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_LIGHTNVM, },
{ PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */
.driver_data = NVME_QUIRK_LIGHTNVM, },
+ { PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */
+ .driver_data = NVME_QUIRK_LIGHTNVM, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 1eb4438a8763..7b3f08410430 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -778,7 +778,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
if (error) {
dev_err(ctrl->ctrl.device,
"prop_get NVME_REG_CAP failed\n");
- goto out_cleanup_queue;
+ goto out_stop_queue;
}
ctrl->ctrl.sqsize =
@@ -786,23 +786,25 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
if (error)
- goto out_cleanup_queue;
+ goto out_stop_queue;
ctrl->ctrl.max_hw_sectors =
(ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9);
error = nvme_init_identify(&ctrl->ctrl);
if (error)
- goto out_cleanup_queue;
+ goto out_stop_queue;
error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev,
&ctrl->async_event_sqe, sizeof(struct nvme_command),
DMA_TO_DEVICE);
if (error)
- goto out_cleanup_queue;
+ goto out_stop_queue;
return 0;
+out_stop_queue:
+ nvme_rdma_stop_queue(&ctrl->queues[0]);
out_cleanup_queue:
if (new)
blk_cleanup_queue(ctrl->ctrl.admin_q);
@@ -1598,7 +1600,7 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
/* fail with DNR on cmd timeout */
nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
- return BLK_EH_HANDLED;
+ return BLK_EH_DONE;
}
static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h
index ea91fccd1bc0..01390f0e1671 100644
--- a/drivers/nvme/host/trace.h
+++ b/drivers/nvme/host/trace.h
@@ -148,8 +148,8 @@ TRACE_EVENT(nvme_complete_rq,
__entry->flags = nvme_req(req)->flags;
__entry->status = nvme_req(req)->status;
),
- TP_printk("cmdid=%u, qid=%d, res=%llu, retries=%u, flags=0x%x, status=%u",
- __entry->cid, __entry->qid, __entry->result,
+ TP_printk("qid=%d, cmdid=%u, res=%llu, retries=%u, flags=0x%x, status=%u",
+ __entry->qid, __entry->cid, __entry->result,
__entry->retries, __entry->flags, __entry->status)
);
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index 5f4f8b16685f..3c7b61ddb0d1 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -27,7 +27,7 @@ config NVME_TARGET_LOOP
config NVME_TARGET_RDMA
tristate "NVMe over Fabrics RDMA target support"
- depends on INFINIBAND
+ depends on INFINIBAND && INFINIBAND_ADDR_TRANS
depends on NVME_TARGET
select SGL_ALLOC
help
diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile
index 488250189c99..8118c93391c6 100644
--- a/drivers/nvme/target/Makefile
+++ b/drivers/nvme/target/Makefile
@@ -6,8 +6,8 @@ obj-$(CONFIG_NVME_TARGET_RDMA) += nvmet-rdma.o
obj-$(CONFIG_NVME_TARGET_FC) += nvmet-fc.o
obj-$(CONFIG_NVME_TARGET_FCLOOP) += nvme-fcloop.o
-nvmet-y += core.o configfs.o admin-cmd.o io-cmd.o fabrics-cmd.o \
- discovery.o
+nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \
+ discovery.o io-cmd-file.o io-cmd-bdev.o
nvme-loop-y += loop.o
nvmet-rdma-y += rdma.o
nvmet-fc-y += fc.o
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 5e0e9fcc0d4d..ead8fbe6922e 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -32,6 +32,11 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd)
return len;
}
+static void nvmet_execute_get_log_page_noop(struct nvmet_req *req)
+{
+ nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->data_len));
+}
+
static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
struct nvme_smart_log *slog)
{
@@ -45,6 +50,10 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
return NVME_SC_INVALID_NS;
}
+ /* we don't have the right data for file backed ns */
+ if (!ns->bdev)
+ goto out;
+
host_reads = part_stat_read(ns->bdev->bd_part, ios[READ]);
data_units_read = part_stat_read(ns->bdev->bd_part, sectors[READ]);
host_writes = part_stat_read(ns->bdev->bd_part, ios[WRITE]);
@@ -54,6 +63,7 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
put_unaligned_le64(data_units_read, &slog->data_units_read[0]);
put_unaligned_le64(host_writes, &slog->host_writes[0]);
put_unaligned_le64(data_units_written, &slog->data_units_written[0]);
+out:
nvmet_put_namespace(ns);
return NVME_SC_SUCCESS;
@@ -71,6 +81,9 @@ static u16 nvmet_get_smart_log_all(struct nvmet_req *req,
rcu_read_lock();
list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
+ /* we don't have the right data for file backed ns */
+ if (!ns->bdev)
+ continue;
host_reads += part_stat_read(ns->bdev->bd_part, ios[READ]);
data_units_read +=
part_stat_read(ns->bdev->bd_part, sectors[READ]);
@@ -89,74 +102,50 @@ static u16 nvmet_get_smart_log_all(struct nvmet_req *req,
return NVME_SC_SUCCESS;
}
-static u16 nvmet_get_smart_log(struct nvmet_req *req,
- struct nvme_smart_log *slog)
+static void nvmet_execute_get_log_page_smart(struct nvmet_req *req)
{
- u16 status;
+ struct nvme_smart_log *log;
+ u16 status = NVME_SC_INTERNAL;
+
+ if (req->data_len != sizeof(*log))
+ goto out;
+
+ log = kzalloc(sizeof(*log), GFP_KERNEL);
+ if (!log)
+ goto out;
- WARN_ON(req == NULL || slog == NULL);
if (req->cmd->get_log_page.nsid == cpu_to_le32(NVME_NSID_ALL))
- status = nvmet_get_smart_log_all(req, slog);
+ status = nvmet_get_smart_log_all(req, log);
else
- status = nvmet_get_smart_log_nsid(req, slog);
- return status;
+ status = nvmet_get_smart_log_nsid(req, log);
+ if (status)
+ goto out;
+
+ status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log));
+out:
+ nvmet_req_complete(req, status);
}
-static void nvmet_execute_get_log_page(struct nvmet_req *req)
+static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req)
{
- struct nvme_smart_log *smart_log;
- size_t data_len = nvmet_get_log_page_len(req->cmd);
- void *buf;
- u16 status = 0;
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ u16 status = NVME_SC_INTERNAL;
+ size_t len;
- buf = kzalloc(data_len, GFP_KERNEL);
- if (!buf) {
- status = NVME_SC_INTERNAL;
+ if (req->data_len != NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32))
goto out;
- }
- switch (req->cmd->get_log_page.lid) {
- case NVME_LOG_ERROR:
- /*
- * We currently never set the More bit in the status field,
- * so all error log entries are invalid and can be zeroed out.
- * This is called a minum viable implementation (TM) of this
- * mandatory log page.
- */
- break;
- case NVME_LOG_SMART:
- /*
- * XXX: fill out actual smart log
- *
- * We might have a hard time coming up with useful values for
- * many of the fields, and even when we have useful data
- * available (e.g. units or commands read/written) those aren't
- * persistent over power loss.
- */
- if (data_len != sizeof(*smart_log)) {
- status = NVME_SC_INTERNAL;
- goto err;
- }
- smart_log = buf;
- status = nvmet_get_smart_log(req, smart_log);
- if (status)
- goto err;
- break;
- case NVME_LOG_FW_SLOT:
- /*
- * We only support a single firmware slot which always is
- * active, so we can zero out the whole firmware slot log and
- * still claim to fully implement this mandatory log page.
- */
- break;
- default:
- BUG();
- }
-
- status = nvmet_copy_to_sgl(req, 0, buf, data_len);
-
-err:
- kfree(buf);
+ mutex_lock(&ctrl->lock);
+ if (ctrl->nr_changed_ns == U32_MAX)
+ len = sizeof(__le32);
+ else
+ len = ctrl->nr_changed_ns * sizeof(__le32);
+ status = nvmet_copy_to_sgl(req, 0, ctrl->changed_ns_list, len);
+ if (!status)
+ status = nvmet_zero_sgl(req, len, req->data_len - len);
+ ctrl->nr_changed_ns = 0;
+ clear_bit(NVME_AEN_CFG_NS_ATTR, &ctrl->aen_masked);
+ mutex_unlock(&ctrl->lock);
out:
nvmet_req_complete(req, status);
}
@@ -201,7 +190,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->ver = cpu_to_le32(ctrl->subsys->ver);
/* XXX: figure out what to do about RTD3R/RTD3 */
- id->oaes = cpu_to_le32(1 << 8);
+ id->oaes = cpu_to_le32(NVMET_AEN_CFG_OPTIONAL);
id->ctratt = cpu_to_le32(1 << 0);
id->oacs = 0;
@@ -447,6 +436,16 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000);
nvmet_set_result(req, req->sq->ctrl->kato);
break;
+ case NVME_FEAT_ASYNC_EVENT:
+ val32 = le32_to_cpu(req->cmd->common.cdw10[1]);
+ if (val32 & ~NVMET_AEN_CFG_ALL) {
+ status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ break;
+ }
+
+ WRITE_ONCE(req->sq->ctrl->aen_enabled, val32);
+ nvmet_set_result(req, val32);
+ break;
case NVME_FEAT_HOST_ID:
status = NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
break;
@@ -485,9 +484,10 @@ static void nvmet_execute_get_features(struct nvmet_req *req)
break;
case NVME_FEAT_WRITE_ATOMIC:
break;
+#endif
case NVME_FEAT_ASYNC_EVENT:
+ nvmet_set_result(req, READ_ONCE(req->sq->ctrl->aen_enabled));
break;
-#endif
case NVME_FEAT_VOLATILE_WC:
nvmet_set_result(req, 1);
break;
@@ -548,8 +548,6 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
struct nvme_command *cmd = req->cmd;
u16 ret;
- req->ns = NULL;
-
ret = nvmet_check_ctrl_status(req, cmd);
if (unlikely(ret))
return ret;
@@ -560,9 +558,28 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
switch (cmd->get_log_page.lid) {
case NVME_LOG_ERROR:
+ /*
+ * We currently never set the More bit in the status
+ * field, so all error log entries are invalid and can
+ * be zeroed out. This is called a minum viable
+ * implementation (TM) of this mandatory log page.
+ */
+ req->execute = nvmet_execute_get_log_page_noop;
+ return 0;
case NVME_LOG_SMART:
+ req->execute = nvmet_execute_get_log_page_smart;
+ return 0;
case NVME_LOG_FW_SLOT:
- req->execute = nvmet_execute_get_log_page;
+ /*
+ * We only support a single firmware slot which always
+ * is active, so we can zero out the whole firmware slot
+ * log and still claim to fully implement this mandatory
+ * log page.
+ */
+ req->execute = nvmet_execute_get_log_page_noop;
+ return 0;
+ case NVME_LOG_CHANGED_NS:
+ req->execute = nvmet_execute_get_log_changed_ns;
return 0;
}
break;
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index e95424f172fd..a03da764ecae 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -57,6 +57,13 @@ u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len)
return 0;
}
+u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
+{
+ if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len)
+ return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
+ return 0;
+}
+
static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys)
{
struct nvmet_ns *ns;
@@ -137,6 +144,51 @@ static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
schedule_work(&ctrl->async_event_work);
}
+static bool nvmet_aen_disabled(struct nvmet_ctrl *ctrl, u32 aen)
+{
+ if (!(READ_ONCE(ctrl->aen_enabled) & aen))
+ return true;
+ return test_and_set_bit(aen, &ctrl->aen_masked);
+}
+
+static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid)
+{
+ u32 i;
+
+ mutex_lock(&ctrl->lock);
+ if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES)
+ goto out_unlock;
+
+ for (i = 0; i < ctrl->nr_changed_ns; i++) {
+ if (ctrl->changed_ns_list[i] == nsid)
+ goto out_unlock;
+ }
+
+ if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) {
+ ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff);
+ ctrl->nr_changed_ns = U32_MAX;
+ goto out_unlock;
+ }
+
+ ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid;
+out_unlock:
+ mutex_unlock(&ctrl->lock);
+}
+
+static void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid)
+{
+ struct nvmet_ctrl *ctrl;
+
+ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
+ nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid));
+ if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_NS_ATTR))
+ continue;
+ nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
+ NVME_AER_NOTICE_NS_CHANGED,
+ NVME_LOG_CHANGED_NS);
+ }
+}
+
int nvmet_register_transport(const struct nvmet_fabrics_ops *ops)
{
int ret = 0;
@@ -271,33 +323,31 @@ void nvmet_put_namespace(struct nvmet_ns *ns)
percpu_ref_put(&ns->ref);
}
+static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
+{
+ nvmet_bdev_ns_disable(ns);
+ nvmet_file_ns_disable(ns);
+}
+
int nvmet_ns_enable(struct nvmet_ns *ns)
{
struct nvmet_subsys *subsys = ns->subsys;
- struct nvmet_ctrl *ctrl;
int ret = 0;
mutex_lock(&subsys->lock);
if (ns->enabled)
goto out_unlock;
- ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE,
- NULL);
- if (IS_ERR(ns->bdev)) {
- pr_err("failed to open block device %s: (%ld)\n",
- ns->device_path, PTR_ERR(ns->bdev));
- ret = PTR_ERR(ns->bdev);
- ns->bdev = NULL;
+ ret = nvmet_bdev_ns_enable(ns);
+ if (ret)
+ ret = nvmet_file_ns_enable(ns);
+ if (ret)
goto out_unlock;
- }
-
- ns->size = i_size_read(ns->bdev->bd_inode);
- ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
0, GFP_KERNEL);
if (ret)
- goto out_blkdev_put;
+ goto out_dev_put;
if (ns->nsid > subsys->max_nsid)
subsys->max_nsid = ns->nsid;
@@ -320,24 +370,20 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
list_add_tail_rcu(&ns->dev_link, &old->dev_link);
}
- list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
- nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
-
+ nvmet_ns_changed(subsys, ns->nsid);
ns->enabled = true;
ret = 0;
out_unlock:
mutex_unlock(&subsys->lock);
return ret;
-out_blkdev_put:
- blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
- ns->bdev = NULL;
+out_dev_put:
+ nvmet_ns_dev_disable(ns);
goto out_unlock;
}
void nvmet_ns_disable(struct nvmet_ns *ns)
{
struct nvmet_subsys *subsys = ns->subsys;
- struct nvmet_ctrl *ctrl;
mutex_lock(&subsys->lock);
if (!ns->enabled)
@@ -363,11 +409,8 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
percpu_ref_exit(&ns->ref);
mutex_lock(&subsys->lock);
- list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
- nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
-
- if (ns->bdev)
- blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
+ nvmet_ns_changed(subsys, ns->nsid);
+ nvmet_ns_dev_disable(ns);
out_unlock:
mutex_unlock(&subsys->lock);
}
@@ -499,6 +542,25 @@ int nvmet_sq_init(struct nvmet_sq *sq)
}
EXPORT_SYMBOL_GPL(nvmet_sq_init);
+static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
+{
+ struct nvme_command *cmd = req->cmd;
+ u16 ret;
+
+ ret = nvmet_check_ctrl_status(req, cmd);
+ if (unlikely(ret))
+ return ret;
+
+ req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
+ if (unlikely(!req->ns))
+ return NVME_SC_INVALID_NS | NVME_SC_DNR;
+
+ if (req->ns->file)
+ return nvmet_file_parse_io_cmd(req);
+ else
+ return nvmet_bdev_parse_io_cmd(req);
+}
+
bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops)
{
@@ -710,15 +772,14 @@ out:
u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd)
{
if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
- pr_err("got io cmd %d while CC.EN == 0 on qid = %d\n",
+ pr_err("got cmd %d while CC.EN == 0 on qid = %d\n",
cmd->common.opcode, req->sq->qid);
return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
}
if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
- pr_err("got io cmd %d while CSTS.RDY == 0 on qid = %d\n",
+ pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n",
cmd->common.opcode, req->sq->qid);
- req->ns = NULL;
return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
}
return 0;
@@ -809,12 +870,18 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
kref_init(&ctrl->ref);
ctrl->subsys = subsys;
+ WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL);
+
+ ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES,
+ sizeof(__le32), GFP_KERNEL);
+ if (!ctrl->changed_ns_list)
+ goto out_free_ctrl;
ctrl->cqs = kcalloc(subsys->max_qid + 1,
sizeof(struct nvmet_cq *),
GFP_KERNEL);
if (!ctrl->cqs)
- goto out_free_ctrl;
+ goto out_free_changed_ns_list;
ctrl->sqs = kcalloc(subsys->max_qid + 1,
sizeof(struct nvmet_sq *),
@@ -872,6 +939,8 @@ out_free_sqs:
kfree(ctrl->sqs);
out_free_cqs:
kfree(ctrl->cqs);
+out_free_changed_ns_list:
+ kfree(ctrl->changed_ns_list);
out_free_ctrl:
kfree(ctrl);
out_put_subsystem:
@@ -898,6 +967,7 @@ static void nvmet_ctrl_free(struct kref *ref)
kfree(ctrl->sqs);
kfree(ctrl->cqs);
+ kfree(ctrl->changed_ns_list);
kfree(ctrl);
nvmet_subsys_put(subsys);
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 231e04e0a496..08656b849bd6 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -187,8 +187,6 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
- req->ns = NULL;
-
if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
pr_err("got cmd %d while not ready\n",
cmd->common.opcode);
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index 19e9e42ae943..d84ae004cb85 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -77,8 +77,6 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
- req->ns = NULL;
-
switch (cmd->fabrics.fctype) {
case nvme_fabrics_type_property_set:
req->data_len = 0;
@@ -242,8 +240,6 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
- req->ns = NULL;
-
if (cmd->common.opcode != nvme_fabrics_command) {
pr_err("invalid command 0x%x on unconnected queue.\n",
cmd->fabrics.opcode);
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 33ee8d3145f8..408279cb6f2c 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -31,7 +31,7 @@
/* *************************** Data Structures/Defines ****************** */
-#define NVMET_LS_CTX_COUNT 4
+#define NVMET_LS_CTX_COUNT 256
/* for this implementation, assume small single frame rqst/rsp */
#define NVME_FC_MAX_LS_BUFFER_SIZE 2048
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd-bdev.c
index cd2344179673..e0b0f7df70c2 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -16,6 +16,34 @@
#include <linux/module.h>
#include "nvmet.h"
+int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
+{
+ int ret;
+
+ ns->bdev = blkdev_get_by_path(ns->device_path,
+ FMODE_READ | FMODE_WRITE, NULL);
+ if (IS_ERR(ns->bdev)) {
+ ret = PTR_ERR(ns->bdev);
+ if (ret != -ENOTBLK) {
+ pr_err("failed to open block device %s: (%ld)\n",
+ ns->device_path, PTR_ERR(ns->bdev));
+ }
+ ns->bdev = NULL;
+ return ret;
+ }
+ ns->size = i_size_read(ns->bdev->bd_inode);
+ ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
+ return 0;
+}
+
+void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
+{
+ if (ns->bdev) {
+ blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ);
+ ns->bdev = NULL;
+ }
+}
+
static void nvmet_bio_done(struct bio *bio)
{
struct nvmet_req *req = bio->bi_private;
@@ -23,20 +51,14 @@ static void nvmet_bio_done(struct bio *bio)
nvmet_req_complete(req,
bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
- if (bio != &req->inline_bio)
+ if (bio != &req->b.inline_bio)
bio_put(bio);
}
-static inline u32 nvmet_rw_len(struct nvmet_req *req)
-{
- return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
- req->ns->blksize_shift;
-}
-
-static void nvmet_execute_rw(struct nvmet_req *req)
+static void nvmet_bdev_execute_rw(struct nvmet_req *req)
{
int sg_cnt = req->sg_cnt;
- struct bio *bio = &req->inline_bio;
+ struct bio *bio = &req->b.inline_bio;
struct scatterlist *sg;
sector_t sector;
blk_qc_t cookie;
@@ -89,9 +111,9 @@ static void nvmet_execute_rw(struct nvmet_req *req)
blk_poll(bdev_get_queue(req->ns->bdev), cookie);
}
-static void nvmet_execute_flush(struct nvmet_req *req)
+static void nvmet_bdev_execute_flush(struct nvmet_req *req)
{
- struct bio *bio = &req->inline_bio;
+ struct bio *bio = &req->b.inline_bio;
bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
bio_set_dev(bio, req->ns->bdev);
@@ -102,7 +124,7 @@ static void nvmet_execute_flush(struct nvmet_req *req)
submit_bio(bio);
}
-static u16 nvmet_discard_range(struct nvmet_ns *ns,
+static u16 nvmet_bdev_discard_range(struct nvmet_ns *ns,
struct nvme_dsm_range *range, struct bio **bio)
{
int ret;
@@ -116,7 +138,7 @@ static u16 nvmet_discard_range(struct nvmet_ns *ns,
return 0;
}
-static void nvmet_execute_discard(struct nvmet_req *req)
+static void nvmet_bdev_execute_discard(struct nvmet_req *req)
{
struct nvme_dsm_range range;
struct bio *bio = NULL;
@@ -129,7 +151,7 @@ static void nvmet_execute_discard(struct nvmet_req *req)
if (status)
break;
- status = nvmet_discard_range(req->ns, &range, &bio);
+ status = nvmet_bdev_discard_range(req->ns, &range, &bio);
if (status)
break;
}
@@ -148,11 +170,11 @@ static void nvmet_execute_discard(struct nvmet_req *req)
}
}
-static void nvmet_execute_dsm(struct nvmet_req *req)
+static void nvmet_bdev_execute_dsm(struct nvmet_req *req)
{
switch (le32_to_cpu(req->cmd->dsm.attributes)) {
case NVME_DSMGMT_AD:
- nvmet_execute_discard(req);
+ nvmet_bdev_execute_discard(req);
return;
case NVME_DSMGMT_IDR:
case NVME_DSMGMT_IDW:
@@ -163,7 +185,7 @@ static void nvmet_execute_dsm(struct nvmet_req *req)
}
}
-static void nvmet_execute_write_zeroes(struct nvmet_req *req)
+static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
{
struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
struct bio *bio = NULL;
@@ -189,38 +211,27 @@ static void nvmet_execute_write_zeroes(struct nvmet_req *req)
}
}
-u16 nvmet_parse_io_cmd(struct nvmet_req *req)
+u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
- u16 ret;
-
- ret = nvmet_check_ctrl_status(req, cmd);
- if (unlikely(ret)) {
- req->ns = NULL;
- return ret;
- }
-
- req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
- if (unlikely(!req->ns))
- return NVME_SC_INVALID_NS | NVME_SC_DNR;
switch (cmd->common.opcode) {
case nvme_cmd_read:
case nvme_cmd_write:
- req->execute = nvmet_execute_rw;
+ req->execute = nvmet_bdev_execute_rw;
req->data_len = nvmet_rw_len(req);
return 0;
case nvme_cmd_flush:
- req->execute = nvmet_execute_flush;
+ req->execute = nvmet_bdev_execute_flush;
req->data_len = 0;
return 0;
case nvme_cmd_dsm:
- req->execute = nvmet_execute_dsm;
+ req->execute = nvmet_bdev_execute_dsm;
req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
sizeof(struct nvme_dsm_range);
return 0;
case nvme_cmd_write_zeroes:
- req->execute = nvmet_execute_write_zeroes;
+ req->execute = nvmet_bdev_execute_write_zeroes;
return 0;
default:
pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
new file mode 100644
index 000000000000..8c42b3a8c420
--- /dev/null
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NVMe Over Fabrics Target File I/O commands implementation.
+ * Copyright (c) 2017-2018 Western Digital Corporation or its
+ * affiliates.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/uio.h>
+#include <linux/falloc.h>
+#include <linux/file.h>
+#include "nvmet.h"
+
+#define NVMET_MAX_MPOOL_BVEC 16
+#define NVMET_MIN_MPOOL_OBJ 16
+
+void nvmet_file_ns_disable(struct nvmet_ns *ns)
+{
+ if (ns->file) {
+ mempool_destroy(ns->bvec_pool);
+ ns->bvec_pool = NULL;
+ kmem_cache_destroy(ns->bvec_cache);
+ ns->bvec_cache = NULL;
+ fput(ns->file);
+ ns->file = NULL;
+ }
+}
+
+int nvmet_file_ns_enable(struct nvmet_ns *ns)
+{
+ int ret;
+ struct kstat stat;
+
+ ns->file = filp_open(ns->device_path,
+ O_RDWR | O_LARGEFILE | O_DIRECT, 0);
+ if (IS_ERR(ns->file)) {
+ pr_err("failed to open file %s: (%ld)\n",
+ ns->device_path, PTR_ERR(ns->file));
+ return PTR_ERR(ns->file);
+ }
+
+ ret = vfs_getattr(&ns->file->f_path,
+ &stat, STATX_SIZE, AT_STATX_FORCE_SYNC);
+ if (ret)
+ goto err;
+
+ ns->size = stat.size;
+ ns->blksize_shift = file_inode(ns->file)->i_blkbits;
+
+ ns->bvec_cache = kmem_cache_create("nvmet-bvec",
+ NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!ns->bvec_cache) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ns->bvec_pool = mempool_create(NVMET_MIN_MPOOL_OBJ, mempool_alloc_slab,
+ mempool_free_slab, ns->bvec_cache);
+
+ if (!ns->bvec_pool) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ return ret;
+err:
+ ns->size = 0;
+ ns->blksize_shift = 0;
+ nvmet_file_ns_disable(ns);
+ return ret;
+}
+
+static void nvmet_file_init_bvec(struct bio_vec *bv, struct sg_page_iter *iter)
+{
+ bv->bv_page = sg_page_iter_page(iter);
+ bv->bv_offset = iter->sg->offset;
+ bv->bv_len = PAGE_SIZE - iter->sg->offset;
+}
+
+static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
+ unsigned long nr_segs, size_t count)
+{
+ struct kiocb *iocb = &req->f.iocb;
+ ssize_t (*call_iter)(struct kiocb *iocb, struct iov_iter *iter);
+ struct iov_iter iter;
+ int ki_flags = 0, rw;
+ ssize_t ret;
+
+ if (req->cmd->rw.opcode == nvme_cmd_write) {
+ if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
+ ki_flags = IOCB_DSYNC;
+ call_iter = req->ns->file->f_op->write_iter;
+ rw = WRITE;
+ } else {
+ call_iter = req->ns->file->f_op->read_iter;
+ rw = READ;
+ }
+
+ iov_iter_bvec(&iter, ITER_BVEC | rw, req->f.bvec, nr_segs, count);
+
+ iocb->ki_pos = pos;
+ iocb->ki_filp = req->ns->file;
+ iocb->ki_flags = IOCB_DIRECT | ki_flags;
+
+ ret = call_iter(iocb, &iter);
+
+ if (ret != -EIOCBQUEUED && iocb->ki_complete)
+ iocb->ki_complete(iocb, ret, 0);
+
+ return ret;
+}
+
+static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2)
+{
+ struct nvmet_req *req = container_of(iocb, struct nvmet_req, f.iocb);
+
+ if (req->f.bvec != req->inline_bvec) {
+ if (likely(req->f.mpool_alloc == false))
+ kfree(req->f.bvec);
+ else
+ mempool_free(req->f.bvec, req->ns->bvec_pool);
+ }
+
+ nvmet_req_complete(req, ret != req->data_len ?
+ NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+}
+
+static void nvmet_file_execute_rw(struct nvmet_req *req)
+{
+ ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE);
+ struct sg_page_iter sg_pg_iter;
+ unsigned long bv_cnt = 0;
+ bool is_sync = false;
+ size_t len = 0, total_len = 0;
+ ssize_t ret = 0;
+ loff_t pos;
+
+ if (!req->sg_cnt || !nr_bvec) {
+ nvmet_req_complete(req, 0);
+ return;
+ }
+
+ if (nr_bvec > NVMET_MAX_INLINE_BIOVEC)
+ req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
+ GFP_KERNEL);
+ else
+ req->f.bvec = req->inline_bvec;
+
+ req->f.mpool_alloc = false;
+ if (unlikely(!req->f.bvec)) {
+ /* fallback under memory pressure */
+ req->f.bvec = mempool_alloc(req->ns->bvec_pool, GFP_KERNEL);
+ req->f.mpool_alloc = true;
+ if (nr_bvec > NVMET_MAX_MPOOL_BVEC)
+ is_sync = true;
+ }
+
+ pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift;
+
+ memset(&req->f.iocb, 0, sizeof(struct kiocb));
+ for_each_sg_page(req->sg, &sg_pg_iter, req->sg_cnt, 0) {
+ nvmet_file_init_bvec(&req->f.bvec[bv_cnt], &sg_pg_iter);
+ len += req->f.bvec[bv_cnt].bv_len;
+ total_len += req->f.bvec[bv_cnt].bv_len;
+ bv_cnt++;
+
+ WARN_ON_ONCE((nr_bvec - 1) < 0);
+
+ if (unlikely(is_sync) &&
+ (nr_bvec - 1 == 0 || bv_cnt == NVMET_MAX_MPOOL_BVEC)) {
+ ret = nvmet_file_submit_bvec(req, pos, bv_cnt, len);
+ if (ret < 0)
+ goto out;
+ pos += len;
+ bv_cnt = 0;
+ len = 0;
+ }
+ nr_bvec--;
+ }
+
+ if (WARN_ON_ONCE(total_len != req->data_len))
+ ret = -EIO;
+out:
+ if (unlikely(is_sync || ret)) {
+ nvmet_file_io_done(&req->f.iocb, ret < 0 ? ret : total_len, 0);
+ return;
+ }
+ req->f.iocb.ki_complete = nvmet_file_io_done;
+ nvmet_file_submit_bvec(req, pos, bv_cnt, total_len);
+}
+
+static void nvmet_file_flush_work(struct work_struct *w)
+{
+ struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
+ int ret;
+
+ ret = vfs_fsync(req->ns->file, 1);
+
+ nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+}
+
+static void nvmet_file_execute_flush(struct nvmet_req *req)
+{
+ INIT_WORK(&req->f.work, nvmet_file_flush_work);
+ schedule_work(&req->f.work);
+}
+
+static void nvmet_file_execute_discard(struct nvmet_req *req)
+{
+ int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
+ struct nvme_dsm_range range;
+ loff_t offset;
+ loff_t len;
+ int i, ret;
+
+ for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
+ if (nvmet_copy_from_sgl(req, i * sizeof(range), &range,
+ sizeof(range)))
+ break;
+ offset = le64_to_cpu(range.slba) << req->ns->blksize_shift;
+ len = le32_to_cpu(range.nlb) << req->ns->blksize_shift;
+ ret = vfs_fallocate(req->ns->file, mode, offset, len);
+ if (ret)
+ break;
+ }
+
+ nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+}
+
+static void nvmet_file_dsm_work(struct work_struct *w)
+{
+ struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
+
+ switch (le32_to_cpu(req->cmd->dsm.attributes)) {
+ case NVME_DSMGMT_AD:
+ nvmet_file_execute_discard(req);
+ return;
+ case NVME_DSMGMT_IDR:
+ case NVME_DSMGMT_IDW:
+ default:
+ /* Not supported yet */
+ nvmet_req_complete(req, 0);
+ return;
+ }
+}
+
+static void nvmet_file_execute_dsm(struct nvmet_req *req)
+{
+ INIT_WORK(&req->f.work, nvmet_file_dsm_work);
+ schedule_work(&req->f.work);
+}
+
+static void nvmet_file_write_zeroes_work(struct work_struct *w)
+{
+ struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
+ struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
+ int mode = FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE;
+ loff_t offset;
+ loff_t len;
+ int ret;
+
+ offset = le64_to_cpu(write_zeroes->slba) << req->ns->blksize_shift;
+ len = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
+ req->ns->blksize_shift);
+
+ ret = vfs_fallocate(req->ns->file, mode, offset, len);
+ nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+}
+
+static void nvmet_file_execute_write_zeroes(struct nvmet_req *req)
+{
+ INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work);
+ schedule_work(&req->f.work);
+}
+
+u16 nvmet_file_parse_io_cmd(struct nvmet_req *req)
+{
+ struct nvme_command *cmd = req->cmd;
+
+ switch (cmd->common.opcode) {
+ case nvme_cmd_read:
+ case nvme_cmd_write:
+ req->execute = nvmet_file_execute_rw;
+ req->data_len = nvmet_rw_len(req);
+ return 0;
+ case nvme_cmd_flush:
+ req->execute = nvmet_file_execute_flush;
+ req->data_len = 0;
+ return 0;
+ case nvme_cmd_dsm:
+ req->execute = nvmet_file_execute_dsm;
+ req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
+ sizeof(struct nvme_dsm_range);
+ return 0;
+ case nvme_cmd_write_zeroes:
+ req->execute = nvmet_file_execute_write_zeroes;
+ req->data_len = 0;
+ return 0;
+ default:
+ pr_err("unhandled cmd for file ns %d on qid %d\n",
+ cmd->common.opcode, req->sq->qid);
+ return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ }
+}
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 31fdfba556a8..1304ec3a7ede 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -45,6 +45,7 @@ struct nvme_loop_ctrl {
struct nvme_ctrl ctrl;
struct nvmet_ctrl *target_ctrl;
+ struct nvmet_port *port;
};
static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl)
@@ -63,7 +64,8 @@ struct nvme_loop_queue {
unsigned long flags;
};
-static struct nvmet_port *nvmet_loop_port;
+static LIST_HEAD(nvme_loop_ports);
+static DEFINE_MUTEX(nvme_loop_ports_mutex);
static LIST_HEAD(nvme_loop_ctrl_list);
static DEFINE_MUTEX(nvme_loop_ctrl_mutex);
@@ -146,7 +148,7 @@ nvme_loop_timeout(struct request *rq, bool reserved)
/* fail with DNR on admin cmd timeout */
nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
- return BLK_EH_HANDLED;
+ return BLK_EH_DONE;
}
static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -169,12 +171,12 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(req);
iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
- iod->req.port = nvmet_loop_port;
+ iod->req.port = queue->ctrl->port;
if (!nvmet_req_init(&iod->req, &queue->nvme_cq,
&queue->nvme_sq, &nvme_loop_ops))
return BLK_STS_OK;
- if (blk_rq_payload_bytes(req)) {
+ if (blk_rq_nr_phys_segments(req)) {
iod->sg_table.sgl = iod->first_sgl;
if (sg_alloc_table_chained(&iod->sg_table,
blk_rq_nr_phys_segments(req),
@@ -469,6 +471,12 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
nvme_stop_ctrl(&ctrl->ctrl);
nvme_loop_shutdown_ctrl(ctrl);
+ if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
+ /* state change failure should never happen */
+ WARN_ON_ONCE(1);
+ return;
+ }
+
ret = nvme_loop_configure_admin_queue(ctrl);
if (ret)
goto out_disable;
@@ -511,6 +519,7 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
.free_ctrl = nvme_loop_free_ctrl,
.submit_async_event = nvme_loop_submit_async_event,
.delete_ctrl = nvme_loop_delete_ctrl_host,
+ .get_address = nvmf_get_address,
};
static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
@@ -559,6 +568,23 @@ out_destroy_queues:
return ret;
}
+static struct nvmet_port *nvme_loop_find_port(struct nvme_ctrl *ctrl)
+{
+ struct nvmet_port *p, *found = NULL;
+
+ mutex_lock(&nvme_loop_ports_mutex);
+ list_for_each_entry(p, &nvme_loop_ports, entry) {
+ /* if no transport address is specified use the first port */
+ if ((ctrl->opts->mask & NVMF_OPT_TRADDR) &&
+ strcmp(ctrl->opts->traddr, p->disc_addr.traddr))
+ continue;
+ found = p;
+ break;
+ }
+ mutex_unlock(&nvme_loop_ports_mutex);
+ return found;
+}
+
static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
struct nvmf_ctrl_options *opts)
{
@@ -583,6 +609,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
ctrl->ctrl.sqsize = opts->queue_size - 1;
ctrl->ctrl.kato = opts->kato;
+ ctrl->port = nvme_loop_find_port(&ctrl->ctrl);
ctrl->queues = kcalloc(opts->nr_io_queues + 1, sizeof(*ctrl->queues),
GFP_KERNEL);
@@ -640,27 +667,17 @@ out_put_ctrl:
static int nvme_loop_add_port(struct nvmet_port *port)
{
- /*
- * XXX: disalow adding more than one port so
- * there is no connection rejections when a
- * a subsystem is assigned to a port for which
- * loop doesn't have a pointer.
- * This scenario would be possible if we allowed
- * more than one port to be added and a subsystem
- * was assigned to a port other than nvmet_loop_port.
- */
-
- if (nvmet_loop_port)
- return -EPERM;
-
- nvmet_loop_port = port;
+ mutex_lock(&nvme_loop_ports_mutex);
+ list_add_tail(&port->entry, &nvme_loop_ports);
+ mutex_unlock(&nvme_loop_ports_mutex);
return 0;
}
static void nvme_loop_remove_port(struct nvmet_port *port)
{
- if (port == nvmet_loop_port)
- nvmet_loop_port = NULL;
+ mutex_lock(&nvme_loop_ports_mutex);
+ list_del_init(&port->entry);
+ mutex_unlock(&nvme_loop_ports_mutex);
}
static const struct nvmet_fabrics_ops nvme_loop_ops = {
@@ -676,6 +693,7 @@ static struct nvmf_transport_ops nvme_loop_transport = {
.name = "loop",
.module = THIS_MODULE,
.create_ctrl = nvme_loop_create_ctrl,
+ .allowed_opts = NVMF_OPT_TRADDR,
};
static int __init nvme_loop_init_module(void)
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 15fd84ab21f8..480dfe10fad9 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -30,6 +30,21 @@
#define NVMET_ASYNC_EVENTS 4
#define NVMET_ERROR_LOG_SLOTS 128
+
+/*
+ * Supported optional AENs:
+ */
+#define NVMET_AEN_CFG_OPTIONAL \
+ NVME_AEN_CFG_NS_ATTR
+
+/*
+ * Plus mandatory SMART AENs (we'll never send them, but allow enabling them):
+ */
+#define NVMET_AEN_CFG_ALL \
+ (NVME_SMART_CRIT_SPARE | NVME_SMART_CRIT_TEMPERATURE | \
+ NVME_SMART_CRIT_RELIABILITY | NVME_SMART_CRIT_MEDIA | \
+ NVME_SMART_CRIT_VOLATILE_MEMORY | NVMET_AEN_CFG_OPTIONAL)
+
/* Helper Macros when NVMe error is NVME_SC_CONNECT_INVALID_PARAM
* The 16 bit shift is to set IATTR bit to 1, which means offending
* offset starts in the data section of connect()
@@ -43,6 +58,7 @@ struct nvmet_ns {
struct list_head dev_link;
struct percpu_ref ref;
struct block_device *bdev;
+ struct file *file;
u32 nsid;
u32 blksize_shift;
loff_t size;
@@ -57,6 +73,8 @@ struct nvmet_ns {
struct config_group group;
struct completion disable_done;
+ mempool_t *bvec_pool;
+ struct kmem_cache *bvec_cache;
};
static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
@@ -82,7 +100,7 @@ struct nvmet_sq {
/**
* struct nvmet_port - Common structure to keep port
* information for the target.
- * @entry: List head for holding a list of these elements.
+ * @entry: Entry into referrals or transport list.
* @disc_addr: Address information is stored in a format defined
* for a discovery log page entry.
* @group: ConfigFS group for this element's folder.
@@ -120,6 +138,8 @@ struct nvmet_ctrl {
u16 cntlid;
u32 kato;
+ u32 aen_enabled;
+ unsigned long aen_masked;
struct nvmet_req *async_event_cmds[NVMET_ASYNC_EVENTS];
unsigned int nr_async_event_cmds;
struct list_head async_events;
@@ -132,6 +152,9 @@ struct nvmet_ctrl {
const struct nvmet_fabrics_ops *ops;
+ __le32 *changed_ns_list;
+ u32 nr_changed_ns;
+
char subsysnqn[NVMF_NQN_FIELD_LEN];
char hostnqn[NVMF_NQN_FIELD_LEN];
};
@@ -222,8 +245,18 @@ struct nvmet_req {
struct nvmet_cq *cq;
struct nvmet_ns *ns;
struct scatterlist *sg;
- struct bio inline_bio;
struct bio_vec inline_bvec[NVMET_MAX_INLINE_BIOVEC];
+ union {
+ struct {
+ struct bio inline_bio;
+ } b;
+ struct {
+ bool mpool_alloc;
+ struct kiocb iocb;
+ struct bio_vec *bvec;
+ struct work_struct work;
+ } f;
+ };
int sg_cnt;
/* data length as parsed from the command: */
size_t data_len;
@@ -263,7 +296,8 @@ struct nvmet_async_event {
};
u16 nvmet_parse_connect_cmd(struct nvmet_req *req);
-u16 nvmet_parse_io_cmd(struct nvmet_req *req);
+u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req);
+u16 nvmet_file_parse_io_cmd(struct nvmet_req *req);
u16 nvmet_parse_admin_cmd(struct nvmet_req *req);
u16 nvmet_parse_discovery_cmd(struct nvmet_req *req);
u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
@@ -316,6 +350,7 @@ u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
size_t len);
u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf,
size_t len);
+u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len);
u32 nvmet_get_log_page_len(struct nvme_command *cmd);
@@ -338,4 +373,14 @@ extern struct rw_semaphore nvmet_config_sem;
bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
const char *hostnqn);
+int nvmet_bdev_ns_enable(struct nvmet_ns *ns);
+int nvmet_file_ns_enable(struct nvmet_ns *ns);
+void nvmet_bdev_ns_disable(struct nvmet_ns *ns);
+void nvmet_file_ns_disable(struct nvmet_ns *ns);
+
+static inline u32 nvmet_rw_len(struct nvmet_req *req)
+{
+ return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
+ req->ns->blksize_shift;
+}
#endif /* _NVMET_H */