diff options
| author | Jens Axboe <axboe@kernel.dk> | 2026-06-05 14:18:58 +0300 |
|---|---|---|
| committer | Jens Axboe <axboe@kernel.dk> | 2026-06-05 14:18:58 +0300 |
| commit | ed60c09f292f1383bbcf79dcf61b6257bbb3a503 (patch) | |
| tree | feb17db78c3ee8d8680e75b24eefd56530b0332f | |
| parent | 3f1eccd37282de91efd0575ee8e212af4bde39b1 (diff) | |
| parent | 3c8c284dfcdfce81a02fe3c911196d9876468ae4 (diff) | |
| download | linux-ed60c09f292f1383bbcf79dcf61b6257bbb3a503.tar.xz | |
Merge tag 'nvme-7.2-2026-06-04' of git://git.infradead.org/nvme into for-7.2/block
Pull NVMe updates from Keith:
"- Per-controller timeouts
- Multipath telemetry
- Namespace format validation
- Various other fixes"
* tag 'nvme-7.2-2026-06-04' of git://git.infradead.org/nvme: (34 commits)
nvme: export controller reconnect event count via sysfs
nvme: export controller reset event count via sysfs
nvme: export I/O failure count when no path is available via sysfs
nvme: export I/O requeue count when no path is usable via sysfs
nvme: export command error counters via sysfs
nvme: export multipath failover count via sysfs
nvme: export command retry count via sysfs
nvme: add diag attribute group under sysfs
nvme-tcp: lockdep: use dynamic lockdep keys per socket instance
nvme-tcp: move nvme_tcp_reclassify_socket()
nvme: validate FDP configuration descriptor sizes
nvmet-auth: validate reply message payload bounds against transfer length
nvme: refresh multipath head zoned limits from path limits
nvme: fix FDP fdpcidx bounds check
nvme-tcp: Use WQ_PERCPU explicitly if wq_unbound is false.
nvmet: fix pre-auth out-of-bounds heap read in Discovery Get Log Page
nvme-multipath: set BIO_REMAPPED on bios remapped to per-path namespace disks
nvme-multipath: require exact iopolicy names for module parameter
nvme-multipath: pass NS head to nvme_mpath_revalidate_paths()
nvme-pci: fix out-of-bounds access in nvme_setup_descriptor_pools
...
| -rw-r--r-- | drivers/nvme/host/apple.c | 2 | ||||
| -rw-r--r-- | drivers/nvme/host/core.c | 74 | ||||
| -rw-r--r-- | drivers/nvme/host/fc.c | 3 | ||||
| -rw-r--r-- | drivers/nvme/host/multipath.c | 137 | ||||
| -rw-r--r-- | drivers/nvme/host/nvme.h | 21 | ||||
| -rw-r--r-- | drivers/nvme/host/pci.c | 14 | ||||
| -rw-r--r-- | drivers/nvme/host/rdma.c | 4 | ||||
| -rw-r--r-- | drivers/nvme/host/sysfs.c | 311 | ||||
| -rw-r--r-- | drivers/nvme/host/tcp.c | 98 | ||||
| -rw-r--r-- | drivers/nvme/target/discovery.c | 23 | ||||
| -rw-r--r-- | drivers/nvme/target/fabrics-cmd-auth.c | 15 | ||||
| -rw-r--r-- | drivers/nvme/target/loop.c | 31 | ||||
| -rw-r--r-- | drivers/nvme/target/rdma.c | 6 | ||||
| -rw-r--r-- | drivers/nvme/target/tcp.c | 11 |
14 files changed, 631 insertions, 119 deletions
diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index 7fc6b9eacf2e..de0d5126458f 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -858,7 +858,7 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown) * doing a safe shutdown. */ if (!dead && shutdown && freeze) - nvme_wait_freeze_timeout(&anv->ctrl, NVME_IO_TIMEOUT); + nvme_wait_freeze_timeout(&anv->ctrl); nvme_quiesce_io_queues(&anv->ctrl); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index dc388e24caad..efaddab8296e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -323,6 +323,7 @@ static void nvme_retry_req(struct request *req) { unsigned long delay = 0; u16 crd; + struct nvme_ns *ns = req->q->queuedata; /* The mask and shift result must be <= 3 */ crd = (nvme_req(req)->status & NVME_STATUS_CRD) >> 11; @@ -330,6 +331,9 @@ static void nvme_retry_req(struct request *req) delay = nvme_req(req)->ctrl->crdt[crd - 1] * 100; nvme_req(req)->retries++; + if (ns) + atomic_long_inc(&ns->retries); + blk_mq_requeue_request(req, false); blk_mq_delay_kick_requeue_list(req->q, delay); } @@ -434,11 +438,19 @@ static inline void nvme_end_req_zoned(struct request *req) static inline void __nvme_end_req(struct request *req) { - if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET))) { + struct nvme_ns *ns = req->q->queuedata; + struct nvme_request *nr = nvme_req(req); + + if (unlikely(nr->status && !(req->rq_flags & RQF_QUIET))) { if (blk_rq_is_passthrough(req)) nvme_log_err_passthru(req); else nvme_log_error(req); + + if (ns) + atomic_long_inc(&ns->errors); + else + atomic_long_inc(&nr->ctrl->errors); } nvme_end_req_zoned(req); nvme_trace_bio_complete(req); @@ -584,6 +596,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, case NVME_CTRL_NEW: case NVME_CTRL_LIVE: changed = true; + atomic_long_inc(&ctrl->nr_reset); fallthrough; default: break; @@ -729,10 +742,8 @@ void nvme_init_request(struct request *req, struct nvme_command *cmd) struct nvme_ns *ns = req->q->disk->private_data; logging_enabled = ns->head->passthru_err_log_enabled; - req->timeout = NVME_IO_TIMEOUT; } else { /* no queuedata implies admin queue */ logging_enabled = nr->ctrl->passthru_err_log_enabled; - req->timeout = NVME_ADMIN_TIMEOUT; } if (!logging_enabled) @@ -2263,7 +2274,7 @@ static int nvme_query_fdp_granularity(struct nvme_ctrl *ctrl, } n = le16_to_cpu(h->numfdpc) + 1; - if (fdp_idx > n) { + if (fdp_idx >= n) { dev_warn(ctrl->device, "FDP index:%d out of range:%d\n", fdp_idx, n); /* Proceed without registering FDP streams */ @@ -2275,14 +2286,16 @@ static int nvme_query_fdp_granularity(struct nvme_ctrl *ctrl, desc = log; end = log + size - sizeof(*h); for (i = 0; i < fdp_idx; i++) { - log += le16_to_cpu(desc->dsze); - desc = log; - if (log >= end) { + u16 dsze = le16_to_cpu(desc->dsze); + + if (!dsze || log + dsze > end) { dev_warn(ctrl->device, - "FDP invalid config descriptor list\n"); + "FDP invalid config descriptor at index %d\n", i); ret = 0; goto out; } + log += dsze; + desc = log; } if (le32_to_cpu(desc->nrg) > 1) { @@ -2409,12 +2422,22 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, goto out; } + if (id->lbaf[lbaf].ds < SECTOR_SHIFT || + check_shl_overflow(le64_to_cpu(id->nsze), + id->lbaf[lbaf].ds - SECTOR_SHIFT, + &capacity)) { + dev_warn_once(ns->ctrl->device, + "invalid LBA data size %u, skipping namespace\n", + id->lbaf[lbaf].ds); + ret = -ENODEV; + goto out; + } + lim = queue_limits_start_update(ns->disk->queue); memflags = blk_mq_freeze_queue(ns->disk->queue); ns->head->lba_shift = id->lbaf[lbaf].ds; ns->head->nuse = le64_to_cpu(id->nuse); - capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze)); nvme_set_ctrl_limits(ns->ctrl, &lim, false); nvme_configure_metadata(ns->ctrl, ns->head, id, nvm, info); nvme_set_chunk_sectors(ns, id, &lim); @@ -2483,6 +2506,14 @@ out: return ret; } +static void nvme_stack_zone_resources(struct queue_limits *t, + const struct queue_limits *b) +{ + t->max_open_zones = min_not_zero(t->max_open_zones, b->max_open_zones); + t->max_active_zones = + min_not_zero(t->max_active_zones, b->max_active_zones); +} + static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) { bool unsupported = false; @@ -2549,6 +2580,8 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) lim.io_opt = ns_lim->io_opt; queue_limits_stack_bdev(&lim, ns->disk->part0, 0, ns->head->disk->disk_name); + if (lim.features & BLK_FEAT_ZONED) + nvme_stack_zone_resources(&lim, ns_lim); if (unsupported) ns->head->disk->flags |= GENHD_FL_HIDDEN; else @@ -2559,7 +2592,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk)); set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info)); - nvme_mpath_revalidate_paths(ns); + nvme_mpath_revalidate_paths(ns->head); blk_mq_unfreeze_queue(ns->head->disk->queue, memflags); } @@ -3922,7 +3955,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, int ret = -ENOMEM; #ifdef CONFIG_NVME_MULTIPATH - size += num_possible_nodes() * sizeof(struct nvme_ns *); + size += nr_node_ids * sizeof(struct nvme_ns *); #endif head = kzalloc(size, GFP_KERNEL); @@ -4205,6 +4238,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) mutex_unlock(&ctrl->namespaces_lock); goto out_unlink_ns; } + blk_queue_rq_timeout(ns->queue, ctrl->io_timeout); nvme_ns_add_to_ctrl_list(ns); mutex_unlock(&ctrl->namespaces_lock); synchronize_srcu(&ctrl->srcu); @@ -4890,12 +4924,7 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, if (ret) return ret; - /* - * If a previous admin queue exists (e.g., from before a reset), - * put it now before allocating a new one to avoid orphaning it. - */ - if (ctrl->admin_q) - blk_put_queue(ctrl->admin_q); + WARN_ON_ONCE(ctrl->admin_q); ctrl->admin_q = blk_mq_alloc_queue(set, NULL, NULL); if (IS_ERR(ctrl->admin_q)) { @@ -4933,10 +4962,8 @@ void nvme_remove_admin_tag_set(struct nvme_ctrl *ctrl) */ nvme_stop_keep_alive(ctrl); blk_mq_destroy_queue(ctrl->admin_q); - if (ctrl->ops->flags & NVME_F_FABRICS) { + if (ctrl->fabrics_q) blk_mq_destroy_queue(ctrl->fabrics_q); - blk_put_queue(ctrl->fabrics_q); - } blk_mq_free_tag_set(ctrl->admin_tagset); } EXPORT_SYMBOL_GPL(nvme_remove_admin_tag_set); @@ -5078,6 +5105,8 @@ static void nvme_free_ctrl(struct device *dev) if (ctrl->admin_q) blk_put_queue(ctrl->admin_q); + if (ctrl->fabrics_q) + blk_put_queue(ctrl->fabrics_q); if (!subsys || ctrl->instance != subsys->instance) ida_free(&nvme_instance_ida, ctrl->instance); nvme_free_cels(ctrl); @@ -5142,6 +5171,8 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd)); ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive; ctrl->ka_last_check_time = jiffies; + ctrl->admin_timeout = NVME_ADMIN_TIMEOUT; + ctrl->io_timeout = NVME_IO_TIMEOUT; BUILD_BUG_ON(NVME_DSM_MAX_RANGES * sizeof(struct nvme_dsm_range) > PAGE_SIZE); @@ -5248,8 +5279,9 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_unfreeze); -int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout) +int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl) { + long timeout = ctrl->io_timeout; struct nvme_ns *ns; int srcu_idx; diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 1907da499ad2..2c9a6d3c9797 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3148,6 +3148,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) goto out_term_aen_ops; } + /* accumulate reconnect attempts before resetting it to zero */ + atomic_long_add(ctrl->ctrl.nr_reconnects, &ctrl->ctrl.acc_reconnects); ctrl->ctrl.nr_reconnects = 0; nvme_start_ctrl(&ctrl->ctrl); @@ -3470,6 +3472,7 @@ nvme_fc_alloc_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->ctrl.opts = opts; ctrl->ctrl.nr_reconnects = 0; + atomic_long_set(&ctrl->ctrl.acc_reconnects, 0); INIT_LIST_HEAD(&ctrl->ctrl_list); ctrl->lport = lport; ctrl->rport = rport; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index bca8e7c97519..e033ede953cc 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -73,19 +73,29 @@ static const char *nvme_iopolicy_names[] = { static int iopolicy = NVME_IOPOLICY_NUMA; +static int nvme_iopolicy_parse(const char *str) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(nvme_iopolicy_names); i++) { + if (sysfs_streq(str, nvme_iopolicy_names[i])) + return i; + } + return -EINVAL; +} + static int nvme_set_iopolicy(const char *val, const struct kernel_param *kp) { + int policy; + if (!val) return -EINVAL; - if (!strncmp(val, "numa", 4)) - iopolicy = NVME_IOPOLICY_NUMA; - else if (!strncmp(val, "round-robin", 11)) - iopolicy = NVME_IOPOLICY_RR; - else if (!strncmp(val, "queue-depth", 11)) - iopolicy = NVME_IOPOLICY_QD; - else - return -EINVAL; + policy = nvme_iopolicy_parse(val); + if (policy < 0) + return policy; + + iopolicy = policy; return 0; } @@ -142,6 +152,7 @@ void nvme_failover_req(struct request *req) struct bio *bio; nvme_mpath_clear_current_path(ns); + atomic_long_inc(&ns->failover); /* * If we got back an ANA error, we know the controller is alive but not @@ -257,10 +268,10 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) srcu_read_unlock(&ctrl->srcu, srcu_idx); } -void nvme_mpath_revalidate_paths(struct nvme_ns *ns) +void nvme_mpath_revalidate_paths(struct nvme_ns_head *head) { - struct nvme_ns_head *head = ns->head; sector_t capacity = get_capacity(head->disk); + struct nvme_ns *ns; int node; int srcu_idx; @@ -514,6 +525,12 @@ static void nvme_ns_head_submit_bio(struct bio *bio) ns = nvme_find_path(head); if (likely(ns)) { bio_set_dev(bio, ns->disk->part0); + /* + * Use BIO_REMAPPED to skip bio_check_eod() when this bio + * enters submit_bio_noacct() for the per-path device. The EOD + * check already passed on the multipath head. + */ + bio_set_flag(bio, BIO_REMAPPED); bio->bi_opf |= REQ_NVME_MPATH; trace_block_bio_remap(bio, disk_devt(ns->head->disk), bio->bi_iter.bi_sector); @@ -524,10 +541,12 @@ static void nvme_ns_head_submit_bio(struct bio *bio) spin_lock_irq(&head->requeue_lock); bio_list_add(&head->requeue_list, bio); spin_unlock_irq(&head->requeue_lock); + atomic_long_inc(&head->io_requeue_no_usable_path_count); } else { dev_warn_ratelimited(dev, "no available path - failing I/O\n"); bio_io_error(bio); + atomic_long_inc(&head->io_fail_no_available_path_count); } srcu_read_unlock(&head->srcu, srcu_idx); @@ -1042,16 +1061,14 @@ static ssize_t nvme_subsys_iopolicy_store(struct device *dev, { struct nvme_subsystem *subsys = container_of(dev, struct nvme_subsystem, dev); - int i; + int policy; - for (i = 0; i < ARRAY_SIZE(nvme_iopolicy_names); i++) { - if (sysfs_streq(buf, nvme_iopolicy_names[i])) { - nvme_subsys_iopolicy_update(subsys, i); - return count; - } - } + policy = nvme_iopolicy_parse(buf); + if (policy < 0) + return policy; - return -EINVAL; + nvme_subsys_iopolicy_update(subsys, policy); + return count; } SUBSYS_ATTR_RW(iopolicy, S_IRUGO | S_IWUSR, nvme_subsys_iopolicy_show, nvme_subsys_iopolicy_store); @@ -1154,6 +1171,90 @@ static ssize_t delayed_removal_secs_store(struct device *dev, DEVICE_ATTR_RW(delayed_removal_secs); +static ssize_t multipath_failover_count_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + + return sysfs_emit(buf, "%lu\n", atomic_long_read(&ns->failover)); +} + +static ssize_t multipath_failover_count_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + unsigned long failover; + int ret; + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + + ret = kstrtoul(buf, 0, &failover); + if (ret) + return -EINVAL; + + atomic_long_set(&ns->failover, failover); + + return count; +} + +DEVICE_ATTR_RW(multipath_failover_count); + +static ssize_t io_requeue_no_usable_path_count_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gendisk *disk = dev_to_disk(dev); + struct nvme_ns_head *head = disk->private_data; + + return sysfs_emit(buf, "%lu\n", + atomic_long_read(&head->io_requeue_no_usable_path_count)); +} + +static ssize_t io_requeue_no_usable_path_count_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + int err; + unsigned long requeue_cnt; + struct gendisk *disk = dev_to_disk(dev); + struct nvme_ns_head *head = disk->private_data; + + err = kstrtoul(buf, 0, &requeue_cnt); + if (err) + return -EINVAL; + + atomic_long_set(&head->io_requeue_no_usable_path_count, requeue_cnt); + + return count; +} + +DEVICE_ATTR_RW(io_requeue_no_usable_path_count); + +static ssize_t io_fail_no_available_path_count_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gendisk *disk = dev_to_disk(dev); + struct nvme_ns_head *head = disk->private_data; + + return sysfs_emit(buf, "%lu\n", + atomic_long_read(&head->io_fail_no_available_path_count)); +} + +static ssize_t io_fail_no_available_path_count_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + int err; + unsigned long fail_cnt; + struct gendisk *disk = dev_to_disk(dev); + struct nvme_ns_head *head = disk->private_data; + + err = kstrtoul(buf, 0, &fail_cnt); + if (err) + return -EINVAL; + + atomic_long_set(&head->io_fail_no_available_path_count, fail_cnt); + + return count; +} + +DEVICE_ATTR_RW(io_fail_no_available_path_count); + static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl, struct nvme_ana_group_desc *desc, void *data) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ccd5e05dac98..b367c67dcb37 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -370,6 +370,8 @@ struct nvme_ctrl { u16 mtfa; u32 ctrl_config; u32 queue_count; + u32 admin_timeout; + u32 io_timeout; u64 cap; u32 max_hw_sectors; @@ -413,6 +415,8 @@ struct nvme_ctrl { unsigned long ka_last_check_time; struct work_struct fw_act_work; unsigned long events; + atomic_long_t errors; + atomic_long_t nr_reset; #ifdef CONFIG_NVME_MULTIPATH /* asymmetric namespace access: */ @@ -454,6 +458,8 @@ struct nvme_ctrl { u16 icdoff; u16 maxcmd; int nr_reconnects; + /* accumulate reconenct attempts, as nr_reconnects can reset to zero */ + atomic_long_t acc_reconnects; unsigned long flags; struct nvmf_ctrl_options *opts; @@ -563,6 +569,8 @@ struct nvme_ns_head { unsigned long flags; struct delayed_work remove_work; unsigned int delayed_removal_secs; + atomic_long_t io_requeue_no_usable_path_count; + atomic_long_t io_fail_no_available_path_count; #define NVME_NSHEAD_DISK_LIVE 0 #define NVME_NSHEAD_QUEUE_IF_NO_PATH 1 struct nvme_ns __rcu *current_path[]; @@ -589,7 +597,10 @@ struct nvme_ns { #ifdef CONFIG_NVME_MULTIPATH enum nvme_ana_state ana_state; u32 ana_grpid; + atomic_long_t failover; #endif + atomic_long_t retries; + atomic_long_t errors; struct list_head siblings; struct kref kref; struct nvme_ns_head *head; @@ -900,7 +911,7 @@ void nvme_sync_queues(struct nvme_ctrl *ctrl); void nvme_sync_io_queues(struct nvme_ctrl *ctrl); void nvme_unfreeze(struct nvme_ctrl *ctrl); void nvme_wait_freeze(struct nvme_ctrl *ctrl); -int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout); +int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl); void nvme_start_freeze(struct nvme_ctrl *ctrl); static inline enum req_op nvme_req_op(struct nvme_command *cmd) @@ -1012,6 +1023,7 @@ extern const struct attribute_group nvme_ns_mpath_attr_group; extern const struct pr_ops nvme_pr_ops; extern const struct block_device_operations nvme_ns_head_ops; extern const struct attribute_group nvme_dev_attrs_group; +extern const struct attribute_group nvme_dev_diag_attrs_group; extern const struct attribute_group *nvme_subsys_attrs_groups[]; extern const struct attribute_group *nvme_dev_attr_groups[]; extern const struct block_device_operations nvme_bdev_ops; @@ -1041,7 +1053,7 @@ void nvme_mpath_update(struct nvme_ctrl *ctrl); void nvme_mpath_uninit(struct nvme_ctrl *ctrl); void nvme_mpath_stop(struct nvme_ctrl *ctrl); bool nvme_mpath_clear_current_path(struct nvme_ns *ns); -void nvme_mpath_revalidate_paths(struct nvme_ns *ns); +void nvme_mpath_revalidate_paths(struct nvme_ns_head *head); void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl); void nvme_mpath_remove_disk(struct nvme_ns_head *head); void nvme_mpath_start_request(struct request *rq); @@ -1061,6 +1073,9 @@ extern struct device_attribute dev_attr_ana_state; extern struct device_attribute dev_attr_queue_depth; extern struct device_attribute dev_attr_numa_nodes; extern struct device_attribute dev_attr_delayed_removal_secs; +extern struct device_attribute dev_attr_multipath_failover_count; +extern struct device_attribute dev_attr_io_requeue_no_usable_path_count; +extern struct device_attribute dev_attr_io_fail_no_available_path_count; extern struct device_attribute subsys_attr_iopolicy; static inline bool nvme_disk_is_ns_head(struct gendisk *disk) @@ -1106,7 +1121,7 @@ static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns) { return false; } -static inline void nvme_mpath_revalidate_paths(struct nvme_ns *ns) +static inline void nvme_mpath_revalidate_paths(struct nvme_ns_head *head) { } static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 24911e1252d5..9b9595a5f331 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -587,11 +587,16 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, __le32 *dbbuf_db, } static struct nvme_descriptor_pools * -nvme_setup_descriptor_pools(struct nvme_dev *dev, unsigned numa_node) +nvme_setup_descriptor_pools(struct nvme_dev *dev, int numa_node) { - struct nvme_descriptor_pools *pools = &dev->descriptor_pools[numa_node]; + struct nvme_descriptor_pools *pools; size_t small_align = NVME_SMALL_POOL_SIZE; + if (numa_node == NUMA_NO_NODE) + numa_node = 0; + + pools = &dev->descriptor_pools[numa_node]; + if (pools->small) return pools; /* already initialized */ @@ -2810,6 +2815,7 @@ static const struct attribute_group nvme_pci_dev_attrs_group = { static const struct attribute_group *nvme_pci_dev_attr_groups[] = { &nvme_dev_attrs_group, &nvme_pci_dev_attrs_group, + &nvme_dev_diag_attrs_group, NULL, }; @@ -3094,7 +3100,7 @@ static bool __nvme_delete_io_queues(struct nvme_dev *dev, u8 opcode) unsigned long timeout; retry: - timeout = NVME_ADMIN_TIMEOUT; + timeout = dev->ctrl.admin_timeout; while (nr_queues > 0) { if (nvme_delete_queue(&dev->queues[nr_queues], opcode)) break; @@ -3276,7 +3282,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) * if doing a safe shutdown. */ if (!dead && shutdown) - nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT); + nvme_wait_freeze_timeout(&dev->ctrl); } nvme_quiesce_io_queues(&dev->ctrl); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 08459c65c3d5..6909e3542794 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -888,7 +888,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) if (!new) { nvme_start_freeze(&ctrl->ctrl); nvme_unquiesce_io_queues(&ctrl->ctrl); - if (!nvme_wait_freeze_timeout(&ctrl->ctrl, NVME_IO_TIMEOUT)) { + if (!nvme_wait_freeze_timeout(&ctrl->ctrl)) { /* * If we timed out waiting for freeze we are likely to * be stuck. Fail the controller initialization just @@ -1110,6 +1110,8 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n", ctrl->ctrl.nr_reconnects); + /* accumulate reconnect attempts before resetting it to zero */ + atomic_long_add(ctrl->ctrl.nr_reconnects, &ctrl->ctrl.acc_reconnects); ctrl->ctrl.nr_reconnects = 0; return; diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index e59758616f27..933a5adfb7af 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -6,6 +6,7 @@ */ #include <linux/nvme-auth.h> +#include <linux/blkdev.h> #include "nvme.h" #include "fabrics.h" @@ -335,14 +336,7 @@ static bool multipath_sysfs_group_visible(struct kobject *kobj) return nvme_disk_is_ns_head(dev_to_disk(dev)); } - -static bool multipath_sysfs_attr_visible(struct kobject *kobj, - struct attribute *attr, int n) -{ - return false; -} - -DEFINE_SYSFS_GROUP_VISIBLE(multipath_sysfs) +DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(multipath_sysfs) const struct attribute_group nvme_ns_mpath_attr_group = { .name = "multipath", @@ -351,11 +345,114 @@ const struct attribute_group nvme_ns_mpath_attr_group = { }; #endif +static ssize_t command_retries_count_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + + return sysfs_emit(buf, "%lu\n", atomic_long_read(&ns->retries)); +} + +static ssize_t command_retries_count_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + unsigned long retries; + int err; + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + + err = kstrtoul(buf, 0, &retries); + if (err) + return -EINVAL; + + atomic_long_set(&ns->retries, retries); + + return count; +} +static DEVICE_ATTR_RW(command_retries_count); + +static ssize_t nvme_io_errors_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + + return sysfs_emit(buf, "%lu\n", atomic_long_read(&ns->errors)); +} + +static ssize_t nvme_io_errors_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + unsigned long errors; + int err; + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + + err = kstrtoul(buf, 0, &errors); + if (err) + return -EINVAL; + + atomic_long_set(&ns->errors, errors); + + return count; +} + +struct device_attribute dev_attr_io_errors = + __ATTR(command_error_count, 0644, + nvme_io_errors_show, nvme_io_errors_store); + +static struct attribute *nvme_ns_diag_attrs[] = { + &dev_attr_command_retries_count.attr, + &dev_attr_io_errors.attr, +#ifdef CONFIG_NVME_MULTIPATH + &dev_attr_multipath_failover_count.attr, + &dev_attr_io_requeue_no_usable_path_count.attr, + &dev_attr_io_fail_no_available_path_count.attr, +#endif + NULL, +}; + +static umode_t nvme_ns_diag_attrs_are_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + + if (a == &dev_attr_command_retries_count.attr) { + if (nvme_disk_is_ns_head(dev_to_disk(dev))) + return 0; + } + if (a == &dev_attr_io_errors.attr) { + struct gendisk *disk = dev_to_disk(dev); + + if (nvme_disk_is_ns_head(disk)) + return 0; + } +#ifdef CONFIG_NVME_MULTIPATH + if (a == &dev_attr_multipath_failover_count.attr) { + if (nvme_disk_is_ns_head(dev_to_disk(dev))) + return 0; + } + if (a == &dev_attr_io_requeue_no_usable_path_count.attr) { + if (!nvme_disk_is_ns_head(dev_to_disk(dev))) + return 0; + } + if (a == &dev_attr_io_fail_no_available_path_count.attr) { + if (!nvme_disk_is_ns_head(dev_to_disk(dev))) + return 0; + } +#endif + return a->mode; +} + +const struct attribute_group nvme_ns_diag_attr_group = { + .name = "diag", + .attrs = nvme_ns_diag_attrs, + .is_visible = nvme_ns_diag_attrs_are_visible, +}; + const struct attribute_group *nvme_ns_attr_groups[] = { &nvme_ns_attr_group, #ifdef CONFIG_NVME_MULTIPATH &nvme_ns_mpath_attr_group, #endif + &nvme_ns_diag_attr_group, NULL, }; @@ -623,6 +720,92 @@ static ssize_t quirks_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(quirks); +static ssize_t nvme_admin_timeout_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%u\n", + jiffies_to_msecs(ctrl->admin_timeout)); +} + +static ssize_t nvme_admin_timeout_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + u32 timeout; + int err; + + /* + * Wait until the controller reaches the LIVE state to be sure that + * admin_q and fabrics_q are properly initialized. + */ + if (!test_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags)) + return -EBUSY; + + err = kstrtou32(buf, 10, &timeout); + if (err || !timeout) + return -EINVAL; + + ctrl->admin_timeout = msecs_to_jiffies(timeout); + + blk_queue_rq_timeout(ctrl->admin_q, ctrl->admin_timeout); + if (ctrl->fabrics_q) + blk_queue_rq_timeout(ctrl->fabrics_q, ctrl->admin_timeout); + + return count; +} + +static DEVICE_ATTR(admin_timeout, S_IRUGO | S_IWUSR, + nvme_admin_timeout_show, nvme_admin_timeout_store); + +static ssize_t nvme_io_timeout_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%u\n", jiffies_to_msecs(ctrl->io_timeout)); +} + +static ssize_t nvme_io_timeout_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + struct nvme_ns *ns; + u32 timeout; + int err; + + /* + * Wait until the controller reaches the LIVE state to be sure that + * connect_q is properly initialized. + */ + if (!test_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags)) + return -EBUSY; + + err = kstrtou32(buf, 10, &timeout); + if (err || !timeout) + return -EINVAL; + + /* Take the namespaces_lock to avoid racing against nvme_alloc_ns() */ + mutex_lock(&ctrl->namespaces_lock); + + ctrl->io_timeout = msecs_to_jiffies(timeout); + list_for_each_entry(ns, &ctrl->namespaces, list) + blk_queue_rq_timeout(ns->queue, ctrl->io_timeout); + + mutex_unlock(&ctrl->namespaces_lock); + + if (ctrl->connect_q) + blk_queue_rq_timeout(ctrl->connect_q, ctrl->io_timeout); + + return count; +} + +static DEVICE_ATTR(io_timeout, S_IRUGO | S_IWUSR, + nvme_io_timeout_show, nvme_io_timeout_store); + #ifdef CONFIG_NVME_HOST_AUTH static ssize_t nvme_ctrl_dhchap_secret_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -765,6 +948,8 @@ static struct attribute *nvme_dev_attrs[] = { &dev_attr_cntrltype.attr, &dev_attr_dctype.attr, &dev_attr_quirks.attr, + &dev_attr_admin_timeout.attr, + &dev_attr_io_timeout.attr, #ifdef CONFIG_NVME_HOST_AUTH &dev_attr_dhchap_secret.attr, &dev_attr_dhchap_ctrl_secret.attr, @@ -937,11 +1122,121 @@ static const struct attribute_group nvme_tls_attrs_group = { }; #endif +static ssize_t nvme_adm_errors_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%lu\n", + (unsigned long)atomic_long_read(&ctrl->errors)); +} + +static ssize_t nvme_adm_errors_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + unsigned long errors; + int err; + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + err = kstrtoul(buf, 0, &errors); + if (err) + return -EINVAL; + + atomic_long_set(&ctrl->errors, errors); + + return count; +} + +struct device_attribute dev_attr_adm_errors = + __ATTR(command_error_count, 0644, + nvme_adm_errors_show, nvme_adm_errors_store); + +static ssize_t reset_count_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%lu\n", atomic_long_read(&ctrl->nr_reset)); +} + +static ssize_t reset_count_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + int err; + unsigned long reset_cnt; + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + err = kstrtoul(buf, 0, &reset_cnt); + if (err) + return -EINVAL; + + atomic_long_set(&ctrl->nr_reset, reset_cnt); + + return count; +} + +static ssize_t reconnect_count_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%lu\n", + atomic_long_read(&ctrl->acc_reconnects) + + ctrl->nr_reconnects); +} + +static ssize_t reconnect_count_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + int err; + unsigned long reconnect_cnt; + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + err = kstrtoul(buf, 0, &reconnect_cnt); + if (err) + return -EINVAL; + + atomic_long_set(&ctrl->acc_reconnects, reconnect_cnt); + + return count; +} + +static DEVICE_ATTR_RW(reconnect_count); + +static DEVICE_ATTR_RW(reset_count); + +static struct attribute *nvme_dev_diag_attrs[] = { + &dev_attr_adm_errors.attr, + &dev_attr_reset_count.attr, + &dev_attr_reconnect_count.attr, + NULL, +}; + +static umode_t nvme_dev_diag_attrs_are_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + if (a == &dev_attr_reconnect_count.attr && !ctrl->opts) + return 0; + + return a->mode; +} + +const struct attribute_group nvme_dev_diag_attrs_group = { + .name = "diag", + .attrs = nvme_dev_diag_attrs, + .is_visible = nvme_dev_diag_attrs_are_visible, +}; +EXPORT_SYMBOL_GPL(nvme_dev_diag_attrs_group); + const struct attribute_group *nvme_dev_attr_groups[] = { &nvme_dev_attrs_group, #ifdef CONFIG_NVME_TCP_TLS &nvme_tls_attrs_group, #endif + &nvme_dev_diag_attrs_group, NULL, }; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 9313ab211c67..4b81521723f6 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -56,44 +56,6 @@ MODULE_PARM_DESC(tls_handshake_timeout, static atomic_t nvme_tcp_cpu_queues[NR_CPUS]; -#ifdef CONFIG_DEBUG_LOCK_ALLOC -/* lockdep can detect a circular dependency of the form - * sk_lock -> mmap_lock (page fault) -> fs locks -> sk_lock - * because dependencies are tracked for both nvme-tcp and user contexts. Using - * a separate class prevents lockdep from conflating nvme-tcp socket use with - * user-space socket API use. - */ -static struct lock_class_key nvme_tcp_sk_key[2]; -static struct lock_class_key nvme_tcp_slock_key[2]; - -static void nvme_tcp_reclassify_socket(struct socket *sock) -{ - struct sock *sk = sock->sk; - - if (WARN_ON_ONCE(!sock_allow_reclassification(sk))) - return; - - switch (sk->sk_family) { - case AF_INET: - sock_lock_init_class_and_name(sk, "slock-AF_INET-NVME", - &nvme_tcp_slock_key[0], - "sk_lock-AF_INET-NVME", - &nvme_tcp_sk_key[0]); - break; - case AF_INET6: - sock_lock_init_class_and_name(sk, "slock-AF_INET6-NVME", - &nvme_tcp_slock_key[1], - "sk_lock-AF_INET6-NVME", - &nvme_tcp_sk_key[1]); - break; - default: - WARN_ON_ONCE(1); - } -} -#else -static void nvme_tcp_reclassify_socket(struct socket *sock) { } -#endif - enum nvme_tcp_send_state { NVME_TCP_SEND_CMD_PDU = 0, NVME_TCP_SEND_H2C_PDU, @@ -180,6 +142,11 @@ struct nvme_tcp_queue { void (*state_change)(struct sock *); void (*data_ready)(struct sock *); void (*write_space)(struct sock *); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lock_class_key nvme_tcp_sk_key; + struct lock_class_key nvme_tcp_slock_key; +#endif }; struct nvme_tcp_ctrl { @@ -207,6 +174,39 @@ static const struct blk_mq_ops nvme_tcp_mq_ops; static const struct blk_mq_ops nvme_tcp_admin_mq_ops; static int nvme_tcp_try_send(struct nvme_tcp_queue *queue); +#ifdef CONFIG_DEBUG_LOCK_ALLOC +/* lockdep can detect a circular dependency of the form + * sk_lock -> mmap_lock (page fault) -> fs locks -> sk_lock + * because dependencies are tracked for both nvme-tcp and user contexts. Using + * a separate class prevents lockdep from conflating nvme-tcp socket use with + * user-space socket API use. + */ +static void nvme_tcp_reclassify_socket(struct nvme_tcp_queue *queue) +{ + struct sock *sk = queue->sock->sk; + + if (WARN_ON_ONCE(!sock_allow_reclassification(sk))) + return; + + switch (sk->sk_family) { + case AF_INET: + sock_lock_init_class_and_name(sk, "slock-AF_INET-NVME", + &queue->nvme_tcp_slock_key, + "sk_lock-AF_INET-NVME", + &queue->nvme_tcp_sk_key); + break; + case AF_INET6: + sock_lock_init_class_and_name(sk, "slock-AF_INET6-NVME", + &queue->nvme_tcp_slock_key, + "sk_lock-AF_INET6-NVME", + &queue->nvme_tcp_sk_key); + break; + default: + WARN_ON_ONCE(1); + } +} +#endif + static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl) { return container_of(ctrl, struct nvme_tcp_ctrl, ctrl); @@ -1461,6 +1461,11 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid) kfree(queue->pdu); mutex_destroy(&queue->send_mutex); mutex_destroy(&queue->queue_lock); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + lockdep_unregister_key(&queue->nvme_tcp_sk_key); + lockdep_unregister_key(&queue->nvme_tcp_slock_key); +#endif } static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) @@ -1806,7 +1811,12 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, } sk_net_refcnt_upgrade(queue->sock->sk); - nvme_tcp_reclassify_socket(queue->sock); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + lockdep_register_key(&queue->nvme_tcp_sk_key); + lockdep_register_key(&queue->nvme_tcp_slock_key); + nvme_tcp_reclassify_socket(queue); +#endif /* Single syn retry */ tcp_sock_set_syncnt(queue->sock->sk, 1); @@ -1911,6 +1921,10 @@ err_sock: /* Use sync variant - see nvme_tcp_free_queue() for explanation */ __fput_sync(queue->sock->file); queue->sock = NULL; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + lockdep_unregister_key(&queue->nvme_tcp_sk_key); + lockdep_unregister_key(&queue->nvme_tcp_slock_key); +#endif err_destroy_mutex: mutex_destroy(&queue->send_mutex); mutex_destroy(&queue->queue_lock); @@ -2201,7 +2215,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) if (!new) { nvme_start_freeze(ctrl); nvme_unquiesce_io_queues(ctrl); - if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) { + if (!nvme_wait_freeze_timeout(ctrl)) { /* * If we timed out waiting for freeze we are likely to * be stuck. Fail the controller initialization just @@ -2468,6 +2482,8 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work) dev_info(ctrl->device, "Successfully reconnected (attempt %d/%d)\n", ctrl->nr_reconnects, ctrl->opts->max_reconnects); + /* accumulate reconnect attempts before resetting it to zero */ + atomic_long_add(ctrl->nr_reconnects, &ctrl->acc_reconnects); ctrl->nr_reconnects = 0; return; @@ -3046,6 +3062,8 @@ static int __init nvme_tcp_init_module(void) if (wq_unbound) wq_flags |= WQ_UNBOUND; + else + wq_flags |= WQ_PERCPU; nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq", wq_flags, 0); if (!nvme_tcp_wq) diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index e9b35549e254..114869d16a1f 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -166,6 +166,7 @@ static void nvmet_execute_disc_get_log_page(struct nvmet_req *req) u64 offset = nvmet_get_log_page_offset(req->cmd); size_t data_len = nvmet_get_log_page_len(req->cmd); size_t alloc_len; + size_t copy_len; struct nvmet_subsys_link *p; struct nvmet_port *r; u32 numrec = 0; @@ -242,7 +243,27 @@ static void nvmet_execute_disc_get_log_page(struct nvmet_req *req) up_read(&nvmet_config_sem); - status = nvmet_copy_to_sgl(req, 0, buffer + offset, data_len); + /* + * Validate the host-supplied log page offset before copying out. + * Without this check, the host controls a 64-bit byte offset into + * a small kzalloc'd buffer: a value past the log page lets the + * subsequent memcpy read adjacent kernel heap, and a value aimed + * at unmapped kernel memory faults the in-kernel copy and crashes + * the target host. The Discovery controller is unauthenticated, + * so the bug is reachable from any reachable fabric peer. + */ + if (offset > alloc_len) { + req->error_loc = + offsetof(struct nvme_get_log_page_command, lpo); + status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; + goto out_free_buffer; + } + + copy_len = min_t(size_t, data_len, alloc_len - offset); + status = nvmet_copy_to_sgl(req, 0, buffer + offset, copy_len); + if (!status && copy_len < data_len) + status = nvmet_zero_sgl(req, copy_len, data_len - copy_len); +out_free_buffer: kfree(buffer); out: nvmet_req_complete(req, status); diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c index f1e613e7c63e..0a85acf1e5c7 100644 --- a/drivers/nvme/target/fabrics-cmd-auth.c +++ b/drivers/nvme/target/fabrics-cmd-auth.c @@ -132,13 +132,22 @@ static u8 nvmet_auth_negotiate(struct nvmet_req *req, void *d) return 0; } -static u8 nvmet_auth_reply(struct nvmet_req *req, void *d) +static u8 nvmet_auth_reply(struct nvmet_req *req, void *d, u32 tl) { struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvmf_auth_dhchap_reply_data *data = d; - u16 dhvlen = le16_to_cpu(data->dhvlen); + u16 dhvlen; u8 *response; + if (tl < sizeof(*data)) + return NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; + + dhvlen = le16_to_cpu(data->dhvlen); + + /* Validate that hl and dhvlen fit within the transfer length */ + if (sizeof(*data) + 2 * (size_t)data->hl + dhvlen > tl) + return NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; + pr_debug("%s: ctrl %d qid %d: data hl %d cvalid %d dhvlen %u\n", __func__, ctrl->cntlid, req->sq->qid, data->hl, data->cvalid, dhvlen); @@ -338,7 +347,7 @@ void nvmet_execute_auth_send(struct nvmet_req *req) switch (data->auth_id) { case NVME_AUTH_DHCHAP_MESSAGE_REPLY: - dhchap_status = nvmet_auth_reply(req, d); + dhchap_status = nvmet_auth_reply(req, d, tl); if (dhchap_status == 0) req->sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_SUCCESS1; diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index ae00bcef2251..fcb1f8186fdd 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -274,7 +274,6 @@ static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); nvmet_cq_put(&ctrl->queues[0].nvme_cq); - nvme_remove_admin_tag_set(&ctrl->ctrl); } static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl) @@ -375,25 +374,18 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) } ctrl->ctrl.queue_count = 1; - error = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set, - &nvme_loop_admin_mq_ops, - sizeof(struct nvme_loop_iod) + - NVME_INLINE_SG_CNT * sizeof(struct scatterlist)); - if (error) - goto out_free_sq; - /* reset stopped state for the fresh admin queue */ clear_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->ctrl.flags); error = nvmf_connect_admin_queue(&ctrl->ctrl); if (error) - goto out_cleanup_tagset; + goto out_free_sq; set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); error = nvme_enable_ctrl(&ctrl->ctrl); if (error) - goto out_cleanup_tagset; + goto out_free_sq; ctrl->ctrl.max_hw_sectors = (NVME_LOOP_MAX_SEGMENTS - 1) << PAGE_SECTORS_SHIFT; @@ -402,14 +394,12 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) error = nvme_init_ctrl_finish(&ctrl->ctrl, false); if (error) - goto out_cleanup_tagset; + goto out_free_sq; return 0; -out_cleanup_tagset: - clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); - nvme_remove_admin_tag_set(&ctrl->ctrl); out_free_sq: + clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); nvmet_cq_put(&ctrl->queues[0].nvme_cq); return error; @@ -432,6 +422,7 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl) static void nvme_loop_delete_ctrl_host(struct nvme_ctrl *ctrl) { nvme_loop_shutdown_ctrl(to_loop_ctrl(ctrl)); + nvme_remove_admin_tag_set(ctrl); } static void nvme_loop_delete_ctrl(struct nvmet_ctrl *nctrl) @@ -494,6 +485,7 @@ out_destroy_admin: nvme_cancel_admin_tagset(&ctrl->ctrl); nvme_loop_destroy_admin_queue(ctrl); out_disable: + nvme_remove_admin_tag_set(&ctrl->ctrl); dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); nvme_uninit_ctrl(&ctrl->ctrl); } @@ -594,10 +586,17 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, if (!ctrl->queues) goto out_uninit_ctrl; - ret = nvme_loop_configure_admin_queue(ctrl); + ret = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set, + &nvme_loop_admin_mq_ops, + sizeof(struct nvme_loop_iod) + + NVME_INLINE_SG_CNT * sizeof(struct scatterlist)); if (ret) goto out_free_queues; + ret = nvme_loop_configure_admin_queue(ctrl); + if (ret) + goto out_remove_admin_tagset; + if (opts->queue_size > ctrl->ctrl.maxcmd) { /* warn if maxcmd is lower than queue_size */ dev_warn(ctrl->ctrl.device, @@ -633,6 +632,8 @@ out_remove_admin_queue: nvme_quiesce_admin_queue(&ctrl->ctrl); nvme_cancel_admin_tagset(&ctrl->ctrl); nvme_loop_destroy_admin_queue(ctrl); +out_remove_admin_tagset: + nvme_remove_admin_tag_set(&ctrl->ctrl); out_free_queues: kfree(ctrl->queues); out_uninit_ctrl: diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index e6e2c3f9afdf..ac26f4f774c4 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1598,8 +1598,10 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, pending++; } mutex_unlock(&nvmet_rdma_queue_mutex); - if (pending > NVMET_RDMA_BACKLOG) - return NVME_SC_CONNECT_CTRL_BUSY; + if (pending > NVMET_RDMA_BACKLOG) { + ret = NVME_SC_CONNECT_CTRL_BUSY; + goto put_device; + } } ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 164a564ba3b4..3568fa9a0905 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1842,10 +1842,11 @@ static void nvmet_tcp_tls_handshake_done(void *data, int status, if (!status) status = nvmet_tcp_tls_key_lookup(queue, peerid); + if (!status) + status = nvmet_tcp_set_queue_sock(queue); + if (status) nvmet_tcp_schedule_release_queue(queue); - else - nvmet_tcp_set_queue_sock(queue); kref_put(&queue->kref, nvmet_tcp_release_queue); } @@ -1997,6 +1998,12 @@ out_free_connect: nvmet_tcp_free_cmd(&queue->connect); out_ida_remove: ida_free(&nvmet_tcp_queue_ida, queue->idx); + /* + * Drain the page fragment cache if any allocations were done. + * The first allocation using pf_cache is nvmet_tcp_alloc_cmd() + * for queue->connect after ida_alloc(). + */ + page_frag_cache_drain(&queue->pf_cache); out_sock: fput(queue->sock->file); out_free_queue: |
