diff options
Diffstat (limited to 'drivers/nvme/host/core.c')
-rw-r--r-- | drivers/nvme/host/core.c | 224 |
1 files changed, 128 insertions, 96 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 60f14019f981..0af612387083 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -20,6 +20,7 @@ #include <linux/ptrace.h> #include <linux/nvme_ioctl.h> #include <linux/pm_qos.h> +#include <linux/ratelimit.h> #include <asm/unaligned.h> #include "nvme.h" @@ -312,12 +313,12 @@ static void nvme_log_error(struct request *req) struct nvme_request *nr = nvme_req(req); if (ns) { - pr_err_ratelimited("%s: %s(0x%x) @ LBA %llu, %llu blocks, %s (sct 0x%x / sc 0x%x) %s%s\n", + pr_err_ratelimited("%s: %s(0x%x) @ LBA %llu, %u blocks, %s (sct 0x%x / sc 0x%x) %s%s\n", ns->disk ? ns->disk->disk_name : "?", nvme_get_opcode_str(nr->cmd->common.opcode), nr->cmd->common.opcode, - (unsigned long long)nvme_sect_to_lba(ns, blk_rq_pos(req)), - (unsigned long long)blk_rq_bytes(req) >> ns->lba_shift, + nvme_sect_to_lba(ns->head, blk_rq_pos(req)), + blk_rq_bytes(req) >> ns->head->lba_shift, nvme_get_error_status_str(nr->status), nr->status >> 8 & 7, /* Status Code Type */ nr->status & 0xff, /* Status Code */ @@ -372,9 +373,12 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req) static inline void nvme_end_req_zoned(struct request *req) { if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && - req_op(req) == REQ_OP_ZONE_APPEND) - req->__sector = nvme_lba_to_sect(req->q->queuedata, + req_op(req) == REQ_OP_ZONE_APPEND) { + struct nvme_ns *ns = req->q->queuedata; + + req->__sector = nvme_lba_to_sect(ns->head, le64_to_cpu(nvme_req(req)->result.u64)); + } } static inline void nvme_end_req(struct request *req) @@ -793,8 +797,8 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, } if (queue_max_discard_segments(req->q) == 1) { - u64 slba = nvme_sect_to_lba(ns, blk_rq_pos(req)); - u32 nlb = blk_rq_sectors(req) >> (ns->lba_shift - 9); + u64 slba = nvme_sect_to_lba(ns->head, blk_rq_pos(req)); + u32 nlb = blk_rq_sectors(req) >> (ns->head->lba_shift - 9); range[0].cattr = cpu_to_le32(0); range[0].nlb = cpu_to_le32(nlb); @@ -802,8 +806,9 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, n = 1; } else { __rq_for_each_bio(bio, req) { - u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector); - u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift; + u64 slba = nvme_sect_to_lba(ns->head, + bio->bi_iter.bi_sector); + u32 nlb = bio->bi_iter.bi_size >> ns->head->lba_shift; if (n < segments) { range[n].cattr = cpu_to_le32(0); @@ -841,7 +846,7 @@ static void nvme_set_ref_tag(struct nvme_ns *ns, struct nvme_command *cmnd, u64 ref48; /* both rw and write zeroes share the same reftag format */ - switch (ns->guard_type) { + switch (ns->head->guard_type) { case NVME_NVM_NS_16B_GUARD: cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req)); break; @@ -869,17 +874,18 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, cmnd->write_zeroes.opcode = nvme_cmd_write_zeroes; cmnd->write_zeroes.nsid = cpu_to_le32(ns->head->ns_id); cmnd->write_zeroes.slba = - cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); + cpu_to_le64(nvme_sect_to_lba(ns->head, blk_rq_pos(req))); cmnd->write_zeroes.length = - cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); + cpu_to_le16((blk_rq_bytes(req) >> ns->head->lba_shift) - 1); - if (!(req->cmd_flags & REQ_NOUNMAP) && (ns->features & NVME_NS_DEAC)) + if (!(req->cmd_flags & REQ_NOUNMAP) && + (ns->head->features & NVME_NS_DEAC)) cmnd->write_zeroes.control |= cpu_to_le16(NVME_WZ_DEAC); - if (nvme_ns_has_pi(ns)) { + if (nvme_ns_has_pi(ns->head)) { cmnd->write_zeroes.control |= cpu_to_le16(NVME_RW_PRINFO_PRACT); - switch (ns->pi_type) { + switch (ns->head->pi_type) { case NVME_NS_DPS_PI_TYPE1: case NVME_NS_DPS_PI_TYPE2: nvme_set_ref_tag(ns, cmnd, req); @@ -911,13 +917,15 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, cmnd->rw.cdw2 = 0; cmnd->rw.cdw3 = 0; cmnd->rw.metadata = 0; - cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); - cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); + cmnd->rw.slba = + cpu_to_le64(nvme_sect_to_lba(ns->head, blk_rq_pos(req))); + cmnd->rw.length = + cpu_to_le16((blk_rq_bytes(req) >> ns->head->lba_shift) - 1); cmnd->rw.reftag = 0; cmnd->rw.apptag = 0; cmnd->rw.appmask = 0; - if (ns->ms) { + if (ns->head->ms) { /* * If formated with metadata, the block layer always provides a * metadata buffer if CONFIG_BLK_DEV_INTEGRITY is enabled. Else @@ -925,12 +933,12 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, * namespace capacity to zero to prevent any I/O. */ if (!blk_integrity_rq(req)) { - if (WARN_ON_ONCE(!nvme_ns_has_pi(ns))) + if (WARN_ON_ONCE(!nvme_ns_has_pi(ns->head))) return BLK_STS_NOTSUPP; control |= NVME_RW_PRINFO_PRACT; } - switch (ns->pi_type) { + switch (ns->head->pi_type) { case NVME_NS_DPS_PI_TYPE3: control |= NVME_RW_PRINFO_PRCHK_GUARD; break; @@ -1452,7 +1460,7 @@ free_data: return status; } -static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, +int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, struct nvme_id_ns **id) { struct nvme_command c = { }; @@ -1671,14 +1679,14 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) } #ifdef CONFIG_BLK_DEV_INTEGRITY -static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns, - u32 max_integrity_segments) +static void nvme_init_integrity(struct gendisk *disk, + struct nvme_ns_head *head, u32 max_integrity_segments) { struct blk_integrity integrity = { }; - switch (ns->pi_type) { + switch (head->pi_type) { case NVME_NS_DPS_PI_TYPE3: - switch (ns->guard_type) { + switch (head->guard_type) { case NVME_NVM_NS_16B_GUARD: integrity.profile = &t10_pi_type3_crc; integrity.tag_size = sizeof(u16) + sizeof(u32); @@ -1696,7 +1704,7 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns, break; case NVME_NS_DPS_PI_TYPE1: case NVME_NS_DPS_PI_TYPE2: - switch (ns->guard_type) { + switch (head->guard_type) { case NVME_NVM_NS_16B_GUARD: integrity.profile = &t10_pi_type1_crc; integrity.tag_size = sizeof(u16); @@ -1717,25 +1725,26 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns, break; } - integrity.tuple_size = ns->ms; + integrity.tuple_size = head->ms; blk_integrity_register(disk, &integrity); blk_queue_max_integrity_segments(disk->queue, max_integrity_segments); } #else -static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns, - u32 max_integrity_segments) +static void nvme_init_integrity(struct gendisk *disk, + struct nvme_ns_head *head, u32 max_integrity_segments) { } #endif /* CONFIG_BLK_DEV_INTEGRITY */ -static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) +static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk, + struct nvme_ns_head *head) { - struct nvme_ctrl *ctrl = ns->ctrl; struct request_queue *queue = disk->queue; u32 size = queue_logical_block_size(queue); - if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns, UINT_MAX)) - ctrl->max_discard_sectors = nvme_lba_to_sect(ns, ctrl->dmrsl); + if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX)) + ctrl->max_discard_sectors = + nvme_lba_to_sect(head, ctrl->dmrsl); if (ctrl->max_discard_sectors == 0) { blk_queue_max_discard_sectors(queue, 0); @@ -1766,21 +1775,21 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) a->csi == b->csi; } -static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id) +static int nvme_init_ms(struct nvme_ctrl *ctrl, struct nvme_ns_head *head, + struct nvme_id_ns *id) { bool first = id->dps & NVME_NS_DPS_PI_FIRST; unsigned lbaf = nvme_lbaf_index(id->flbas); - struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_command c = { }; struct nvme_id_ns_nvm *nvm; int ret = 0; u32 elbaf; - ns->pi_size = 0; - ns->ms = le16_to_cpu(id->lbaf[lbaf].ms); + head->pi_size = 0; + head->ms = le16_to_cpu(id->lbaf[lbaf].ms); if (!(ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)) { - ns->pi_size = sizeof(struct t10_pi_tuple); - ns->guard_type = NVME_NVM_NS_16B_GUARD; + head->pi_size = sizeof(struct t10_pi_tuple); + head->guard_type = NVME_NVM_NS_16B_GUARD; goto set_pi; } @@ -1789,11 +1798,11 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id) return -ENOMEM; c.identify.opcode = nvme_admin_identify; - c.identify.nsid = cpu_to_le32(ns->head->ns_id); + c.identify.nsid = cpu_to_le32(head->ns_id); c.identify.cns = NVME_ID_CNS_CS_NS; c.identify.csi = NVME_CSI_NVM; - ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, nvm, sizeof(*nvm)); + ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, nvm, sizeof(*nvm)); if (ret) goto free_data; @@ -1803,13 +1812,13 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id) if (nvme_elbaf_sts(elbaf)) goto free_data; - ns->guard_type = nvme_elbaf_guard_type(elbaf); - switch (ns->guard_type) { + head->guard_type = nvme_elbaf_guard_type(elbaf); + switch (head->guard_type) { case NVME_NVM_NS_64B_GUARD: - ns->pi_size = sizeof(struct crc64_pi_tuple); + head->pi_size = sizeof(struct crc64_pi_tuple); break; case NVME_NVM_NS_16B_GUARD: - ns->pi_size = sizeof(struct t10_pi_tuple); + head->pi_size = sizeof(struct t10_pi_tuple); break; default: break; @@ -1818,25 +1827,25 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id) free_data: kfree(nvm); set_pi: - if (ns->pi_size && (first || ns->ms == ns->pi_size)) - ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK; + if (head->pi_size && (first || head->ms == head->pi_size)) + head->pi_type = id->dps & NVME_NS_DPS_PI_MASK; else - ns->pi_type = 0; + head->pi_type = 0; return ret; } -static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) +static int nvme_configure_metadata(struct nvme_ctrl *ctrl, + struct nvme_ns_head *head, struct nvme_id_ns *id) { - struct nvme_ctrl *ctrl = ns->ctrl; int ret; - ret = nvme_init_ms(ns, id); + ret = nvme_init_ms(ctrl, head, id); if (ret) return ret; - ns->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); - if (!ns->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) + head->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); + if (!head->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) return 0; if (ctrl->ops->flags & NVME_F_FABRICS) { @@ -1848,7 +1857,7 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT))) return 0; - ns->features |= NVME_NS_EXT_LBAS; + head->features |= NVME_NS_EXT_LBAS; /* * The current fabrics transport drivers support namespace @@ -1859,8 +1868,8 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) * Note, this check will need to be modified if any drivers * gain the ability to use other metadata formats. */ - if (ctrl->max_integrity_segments && nvme_ns_has_pi(ns)) - ns->features |= NVME_NS_METADATA_SUPPORTED; + if (ctrl->max_integrity_segments && nvme_ns_has_pi(head)) + head->features |= NVME_NS_METADATA_SUPPORTED; } else { /* * For PCIe controllers, we can't easily remap the separate @@ -1869,9 +1878,9 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) * We allow extended LBAs for the passthrough interface, though. */ if (id->flbas & NVME_NS_FLBAS_META_EXT) - ns->features |= NVME_NS_EXT_LBAS; + head->features |= NVME_NS_EXT_LBAS; else - ns->features |= NVME_NS_METADATA_SUPPORTED; + head->features |= NVME_NS_METADATA_SUPPORTED; } return 0; } @@ -1894,11 +1903,11 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_write_cache(q, vwc, vwc); } -static void nvme_update_disk_info(struct gendisk *disk, - struct nvme_ns *ns, struct nvme_id_ns *id) +static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk, + struct nvme_ns_head *head, struct nvme_id_ns *id) { - sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze)); - u32 bs = 1U << ns->lba_shift; + sector_t capacity = nvme_lba_to_sect(head, le64_to_cpu(id->nsze)); + u32 bs = 1U << head->lba_shift; u32 atomic_bs, phys_bs, io_opt = 0; /* @@ -1906,7 +1915,7 @@ static void nvme_update_disk_info(struct gendisk *disk, * or smaller than a sector size yet, so catch this early and don't * allow block I/O. */ - if (ns->lba_shift > PAGE_SHIFT || ns->lba_shift < SECTOR_SHIFT) { + if (head->lba_shift > PAGE_SHIFT || head->lba_shift < SECTOR_SHIFT) { capacity = 0; bs = (1 << 9); } @@ -1923,7 +1932,7 @@ static void nvme_update_disk_info(struct gendisk *disk, if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; else - atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; + atomic_bs = (1 + ctrl->subsys->awupf) * bs; } if (id->nsfeat & NVME_NS_FEAT_IO_OPT) { @@ -1949,20 +1958,20 @@ static void nvme_update_disk_info(struct gendisk *disk, * I/O to namespaces with metadata except when the namespace supports * PI, as it can strip/insert in that case. */ - if (ns->ms) { + if (head->ms) { if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && - (ns->features & NVME_NS_METADATA_SUPPORTED)) - nvme_init_integrity(disk, ns, - ns->ctrl->max_integrity_segments); - else if (!nvme_ns_has_pi(ns)) + (head->features & NVME_NS_METADATA_SUPPORTED)) + nvme_init_integrity(disk, head, + ctrl->max_integrity_segments); + else if (!nvme_ns_has_pi(head)) capacity = 0; } set_capacity_and_notify(disk, capacity); - nvme_config_discard(disk, ns); + nvme_config_discard(ctrl, disk, head); blk_queue_max_write_zeroes_sectors(disk->queue, - ns->ctrl->max_zeroes_sectors); + ctrl->max_zeroes_sectors); } static bool nvme_ns_is_readonly(struct nvme_ns *ns, struct nvme_ns_info *info) @@ -1985,7 +1994,7 @@ static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id) is_power_of_2(ctrl->max_hw_sectors)) iob = ctrl->max_hw_sectors; else - iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob)); + iob = nvme_lba_to_sect(ns->head, le16_to_cpu(id->noiob)); if (!iob) return; @@ -2052,16 +2061,17 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, blk_mq_freeze_queue(ns->disk->queue); lbaf = nvme_lbaf_index(id->flbas); - ns->lba_shift = id->lbaf[lbaf].ds; + ns->head->lba_shift = id->lbaf[lbaf].ds; + ns->head->nuse = le64_to_cpu(id->nuse); nvme_set_queue_limits(ns->ctrl, ns->queue); - ret = nvme_configure_metadata(ns, id); + ret = nvme_configure_metadata(ns->ctrl, ns->head, id); if (ret < 0) { blk_mq_unfreeze_queue(ns->disk->queue); goto out; } nvme_set_chunk_sectors(ns, id); - nvme_update_disk_info(ns->disk, ns, id); + nvme_update_disk_info(ns->ctrl, ns->disk, ns->head, id); if (ns->head->ids.csi == NVME_CSI_ZNS) { ret = nvme_update_zone_info(ns, lbaf); @@ -2078,7 +2088,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, * do not return zeroes. */ if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3))) - ns->features |= NVME_NS_DEAC; + ns->head->features |= NVME_NS_DEAC; set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info)); set_bit(NVME_NS_READY, &ns->flags); blk_mq_unfreeze_queue(ns->disk->queue); @@ -2091,7 +2101,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, if (nvme_ns_head_multipath(ns->head)) { blk_mq_freeze_queue(ns->head->disk->queue); - nvme_update_disk_info(ns->head->disk, ns, id); + nvme_update_disk_info(ns->ctrl, ns->head->disk, ns->head, id); set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info)); nvme_mpath_revalidate_paths(ns); blk_stack_limits(&ns->head->disk->queue->limits, @@ -3026,6 +3036,42 @@ static int nvme_init_effects(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) return 0; } +static int nvme_check_ctrl_fabric_info(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) +{ + /* + * In fabrics we need to verify the cntlid matches the + * admin connect + */ + if (ctrl->cntlid != le16_to_cpu(id->cntlid)) { + dev_err(ctrl->device, + "Mismatching cntlid: Connect %u vs Identify %u, rejecting\n", + ctrl->cntlid, le16_to_cpu(id->cntlid)); + return -EINVAL; + } + + if (!nvme_discovery_ctrl(ctrl) && !ctrl->kas) { + dev_err(ctrl->device, + "keep-alive support is mandatory for fabrics\n"); + return -EINVAL; + } + + if (!nvme_discovery_ctrl(ctrl) && ctrl->ioccsz < 4) { + dev_err(ctrl->device, + "I/O queue command capsule supported size %d < 4\n", + ctrl->ioccsz); + return -EINVAL; + } + + if (!nvme_discovery_ctrl(ctrl) && ctrl->iorcsz < 1) { + dev_err(ctrl->device, + "I/O queue response capsule supported size %d < 1\n", + ctrl->iorcsz); + return -EINVAL; + } + + return 0; +} + static int nvme_init_identify(struct nvme_ctrl *ctrl) { struct nvme_id_ctrl *id; @@ -3138,25 +3184,9 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->iorcsz = le32_to_cpu(id->iorcsz); ctrl->maxcmd = le16_to_cpu(id->maxcmd); - /* - * In fabrics we need to verify the cntlid matches the - * admin connect - */ - if (ctrl->cntlid != le16_to_cpu(id->cntlid)) { - dev_err(ctrl->device, - "Mismatching cntlid: Connect %u vs Identify " - "%u, rejecting\n", - ctrl->cntlid, le16_to_cpu(id->cntlid)); - ret = -EINVAL; - goto out_free; - } - - if (!nvme_discovery_ctrl(ctrl) && !ctrl->kas) { - dev_err(ctrl->device, - "keep-alive support is mandatory for fabrics\n"); - ret = -EINVAL; + ret = nvme_check_ctrl_fabric_info(ctrl, id); + if (ret) goto out_free; - } } else { ctrl->hmpre = le32_to_cpu(id->hmpre); ctrl->hmmin = le32_to_cpu(id->hmmin); @@ -3415,6 +3445,8 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, head->ns_id = info->nsid; head->ids = info->ids; head->shared = info->is_shared; + ratelimit_state_init(&head->rs_nuse, 5 * HZ, 1); + ratelimit_set_flags(&head->rs_nuse, RATELIMIT_MSG_ON_RELEASE); kref_init(&head->ref); if (head->ids.csi) { @@ -3674,7 +3706,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) up_write(&ctrl->namespaces_rwsem); nvme_get_ctrl(ctrl); - if (device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups)) + if (device_add_disk(ctrl->device, ns->disk, nvme_ns_attr_groups)) goto out_cleanup_ns_from_list; if (!nvme_ns_head_multipath(ns->head)) |