diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-12 00:58:04 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-12 00:58:04 +0300 |
commit | 01d550f0fcc06c7292f79a6f1453aac122d1d2c8 (patch) | |
tree | 58b58ac1cb833af0469b1942774a382633bc6cda /drivers/nvme | |
parent | d05e626603d57936314816433db8bf1d34b5a504 (diff) | |
parent | 587371ed783b046f22ba7a5e1cc9a19ae35123b4 (diff) | |
download | linux-01d550f0fcc06c7292f79a6f1453aac122d1d2c8.tar.xz |
Merge tag 'for-6.8/block-2024-01-08' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
"Pretty quiet round this time around. This contains:
- NVMe updates via Keith:
- nvme fabrics spec updates (Guixin, Max)
- nvme target udpates (Guixin, Evan)
- nvme attribute refactoring (Daniel)
- nvme-fc numa fix (Keith)
- MD updates via Song:
- Fix/Cleanup RCU usage from conf->disks[i].rdev (Yu Kuai)
- Fix raid5 hang issue (Junxiao Bi)
- Add Yu Kuai as Reviewer of the md subsystem
- Remove deprecated flavors (Song Liu)
- raid1 read error check support (Li Nan)
- Better handle events off-by-1 case (Alex Lyakas)
- Efficiency improvements for passthrough (Kundan)
- Support for mapping integrity data directly (Keith)
- Zoned write fix (Damien)
- rnbd fixes (Kees, Santosh, Supriti)
- Default to a sane discard size granularity (Christoph)
- Make the default max transfer size naming less confusing
(Christoph)
- Remove support for deprecated host aware zoned model (Christoph)
- Misc fixes (me, Li, Matthew, Min, Ming, Randy, liyouhong, Daniel,
Bart, Christoph)"
* tag 'for-6.8/block-2024-01-08' of git://git.kernel.dk/linux: (78 commits)
block: Treat sequential write preferred zone type as invalid
block: remove disk_clear_zoned
sd: remove the !ZBC && blk_queue_is_zoned case in sd_read_block_characteristics
drivers/block/xen-blkback/common.h: Fix spelling typo in comment
blk-cgroup: fix rcu lockdep warning in blkg_lookup()
blk-cgroup: don't use removal safe list iterators
block: floor the discard granularity to the physical block size
mtd_blkdevs: use the default discard granularity
bcache: use the default discard granularity
zram: use the default discard granularity
null_blk: use the default discard granularity
nbd: use the default discard granularity
ubd: use the default discard granularity
block: default the discard granularity to sector size
bcache: discard_granularity should not be smaller than a sector
block: remove two comments in bio_split_discard
block: rename and document BLK_DEF_MAX_SECTORS
loop: don't abuse BLK_DEF_MAX_SECTORS
aoe: don't abuse BLK_DEF_MAX_SECTORS
null_blk: don't cap max_hw_sectors to BLK_DEF_MAX_SECTORS
...
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/core.c | 224 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 6 | ||||
-rw-r--r-- | drivers/nvme/host/ioctl.c | 205 | ||||
-rw-r--r-- | drivers/nvme/host/multipath.c | 2 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 44 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 4 | ||||
-rw-r--r-- | drivers/nvme/host/sysfs.c | 99 | ||||
-rw-r--r-- | drivers/nvme/host/zns.c | 37 | ||||
-rw-r--r-- | drivers/nvme/target/configfs.c | 4 | ||||
-rw-r--r-- | drivers/nvme/target/core.c | 3 | ||||
-rw-r--r-- | drivers/nvme/target/passthru.c | 4 |
11 files changed, 307 insertions, 325 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 60f14019f981..0af612387083 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -20,6 +20,7 @@ #include <linux/ptrace.h> #include <linux/nvme_ioctl.h> #include <linux/pm_qos.h> +#include <linux/ratelimit.h> #include <asm/unaligned.h> #include "nvme.h" @@ -312,12 +313,12 @@ static void nvme_log_error(struct request *req) struct nvme_request *nr = nvme_req(req); if (ns) { - pr_err_ratelimited("%s: %s(0x%x) @ LBA %llu, %llu blocks, %s (sct 0x%x / sc 0x%x) %s%s\n", + pr_err_ratelimited("%s: %s(0x%x) @ LBA %llu, %u blocks, %s (sct 0x%x / sc 0x%x) %s%s\n", ns->disk ? ns->disk->disk_name : "?", nvme_get_opcode_str(nr->cmd->common.opcode), nr->cmd->common.opcode, - (unsigned long long)nvme_sect_to_lba(ns, blk_rq_pos(req)), - (unsigned long long)blk_rq_bytes(req) >> ns->lba_shift, + nvme_sect_to_lba(ns->head, blk_rq_pos(req)), + blk_rq_bytes(req) >> ns->head->lba_shift, nvme_get_error_status_str(nr->status), nr->status >> 8 & 7, /* Status Code Type */ nr->status & 0xff, /* Status Code */ @@ -372,9 +373,12 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req) static inline void nvme_end_req_zoned(struct request *req) { if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && - req_op(req) == REQ_OP_ZONE_APPEND) - req->__sector = nvme_lba_to_sect(req->q->queuedata, + req_op(req) == REQ_OP_ZONE_APPEND) { + struct nvme_ns *ns = req->q->queuedata; + + req->__sector = nvme_lba_to_sect(ns->head, le64_to_cpu(nvme_req(req)->result.u64)); + } } static inline void nvme_end_req(struct request *req) @@ -793,8 +797,8 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, } if (queue_max_discard_segments(req->q) == 1) { - u64 slba = nvme_sect_to_lba(ns, blk_rq_pos(req)); - u32 nlb = blk_rq_sectors(req) >> (ns->lba_shift - 9); + u64 slba = nvme_sect_to_lba(ns->head, blk_rq_pos(req)); + u32 nlb = blk_rq_sectors(req) >> (ns->head->lba_shift - 9); range[0].cattr = cpu_to_le32(0); range[0].nlb = cpu_to_le32(nlb); @@ -802,8 +806,9 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, n = 1; } else { __rq_for_each_bio(bio, req) { - u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector); - u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift; + u64 slba = nvme_sect_to_lba(ns->head, + bio->bi_iter.bi_sector); + u32 nlb = bio->bi_iter.bi_size >> ns->head->lba_shift; if (n < segments) { range[n].cattr = cpu_to_le32(0); @@ -841,7 +846,7 @@ static void nvme_set_ref_tag(struct nvme_ns *ns, struct nvme_command *cmnd, u64 ref48; /* both rw and write zeroes share the same reftag format */ - switch (ns->guard_type) { + switch (ns->head->guard_type) { case NVME_NVM_NS_16B_GUARD: cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req)); break; @@ -869,17 +874,18 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, cmnd->write_zeroes.opcode = nvme_cmd_write_zeroes; cmnd->write_zeroes.nsid = cpu_to_le32(ns->head->ns_id); cmnd->write_zeroes.slba = - cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); + cpu_to_le64(nvme_sect_to_lba(ns->head, blk_rq_pos(req))); cmnd->write_zeroes.length = - cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); + cpu_to_le16((blk_rq_bytes(req) >> ns->head->lba_shift) - 1); - if (!(req->cmd_flags & REQ_NOUNMAP) && (ns->features & NVME_NS_DEAC)) + if (!(req->cmd_flags & REQ_NOUNMAP) && + (ns->head->features & NVME_NS_DEAC)) cmnd->write_zeroes.control |= cpu_to_le16(NVME_WZ_DEAC); - if (nvme_ns_has_pi(ns)) { + if (nvme_ns_has_pi(ns->head)) { cmnd->write_zeroes.control |= cpu_to_le16(NVME_RW_PRINFO_PRACT); - switch (ns->pi_type) { + switch (ns->head->pi_type) { case NVME_NS_DPS_PI_TYPE1: case NVME_NS_DPS_PI_TYPE2: nvme_set_ref_tag(ns, cmnd, req); @@ -911,13 +917,15 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, cmnd->rw.cdw2 = 0; cmnd->rw.cdw3 = 0; cmnd->rw.metadata = 0; - cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); - cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); + cmnd->rw.slba = + cpu_to_le64(nvme_sect_to_lba(ns->head, blk_rq_pos(req))); + cmnd->rw.length = + cpu_to_le16((blk_rq_bytes(req) >> ns->head->lba_shift) - 1); cmnd->rw.reftag = 0; cmnd->rw.apptag = 0; cmnd->rw.appmask = 0; - if (ns->ms) { + if (ns->head->ms) { /* * If formated with metadata, the block layer always provides a * metadata buffer if CONFIG_BLK_DEV_INTEGRITY is enabled. Else @@ -925,12 +933,12 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, * namespace capacity to zero to prevent any I/O. */ if (!blk_integrity_rq(req)) { - if (WARN_ON_ONCE(!nvme_ns_has_pi(ns))) + if (WARN_ON_ONCE(!nvme_ns_has_pi(ns->head))) return BLK_STS_NOTSUPP; control |= NVME_RW_PRINFO_PRACT; } - switch (ns->pi_type) { + switch (ns->head->pi_type) { case NVME_NS_DPS_PI_TYPE3: control |= NVME_RW_PRINFO_PRCHK_GUARD; break; @@ -1452,7 +1460,7 @@ free_data: return status; } -static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, +int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, struct nvme_id_ns **id) { struct nvme_command c = { }; @@ -1671,14 +1679,14 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) } #ifdef CONFIG_BLK_DEV_INTEGRITY -static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns, - u32 max_integrity_segments) +static void nvme_init_integrity(struct gendisk *disk, + struct nvme_ns_head *head, u32 max_integrity_segments) { struct blk_integrity integrity = { }; - switch (ns->pi_type) { + switch (head->pi_type) { case NVME_NS_DPS_PI_TYPE3: - switch (ns->guard_type) { + switch (head->guard_type) { case NVME_NVM_NS_16B_GUARD: integrity.profile = &t10_pi_type3_crc; integrity.tag_size = sizeof(u16) + sizeof(u32); @@ -1696,7 +1704,7 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns, break; case NVME_NS_DPS_PI_TYPE1: case NVME_NS_DPS_PI_TYPE2: - switch (ns->guard_type) { + switch (head->guard_type) { case NVME_NVM_NS_16B_GUARD: integrity.profile = &t10_pi_type1_crc; integrity.tag_size = sizeof(u16); @@ -1717,25 +1725,26 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns, break; } - integrity.tuple_size = ns->ms; + integrity.tuple_size = head->ms; blk_integrity_register(disk, &integrity); blk_queue_max_integrity_segments(disk->queue, max_integrity_segments); } #else -static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns, - u32 max_integrity_segments) +static void nvme_init_integrity(struct gendisk *disk, + struct nvme_ns_head *head, u32 max_integrity_segments) { } #endif /* CONFIG_BLK_DEV_INTEGRITY */ -static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) +static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk, + struct nvme_ns_head *head) { - struct nvme_ctrl *ctrl = ns->ctrl; struct request_queue *queue = disk->queue; u32 size = queue_logical_block_size(queue); - if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns, UINT_MAX)) - ctrl->max_discard_sectors = nvme_lba_to_sect(ns, ctrl->dmrsl); + if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX)) + ctrl->max_discard_sectors = + nvme_lba_to_sect(head, ctrl->dmrsl); if (ctrl->max_discard_sectors == 0) { blk_queue_max_discard_sectors(queue, 0); @@ -1766,21 +1775,21 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) a->csi == b->csi; } -static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id) +static int nvme_init_ms(struct nvme_ctrl *ctrl, struct nvme_ns_head *head, + struct nvme_id_ns *id) { bool first = id->dps & NVME_NS_DPS_PI_FIRST; unsigned lbaf = nvme_lbaf_index(id->flbas); - struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_command c = { }; struct nvme_id_ns_nvm *nvm; int ret = 0; u32 elbaf; - ns->pi_size = 0; - ns->ms = le16_to_cpu(id->lbaf[lbaf].ms); + head->pi_size = 0; + head->ms = le16_to_cpu(id->lbaf[lbaf].ms); if (!(ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)) { - ns->pi_size = sizeof(struct t10_pi_tuple); - ns->guard_type = NVME_NVM_NS_16B_GUARD; + head->pi_size = sizeof(struct t10_pi_tuple); + head->guard_type = NVME_NVM_NS_16B_GUARD; goto set_pi; } @@ -1789,11 +1798,11 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id) return -ENOMEM; c.identify.opcode = nvme_admin_identify; - c.identify.nsid = cpu_to_le32(ns->head->ns_id); + c.identify.nsid = cpu_to_le32(head->ns_id); c.identify.cns = NVME_ID_CNS_CS_NS; c.identify.csi = NVME_CSI_NVM; - ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, nvm, sizeof(*nvm)); + ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, nvm, sizeof(*nvm)); if (ret) goto free_data; @@ -1803,13 +1812,13 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id) if (nvme_elbaf_sts(elbaf)) goto free_data; - ns->guard_type = nvme_elbaf_guard_type(elbaf); - switch (ns->guard_type) { + head->guard_type = nvme_elbaf_guard_type(elbaf); + switch (head->guard_type) { case NVME_NVM_NS_64B_GUARD: - ns->pi_size = sizeof(struct crc64_pi_tuple); + head->pi_size = sizeof(struct crc64_pi_tuple); break; case NVME_NVM_NS_16B_GUARD: - ns->pi_size = sizeof(struct t10_pi_tuple); + head->pi_size = sizeof(struct t10_pi_tuple); break; default: break; @@ -1818,25 +1827,25 @@ static int nvme_init_ms(struct nvme_ns *ns, struct nvme_id_ns *id) free_data: kfree(nvm); set_pi: - if (ns->pi_size && (first || ns->ms == ns->pi_size)) - ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK; + if (head->pi_size && (first || head->ms == head->pi_size)) + head->pi_type = id->dps & NVME_NS_DPS_PI_MASK; else - ns->pi_type = 0; + head->pi_type = 0; return ret; } -static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) +static int nvme_configure_metadata(struct nvme_ctrl *ctrl, + struct nvme_ns_head *head, struct nvme_id_ns *id) { - struct nvme_ctrl *ctrl = ns->ctrl; int ret; - ret = nvme_init_ms(ns, id); + ret = nvme_init_ms(ctrl, head, id); if (ret) return ret; - ns->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); - if (!ns->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) + head->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); + if (!head->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) return 0; if (ctrl->ops->flags & NVME_F_FABRICS) { @@ -1848,7 +1857,7 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT))) return 0; - ns->features |= NVME_NS_EXT_LBAS; + head->features |= NVME_NS_EXT_LBAS; /* * The current fabrics transport drivers support namespace @@ -1859,8 +1868,8 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) * Note, this check will need to be modified if any drivers * gain the ability to use other metadata formats. */ - if (ctrl->max_integrity_segments && nvme_ns_has_pi(ns)) - ns->features |= NVME_NS_METADATA_SUPPORTED; + if (ctrl->max_integrity_segments && nvme_ns_has_pi(head)) + head->features |= NVME_NS_METADATA_SUPPORTED; } else { /* * For PCIe controllers, we can't easily remap the separate @@ -1869,9 +1878,9 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) * We allow extended LBAs for the passthrough interface, though. */ if (id->flbas & NVME_NS_FLBAS_META_EXT) - ns->features |= NVME_NS_EXT_LBAS; + head->features |= NVME_NS_EXT_LBAS; else - ns->features |= NVME_NS_METADATA_SUPPORTED; + head->features |= NVME_NS_METADATA_SUPPORTED; } return 0; } @@ -1894,11 +1903,11 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_write_cache(q, vwc, vwc); } -static void nvme_update_disk_info(struct gendisk *disk, - struct nvme_ns *ns, struct nvme_id_ns *id) +static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk, + struct nvme_ns_head *head, struct nvme_id_ns *id) { - sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze)); - u32 bs = 1U << ns->lba_shift; + sector_t capacity = nvme_lba_to_sect(head, le64_to_cpu(id->nsze)); + u32 bs = 1U << head->lba_shift; u32 atomic_bs, phys_bs, io_opt = 0; /* @@ -1906,7 +1915,7 @@ static void nvme_update_disk_info(struct gendisk *disk, * or smaller than a sector size yet, so catch this early and don't * allow block I/O. */ - if (ns->lba_shift > PAGE_SHIFT || ns->lba_shift < SECTOR_SHIFT) { + if (head->lba_shift > PAGE_SHIFT || head->lba_shift < SECTOR_SHIFT) { capacity = 0; bs = (1 << 9); } @@ -1923,7 +1932,7 @@ static void nvme_update_disk_info(struct gendisk *disk, if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; else - atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; + atomic_bs = (1 + ctrl->subsys->awupf) * bs; } if (id->nsfeat & NVME_NS_FEAT_IO_OPT) { @@ -1949,20 +1958,20 @@ static void nvme_update_disk_info(struct gendisk *disk, * I/O to namespaces with metadata except when the namespace supports * PI, as it can strip/insert in that case. */ - if (ns->ms) { + if (head->ms) { if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && - (ns->features & NVME_NS_METADATA_SUPPORTED)) - nvme_init_integrity(disk, ns, - ns->ctrl->max_integrity_segments); - else if (!nvme_ns_has_pi(ns)) + (head->features & NVME_NS_METADATA_SUPPORTED)) + nvme_init_integrity(disk, head, + ctrl->max_integrity_segments); + else if (!nvme_ns_has_pi(head)) capacity = 0; } set_capacity_and_notify(disk, capacity); - nvme_config_discard(disk, ns); + nvme_config_discard(ctrl, disk, head); blk_queue_max_write_zeroes_sectors(disk->queue, - ns->ctrl->max_zeroes_sectors); + ctrl->max_zeroes_sectors); } static bool nvme_ns_is_readonly(struct nvme_ns *ns, struct nvme_ns_info *info) @@ -1985,7 +1994,7 @@ static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id) is_power_of_2(ctrl->max_hw_sectors)) iob = ctrl->max_hw_sectors; else - iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob)); + iob = nvme_lba_to_sect(ns->head, le16_to_cpu(id->noiob)); if (!iob) return; @@ -2052,16 +2061,17 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, blk_mq_freeze_queue(ns->disk->queue); lbaf = nvme_lbaf_index(id->flbas); - ns->lba_shift = id->lbaf[lbaf].ds; + ns->head->lba_shift = id->lbaf[lbaf].ds; + ns->head->nuse = le64_to_cpu(id->nuse); nvme_set_queue_limits(ns->ctrl, ns->queue); - ret = nvme_configure_metadata(ns, id); + ret = nvme_configure_metadata(ns->ctrl, ns->head, id); if (ret < 0) { blk_mq_unfreeze_queue(ns->disk->queue); goto out; } nvme_set_chunk_sectors(ns, id); - nvme_update_disk_info(ns->disk, ns, id); + nvme_update_disk_info(ns->ctrl, ns->disk, ns->head, id); if (ns->head->ids.csi == NVME_CSI_ZNS) { ret = nvme_update_zone_info(ns, lbaf); @@ -2078,7 +2088,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, * do not return zeroes. */ if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3))) - ns->features |= NVME_NS_DEAC; + ns->head->features |= NVME_NS_DEAC; set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info)); set_bit(NVME_NS_READY, &ns->flags); blk_mq_unfreeze_queue(ns->disk->queue); @@ -2091,7 +2101,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, if (nvme_ns_head_multipath(ns->head)) { blk_mq_freeze_queue(ns->head->disk->queue); - nvme_update_disk_info(ns->head->disk, ns, id); + nvme_update_disk_info(ns->ctrl, ns->head->disk, ns->head, id); set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info)); nvme_mpath_revalidate_paths(ns); blk_stack_limits(&ns->head->disk->queue->limits, @@ -3026,6 +3036,42 @@ static int nvme_init_effects(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) return 0; } +static int nvme_check_ctrl_fabric_info(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) +{ + /* + * In fabrics we need to verify the cntlid matches the + * admin connect + */ + if (ctrl->cntlid != le16_to_cpu(id->cntlid)) { + dev_err(ctrl->device, + "Mismatching cntlid: Connect %u vs Identify %u, rejecting\n", + ctrl->cntlid, le16_to_cpu(id->cntlid)); + return -EINVAL; + } + + if (!nvme_discovery_ctrl(ctrl) && !ctrl->kas) { + dev_err(ctrl->device, + "keep-alive support is mandatory for fabrics\n"); + return -EINVAL; + } + + if (!nvme_discovery_ctrl(ctrl) && ctrl->ioccsz < 4) { + dev_err(ctrl->device, + "I/O queue command capsule supported size %d < 4\n", + ctrl->ioccsz); + return -EINVAL; + } + + if (!nvme_discovery_ctrl(ctrl) && ctrl->iorcsz < 1) { + dev_err(ctrl->device, + "I/O queue response capsule supported size %d < 1\n", + ctrl->iorcsz); + return -EINVAL; + } + + return 0; +} + static int nvme_init_identify(struct nvme_ctrl *ctrl) { struct nvme_id_ctrl *id; @@ -3138,25 +3184,9 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->iorcsz = le32_to_cpu(id->iorcsz); ctrl->maxcmd = le16_to_cpu(id->maxcmd); - /* - * In fabrics we need to verify the cntlid matches the - * admin connect - */ - if (ctrl->cntlid != le16_to_cpu(id->cntlid)) { - dev_err(ctrl->device, - "Mismatching cntlid: Connect %u vs Identify " - "%u, rejecting\n", - ctrl->cntlid, le16_to_cpu(id->cntlid)); - ret = -EINVAL; - goto out_free; - } - - if (!nvme_discovery_ctrl(ctrl) && !ctrl->kas) { - dev_err(ctrl->device, - "keep-alive support is mandatory for fabrics\n"); - ret = -EINVAL; + ret = nvme_check_ctrl_fabric_info(ctrl, id); + if (ret) goto out_free; - } } else { ctrl->hmpre = le32_to_cpu(id->hmpre); ctrl->hmmin = le32_to_cpu(id->hmmin); @@ -3415,6 +3445,8 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, head->ns_id = info->nsid; head->ids = info->ids; head->shared = info->is_shared; + ratelimit_state_init(&head->rs_nuse, 5 * HZ, 1); + ratelimit_set_flags(&head->rs_nuse, RATELIMIT_MSG_ON_RELEASE); kref_init(&head->ref); if (head->ids.csi) { @@ -3674,7 +3706,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) up_write(&ctrl->namespaces_rwsem); nvme_get_ctrl(ctrl); - if (device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups)) + if (device_add_disk(ctrl->device, ns->disk, nvme_ns_attr_groups)) goto out_cleanup_ns_from_list; if (!nvme_ns_head_multipath(ns->head)) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 043f2d7798c1..16847a316421 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3498,10 +3498,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->ctrl.opts = opts; ctrl->ctrl.nr_reconnects = 0; - if (lport->dev) - ctrl->ctrl.numa_node = dev_to_node(lport->dev); - else - ctrl->ctrl.numa_node = NUMA_NO_NODE; INIT_LIST_HEAD(&ctrl->ctrl_list); ctrl->lport = lport; ctrl->rport = rport; @@ -3546,6 +3542,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0); if (ret) goto out_free_queues; + if (lport->dev) + ctrl->ctrl.numa_node = dev_to_node(lport->dev); /* at this point, teardown path changes to ref counting on nvme ctrl */ diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 4939ed35638f..093e46f16e6c 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -97,58 +97,6 @@ static void __user *nvme_to_user_ptr(uintptr_t ptrval) return (void __user *)ptrval; } -static void *nvme_add_user_metadata(struct request *req, void __user *ubuf, - unsigned len, u32 seed) -{ - struct bio_integrity_payload *bip; - int ret = -ENOMEM; - void *buf; - struct bio *bio = req->bio; - - buf = kmalloc(len, GFP_KERNEL); - if (!buf) - goto out; - - if (req_op(req) == REQ_OP_DRV_OUT) { - ret = -EFAULT; - if (copy_from_user(buf, ubuf, len)) - goto out_free_meta; - } else { - memset(buf, 0, len); - } - - bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); - if (IS_ERR(bip)) { - ret = PTR_ERR(bip); - goto out_free_meta; - } - - bip->bip_iter.bi_sector = seed; - ret = bio_integrity_add_page(bio, virt_to_page(buf), len, - offset_in_page(buf)); - if (ret != len) { - ret = -ENOMEM; - goto out_free_meta; - } - - req->cmd_flags |= REQ_INTEGRITY; - return buf; -out_free_meta: - kfree(buf); -out: - return ERR_PTR(ret); -} - -static int nvme_finish_user_metadata(struct request *req, void __user *ubuf, - void *meta, unsigned len, int ret) -{ - if (!ret && req_op(req) == REQ_OP_DRV_IN && - copy_to_user(ubuf, meta, len)) - ret = -EFAULT; - kfree(meta); - return ret; -} - static struct request *nvme_alloc_user_request(struct request_queue *q, struct nvme_command *cmd, blk_opf_t rq_flags, blk_mq_req_flags_t blk_flags) @@ -165,14 +113,12 @@ static struct request *nvme_alloc_user_request(struct request_queue *q, static int nvme_map_user_request(struct request *req, u64 ubuffer, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, - u32 meta_seed, void **metap, struct io_uring_cmd *ioucmd, - unsigned int flags) + u32 meta_seed, struct io_uring_cmd *ioucmd, unsigned int flags) { struct request_queue *q = req->q; struct nvme_ns *ns = q->queuedata; struct block_device *bdev = ns ? ns->disk->part0 : NULL; struct bio *bio = NULL; - void *meta = NULL; int ret; if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) { @@ -194,18 +140,17 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, if (ret) goto out; + bio = req->bio; - if (bdev) + if (bdev) { bio_set_dev(bio, bdev); - - if (bdev && meta_buffer && meta_len) { - meta = nvme_add_user_metadata(req, meta_buffer, meta_len, - meta_seed); - if (IS_ERR(meta)) { - ret = PTR_ERR(meta); - goto out_unmap; + if (meta_buffer && meta_len) { + ret = bio_integrity_map_user(bio, meta_buffer, meta_len, + meta_seed); + if (ret) + goto out_unmap; + req->cmd_flags |= REQ_INTEGRITY; } - *metap = meta; } return ret; @@ -226,7 +171,6 @@ static int nvme_submit_user_cmd(struct request_queue *q, struct nvme_ns *ns = q->queuedata; struct nvme_ctrl *ctrl; struct request *req; - void *meta = NULL; struct bio *bio; u32 effects; int ret; @@ -238,7 +182,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, req->timeout = timeout; if (ubuffer && bufflen) { ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, - meta_len, meta_seed, &meta, NULL, flags); + meta_len, meta_seed, NULL, flags); if (ret) return ret; } @@ -250,9 +194,6 @@ static int nvme_submit_user_cmd(struct request_queue *q, ret = nvme_execute_rq(req, false); if (result) *result = le64_to_cpu(nvme_req(req)->result.u64); - if (meta) - ret = nvme_finish_user_metadata(req, meta_buffer, meta, - meta_len, ret); if (bio) blk_rq_unmap_user(bio); blk_mq_free_request(req); @@ -284,10 +225,10 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) return -EINVAL; } - length = (io.nblocks + 1) << ns->lba_shift; + length = (io.nblocks + 1) << ns->head->lba_shift; if ((io.control & NVME_RW_PRINFO_PRACT) && - ns->ms == sizeof(struct t10_pi_tuple)) { + ns->head->ms == sizeof(struct t10_pi_tuple)) { /* * Protection information is stripped/inserted by the * controller. @@ -297,11 +238,11 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) meta_len = 0; metadata = NULL; } else { - meta_len = (io.nblocks + 1) * ns->ms; + meta_len = (io.nblocks + 1) * ns->head->ms; metadata = nvme_to_user_ptr(io.metadata); } - if (ns->features & NVME_NS_EXT_LBAS) { + if (ns->head->features & NVME_NS_EXT_LBAS) { length += meta_len; meta_len = 0; } else if (meta_len) { @@ -447,19 +388,10 @@ struct nvme_uring_data { * Expect build errors if this grows larger than that. */ struct nvme_uring_cmd_pdu { - union { - struct bio *bio; - struct request *req; - }; - u32 meta_len; - u32 nvme_status; - union { - struct { - void *meta; /* kernel-resident buffer */ - void __user *meta_buffer; - }; - u64 result; - } u; + struct request *req; + struct bio *bio; + u64 result; + int status; }; static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( @@ -468,31 +400,6 @@ static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu; } -static void nvme_uring_task_meta_cb(struct io_uring_cmd *ioucmd, - unsigned issue_flags) -{ - struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); - struct request *req = pdu->req; - int status; - u64 result; - - if (nvme_req(req)->flags & NVME_REQ_CANCELLED) - status = -EINTR; - else - status = nvme_req(req)->status; - - result = le64_to_cpu(nvme_req(req)->result.u64); - - if (pdu->meta_len) - status = nvme_finish_user_metadata(req, pdu->u.meta_buffer, - pdu->u.meta, pdu->meta_len, status); - if (req->bio) - blk_rq_unmap_user(req->bio); - blk_mq_free_request(req); - - io_uring_cmd_done(ioucmd, status, result, issue_flags); -} - static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd, unsigned issue_flags) { @@ -500,8 +407,7 @@ static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd, if (pdu->bio) blk_rq_unmap_user(pdu->bio); - - io_uring_cmd_done(ioucmd, pdu->nvme_status, pdu->u.result, issue_flags); + io_uring_cmd_done(ioucmd, pdu->status, pdu->result, issue_flags); } static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, @@ -510,53 +416,24 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, struct io_uring_cmd *ioucmd = req->end_io_data; struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); - req->bio = pdu->bio; - if (nvme_req(req)->flags & NVME_REQ_CANCELLED) { - pdu->nvme_status = -EINTR; - } else { - pdu->nvme_status = nvme_req(req)->status; - if (!pdu->nvme_status) - pdu->nvme_status = blk_status_to_errno(err); - } - pdu->u.result = le64_to_cpu(nvme_req(req)->result.u64); + if (nvme_req(req)->flags & NVME_REQ_CANCELLED) + pdu->status = -EINTR; + else + pdu->status = nvme_req(req)->status; + pdu->result = le64_to_cpu(nvme_req(req)->result.u64); /* * For iopoll, complete it directly. * Otherwise, move the completion to task work. */ - if (blk_rq_is_poll(req)) { - WRITE_ONCE(ioucmd->cookie, NULL); + if (blk_rq_is_poll(req)) nvme_uring_task_cb(ioucmd, IO_URING_F_UNLOCKED); - } else { + else io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); - } return RQ_END_IO_FREE; } -static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req, - blk_status_t err) -{ - struct io_uring_cmd *ioucmd = req->end_io_data; - struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); - - req->bio = pdu->bio; - pdu->req = req; - - /* - * For iopoll, complete it directly. - * Otherwise, move the completion to task work. - */ - if (blk_rq_is_poll(req)) { - WRITE_ONCE(ioucmd->cookie, NULL); - nvme_uring_task_meta_cb(ioucmd, IO_URING_F_UNLOCKED); - } else { - io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_meta_cb); - } - - return RQ_END_IO_NONE; -} - static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec) { @@ -568,7 +445,6 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, struct request *req; blk_opf_t rq_flags = REQ_ALLOC_CACHE; blk_mq_req_flags_t blk_flags = 0; - void *meta = NULL; int ret; c.common.opcode = READ_ONCE(cmd->opcode); @@ -616,27 +492,16 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, if (d.addr && d.data_len) { ret = nvme_map_user_request(req, d.addr, d.data_len, nvme_to_user_ptr(d.metadata), - d.metadata_len, 0, &meta, ioucmd, vec); + d.metadata_len, 0, ioucmd, vec); if (ret) return ret; } - if (blk_rq_is_poll(req)) { - ioucmd->flags |= IORING_URING_CMD_POLLED; - WRITE_ONCE(ioucmd->cookie, req); - } - /* to free bio on completion, as req->bio will be null at that time */ pdu->bio = req->bio; - pdu->meta_len = d.metadata_len; + pdu->req = req; req->end_io_data = ioucmd; - if (pdu->meta_len) { - pdu->u.meta = meta; - pdu->u.meta_buffer = nvme_to_user_ptr(d.metadata); - req->end_io = nvme_uring_cmd_end_io_meta; - } else { - req->end_io = nvme_uring_cmd_end_io; - } + req->end_io = nvme_uring_cmd_end_io; blk_execute_rq_nowait(req, false); return -EIOCBQUEUED; } @@ -787,16 +652,12 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, struct io_comp_batch *iob, unsigned int poll_flags) { - struct request *req; - int ret = 0; - - if (!(ioucmd->flags & IORING_URING_CMD_POLLED)) - return 0; + struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); + struct request *req = pdu->req; - req = READ_ONCE(ioucmd->cookie); if (req && blk_rq_is_poll(req)) - ret = blk_rq_poll(req, iob, poll_flags); - return ret; + return blk_rq_poll(req, iob, poll_flags); + return 0; } #ifdef CONFIG_NVME_MULTIPATH static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 0a88d7bdc5e3..2dd4137a08b2 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -579,7 +579,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) */ if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { rc = device_add_disk(&head->subsys->dev, head->disk, - nvme_ns_id_attr_groups); + nvme_ns_attr_groups); if (rc) { clear_bit(NVME_NSHEAD_DISK_LIVE, &ns->flags); return; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index e7411dac00f7..4be7f6822966 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -16,6 +16,7 @@ #include <linux/rcupdate.h> #include <linux/wait.h> #include <linux/t10-pi.h> +#include <linux/ratelimit_types.h> #include <trace/events/block.h> @@ -450,13 +451,27 @@ struct nvme_ns_head { struct list_head list; struct srcu_struct srcu; struct nvme_subsystem *subsys; - unsigned ns_id; struct nvme_ns_ids ids; struct list_head entry; struct kref ref; bool shared; int instance; struct nvme_effects_log *effects; + u64 nuse; + unsigned ns_id; + int lba_shift; + u16 ms; + u16 pi_size; + u8 pi_type; + u8 guard_type; + u16 sgs; + u32 sws; +#ifdef CONFIG_BLK_DEV_ZONED + u64 zsze; +#endif + unsigned long features; + + struct ratelimit_state rs_nuse; struct cdev cdev; struct device cdev_device; @@ -498,17 +513,6 @@ struct nvme_ns { struct kref kref; struct nvme_ns_head *head; - int lba_shift; - u16 ms; - u16 pi_size; - u16 sgs; - u32 sws; - u8 pi_type; - u8 guard_type; -#ifdef CONFIG_BLK_DEV_ZONED - u64 zsze; -#endif - unsigned long features; unsigned long flags; #define NVME_NS_REMOVING 0 #define NVME_NS_ANA_PENDING 2 @@ -523,9 +527,9 @@ struct nvme_ns { }; /* NVMe ns supports metadata actions by the controller (generate/strip) */ -static inline bool nvme_ns_has_pi(struct nvme_ns *ns) +static inline bool nvme_ns_has_pi(struct nvme_ns_head *head) { - return ns->pi_type && ns->ms == ns->pi_size; + return head->pi_type && head->ms == head->pi_size; } struct nvme_ctrl_ops { @@ -657,17 +661,17 @@ static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl) /* * Convert a 512B sector number to a device logical block number. */ -static inline u64 nvme_sect_to_lba(struct nvme_ns *ns, sector_t sector) +static inline u64 nvme_sect_to_lba(struct nvme_ns_head *head, sector_t sector) { - return sector >> (ns->lba_shift - SECTOR_SHIFT); + return sector >> (head->lba_shift - SECTOR_SHIFT); } /* * Convert a device logical block number to a 512B sector number. */ -static inline sector_t nvme_lba_to_sect(struct nvme_ns *ns, u64 lba) +static inline sector_t nvme_lba_to_sect(struct nvme_ns_head *head, u64 lba) { - return lba << (ns->lba_shift - SECTOR_SHIFT); + return lba << (head->lba_shift - SECTOR_SHIFT); } /* @@ -873,10 +877,12 @@ int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags); int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags); +int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, + struct nvme_id_ns **id); int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo); int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags); -extern const struct attribute_group *nvme_ns_id_attr_groups[]; +extern const struct attribute_group *nvme_ns_attr_groups[]; extern const struct pr_ops nvme_pr_ops; extern const struct block_device_operations nvme_ns_head_ops; extern const struct attribute_group nvme_dev_attrs_group; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 81e2621169e5..c89503da24d7 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1423,7 +1423,7 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue, goto mr_put; nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_bdev->bd_disk), c, - req->mr->sig_attrs, ns->pi_type); + req->mr->sig_attrs, ns->head->pi_type); nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask); ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey)); @@ -2017,7 +2017,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, queue->pi_support && (c->common.opcode == nvme_cmd_write || c->common.opcode == nvme_cmd_read) && - nvme_ns_has_pi(ns)) + nvme_ns_has_pi(ns->head)) req->use_sig_mr = true; else req->use_sig_mr = false; diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index c6b7fbd4d34d..ac24ad102380 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -114,12 +114,97 @@ static ssize_t nsid_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(nsid); -static struct attribute *nvme_ns_id_attrs[] = { +static ssize_t csi_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, "%u\n", dev_to_ns_head(dev)->ids.csi); +} +static DEVICE_ATTR_RO(csi); + +static ssize_t metadata_bytes_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%u\n", dev_to_ns_head(dev)->ms); +} +static DEVICE_ATTR_RO(metadata_bytes); + +static int ns_head_update_nuse(struct nvme_ns_head *head) +{ + struct nvme_id_ns *id; + struct nvme_ns *ns; + int srcu_idx, ret = -EWOULDBLOCK; + + /* Avoid issuing commands too often by rate limiting the update */ + if (!__ratelimit(&head->rs_nuse)) + return 0; + + srcu_idx = srcu_read_lock(&head->srcu); + ns = nvme_find_path(head); + if (!ns) + goto out_unlock; + + ret = nvme_identify_ns(ns->ctrl, head->ns_id, &id); + if (ret) + goto out_unlock; + + head->nuse = le64_to_cpu(id->nuse); + kfree(id); + +out_unlock: + srcu_read_unlock(&head->srcu, srcu_idx); + return ret; +} + +static int ns_update_nuse(struct nvme_ns *ns) +{ + struct nvme_id_ns *id; + int ret; + + /* Avoid issuing commands too often by rate limiting the update. */ + if (!__ratelimit(&ns->head->rs_nuse)) + return 0; + + ret = nvme_identify_ns(ns->ctrl, ns->head->ns_id, &id); + if (ret) + goto out_free_id; + + ns->head->nuse = le64_to_cpu(id->nuse); + +out_free_id: + kfree(id); + + return ret; +} + +static ssize_t nuse_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct nvme_ns_head *head = dev_to_ns_head(dev); + struct gendisk *disk = dev_to_disk(dev); + struct block_device *bdev = disk->part0; + int ret; + + if (IS_ENABLED(CONFIG_NVME_MULTIPATH) && + bdev->bd_disk->fops == &nvme_ns_head_ops) + ret = ns_head_update_nuse(head); + else + ret = ns_update_nuse(bdev->bd_disk->private_data); + if (ret) + return ret; + + return sysfs_emit(buf, "%llu\n", head->nuse); +} +static DEVICE_ATTR_RO(nuse); + +static struct attribute *nvme_ns_attrs[] = { &dev_attr_wwid.attr, &dev_attr_uuid.attr, &dev_attr_nguid.attr, &dev_attr_eui.attr, + &dev_attr_csi.attr, &dev_attr_nsid.attr, + &dev_attr_metadata_bytes.attr, + &dev_attr_nuse.attr, #ifdef CONFIG_NVME_MULTIPATH &dev_attr_ana_grpid.attr, &dev_attr_ana_state.attr, @@ -127,7 +212,7 @@ static struct attribute *nvme_ns_id_attrs[] = { NULL, }; -static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj, +static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = container_of(kobj, struct device, kobj); @@ -157,13 +242,13 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj, return a->mode; } -static const struct attribute_group nvme_ns_id_attr_group = { - .attrs = nvme_ns_id_attrs, - .is_visible = nvme_ns_id_attrs_are_visible, +static const struct attribute_group nvme_ns_attr_group = { + .attrs = nvme_ns_attrs, + .is_visible = nvme_ns_attrs_are_visible, }; -const struct attribute_group *nvme_ns_id_attr_groups[] = { - &nvme_ns_id_attr_group, +const struct attribute_group *nvme_ns_attr_groups[] = { + &nvme_ns_attr_group, NULL, }; diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index ec8557810c21..499bbb0eee8d 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -11,7 +11,7 @@ int nvme_revalidate_zones(struct nvme_ns *ns) { struct request_queue *q = ns->queue; - blk_queue_chunk_sectors(q, ns->zsze); + blk_queue_chunk_sectors(q, ns->head->zsze); blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append); return blk_revalidate_disk_zones(ns->disk, NULL); @@ -99,16 +99,17 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) goto free_data; } - ns->zsze = nvme_lba_to_sect(ns, le64_to_cpu(id->lbafe[lbaf].zsze)); - if (!is_power_of_2(ns->zsze)) { + ns->head->zsze = + nvme_lba_to_sect(ns->head, le64_to_cpu(id->lbafe[lbaf].zsze)); + if (!is_power_of_2(ns->head->zsze)) { dev_warn(ns->ctrl->device, "invalid zone size:%llu for namespace:%u\n", - ns->zsze, ns->head->ns_id); + ns->head->zsze, ns->head->ns_id); status = -ENODEV; goto free_data; } - disk_set_zoned(ns->disk, BLK_ZONED_HM); + disk_set_zoned(ns->disk); blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); disk_set_max_open_zones(ns->disk, le32_to_cpu(id->mor) + 1); disk_set_max_active_zones(ns->disk, le32_to_cpu(id->mar) + 1); @@ -128,7 +129,7 @@ static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns, sizeof(struct nvme_zone_descriptor); nr_zones = min_t(unsigned int, nr_zones, - get_capacity(ns->disk) >> ilog2(ns->zsze)); + get_capacity(ns->disk) >> ilog2(ns->head->zsze)); bufsize = sizeof(struct nvme_zone_report) + nr_zones * sizeof(struct nvme_zone_descriptor); @@ -147,7 +148,8 @@ static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns, return NULL; } -static int nvme_zone_parse_entry(struct nvme_ns *ns, +static int nvme_zone_parse_entry(struct nvme_ctrl *ctrl, + struct nvme_ns_head *head, struct nvme_zone_descriptor *entry, unsigned int idx, report_zones_cb cb, void *data) @@ -155,20 +157,20 @@ static int nvme_zone_parse_entry(struct nvme_ns *ns, struct blk_zone zone = { }; if ((entry->zt & 0xf) != NVME_ZONE_TYPE_SEQWRITE_REQ) { - dev_err(ns->ctrl->device, "invalid zone type %#x\n", + dev_err(ctrl->device, "invalid zone type %#x\n", entry->zt); return -EINVAL; } zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ; zone.cond = entry->zs >> 4; - zone.len = ns->zsze; - zone.capacity = nvme_lba_to_sect(ns, le64_to_cpu(entry->zcap)); - zone.start = nvme_lba_to_sect(ns, le64_to_cpu(entry->zslba)); + zone.len = head->zsze; + zone.capacity = nvme_lba_to_sect(head, le64_to_cpu(entry->zcap)); + zone.start = nvme_lba_to_sect(head, le64_to_cpu(entry->zslba)); if (zone.cond == BLK_ZONE_COND_FULL) zone.wp = zone.start + zone.len; else - zone.wp = nvme_lba_to_sect(ns, le64_to_cpu(entry->wp)); + zone.wp = nvme_lba_to_sect(head, le64_to_cpu(entry->wp)); return cb(&zone, idx, data); } @@ -196,11 +198,11 @@ int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL; c.zmr.pr = NVME_REPORT_ZONE_PARTIAL; - sector &= ~(ns->zsze - 1); + sector &= ~(ns->head->zsze - 1); while (zone_idx < nr_zones && sector < get_capacity(ns->disk)) { memset(report, 0, buflen); - c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns, sector)); + c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns->head, sector)); ret = nvme_submit_sync_cmd(ns->queue, &c, report, buflen); if (ret) { if (ret > 0) @@ -213,14 +215,15 @@ int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, break; for (i = 0; i < nz && zone_idx < nr_zones; i++) { - ret = nvme_zone_parse_entry(ns, &report->entries[i], + ret = nvme_zone_parse_entry(ns->ctrl, ns->head, + &report->entries[i], zone_idx, cb, data); if (ret) goto out_free; zone_idx++; } - sector += ns->zsze * nz; + sector += ns->head->zsze * nz; } if (zone_idx > 0) @@ -239,7 +242,7 @@ blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req, c->zms.opcode = nvme_cmd_zone_mgmt_send; c->zms.nsid = cpu_to_le32(ns->head->ns_id); - c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); + c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns->head, blk_rq_pos(req))); c->zms.zsa = action; if (req_op(req) == REQ_OP_ZONE_RESET_ALL) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index d937fe05129e..2482a0db2504 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1276,7 +1276,7 @@ static ssize_t nvmet_subsys_attr_cntlid_min_store(struct config_item *item, return -EINVAL; down_write(&nvmet_config_sem); - if (cntlid_min >= to_subsys(item)->cntlid_max) + if (cntlid_min > to_subsys(item)->cntlid_max) goto out_unlock; to_subsys(item)->cntlid_min = cntlid_min; up_write(&nvmet_config_sem); @@ -1306,7 +1306,7 @@ static ssize_t nvmet_subsys_attr_cntlid_max_store(struct config_item *item, return -EINVAL; down_write(&nvmet_config_sem); - if (cntlid_max <= to_subsys(item)->cntlid_min) + if (cntlid_max < to_subsys(item)->cntlid_min) goto out_unlock; to_subsys(item)->cntlid_max = cntlid_max; up_write(&nvmet_config_sem); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 3935165048e7..d26aa30f8702 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1425,9 +1425,6 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, if (!ctrl->sqs) goto out_free_changed_ns_list; - if (subsys->cntlid_min > subsys->cntlid_max) - goto out_free_sqs; - ret = ida_alloc_range(&cntlid_ida, subsys->cntlid_min, subsys->cntlid_max, GFP_KERNEL); diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 9fe07d7efa96..f2d963e1fe94 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -602,7 +602,7 @@ int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys) goto out_put_file; } - old = xa_cmpxchg(&passthru_subsystems, ctrl->cntlid, NULL, + old = xa_cmpxchg(&passthru_subsystems, ctrl->instance, NULL, subsys, GFP_KERNEL); if (xa_is_err(old)) { ret = xa_err(old); @@ -635,7 +635,7 @@ out_unlock: static void __nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys) { if (subsys->passthru_ctrl) { - xa_erase(&passthru_subsystems, subsys->passthru_ctrl->cntlid); + xa_erase(&passthru_subsystems, subsys->passthru_ctrl->instance); module_put(subsys->passthru_ctrl->ops->module); nvme_put_ctrl(subsys->passthru_ctrl); } |