diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-08-31 05:01:46 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-08-31 05:01:46 +0300 |
commit | 9a1d6c9e3f53732f2f48f4424e028642db616663 (patch) | |
tree | 5308c856a8b7a56b705cde940eb946ee88e1ca88 /drivers/nvme | |
parent | 679369114e55f422dc593d0628cfde1d04ae59b3 (diff) | |
parent | b5b0eba590f08e2b06c830b8343c1da7059c7a88 (diff) | |
download | linux-9a1d6c9e3f53732f2f48f4424e028642db616663.tar.xz |
Merge tag 'for-5.15/drivers-2021-08-30' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe:
"Sitting on top of the core block changes, here are the driver changes
for the 5.15 merge window:
- NVMe updates via Christoph:
- suspend improvements for devices with an HMB (Keith Busch)
- handle double completions more gacefull (Sagi Grimberg)
- cleanup the selects for the nvme core code a bit (Sagi Grimberg)
- don't update queue count when failing to set io queues (Ruozhu Li)
- various nvmet connect fixes (Amit Engel)
- cleanup lightnvm leftovers (Keith Busch, me)
- small cleanups (Colin Ian King, Hou Pu)
- add tracing for the Set Features command (Hou Pu)
- CMB sysfs cleanups (Keith Busch)
- add a mutex_destroy call (Keith Busch)
- remove lightnvm subsystem. It's served its purpose and ultimately
led to zoned nvme support, we no longer need it (Christoph)
- revert floppy O_NDELAY fix (Denis)
- nbd fixes (Hou, Pavel, Baokun)
- nbd locking fixes (Tetsuo)
- nbd device removal fixes (Christoph)
- raid10 rcu warning fix (Xiao)
- raid1 write behind fix (Guoqing)
- rnbd fixes (Gioh, Md Haris)
- misc fixes (Colin)"
* tag 'for-5.15/drivers-2021-08-30' of git://git.kernel.dk/linux-block: (42 commits)
Revert "floppy: reintroduce O_NDELAY fix"
raid1: ensure write behind bio has less than BIO_MAX_VECS sectors
md/raid10: Remove unnecessary rcu_dereference in raid10_handle_discard
nbd: remove nbd->destroy_complete
nbd: only return usable devices from nbd_find_unused
nbd: set nbd->index before releasing nbd_index_mutex
nbd: prevent IDR lookups from finding partially initialized devices
nbd: reset NBD to NULL when restarting in nbd_genl_connect
nbd: add missing locking to the nbd_dev_add error path
nvme: remove the unused NVME_NS_* enum
nvme: remove nvm_ndev from ns
nvme: Have NVME_FABRICS select NVME_CORE instead of transport drivers
block: nbd: add sanity check for first_minor
nvmet: check that host sqsize does not exceed ctrl MQES
nvmet: avoid duplicate qid in connect cmd
nvmet: pass back cntlid on successful completion
nvme-rdma: don't update queue count when failing to set io queues
nvme-tcp: don't update queue count when failing to set io queues
nvme-tcp: pair send_mutex init with destroy
nvme: allow user toggling hmb usage
...
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/Kconfig | 4 | ||||
-rw-r--r-- | drivers/nvme/host/Makefile | 1 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 16 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 1 | ||||
-rw-r--r-- | drivers/nvme/host/ioctl.c | 4 | ||||
-rw-r--r-- | drivers/nvme/host/lightnvm.c | 1274 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 79 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 187 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 8 | ||||
-rw-r--r-- | drivers/nvme/host/tcp.c | 44 | ||||
-rw-r--r-- | drivers/nvme/host/trace.c | 18 | ||||
-rw-r--r-- | drivers/nvme/target/Kconfig | 2 | ||||
-rw-r--r-- | drivers/nvme/target/core.c | 1 | ||||
-rw-r--r-- | drivers/nvme/target/fabrics-cmd.c | 38 | ||||
-rw-r--r-- | drivers/nvme/target/loop.c | 4 | ||||
-rw-r--r-- | drivers/nvme/target/trace.c | 18 | ||||
-rw-r--r-- | drivers/nvme/target/zns.c | 5 |
17 files changed, 276 insertions, 1428 deletions
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index c3f3d77f1aac..dc0450ca23a3 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -33,12 +33,12 @@ config NVME_HWMON in the system. config NVME_FABRICS + select NVME_CORE tristate config NVME_RDMA tristate "NVM Express over Fabrics RDMA host driver" depends on INFINIBAND && INFINIBAND_ADDR_TRANS && BLOCK - select NVME_CORE select NVME_FABRICS select SG_POOL help @@ -55,7 +55,6 @@ config NVME_FC tristate "NVM Express over Fabrics FC host driver" depends on BLOCK depends on HAS_DMA - select NVME_CORE select NVME_FABRICS select SG_POOL help @@ -72,7 +71,6 @@ config NVME_TCP tristate "NVM Express over Fabrics TCP host driver" depends on INET depends on BLOCK - select NVME_CORE select NVME_FABRICS select CRYPTO select CRYPTO_CRC32C diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index cbc509784b2e..dfaacd472e5d 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -12,7 +12,6 @@ obj-$(CONFIG_NVME_TCP) += nvme-tcp.o nvme-core-y := core.o ioctl.o nvme-core-$(CONFIG_TRACING) += trace.o nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o -nvme-core-$(CONFIG_NVM) += lightnvm.o nvme-core-$(CONFIG_BLK_DEV_ZONED) += zns.o nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 68acd33c3856..8679a108f571 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -587,9 +587,6 @@ static void nvme_free_ns(struct kref *kref) { struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref); - if (ns->ndev) - nvme_nvm_unregister(ns); - put_disk(ns->disk); nvme_put_ns_head(ns->head); nvme_put_ctrl(ns->ctrl); @@ -1028,7 +1025,8 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req) return BLK_STS_IOERR; } - cmd->common.command_id = req->tag; + nvme_req(req)->genctr++; + cmd->common.command_id = nvme_cid(req); trace_nvme_setup_cmd(req, cmd); return ret; } @@ -3217,9 +3215,6 @@ static const struct attribute_group nvme_ns_id_attr_group = { const struct attribute_group *nvme_ns_id_attr_groups[] = { &nvme_ns_id_attr_group, -#ifdef CONFIG_NVM - &nvme_nvm_attr_group, -#endif NULL, }; @@ -3762,13 +3757,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, if (nvme_update_ns_info(ns, id)) goto out_unlink_ns; - if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { - if (nvme_nvm_register(ns, disk->disk_name, node)) { - dev_warn(ctrl->device, "LightNVM init failure\n"); - goto out_unlink_ns; - } - } - down_write(&ctrl->namespaces_rwsem); list_add_tail(&ns->list, &ctrl->namespaces); up_write(&ctrl->namespaces_rwsem); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index a5469fd9d4c3..668c6bb7a567 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -719,7 +719,6 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -EINVAL; goto out; } - nvmf_host_put(opts->host); opts->host = nvmf_host_add(p); kfree(p); if (!opts->host) { diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 305ddd415e45..22314962842d 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -342,9 +342,7 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, case NVME_IOCTL_IO64_CMD: return nvme_user_cmd64(ns->ctrl, ns, argp); default: - if (!ns->ndev) - return -ENOTTY; - return nvme_nvm_ioctl(ns, cmd, argp); + return -ENOTTY; } } diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c deleted file mode 100644 index e9d9ad47f70f..000000000000 --- a/drivers/nvme/host/lightnvm.c +++ /dev/null @@ -1,1274 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * nvme-lightnvm.c - LightNVM NVMe device - * - * Copyright (C) 2014-2015 IT University of Copenhagen - * Initial release: Matias Bjorling <mb@lightnvm.io> - */ - -#include "nvme.h" - -#include <linux/nvme.h> -#include <linux/bitops.h> -#include <linux/lightnvm.h> -#include <linux/vmalloc.h> -#include <linux/sched/sysctl.h> -#include <uapi/linux/lightnvm.h> - -enum nvme_nvm_admin_opcode { - nvme_nvm_admin_identity = 0xe2, - nvme_nvm_admin_get_bb_tbl = 0xf2, - nvme_nvm_admin_set_bb_tbl = 0xf1, -}; - -enum nvme_nvm_log_page { - NVME_NVM_LOG_REPORT_CHUNK = 0xca, -}; - -struct nvme_nvm_ph_rw { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd2; - __le64 metadata; - __le64 prp1; - __le64 prp2; - __le64 spba; - __le16 length; - __le16 control; - __le32 dsmgmt; - __le64 resv; -}; - -struct nvme_nvm_erase_blk { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd[2]; - __le64 prp1; - __le64 prp2; - __le64 spba; - __le16 length; - __le16 control; - __le32 dsmgmt; - __le64 resv; -}; - -struct nvme_nvm_identity { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd[2]; - __le64 prp1; - __le64 prp2; - __u32 rsvd11[6]; -}; - -struct nvme_nvm_getbbtbl { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd[2]; - __le64 prp1; - __le64 prp2; - __le64 spba; - __u32 rsvd4[4]; -}; - -struct nvme_nvm_setbbtbl { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __le64 rsvd[2]; - __le64 prp1; - __le64 prp2; - __le64 spba; - __le16 nlb; - __u8 value; - __u8 rsvd3; - __u32 rsvd4[3]; -}; - -struct nvme_nvm_command { - union { - struct nvme_common_command common; - struct nvme_nvm_ph_rw ph_rw; - struct nvme_nvm_erase_blk erase; - struct nvme_nvm_identity identity; - struct nvme_nvm_getbbtbl get_bb; - struct nvme_nvm_setbbtbl set_bb; - }; -}; - -struct nvme_nvm_id12_grp { - __u8 mtype; - __u8 fmtype; - __le16 res16; - __u8 num_ch; - __u8 num_lun; - __u8 num_pln; - __u8 rsvd1; - __le16 num_chk; - __le16 num_pg; - __le16 fpg_sz; - __le16 csecs; - __le16 sos; - __le16 rsvd2; - __le32 trdt; - __le32 trdm; - __le32 tprt; - __le32 tprm; - __le32 tbet; - __le32 tbem; - __le32 mpos; - __le32 mccap; - __le16 cpar; - __u8 reserved[906]; -} __packed; - -struct nvme_nvm_id12_addrf { - __u8 ch_offset; - __u8 ch_len; - __u8 lun_offset; - __u8 lun_len; - __u8 pln_offset; - __u8 pln_len; - __u8 blk_offset; - __u8 blk_len; - __u8 pg_offset; - __u8 pg_len; - __u8 sec_offset; - __u8 sec_len; - __u8 res[4]; -} __packed; - -struct nvme_nvm_id12 { - __u8 ver_id; - __u8 vmnt; - __u8 cgrps; - __u8 res; - __le32 cap; - __le32 dom; - struct nvme_nvm_id12_addrf ppaf; - __u8 resv[228]; - struct nvme_nvm_id12_grp grp; - __u8 resv2[2880]; -} __packed; - -struct nvme_nvm_bb_tbl { - __u8 tblid[4]; - __le16 verid; - __le16 revid; - __le32 rvsd1; - __le32 tblks; - __le32 tfact; - __le32 tgrown; - __le32 tdresv; - __le32 thresv; - __le32 rsvd2[8]; - __u8 blk[]; -}; - -struct nvme_nvm_id20_addrf { - __u8 grp_len; - __u8 pu_len; - __u8 chk_len; - __u8 lba_len; - __u8 resv[4]; -}; - -struct nvme_nvm_id20 { - __u8 mjr; - __u8 mnr; - __u8 resv[6]; - - struct nvme_nvm_id20_addrf lbaf; - - __le32 mccap; - __u8 resv2[12]; - - __u8 wit; - __u8 resv3[31]; - - /* Geometry */ - __le16 num_grp; - __le16 num_pu; - __le32 num_chk; - __le32 clba; - __u8 resv4[52]; - - /* Write data requirements */ - __le32 ws_min; - __le32 ws_opt; - __le32 mw_cunits; - __le32 maxoc; - __le32 maxocpu; - __u8 resv5[44]; - - /* Performance related metrics */ - __le32 trdt; - __le32 trdm; - __le32 twrt; - __le32 twrm; - __le32 tcrst; - __le32 tcrsm; - __u8 resv6[40]; - - /* Reserved area */ - __u8 resv7[2816]; - - /* Vendor specific */ - __u8 vs[1024]; -}; - -struct nvme_nvm_chk_meta { - __u8 state; - __u8 type; - __u8 wi; - __u8 rsvd[5]; - __le64 slba; - __le64 cnlb; - __le64 wp; -}; - -/* - * Check we didn't inadvertently grow the command struct - */ -static inline void _nvme_nvm_check_size(void) -{ - BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_grp) != 960); - BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_addrf) != 16); - BUILD_BUG_ON(sizeof(struct nvme_nvm_id12) != NVME_IDENTIFY_DATA_SIZE); - BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_id20_addrf) != 8); - BUILD_BUG_ON(sizeof(struct nvme_nvm_id20) != NVME_IDENTIFY_DATA_SIZE); - BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) != 32); - BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) != - sizeof(struct nvm_chk_meta)); -} - -static void nvme_nvm_set_addr_12(struct nvm_addrf_12 *dst, - struct nvme_nvm_id12_addrf *src) -{ - dst->ch_len = src->ch_len; - dst->lun_len = src->lun_len; - dst->blk_len = src->blk_len; - dst->pg_len = src->pg_len; - dst->pln_len = src->pln_len; - dst->sec_len = src->sec_len; - - dst->ch_offset = src->ch_offset; - dst->lun_offset = src->lun_offset; - dst->blk_offset = src->blk_offset; - dst->pg_offset = src->pg_offset; - dst->pln_offset = src->pln_offset; - dst->sec_offset = src->sec_offset; - - dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset; - dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset; - dst->blk_mask = ((1ULL << dst->blk_len) - 1) << dst->blk_offset; - dst->pg_mask = ((1ULL << dst->pg_len) - 1) << dst->pg_offset; - dst->pln_mask = ((1ULL << dst->pln_len) - 1) << dst->pln_offset; - dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset; -} - -static int nvme_nvm_setup_12(struct nvme_nvm_id12 *id, - struct nvm_geo *geo) -{ - struct nvme_nvm_id12_grp *src; - int sec_per_pg, sec_per_pl, pg_per_blk; - - if (id->cgrps != 1) - return -EINVAL; - - src = &id->grp; - - if (src->mtype != 0) { - pr_err("nvm: memory type not supported\n"); - return -EINVAL; - } - - /* 1.2 spec. only reports a single version id - unfold */ - geo->major_ver_id = id->ver_id; - geo->minor_ver_id = 2; - - /* Set compacted version for upper layers */ - geo->version = NVM_OCSSD_SPEC_12; - - geo->num_ch = src->num_ch; - geo->num_lun = src->num_lun; - geo->all_luns = geo->num_ch * geo->num_lun; - - geo->num_chk = le16_to_cpu(src->num_chk); - - geo->csecs = le16_to_cpu(src->csecs); - geo->sos = le16_to_cpu(src->sos); - - pg_per_blk = le16_to_cpu(src->num_pg); - sec_per_pg = le16_to_cpu(src->fpg_sz) / geo->csecs; - sec_per_pl = sec_per_pg * src->num_pln; - geo->clba = sec_per_pl * pg_per_blk; - - geo->all_chunks = geo->all_luns * geo->num_chk; - geo->total_secs = geo->clba * geo->all_chunks; - - geo->ws_min = sec_per_pg; - geo->ws_opt = sec_per_pg; - geo->mw_cunits = geo->ws_opt << 3; /* default to MLC safe values */ - - /* Do not impose values for maximum number of open blocks as it is - * unspecified in 1.2. Users of 1.2 must be aware of this and eventually - * specify these values through a quirk if restrictions apply. - */ - geo->maxoc = geo->all_luns * geo->num_chk; - geo->maxocpu = geo->num_chk; - - geo->mccap = le32_to_cpu(src->mccap); - - geo->trdt = le32_to_cpu(src->trdt); - geo->trdm = le32_to_cpu(src->trdm); - geo->tprt = le32_to_cpu(src->tprt); - geo->tprm = le32_to_cpu(src->tprm); - geo->tbet = le32_to_cpu(src->tbet); - geo->tbem = le32_to_cpu(src->tbem); - - /* 1.2 compatibility */ - geo->vmnt = id->vmnt; - geo->cap = le32_to_cpu(id->cap); - geo->dom = le32_to_cpu(id->dom); - - geo->mtype = src->mtype; - geo->fmtype = src->fmtype; - - geo->cpar = le16_to_cpu(src->cpar); - geo->mpos = le32_to_cpu(src->mpos); - - geo->pln_mode = NVM_PLANE_SINGLE; - - if (geo->mpos & 0x020202) { - geo->pln_mode = NVM_PLANE_DOUBLE; - geo->ws_opt <<= 1; - } else if (geo->mpos & 0x040404) { - geo->pln_mode = NVM_PLANE_QUAD; - geo->ws_opt <<= 2; - } - - geo->num_pln = src->num_pln; - geo->num_pg = le16_to_cpu(src->num_pg); - geo->fpg_sz = le16_to_cpu(src->fpg_sz); - - nvme_nvm_set_addr_12((struct nvm_addrf_12 *)&geo->addrf, &id->ppaf); - - return 0; -} - -static void nvme_nvm_set_addr_20(struct nvm_addrf *dst, - struct nvme_nvm_id20_addrf *src) -{ - dst->ch_len = src->grp_len; - dst->lun_len = src->pu_len; - dst->chk_len = src->chk_len; - dst->sec_len = src->lba_len; - - dst->sec_offset = 0; - dst->chk_offset = dst->sec_len; - dst->lun_offset = dst->chk_offset + dst->chk_len; - dst->ch_offset = dst->lun_offset + dst->lun_len; - - dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset; - dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset; - dst->chk_mask = ((1ULL << dst->chk_len) - 1) << dst->chk_offset; - dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset; -} - -static int nvme_nvm_setup_20(struct nvme_nvm_id20 *id, - struct nvm_geo *geo) -{ - geo->major_ver_id = id->mjr; - geo->minor_ver_id = id->mnr; - - /* Set compacted version for upper layers */ - geo->version = NVM_OCSSD_SPEC_20; - - geo->num_ch = le16_to_cpu(id->num_grp); - geo->num_lun = le16_to_cpu(id->num_pu); - geo->all_luns = geo->num_ch * geo->num_lun; - - geo->num_chk = le32_to_cpu(id->num_chk); - geo->clba = le32_to_cpu(id->clba); - - geo->all_chunks = geo->all_luns * geo->num_chk; - geo->total_secs = geo->clba * geo->all_chunks; - - geo->ws_min = le32_to_cpu(id->ws_min); - geo->ws_opt = le32_to_cpu(id->ws_opt); - geo->mw_cunits = le32_to_cpu(id->mw_cunits); - geo->maxoc = le32_to_cpu(id->maxoc); - geo->maxocpu = le32_to_cpu(id->maxocpu); - - geo->trdt = le32_to_cpu(id->trdt); - geo->trdm = le32_to_cpu(id->trdm); - geo->tprt = le32_to_cpu(id->twrt); - geo->tprm = le32_to_cpu(id->twrm); - geo->tbet = le32_to_cpu(id->tcrst); - geo->tbem = le32_to_cpu(id->tcrsm); - - nvme_nvm_set_addr_20(&geo->addrf, &id->lbaf); - - return 0; -} - -static int nvme_nvm_identity(struct nvm_dev *nvmdev) -{ - struct nvme_ns *ns = nvmdev->q->queuedata; - struct nvme_nvm_id12 *id; - struct nvme_nvm_command c = {}; - int ret; - - c.identity.opcode = nvme_nvm_admin_identity; - c.identity.nsid = cpu_to_le32(ns->head->ns_id); - - id = kmalloc(sizeof(struct nvme_nvm_id12), GFP_KERNEL); - if (!id) - return -ENOMEM; - - ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c, - id, sizeof(struct nvme_nvm_id12)); - if (ret) { - ret = -EIO; - goto out; - } - - /* - * The 1.2 and 2.0 specifications share the first byte in their geometry - * command to make it possible to know what version a device implements. - */ - switch (id->ver_id) { - case 1: - ret = nvme_nvm_setup_12(id, &nvmdev->geo); - break; - case 2: - ret = nvme_nvm_setup_20((struct nvme_nvm_id20 *)id, - &nvmdev->geo); - break; - default: - dev_err(ns->ctrl->device, "OCSSD revision not supported (%d)\n", - id->ver_id); - ret = -EINVAL; - } - -out: - kfree(id); - return ret; -} - -static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, - u8 *blks) -{ - struct request_queue *q = nvmdev->q; - struct nvm_geo *geo = &nvmdev->geo; - struct nvme_ns *ns = q->queuedata; - struct nvme_ctrl *ctrl = ns->ctrl; - struct nvme_nvm_command c = {}; - struct nvme_nvm_bb_tbl *bb_tbl; - int nr_blks = geo->num_chk * geo->num_pln; - int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blks; - int ret = 0; - - c.get_bb.opcode = nvme_nvm_admin_get_bb_tbl; - c.get_bb.nsid = cpu_to_le32(ns->head->ns_id); - c.get_bb.spba = cpu_to_le64(ppa.ppa); - - bb_tbl = kzalloc(tblsz, GFP_KERNEL); - if (!bb_tbl) - return -ENOMEM; - - ret = nvme_submit_sync_cmd(ctrl->admin_q, (struct nvme_command *)&c, - bb_tbl, tblsz); - if (ret) { - dev_err(ctrl->device, "get bad block table failed (%d)\n", ret); - ret = -EIO; - goto out; - } - - if (bb_tbl->tblid[0] != 'B' || bb_tbl->tblid[1] != 'B' || - bb_tbl->tblid[2] != 'L' || bb_tbl->tblid[3] != 'T') { - dev_err(ctrl->device, "bbt format mismatch\n"); - ret = -EINVAL; - goto out; - } - - if (le16_to_cpu(bb_tbl->verid) != 1) { - ret = -EINVAL; - dev_err(ctrl->device, "bbt version not supported\n"); - goto out; - } - - if (le32_to_cpu(bb_tbl->tblks) != nr_blks) { - ret = -EINVAL; - dev_err(ctrl->device, - "bbt unsuspected blocks returned (%u!=%u)", - le32_to_cpu(bb_tbl->tblks), nr_blks); - goto out; - } - - memcpy(blks, bb_tbl->blk, geo->num_chk * geo->num_pln); -out: - kfree(bb_tbl); - return ret; -} - -static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas, - int nr_ppas, int type) -{ - struct nvme_ns *ns = nvmdev->q->queuedata; - struct nvme_nvm_command c = {}; - int ret = 0; - - c.set_bb.opcode = nvme_nvm_admin_set_bb_tbl; - c.set_bb.nsid = cpu_to_le32(ns->head->ns_id); - c.set_bb.spba = cpu_to_le64(ppas->ppa); - c.set_bb.nlb = cpu_to_le16(nr_ppas - 1); - c.set_bb.value = type; - - ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c, - NULL, 0); - if (ret) - dev_err(ns->ctrl->device, "set bad block table failed (%d)\n", - ret); - return ret; -} - -/* - * Expect the lba in device format - */ -static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, - sector_t slba, int nchks, - struct nvm_chk_meta *meta) -{ - struct nvm_geo *geo = &ndev->geo; - struct nvme_ns *ns = ndev->q->queuedata; - struct nvme_ctrl *ctrl = ns->ctrl; - struct nvme_nvm_chk_meta *dev_meta, *dev_meta_off; - struct ppa_addr ppa; - size_t left = nchks * sizeof(struct nvme_nvm_chk_meta); - size_t log_pos, offset, len; - int i, max_len; - int ret = 0; - - /* - * limit requests to maximum 256K to avoid issuing arbitrary large - * requests when the device does not specific a maximum transfer size. - */ - max_len = min_t(unsigned int, ctrl->max_hw_sectors << 9, 256 * 1024); - - dev_meta = kmalloc(max_len, GFP_KERNEL); - if (!dev_meta) - return -ENOMEM; - - /* Normalize lba address space to obtain log offset */ - ppa.ppa = slba; - ppa = dev_to_generic_addr(ndev, ppa); - - log_pos = ppa.m.chk; - log_pos += ppa.m.pu * geo->num_chk; - log_pos += ppa.m.grp * geo->num_lun * geo->num_chk; - - offset = log_pos * sizeof(struct nvme_nvm_chk_meta); - - while (left) { - len = min_t(unsigned int, left, max_len); - - memset(dev_meta, 0, max_len); - dev_meta_off = dev_meta; - - ret = nvme_get_log(ctrl, ns->head->ns_id, - NVME_NVM_LOG_REPORT_CHUNK, 0, NVME_CSI_NVM, - dev_meta, len, offset); - if (ret) { - dev_err(ctrl->device, "Get REPORT CHUNK log error\n"); - break; - } - - for (i = 0; i < len; i += sizeof(struct nvme_nvm_chk_meta)) { - meta->state = dev_meta_off->state; - meta->type = dev_meta_off->type; - meta->wi = dev_meta_off->wi; - meta->slba = le64_to_cpu(dev_meta_off->slba); - meta->cnlb = le64_to_cpu(dev_meta_off->cnlb); - meta->wp = le64_to_cpu(dev_meta_off->wp); - - meta++; - dev_meta_off++; - } - - offset += len; - left -= len; - } - - kfree(dev_meta); - - return ret; -} - -static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns, - struct nvme_nvm_command *c) -{ - c->ph_rw.opcode = rqd->opcode; - c->ph_rw.nsid = cpu_to_le32(ns->head->ns_id); - c->ph_rw.spba = cpu_to_le64(rqd->ppa_addr.ppa); - c->ph_rw.metadata = cpu_to_le64(rqd->dma_meta_list); - c->ph_rw.control = cpu_to_le16(rqd->flags); - c->ph_rw.length = cpu_to_le16(rqd->nr_ppas - 1); -} - -static void nvme_nvm_end_io(struct request *rq, blk_status_t status) -{ - struct nvm_rq *rqd = rq->end_io_data; - - rqd->ppa_status = le64_to_cpu(nvme_req(rq)->result.u64); - rqd->error = nvme_req(rq)->status; - nvm_end_io(rqd); - - kfree(nvme_req(rq)->cmd); - blk_mq_free_request(rq); -} - -static struct request *nvme_nvm_alloc_request(struct request_queue *q, - struct nvm_rq *rqd, - struct nvme_nvm_command *cmd) -{ - struct nvme_ns *ns = q->queuedata; - struct request *rq; - - nvme_nvm_rqtocmd(rqd, ns, cmd); - - rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0); - if (IS_ERR(rq)) - return rq; - - rq->cmd_flags &= ~REQ_FAILFAST_DRIVER; - - if (rqd->bio) - blk_rq_append_bio(rq, rqd->bio); - else - rq->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM); - - return rq; -} - -static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd, - void *buf) -{ - struct nvm_geo *geo = &dev->geo; - struct request_queue *q = dev->q; - struct nvme_nvm_command *cmd; - struct request *rq; - int ret; - - cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL); - if (!cmd) - return -ENOMEM; - - rq = nvme_nvm_alloc_request(q, rqd, cmd); - if (IS_ERR(rq)) { - ret = PTR_ERR(rq); - goto err_free_cmd; - } - - if (buf) { - ret = blk_rq_map_kern(q, rq, buf, geo->csecs * rqd->nr_ppas, - GFP_KERNEL); - if (ret) - goto err_free_cmd; - } - - rq->end_io_data = rqd; - - blk_execute_rq_nowait(NULL, rq, 0, nvme_nvm_end_io); - - return 0; - -err_free_cmd: - kfree(cmd); - return ret; -} - -static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name, - int size) -{ - struct nvme_ns *ns = nvmdev->q->queuedata; - - return dma_pool_create(name, ns->ctrl->dev, size, PAGE_SIZE, 0); -} - -static void nvme_nvm_destroy_dma_pool(void *pool) -{ - struct dma_pool *dma_pool = pool; - - dma_pool_destroy(dma_pool); -} - -static void *nvme_nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool, - gfp_t mem_flags, dma_addr_t *dma_handler) -{ - return dma_pool_alloc(pool, mem_flags, dma_handler); -} - -static void nvme_nvm_dev_dma_free(void *pool, void *addr, - dma_addr_t dma_handler) -{ - dma_pool_free(pool, addr, dma_handler); -} - -static struct nvm_dev_ops nvme_nvm_dev_ops = { - .identity = nvme_nvm_identity, - - .get_bb_tbl = nvme_nvm_get_bb_tbl, - .set_bb_tbl = nvme_nvm_set_bb_tbl, - - .get_chk_meta = nvme_nvm_get_chk_meta, - - .submit_io = nvme_nvm_submit_io, - - .create_dma_pool = nvme_nvm_create_dma_pool, - .destroy_dma_pool = nvme_nvm_destroy_dma_pool, - .dev_dma_alloc = nvme_nvm_dev_dma_alloc, - .dev_dma_free = nvme_nvm_dev_dma_free, -}; - -static int nvme_nvm_submit_user_cmd(struct request_queue *q, - struct nvme_ns *ns, - struct nvme_nvm_command *vcmd, - void __user *ubuf, unsigned int bufflen, - void __user *meta_buf, unsigned int meta_len, - void __user *ppa_buf, unsigned int ppa_len, - u32 *result, u64 *status, unsigned int timeout) -{ - bool write = nvme_is_write((struct nvme_command *)vcmd); - struct nvm_dev *dev = ns->ndev; - struct request *rq; - struct bio *bio = NULL; - __le64 *ppa_list = NULL; - dma_addr_t ppa_dma; - __le64 *metadata = NULL; - dma_addr_t metadata_dma; - DECLARE_COMPLETION_ONSTACK(wait); - int ret = 0; - - rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0); - if (IS_ERR(rq)) { - ret = -ENOMEM; - goto err_cmd; - } - - if (timeout) - rq->timeout = timeout; - - if (ppa_buf && ppa_len) { - ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma); - if (!ppa_list) { - ret = -ENOMEM; - goto err_rq; - } - if (copy_from_user(ppa_list, (void __user *)ppa_buf, - sizeof(u64) * (ppa_len + 1))) { - ret = -EFAULT; - goto err_ppa; - } - vcmd->ph_rw.spba = cpu_to_le64(ppa_dma); - } else { - vcmd->ph_rw.spba = cpu_to_le64((uintptr_t)ppa_buf); - } - - if (ubuf && bufflen) { - ret = blk_rq_map_user(q, rq, NULL, ubuf, bufflen, GFP_KERNEL); - if (ret) - goto err_ppa; - bio = rq->bio; - - if (meta_buf && meta_len) { - metadata = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, - &metadata_dma); - if (!metadata) { - ret = -ENOMEM; - goto err_map; - } - - if (write) { - if (copy_from_user(metadata, - (void __user *)meta_buf, - meta_len)) { - ret = -EFAULT; - goto err_meta; - } - } - vcmd->ph_rw.metadata = cpu_to_le64(metadata_dma); - } - - bio_set_dev(bio, ns->disk->part0); - } - - blk_execute_rq(NULL, rq, 0); - - if (nvme_req(rq)->flags & NVME_REQ_CANCELLED) - ret = -EINTR; - else if (nvme_req(rq)->status & 0x7ff) - ret = -EIO; - if (result) - *result = nvme_req(rq)->status & 0x7ff; - if (status) - *status = le64_to_cpu(nvme_req(rq)->result.u64); - - if (metadata && !ret && !write) { - if (copy_to_user(meta_buf, (void *)metadata, meta_len)) - ret = -EFAULT; - } -err_meta: - if (meta_buf && meta_len) - dma_pool_free(dev->dma_pool, metadata, metadata_dma); -err_map: - if (bio) - blk_rq_unmap_user(bio); -err_ppa: - if (ppa_buf && ppa_len) - dma_pool_free(dev->dma_pool, ppa_list, ppa_dma); -err_rq: - blk_mq_free_request(rq); -err_cmd: - return ret; -} - -static int nvme_nvm_submit_vio(struct nvme_ns *ns, - struct nvm_user_vio __user *uvio) -{ - struct nvm_user_vio vio; - struct nvme_nvm_command c; - unsigned int length; - int ret; - - if (copy_from_user(&vio, uvio, sizeof(vio))) - return -EFAULT; - if (vio.flags) - return -EINVAL; - - memset(&c, 0, sizeof(c)); - c.ph_rw.opcode = vio.opcode; - c.ph_rw.nsid = cpu_to_le32(ns->head->ns_id); - c.ph_rw.control = cpu_to_le16(vio.control); - c.ph_rw.length = cpu_to_le16(vio.nppas); - - length = (vio.nppas + 1) << ns->lba_shift; - - ret = nvme_nvm_submit_user_cmd(ns->queue, ns, &c, - (void __user *)(uintptr_t)vio.addr, length, - (void __user *)(uintptr_t)vio.metadata, - vio.metadata_len, - (void __user *)(uintptr_t)vio.ppa_list, vio.nppas, - &vio.result, &vio.status, 0); - - if (ret && copy_to_user(uvio, &vio, sizeof(vio))) - return -EFAULT; - - return ret; -} - -static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin, - struct nvm_passthru_vio __user *uvcmd) -{ - struct nvm_passthru_vio vcmd; - struct nvme_nvm_command c; - struct request_queue *q; - unsigned int timeout = 0; - int ret; - - if (copy_from_user(&vcmd, uvcmd, sizeof(vcmd))) - return -EFAULT; - if ((vcmd.opcode != 0xF2) && (!capable(CAP_SYS_ADMIN))) - return -EACCES; - if (vcmd.flags) - return -EINVAL; - - memset(&c, 0, sizeof(c)); - c.common.opcode = vcmd.opcode; - c.common.nsid = cpu_to_le32(ns->head->ns_id); - c.common.cdw2[0] = cpu_to_le32(vcmd.cdw2); - c.common.cdw2[1] = cpu_to_le32(vcmd.cdw3); - /* cdw11-12 */ - c.ph_rw.length = cpu_to_le16(vcmd.nppas); - c.ph_rw.control = cpu_to_le16(vcmd.control); - c.common.cdw13 = cpu_to_le32(vcmd.cdw13); - c.common.cdw14 = cpu_to_le32(vcmd.cdw14); - c.common.cdw15 = cpu_to_le32(vcmd.cdw15); - - if (vcmd.timeout_ms) - timeout = msecs_to_jiffies(vcmd.timeout_ms); - - q = admin ? ns->ctrl->admin_q : ns->queue; - - ret = nvme_nvm_submit_user_cmd(q, ns, - (struct nvme_nvm_command *)&c, - (void __user *)(uintptr_t)vcmd.addr, vcmd.data_len, - (void __user *)(uintptr_t)vcmd.metadata, - vcmd.metadata_len, - (void __user *)(uintptr_t)vcmd.ppa_list, vcmd.nppas, - &vcmd.result, &vcmd.status, timeout); - - if (ret && copy_to_user(uvcmd, &vcmd, sizeof(vcmd))) - return -EFAULT; - - return ret; -} - -int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *argp) -{ - switch (cmd) { - case NVME_NVM_IOCTL_ADMIN_VIO: - return nvme_nvm_user_vcmd(ns, 1, argp); - case NVME_NVM_IOCTL_IO_VIO: - return nvme_nvm_user_vcmd(ns, 0, argp); - case NVME_NVM_IOCTL_SUBMIT_VIO: - return nvme_nvm_submit_vio(ns, argp); - default: - return -ENOTTY; - } -} - -int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) -{ - struct request_queue *q = ns->queue; - struct nvm_dev *dev; - struct nvm_geo *geo; - - _nvme_nvm_check_size(); - - dev = nvm_alloc_dev(node); - if (!dev) - return -ENOMEM; - - /* Note that csecs and sos will be overridden if it is a 1.2 drive. */ - geo = &dev->geo; - geo->csecs = 1 << ns->lba_shift; - geo->sos = ns->ms; - if (ns->features & NVME_NS_EXT_LBAS) - geo->ext = true; - else - geo->ext = false; - geo->mdts = ns->ctrl->max_hw_sectors; - - dev->q = q; - memcpy(dev->name, disk_name, DISK_NAME_LEN); - dev->ops = &nvme_nvm_dev_ops; - dev->private_data = ns; - ns->ndev = dev; - - return nvm_register(dev); -} - -void nvme_nvm_unregister(struct nvme_ns *ns) -{ - nvm_unregister(ns->ndev); -} - -static ssize_t nvm_dev_attr_show(struct device *dev, - struct device_attribute *dattr, char *page) -{ - struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - struct nvm_dev *ndev = ns->ndev; - struct nvm_geo *geo = &ndev->geo; - struct attribute *attr; - - if (!ndev) - return 0; - - attr = &dattr->attr; - - if (strcmp(attr->name, "version") == 0) { - if (geo->major_ver_id == 1) - return scnprintf(page, PAGE_SIZE, "%u\n", - geo->major_ver_id); - else - return scnprintf(page, PAGE_SIZE, "%u.%u\n", - geo->major_ver_id, - geo->minor_ver_id); - } else if (strcmp(attr->name, "capabilities") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->cap); - } else if (strcmp(attr->name, "read_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdt); - } else if (strcmp(attr->name, "read_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdm); - } else { - return scnprintf(page, - PAGE_SIZE, - "Unhandled attr(%s) in `%s`\n", - attr->name, __func__); - } -} - -static ssize_t nvm_dev_attr_show_ppaf(struct nvm_addrf_12 *ppaf, char *page) -{ - return scnprintf(page, PAGE_SIZE, - "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", - ppaf->ch_offset, ppaf->ch_len, - ppaf->lun_offset, ppaf->lun_len, - ppaf->pln_offset, ppaf->pln_len, - ppaf->blk_offset, ppaf->blk_len, - ppaf->pg_offset, ppaf->pg_len, - ppaf->sec_offset, ppaf->sec_len); -} - -static ssize_t nvm_dev_attr_show_12(struct device *dev, - struct device_attribute *dattr, char *page) -{ - struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - struct nvm_dev *ndev = ns->ndev; - struct nvm_geo *geo = &ndev->geo; - struct attribute *attr; - - if (!ndev) - return 0; - - attr = &dattr->attr; - - if (strcmp(attr->name, "vendor_opcode") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->vmnt); - } else if (strcmp(attr->name, "device_mode") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->dom); - /* kept for compatibility */ - } else if (strcmp(attr->name, "media_manager") == 0) { - return scnprintf(page, PAGE_SIZE, "%s\n", "gennvm"); - } else if (strcmp(attr->name, "ppa_format") == 0) { - return nvm_dev_attr_show_ppaf((void *)&geo->addrf, page); - } else if (strcmp(attr->name, "media_type") == 0) { /* u8 */ - return scnprintf(page, PAGE_SIZE, "%u\n", geo->mtype); - } else if (strcmp(attr->name, "flash_media_type") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->fmtype); - } else if (strcmp(attr->name, "num_channels") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch); - } else if (strcmp(attr->name, "num_luns") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun); - } else if (strcmp(attr->name, "num_planes") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pln); - } else if (strcmp(attr->name, "num_blocks") == 0) { /* u16 */ - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk); - } else if (strcmp(attr->name, "num_pages") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pg); - } else if (strcmp(attr->name, "page_size") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->fpg_sz); - } else if (strcmp(attr->name, "hw_sector_size") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->csecs); - } else if (strcmp(attr->name, "oob_sector_size") == 0) {/* u32 */ - return scnprintf(page, PAGE_SIZE, "%u\n", geo->sos); - } else if (strcmp(attr->name, "prog_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt); - } else if (strcmp(attr->name, "prog_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm); - } else if (strcmp(attr->name, "erase_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet); - } else if (strcmp(attr->name, "erase_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem); - } else if (strcmp(attr->name, "multiplane_modes") == 0) { - return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mpos); - } else if (strcmp(attr->name, "media_capabilities") == 0) { - return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mccap); - } else if (strcmp(attr->name, "max_phys_secs") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", NVM_MAX_VLBA); - } else { - return scnprintf(page, PAGE_SIZE, - "Unhandled attr(%s) in `%s`\n", - attr->name, __func__); - } -} - -static ssize_t nvm_dev_attr_show_20(struct device *dev, - struct device_attribute *dattr, char *page) -{ - struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - struct nvm_dev *ndev = ns->ndev; - struct nvm_geo *geo = &ndev->geo; - struct attribute *attr; - - if (!ndev) - return 0; - - attr = &dattr->attr; - - if (strcmp(attr->name, "groups") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch); - } else if (strcmp(attr->name, "punits") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun); - } else if (strcmp(attr->name, "chunks") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk); - } else if (strcmp(attr->name, "clba") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->clba); - } else if (strcmp(attr->name, "ws_min") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_min); - } else if (strcmp(attr->name, "ws_opt") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_opt); - } else if (strcmp(attr->name, "maxoc") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxoc); - } else if (strcmp(attr->name, "maxocpu") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxocpu); - } else if (strcmp(attr->name, "mw_cunits") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->mw_cunits); - } else if (strcmp(attr->name, "write_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt); - } else if (strcmp(attr->name, "write_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm); - } else if (strcmp(attr->name, "reset_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet); - } else if (strcmp(attr->name, "reset_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem); - } else { - return scnprintf(page, PAGE_SIZE, - "Unhandled attr(%s) in `%s`\n", - attr->name, __func__); - } -} - -#define NVM_DEV_ATTR_RO(_name) \ - DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show, NULL) -#define NVM_DEV_ATTR_12_RO(_name) \ - DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_12, NULL) -#define NVM_DEV_ATTR_20_RO(_name) \ - DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_20, NULL) - -/* general attributes */ -static NVM_DEV_ATTR_RO(version); -static NVM_DEV_ATTR_RO(capabilities); - -static NVM_DEV_ATTR_RO(read_typ); -static NVM_DEV_ATTR_RO(read_max); - -/* 1.2 values */ -static NVM_DEV_ATTR_12_RO(vendor_opcode); -static NVM_DEV_ATTR_12_RO(device_mode); -static NVM_DEV_ATTR_12_RO(ppa_format); -static NVM_DEV_ATTR_12_RO(media_manager); -static NVM_DEV_ATTR_12_RO(media_type); -static NVM_DEV_ATTR_12_RO(flash_media_type); -static NVM_DEV_ATTR_12_RO(num_channels); -static NVM_DEV_ATTR_12_RO(num_luns); -static NVM_DEV_ATTR_12_RO(num_planes); -static NVM_DEV_ATTR_12_RO(num_blocks); -static NVM_DEV_ATTR_12_RO(num_pages); -static NVM_DEV_ATTR_12_RO(page_size); -static NVM_DEV_ATTR_12_RO(hw_sector_size); -static NVM_DEV_ATTR_12_RO(oob_sector_size); -static NVM_DEV_ATTR_12_RO(prog_typ); -static NVM_DEV_ATTR_12_RO(prog_max); -static NVM_DEV_ATTR_12_RO(erase_typ); -static NVM_DEV_ATTR_12_RO(erase_max); -static NVM_DEV_ATTR_12_RO(multiplane_modes); -static NVM_DEV_ATTR_12_RO(media_capabilities); -static NVM_DEV_ATTR_12_RO(max_phys_secs); - -/* 2.0 values */ -static NVM_DEV_ATTR_20_RO(groups); -static NVM_DEV_ATTR_20_RO(punits); -static NVM_DEV_ATTR_20_RO(chunks); -static NVM_DEV_ATTR_20_RO(clba); -static NVM_DEV_ATTR_20_RO(ws_min); -static NVM_DEV_ATTR_20_RO(ws_opt); -static NVM_DEV_ATTR_20_RO(maxoc); -static NVM_DEV_ATTR_20_RO(maxocpu); -static NVM_DEV_ATTR_20_RO(mw_cunits); -static NVM_DEV_ATTR_20_RO(write_typ); -static NVM_DEV_ATTR_20_RO(write_max); -static NVM_DEV_ATTR_20_RO(reset_typ); -static NVM_DEV_ATTR_20_RO(reset_max); - -static struct attribute *nvm_dev_attrs[] = { - /* version agnostic attrs */ - &dev_attr_version.attr, - &dev_attr_capabilities.attr, - &dev_attr_read_typ.attr, - &dev_attr_read_max.attr, - - /* 1.2 attrs */ - &dev_attr_vendor_opcode.attr, - &dev_attr_device_mode.attr, - &dev_attr_media_manager.attr, - &dev_attr_ppa_format.attr, - &dev_attr_media_type.attr, - &dev_attr_flash_media_type.attr, - &dev_attr_num_channels.attr, - &dev_attr_num_luns.attr, - &dev_attr_num_planes.attr, - &dev_attr_num_blocks.attr, - &dev_attr_num_pages.attr, - &dev_attr_page_size.attr, - &dev_attr_hw_sector_size.attr, - &dev_attr_oob_sector_size.attr, - &dev_attr_prog_typ.attr, - &dev_attr_prog_max.attr, - &dev_attr_erase_typ.attr, - &dev_attr_erase_max.attr, - &dev_attr_multiplane_modes.attr, - &dev_attr_media_capabilities.attr, - &dev_attr_max_phys_secs.attr, - - /* 2.0 attrs */ - &dev_attr_groups.attr, - &dev_attr_punits.attr, - &dev_attr_chunks.attr, - &dev_attr_clba.attr, - &dev_attr_ws_min.attr, - &dev_attr_ws_opt.attr, - &dev_attr_maxoc.attr, - &dev_attr_maxocpu.attr, - &dev_attr_mw_cunits.attr, - - &dev_attr_write_typ.attr, - &dev_attr_write_max.attr, - &dev_attr_reset_typ.attr, - &dev_attr_reset_max.attr, - - NULL, -}; - -static umode_t nvm_dev_attrs_visible(struct kobject *kobj, - struct attribute *attr, int index) -{ - struct device *dev = kobj_to_dev(kobj); - struct gendisk *disk = dev_to_disk(dev); - struct nvme_ns *ns = disk->private_data; - struct nvm_dev *ndev = ns->ndev; - struct device_attribute *dev_attr = - container_of(attr, typeof(*dev_attr), attr); - - if (!ndev) - return 0; - - if (dev_attr->show == nvm_dev_attr_show) - return attr->mode; - - switch (ndev->geo.major_ver_id) { - case 1: - if (dev_attr->show == nvm_dev_attr_show_12) - return attr->mode; - break; - case 2: - if (dev_attr->show == nvm_dev_attr_show_20) - return attr->mode; - break; - } - - return 0; -} - -const struct attribute_group nvme_nvm_attr_group = { - .name = "lightnvm", - .attrs = nvm_dev_attrs, - .is_visible = nvm_dev_attrs_visible, -}; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 5cd1fa3b8464..a2e1f298b217 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -11,7 +11,6 @@ #include <linux/pci.h> #include <linux/kref.h> #include <linux/blk-mq.h> -#include <linux/lightnvm.h> #include <linux/sed-opal.h> #include <linux/fault-inject.h> #include <linux/rcupdate.h> @@ -48,11 +47,6 @@ extern struct workqueue_struct *nvme_wq; extern struct workqueue_struct *nvme_reset_wq; extern struct workqueue_struct *nvme_delete_wq; -enum { - NVME_NS_LBA = 0, - NVME_NS_LIGHTNVM = 1, -}; - /* * List of workarounds for devices that required behavior not specified in * the standard. @@ -93,11 +87,6 @@ enum nvme_quirks { NVME_QUIRK_NO_DEEPEST_PS = (1 << 5), /* - * Supports the LighNVM command set if indicated in vs[1]. - */ - NVME_QUIRK_LIGHTNVM = (1 << 6), - - /* * Set MEDIUM priority on SQ creation */ NVME_QUIRK_MEDIUM_PRIO_SQ = (1 << 7), @@ -158,6 +147,7 @@ enum nvme_quirks { struct nvme_request { struct nvme_command *cmd; union nvme_result result; + u8 genctr; u8 retries; u8 flags; u16 status; @@ -449,7 +439,6 @@ struct nvme_ns { u32 ana_grpid; #endif struct list_head siblings; - struct nvm_dev *ndev; struct kref kref; struct nvme_ns_head *head; @@ -497,6 +486,49 @@ struct nvme_ctrl_ops { int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); }; +/* + * nvme command_id is constructed as such: + * | xxxx | xxxxxxxxxxxx | + * gen request tag + */ +#define nvme_genctr_mask(gen) (gen & 0xf) +#define nvme_cid_install_genctr(gen) (nvme_genctr_mask(gen) << 12) +#define nvme_genctr_from_cid(cid) ((cid & 0xf000) >> 12) +#define nvme_tag_from_cid(cid) (cid & 0xfff) + +static inline u16 nvme_cid(struct request *rq) +{ + return nvme_cid_install_genctr(nvme_req(rq)->genctr) | rq->tag; +} + +static inline struct request *nvme_find_rq(struct blk_mq_tags *tags, + u16 command_id) +{ + u8 genctr = nvme_genctr_from_cid(command_id); + u16 tag = nvme_tag_from_cid(command_id); + struct request *rq; + + rq = blk_mq_tag_to_rq(tags, tag); + if (unlikely(!rq)) { + pr_err("could not locate request for tag %#x\n", + tag); + return NULL; + } + if (unlikely(nvme_genctr_mask(nvme_req(rq)->genctr) != genctr)) { + dev_err(nvme_req(rq)->ctrl->device, + "request %#x genctr mismatch (got %#x expected %#x)\n", + tag, genctr, nvme_genctr_mask(nvme_req(rq)->genctr)); + return NULL; + } + return rq; +} + +static inline struct request *nvme_cid_to_rq(struct blk_mq_tags *tags, + u16 command_id) +{ + return blk_mq_tag_to_rq(tags, nvme_tag_from_cid(command_id)); +} + #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj, const char *dev_name); @@ -594,7 +626,8 @@ static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl) static inline bool nvme_is_aen_req(u16 qid, __u16 command_id) { - return !qid && command_id >= NVME_AQ_BLK_MQ_DEPTH; + return !qid && + nvme_tag_from_cid(command_id) >= NVME_AQ_BLK_MQ_DEPTH; } void nvme_complete_rq(struct request *req); @@ -823,26 +856,6 @@ static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) } #endif -#ifdef CONFIG_NVM -int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); -void nvme_nvm_unregister(struct nvme_ns *ns); -extern const struct attribute_group nvme_nvm_attr_group; -int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *argp); -#else -static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, - int node) -{ - return 0; -} - -static inline void nvme_nvm_unregister(struct nvme_ns *ns) {}; -static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, - void __user *argp) -{ - return -ENOTTY; -} -#endif /* CONFIG_NVM */ - static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) { return dev_to_disk(dev)->private_data; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 51852085239e..b82492cd7503 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -60,6 +60,8 @@ MODULE_PARM_DESC(sgl_threshold, "Use SGLs when average request segment size is larger or equal to " "this size. Use 0 to disable SGLs."); +#define NVME_PCI_MIN_QUEUE_SIZE 2 +#define NVME_PCI_MAX_QUEUE_SIZE 4095 static int io_queue_depth_set(const char *val, const struct kernel_param *kp); static const struct kernel_param_ops io_queue_depth_ops = { .set = io_queue_depth_set, @@ -68,7 +70,7 @@ static const struct kernel_param_ops io_queue_depth_ops = { static unsigned int io_queue_depth = 1024; module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644); -MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2"); +MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2 and < 4096"); static int io_queue_count_set(const char *val, const struct kernel_param *kp) { @@ -135,6 +137,7 @@ struct nvme_dev { u32 cmbloc; struct nvme_ctrl ctrl; u32 last_ps; + bool hmb; mempool_t *iod_mempool; @@ -153,18 +156,14 @@ struct nvme_dev { unsigned int nr_allocated_queues; unsigned int nr_write_queues; unsigned int nr_poll_queues; + + bool attrs_added; }; static int io_queue_depth_set(const char *val, const struct kernel_param *kp) { - int ret; - u32 n; - - ret = kstrtou32(val, 10, &n); - if (ret != 0 || n < 2) - return -EINVAL; - - return param_set_uint(val, kp); + return param_set_uint_minmax(val, kp, NVME_PCI_MIN_QUEUE_SIZE, + NVME_PCI_MAX_QUEUE_SIZE); } static inline unsigned int sq_idx(unsigned int qid, u32 stride) @@ -1014,7 +1013,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) return; } - req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), command_id); + req = nvme_find_rq(nvme_queue_tagset(nvmeq), command_id); if (unlikely(!req)) { dev_warn(nvmeq->dev->ctrl.device, "invalid id %d completed on queue %d\n", @@ -1808,17 +1807,6 @@ static int nvme_create_io_queues(struct nvme_dev *dev) return ret >= 0 ? 0 : ret; } -static ssize_t nvme_cmb_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev)); - - return scnprintf(buf, PAGE_SIZE, "cmbloc : x%08x\ncmbsz : x%08x\n", - ndev->cmbloc, ndev->cmbsz); -} -static DEVICE_ATTR(cmb, S_IRUGO, nvme_cmb_show, NULL); - static u64 nvme_cmb_size_unit(struct nvme_dev *dev) { u8 szu = (dev->cmbsz >> NVME_CMBSZ_SZU_SHIFT) & NVME_CMBSZ_SZU_MASK; @@ -1887,20 +1875,6 @@ static void nvme_map_cmb(struct nvme_dev *dev) if ((dev->cmbsz & (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) == (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) pci_p2pmem_publish(pdev, true); - - if (sysfs_add_file_to_group(&dev->ctrl.device->kobj, - &dev_attr_cmb.attr, NULL)) - dev_warn(dev->ctrl.device, - "failed to add sysfs attribute for CMB\n"); -} - -static inline void nvme_release_cmb(struct nvme_dev *dev) -{ - if (dev->cmb_size) { - sysfs_remove_file_from_group(&dev->ctrl.device->kobj, - &dev_attr_cmb.attr, NULL); - dev->cmb_size = 0; - } } static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits) @@ -1923,7 +1897,9 @@ static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits) dev_warn(dev->ctrl.device, "failed to set host mem (err %d, flags %#x).\n", ret, bits); - } + } else + dev->hmb = bits & NVME_HOST_MEM_ENABLE; + return ret; } @@ -2080,6 +2056,102 @@ static int nvme_setup_host_mem(struct nvme_dev *dev) return ret; } +static ssize_t cmb_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "cmbloc : x%08x\ncmbsz : x%08x\n", + ndev->cmbloc, ndev->cmbsz); +} +static DEVICE_ATTR_RO(cmb); + +static ssize_t cmbloc_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%u\n", ndev->cmbloc); +} +static DEVICE_ATTR_RO(cmbloc); + +static ssize_t cmbsz_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%u\n", ndev->cmbsz); +} +static DEVICE_ATTR_RO(cmbsz); + +static ssize_t hmb_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%d\n", ndev->hmb); +} + +static ssize_t hmb_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev)); + bool new; + int ret; + + if (strtobool(buf, &new) < 0) + return -EINVAL; + + if (new == ndev->hmb) + return count; + + if (new) { + ret = nvme_setup_host_mem(ndev); + } else { + ret = nvme_set_host_mem(ndev, 0); + if (!ret) + nvme_free_host_mem(ndev); + } + + if (ret < 0) + return ret; + + return count; +} +static DEVICE_ATTR_RW(hmb); + +static umode_t nvme_pci_attrs_are_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct nvme_ctrl *ctrl = + dev_get_drvdata(container_of(kobj, struct device, kobj)); + struct nvme_dev *dev = to_nvme_dev(ctrl); + + if (a == &dev_attr_cmb.attr || + a == &dev_attr_cmbloc.attr || + a == &dev_attr_cmbsz.attr) { + if (!dev->cmbsz) + return 0; + } + if (a == &dev_attr_hmb.attr && !ctrl->hmpre) + return 0; + + return a->mode; +} + +static struct attribute *nvme_pci_attrs[] = { + &dev_attr_cmb.attr, + &dev_attr_cmbloc.attr, + &dev_attr_cmbsz.attr, + &dev_attr_hmb.attr, + NULL, +}; + +static const struct attribute_group nvme_pci_attr_group = { + .attrs = nvme_pci_attrs, + .is_visible = nvme_pci_attrs_are_visible, +}; + /* * nirqs is the number of interrupts available for write and read * queues. The core already reserved an interrupt for the admin queue. @@ -2751,6 +2823,10 @@ static void nvme_reset_work(struct work_struct *work) goto out; } + if (!dev->attrs_added && !sysfs_create_group(&dev->ctrl.device->kobj, + &nvme_pci_attr_group)) + dev->attrs_added = true; + nvme_start_ctrl(&dev->ctrl); return; @@ -2999,6 +3075,13 @@ static void nvme_shutdown(struct pci_dev *pdev) nvme_disable_prepare_reset(dev, true); } +static void nvme_remove_attrs(struct nvme_dev *dev) +{ + if (dev->attrs_added) + sysfs_remove_group(&dev->ctrl.device->kobj, + &nvme_pci_attr_group); +} + /* * The driver's remove may be called on a device in a partially initialized * state. This function must not have any dependencies on the device state in @@ -3020,7 +3103,7 @@ static void nvme_remove(struct pci_dev *pdev) nvme_stop_ctrl(&dev->ctrl); nvme_remove_namespaces(&dev->ctrl); nvme_dev_disable(dev, true); - nvme_release_cmb(dev); + nvme_remove_attrs(dev); nvme_free_host_mem(dev); nvme_dev_remove_admin(dev); nvme_free_queues(dev, 0); @@ -3047,8 +3130,13 @@ static int nvme_resume(struct device *dev) if (ndev->last_ps == U32_MAX || nvme_set_power_state(ctrl, ndev->last_ps) != 0) - return nvme_try_sched_reset(&ndev->ctrl); + goto reset; + if (ctrl->hmpre && nvme_setup_host_mem(ndev)) + goto reset; + return 0; +reset: + return nvme_try_sched_reset(ctrl); } static int nvme_suspend(struct device *dev) @@ -3072,15 +3160,9 @@ static int nvme_suspend(struct device *dev) * the PCI bus layer to put it into D3 in order to take the PCIe link * down, so as to allow the platform to achieve its minimum low-power * state (which may not be possible if the link is up). - * - * If a host memory buffer is enabled, shut down the device as the NVMe - * specification allows the device to access the host memory buffer in - * host DRAM from all power states, but hosts will fail access to DRAM - * during S3. */ if (pm_suspend_via_firmware() || !ctrl->npss || !pcie_aspm_enabled(pdev) || - ndev->nr_host_mem_descs || (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND)) return nvme_disable_prepare_reset(ndev, true); @@ -3091,6 +3173,17 @@ static int nvme_suspend(struct device *dev) if (ctrl->state != NVME_CTRL_LIVE) goto unfreeze; + /* + * Host memory access may not be successful in a system suspend state, + * but the specification allows the controller to access memory in a + * non-operational power state. + */ + if (ndev->hmb) { + ret = nvme_set_host_mem(ndev, 0); + if (ret < 0) + goto unfreeze; + } + ret = nvme_get_power_state(ctrl, &ndev->last_ps); if (ret < 0) goto unfreeze; @@ -3243,12 +3336,6 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, - { PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */ - .driver_data = NVME_QUIRK_LIGHTNVM, }, - { PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */ - .driver_data = NVME_QUIRK_LIGHTNVM, }, - { PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */ - .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */ diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 7f6b3a991501..a68704e39084 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -735,13 +735,13 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl) if (ret) return ret; - ctrl->ctrl.queue_count = nr_io_queues + 1; - if (ctrl->ctrl.queue_count < 2) { + if (nr_io_queues == 0) { dev_err(ctrl->ctrl.device, "unable to set any I/O queues\n"); return -ENOMEM; } + ctrl->ctrl.queue_count = nr_io_queues + 1; dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", nr_io_queues); @@ -1730,10 +1730,10 @@ static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, struct request *rq; struct nvme_rdma_request *req; - rq = blk_mq_tag_to_rq(nvme_rdma_tagset(queue), cqe->command_id); + rq = nvme_find_rq(nvme_rdma_tagset(queue), cqe->command_id); if (!rq) { dev_err(queue->ctrl->ctrl.device, - "tag 0x%x on QP %#x not found\n", + "got bad command_id %#x on QP %#x\n", cqe->command_id, queue->qp->qp_num); nvme_rdma_error_recovery(queue->ctrl); return; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 8cb15ee5b249..645025620154 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -487,11 +487,11 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue, { struct request *rq; - rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), cqe->command_id); + rq = nvme_find_rq(nvme_tcp_tagset(queue), cqe->command_id); if (!rq) { dev_err(queue->ctrl->ctrl.device, - "queue %d tag 0x%x not found\n", - nvme_tcp_queue_id(queue), cqe->command_id); + "got bad cqe.command_id %#x on queue %d\n", + cqe->command_id, nvme_tcp_queue_id(queue)); nvme_tcp_error_recovery(&queue->ctrl->ctrl); return -EINVAL; } @@ -508,11 +508,11 @@ static int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue, { struct request *rq; - rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id); + rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id); if (!rq) { dev_err(queue->ctrl->ctrl.device, - "queue %d tag %#x not found\n", - nvme_tcp_queue_id(queue), pdu->command_id); + "got bad c2hdata.command_id %#x on queue %d\n", + pdu->command_id, nvme_tcp_queue_id(queue)); return -ENOENT; } @@ -606,7 +606,7 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req, data->hdr.plen = cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst); data->ttag = pdu->ttag; - data->command_id = rq->tag; + data->command_id = nvme_cid(rq); data->data_offset = cpu_to_le32(req->data_sent); data->data_length = cpu_to_le32(req->pdu_len); return 0; @@ -619,11 +619,11 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue, struct request *rq; int ret; - rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id); + rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id); if (!rq) { dev_err(queue->ctrl->ctrl.device, - "queue %d tag %#x not found\n", - nvme_tcp_queue_id(queue), pdu->command_id); + "got bad r2t.command_id %#x on queue %d\n", + pdu->command_id, nvme_tcp_queue_id(queue)); return -ENOENT; } req = blk_mq_rq_to_pdu(rq); @@ -702,17 +702,9 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, unsigned int *offset, size_t *len) { struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu; - struct nvme_tcp_request *req; - struct request *rq; - - rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id); - if (!rq) { - dev_err(queue->ctrl->ctrl.device, - "queue %d tag %#x not found\n", - nvme_tcp_queue_id(queue), pdu->command_id); - return -ENOENT; - } - req = blk_mq_rq_to_pdu(rq); + struct request *rq = + nvme_cid_to_rq(nvme_tcp_tagset(queue), pdu->command_id); + struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); while (true) { int recv_len, ret; @@ -804,8 +796,8 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, } if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { - struct request *rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), - pdu->command_id); + struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), + pdu->command_id); nvme_tcp_end_request(rq, NVME_SC_SUCCESS); queue->nr_cqe++; @@ -1228,6 +1220,7 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid) sock_release(queue->sock); kfree(queue->pdu); + mutex_destroy(&queue->send_mutex); mutex_destroy(&queue->queue_lock); } @@ -1533,6 +1526,7 @@ err_sock: sock_release(queue->sock); queue->sock = NULL; err_destroy_mutex: + mutex_destroy(&queue->send_mutex); mutex_destroy(&queue->queue_lock); return ret; } @@ -1769,13 +1763,13 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) if (ret) return ret; - ctrl->queue_count = nr_io_queues + 1; - if (ctrl->queue_count < 2) { + if (nr_io_queues == 0) { dev_err(ctrl->device, "unable to set any I/O queues\n"); return -ENOMEM; } + ctrl->queue_count = nr_io_queues + 1; dev_info(ctrl->device, "creating %d I/O queues.\n", nr_io_queues); diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c index 6543015b6121..2a89c5aa0790 100644 --- a/drivers/nvme/host/trace.c +++ b/drivers/nvme/host/trace.c @@ -72,6 +72,20 @@ static const char *nvme_trace_admin_identify(struct trace_seq *p, u8 *cdw10) return ret; } +static const char *nvme_trace_admin_set_features(struct trace_seq *p, + u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u8 fid = cdw10[0]; + u8 sv = cdw10[3] & 0x8; + u32 cdw11 = get_unaligned_le32(cdw10 + 4); + + trace_seq_printf(p, "fid=0x%x, sv=0x%x, cdw11=0x%x", fid, sv, cdw11); + trace_seq_putc(p, 0); + + return ret; +} + static const char *nvme_trace_admin_get_features(struct trace_seq *p, u8 *cdw10) { @@ -80,7 +94,7 @@ static const char *nvme_trace_admin_get_features(struct trace_seq *p, u8 sel = cdw10[1] & 0x7; u32 cdw11 = get_unaligned_le32(cdw10 + 4); - trace_seq_printf(p, "fid=0x%x sel=0x%x cdw11=0x%x", fid, sel, cdw11); + trace_seq_printf(p, "fid=0x%x, sel=0x%x, cdw11=0x%x", fid, sel, cdw11); trace_seq_putc(p, 0); return ret; @@ -201,6 +215,8 @@ const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, return nvme_trace_create_cq(p, cdw10); case nvme_admin_identify: return nvme_trace_admin_identify(p, cdw10); + case nvme_admin_set_features: + return nvme_trace_admin_set_features(p, cdw10); case nvme_admin_get_features: return nvme_trace_admin_get_features(p, cdw10); case nvme_admin_get_lba_status: diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index 4be2ececbc45..973561c93888 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -31,7 +31,6 @@ config NVME_TARGET_PASSTHRU config NVME_TARGET_LOOP tristate "NVMe loopback device support" depends on NVME_TARGET - select NVME_CORE select NVME_FABRICS select SG_POOL help @@ -65,7 +64,6 @@ config NVME_TARGET_FC config NVME_TARGET_FCLOOP tristate "NVMe over Fabrics FC Transport Loopback Test driver" depends on NVME_TARGET - select NVME_CORE select NVME_FABRICS select SG_POOL depends on NVME_FC diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index ac7210a3ea1c..66d05eecc2a9 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -802,6 +802,7 @@ void nvmet_sq_destroy(struct nvmet_sq *sq) * controller teardown as a result of a keep-alive expiration. */ ctrl->reset_tbkas = true; + sq->ctrl->sqs[sq->qid] = NULL; nvmet_ctrl_put(ctrl); sq->ctrl = NULL; /* allows reusing the queue later */ } diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 7d0f3523fdab..7d0454cee920 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -109,21 +109,38 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) u16 qid = le16_to_cpu(c->qid); u16 sqsize = le16_to_cpu(c->sqsize); struct nvmet_ctrl *old; + u16 mqes = NVME_CAP_MQES(ctrl->cap); u16 ret; - old = cmpxchg(&req->sq->ctrl, NULL, ctrl); - if (old) { - pr_warn("queue already connected!\n"); - req->error_loc = offsetof(struct nvmf_connect_command, opcode); - return NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR; - } if (!sqsize) { pr_warn("queue size zero!\n"); req->error_loc = offsetof(struct nvmf_connect_command, sqsize); + req->cqe->result.u32 = IPO_IATTR_CONNECT_SQE(sqsize); ret = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; goto err; } + if (ctrl->sqs[qid] != NULL) { + pr_warn("qid %u has already been created\n", qid); + req->error_loc = offsetof(struct nvmf_connect_command, qid); + return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; + } + + if (sqsize > mqes) { + pr_warn("sqsize %u is larger than MQES supported %u cntlid %d\n", + sqsize, mqes, ctrl->cntlid); + req->error_loc = offsetof(struct nvmf_connect_command, sqsize); + req->cqe->result.u32 = IPO_IATTR_CONNECT_SQE(sqsize); + return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; + } + + old = cmpxchg(&req->sq->ctrl, NULL, ctrl); + if (old) { + pr_warn("queue already connected!\n"); + req->error_loc = offsetof(struct nvmf_connect_command, opcode); + return NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR; + } + /* note: convert queue size from 0's-based value to 1's-based value */ nvmet_cq_setup(ctrl, req->cq, qid, sqsize + 1); nvmet_sq_setup(ctrl, req->sq, qid, sqsize + 1); @@ -138,6 +155,7 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) if (ret) { pr_err("failed to install queue %d cntlid %d ret %x\n", qid, ctrl->cntlid, ret); + ctrl->sqs[qid] = NULL; goto err; } } @@ -260,11 +278,11 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) } status = nvmet_install_queue(ctrl, req); - if (status) { - /* pass back cntlid that had the issue of installing queue */ - req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); + if (status) goto out_ctrl_put; - } + + /* pass back cntlid for successful completion */ + req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 3a17a7e26bbf..0285ccc7541f 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -107,10 +107,10 @@ static void nvme_loop_queue_response(struct nvmet_req *req) } else { struct request *rq; - rq = blk_mq_tag_to_rq(nvme_loop_tagset(queue), cqe->command_id); + rq = nvme_find_rq(nvme_loop_tagset(queue), cqe->command_id); if (!rq) { dev_err(queue->ctrl->ctrl.device, - "tag 0x%x on queue %d not found\n", + "got bad command_id %#x on queue %d\n", cqe->command_id, nvme_loop_queue_idx(queue)); return; } diff --git a/drivers/nvme/target/trace.c b/drivers/nvme/target/trace.c index 1373a3c67962..bff454d46255 100644 --- a/drivers/nvme/target/trace.c +++ b/drivers/nvme/target/trace.c @@ -27,7 +27,7 @@ static const char *nvmet_trace_admin_get_features(struct trace_seq *p, u8 sel = cdw10[1] & 0x7; u32 cdw11 = get_unaligned_le32(cdw10 + 4); - trace_seq_printf(p, "fid=0x%x sel=0x%x cdw11=0x%x", fid, sel, cdw11); + trace_seq_printf(p, "fid=0x%x, sel=0x%x, cdw11=0x%x", fid, sel, cdw11); trace_seq_putc(p, 0); return ret; @@ -49,6 +49,20 @@ static const char *nvmet_trace_get_lba_status(struct trace_seq *p, return ret; } +static const char *nvmet_trace_admin_set_features(struct trace_seq *p, + u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u8 fid = cdw10[0]; + u8 sv = cdw10[3] & 0x8; + u32 cdw11 = get_unaligned_le32(cdw10 + 4); + + trace_seq_printf(p, "fid=0x%x, sv=0x%x, cdw11=0x%x", fid, sv, cdw11); + trace_seq_putc(p, 0); + + return ret; +} + static const char *nvmet_trace_read_write(struct trace_seq *p, u8 *cdw10) { const char *ret = trace_seq_buffer_ptr(p); @@ -94,6 +108,8 @@ const char *nvmet_trace_parse_admin_cmd(struct trace_seq *p, switch (opcode) { case nvme_admin_identify: return nvmet_trace_admin_identify(p, cdw10); + case nvme_admin_set_features: + return nvmet_trace_admin_set_features(p, cdw10); case nvme_admin_get_features: return nvmet_trace_admin_get_features(p, cdw10); case nvme_admin_get_lba_status: diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index 17f8b7a45f21..46bc30fe85d2 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -115,14 +115,11 @@ void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req) } status = nvmet_req_find_ns(req); - if (status) { - status = NVME_SC_INTERNAL; + if (status) goto done; - } if (!bdev_is_zoned(req->ns->bdev)) { req->error_loc = offsetof(struct nvme_identify, nsid); - status = NVME_SC_INVALID_NS | NVME_SC_DNR; goto done; } |