diff options
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/Kconfig | 13 | ||||
-rw-r--r-- | drivers/nvme/host/Makefile | 3 | ||||
-rw-r--r-- | drivers/nvme/host/apple.c | 1593 | ||||
-rw-r--r-- | drivers/nvme/host/constants.c | 5 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 151 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.h | 8 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 26 | ||||
-rw-r--r-- | drivers/nvme/host/ioctl.c | 279 | ||||
-rw-r--r-- | drivers/nvme/host/multipath.c | 1 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 11 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 26 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 9 | ||||
-rw-r--r-- | drivers/nvme/host/tcp.c | 5 | ||||
-rw-r--r-- | drivers/nvme/target/io-cmd-bdev.c | 2 | ||||
-rw-r--r-- | drivers/nvme/target/passthru.c | 5 | ||||
-rw-r--r-- | drivers/nvme/target/rdma.c | 4 | ||||
-rw-r--r-- | drivers/nvme/target/zns.c | 3 |
17 files changed, 2067 insertions, 77 deletions
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index d6d056963c06..877d2ec4ea9f 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -91,3 +91,16 @@ config NVME_TCP from https://github.com/linux-nvme/nvme-cli. If unsure, say N. + +config NVME_APPLE + tristate "Apple ANS2 NVM Express host driver" + depends on OF && BLOCK + depends on APPLE_RTKIT && APPLE_SART + depends on ARCH_APPLE || COMPILE_TEST + select NVME_CORE + help + This provides support for the NVMe controller embedded in Apple SoCs + such as the M1. + + To compile this driver as a module, choose M here: the + module will be called nvme-apple. diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index 476c5c988496..a36ae1612059 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_NVME_FABRICS) += nvme-fabrics.o obj-$(CONFIG_NVME_RDMA) += nvme-rdma.o obj-$(CONFIG_NVME_FC) += nvme-fc.o obj-$(CONFIG_NVME_TCP) += nvme-tcp.o +obj-$(CONFIG_NVME_APPLE) += nvme-apple.o nvme-core-y := core.o ioctl.o constants.o nvme-core-$(CONFIG_TRACING) += trace.o @@ -25,3 +26,5 @@ nvme-rdma-y += rdma.o nvme-fc-y += fc.o nvme-tcp-y += tcp.o + +nvme-apple-y += apple.o diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c new file mode 100644 index 000000000000..d702d7d60235 --- /dev/null +++ b/drivers/nvme/host/apple.c @@ -0,0 +1,1593 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Apple ANS NVM Express device driver + * Copyright The Asahi Linux Contributors + * + * Based on the pci.c NVM Express device driver + * Copyright (c) 2011-2014, Intel Corporation. + * and on the rdma.c NVMe over Fabrics RDMA host code. + * Copyright (c) 2015-2016 HGST, a Western Digital Company. + */ + +#include <linux/async.h> +#include <linux/blkdev.h> +#include <linux/blk-mq.h> +#include <linux/device.h> +#include <linux/dma-mapping.h> +#include <linux/dmapool.h> +#include <linux/interrupt.h> +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/io.h> +#include <linux/iopoll.h> +#include <linux/jiffies.h> +#include <linux/mempool.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/once.h> +#include <linux/platform_device.h> +#include <linux/pm_domain.h> +#include <linux/soc/apple/rtkit.h> +#include <linux/soc/apple/sart.h> +#include <linux/reset.h> +#include <linux/time64.h> + +#include "nvme.h" + +#define APPLE_ANS_BOOT_TIMEOUT USEC_PER_SEC +#define APPLE_ANS_MAX_QUEUE_DEPTH 64 + +#define APPLE_ANS_COPROC_CPU_CONTROL 0x44 +#define APPLE_ANS_COPROC_CPU_CONTROL_RUN BIT(4) + +#define APPLE_ANS_ACQ_DB 0x1004 +#define APPLE_ANS_IOCQ_DB 0x100c + +#define APPLE_ANS_MAX_PEND_CMDS_CTRL 0x1210 + +#define APPLE_ANS_BOOT_STATUS 0x1300 +#define APPLE_ANS_BOOT_STATUS_OK 0xde71ce55 + +#define APPLE_ANS_UNKNOWN_CTRL 0x24008 +#define APPLE_ANS_PRP_NULL_CHECK BIT(11) + +#define APPLE_ANS_LINEAR_SQ_CTRL 0x24908 +#define APPLE_ANS_LINEAR_SQ_EN BIT(0) + +#define APPLE_ANS_LINEAR_ASQ_DB 0x2490c +#define APPLE_ANS_LINEAR_IOSQ_DB 0x24910 + +#define APPLE_NVMMU_NUM_TCBS 0x28100 +#define APPLE_NVMMU_ASQ_TCB_BASE 0x28108 +#define APPLE_NVMMU_IOSQ_TCB_BASE 0x28110 +#define APPLE_NVMMU_TCB_INVAL 0x28118 +#define APPLE_NVMMU_TCB_STAT 0x28120 + +/* + * This controller is a bit weird in the way command tags works: Both the + * admin and the IO queue share the same tag space. Additionally, tags + * cannot be higher than 0x40 which effectively limits the combined + * queue depth to 0x40. Instead of wasting half of that on the admin queue + * which gets much less traffic we instead reduce its size here. + * The controller also doesn't support async event such that no space must + * be reserved for NVME_NR_AEN_COMMANDS. + */ +#define APPLE_NVME_AQ_DEPTH 2 +#define APPLE_NVME_AQ_MQ_TAG_DEPTH (APPLE_NVME_AQ_DEPTH - 1) + +/* + * These can be higher, but we need to ensure that any command doesn't + * require an sg allocation that needs more than a page of data. + */ +#define NVME_MAX_KB_SZ 4096 +#define NVME_MAX_SEGS 127 + +/* + * This controller comes with an embedded IOMMU known as NVMMU. + * The NVMMU is pointed to an array of TCBs indexed by the command tag. + * Each command must be configured inside this structure before it's allowed + * to execute, including commands that don't require DMA transfers. + * + * An exception to this are Apple's vendor-specific commands (opcode 0xD8 on the + * admin queue): Those commands must still be added to the NVMMU but the DMA + * buffers cannot be represented as PRPs and must instead be allowed using SART. + * + * Programming the PRPs to the same values as those in the submission queue + * looks rather silly at first. This hardware is however designed for a kernel + * that runs the NVMMU code in a higher exception level than the NVMe driver. + * In that setting the NVMe driver first programs the submission queue entry + * and then executes a hypercall to the code that is allowed to program the + * NVMMU. The NVMMU driver then creates a shadow copy of the PRPs while + * verifying that they don't point to kernel text, data, pagetables, or similar + * protected areas before programming the TCB to point to this shadow copy. + * Since Linux doesn't do any of that we may as well just point both the queue + * and the TCB PRP pointer to the same memory. + */ +struct apple_nvmmu_tcb { + u8 opcode; + +#define APPLE_ANS_TCB_DMA_FROM_DEVICE BIT(0) +#define APPLE_ANS_TCB_DMA_TO_DEVICE BIT(1) + u8 dma_flags; + + u8 command_id; + u8 _unk0; + __le16 length; + u8 _unk1[18]; + __le64 prp1; + __le64 prp2; + u8 _unk2[16]; + u8 aes_iv[8]; + u8 _aes_unk[64]; +}; + +/* + * The Apple NVMe controller only supports a single admin and a single IO queue + * which are both limited to 64 entries and share a single interrupt. + * + * The completion queue works as usual. The submission "queue" instead is + * an array indexed by the command tag on this hardware. Commands must also be + * present in the NVMMU's tcb array. They are triggered by writing their tag to + * a MMIO register. + */ +struct apple_nvme_queue { + struct nvme_command *sqes; + struct nvme_completion *cqes; + struct apple_nvmmu_tcb *tcbs; + + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + dma_addr_t tcb_dma_addr; + + u32 __iomem *sq_db; + u32 __iomem *cq_db; + + u16 cq_head; + u8 cq_phase; + + bool is_adminq; + bool enabled; +}; + +/* + * The apple_nvme_iod describes the data in an I/O. + * + * The sg pointer contains the list of PRP chunk allocations in addition + * to the actual struct scatterlist. + */ +struct apple_nvme_iod { + struct nvme_request req; + struct nvme_command cmd; + struct apple_nvme_queue *q; + int npages; /* In the PRP list. 0 means small pool in use */ + int nents; /* Used in scatterlist */ + dma_addr_t first_dma; + unsigned int dma_len; /* length of single DMA segment mapping */ + struct scatterlist *sg; +}; + +struct apple_nvme { + struct device *dev; + + void __iomem *mmio_coproc; + void __iomem *mmio_nvme; + + struct device **pd_dev; + struct device_link **pd_link; + int pd_count; + + struct apple_sart *sart; + struct apple_rtkit *rtk; + struct reset_control *reset; + + struct dma_pool *prp_page_pool; + struct dma_pool *prp_small_pool; + mempool_t *iod_mempool; + + struct nvme_ctrl ctrl; + struct work_struct remove_work; + + struct apple_nvme_queue adminq; + struct apple_nvme_queue ioq; + + struct blk_mq_tag_set admin_tagset; + struct blk_mq_tag_set tagset; + + int irq; + spinlock_t lock; +}; + +static_assert(sizeof(struct nvme_command) == 64); +static_assert(sizeof(struct apple_nvmmu_tcb) == 128); + +static inline struct apple_nvme *ctrl_to_apple_nvme(struct nvme_ctrl *ctrl) +{ + return container_of(ctrl, struct apple_nvme, ctrl); +} + +static inline struct apple_nvme *queue_to_apple_nvme(struct apple_nvme_queue *q) +{ + if (q->is_adminq) + return container_of(q, struct apple_nvme, adminq); + else + return container_of(q, struct apple_nvme, ioq); +} + +static unsigned int apple_nvme_queue_depth(struct apple_nvme_queue *q) +{ + if (q->is_adminq) + return APPLE_NVME_AQ_DEPTH; + else + return APPLE_ANS_MAX_QUEUE_DEPTH; +} + +static void apple_nvme_rtkit_crashed(void *cookie) +{ + struct apple_nvme *anv = cookie; + + dev_warn(anv->dev, "RTKit crashed; unable to recover without a reboot"); + nvme_reset_ctrl(&anv->ctrl); +} + +static int apple_nvme_sart_dma_setup(void *cookie, + struct apple_rtkit_shmem *bfr) +{ + struct apple_nvme *anv = cookie; + int ret; + + if (bfr->iova) + return -EINVAL; + if (!bfr->size) + return -EINVAL; + + bfr->buffer = + dma_alloc_coherent(anv->dev, bfr->size, &bfr->iova, GFP_KERNEL); + if (!bfr->buffer) + return -ENOMEM; + + ret = apple_sart_add_allowed_region(anv->sart, bfr->iova, bfr->size); + if (ret) { + dma_free_coherent(anv->dev, bfr->size, bfr->buffer, bfr->iova); + bfr->buffer = NULL; + return -ENOMEM; + } + + return 0; +} + +static void apple_nvme_sart_dma_destroy(void *cookie, + struct apple_rtkit_shmem *bfr) +{ + struct apple_nvme *anv = cookie; + + apple_sart_remove_allowed_region(anv->sart, bfr->iova, bfr->size); + dma_free_coherent(anv->dev, bfr->size, bfr->buffer, bfr->iova); +} + +static const struct apple_rtkit_ops apple_nvme_rtkit_ops = { + .crashed = apple_nvme_rtkit_crashed, + .shmem_setup = apple_nvme_sart_dma_setup, + .shmem_destroy = apple_nvme_sart_dma_destroy, +}; + +static void apple_nvmmu_inval(struct apple_nvme_queue *q, unsigned int tag) +{ + struct apple_nvme *anv = queue_to_apple_nvme(q); + + writel(tag, anv->mmio_nvme + APPLE_NVMMU_TCB_INVAL); + if (readl(anv->mmio_nvme + APPLE_NVMMU_TCB_STAT)) + dev_warn_ratelimited(anv->dev, + "NVMMU TCB invalidation failed\n"); +} + +static void apple_nvme_submit_cmd(struct apple_nvme_queue *q, + struct nvme_command *cmd) +{ + struct apple_nvme *anv = queue_to_apple_nvme(q); + u32 tag = nvme_tag_from_cid(cmd->common.command_id); + struct apple_nvmmu_tcb *tcb = &q->tcbs[tag]; + + tcb->opcode = cmd->common.opcode; + tcb->prp1 = cmd->common.dptr.prp1; + tcb->prp2 = cmd->common.dptr.prp2; + tcb->length = cmd->rw.length; + tcb->command_id = tag; + + if (nvme_is_write(cmd)) + tcb->dma_flags = APPLE_ANS_TCB_DMA_TO_DEVICE; + else + tcb->dma_flags = APPLE_ANS_TCB_DMA_FROM_DEVICE; + + memcpy(&q->sqes[tag], cmd, sizeof(*cmd)); + + /* + * This lock here doesn't make much sense at a first glace but + * removing it will result in occasional missed completetion + * interrupts even though the commands still appear on the CQ. + * It's unclear why this happens but our best guess is that + * there is a bug in the firmware triggered when a new command + * is issued while we're inside the irq handler between the + * NVMMU invalidation (and making the tag available again) + * and the final CQ update. + */ + spin_lock_irq(&anv->lock); + writel(tag, q->sq_db); + spin_unlock_irq(&anv->lock); +} + +/* + * From pci.c: + * Will slightly overestimate the number of pages needed. This is OK + * as it only leads to a small amount of wasted memory for the lifetime of + * the I/O. + */ +static inline size_t apple_nvme_iod_alloc_size(void) +{ + const unsigned int nprps = DIV_ROUND_UP( + NVME_MAX_KB_SZ + NVME_CTRL_PAGE_SIZE, NVME_CTRL_PAGE_SIZE); + const int npages = DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8); + const size_t alloc_size = sizeof(__le64 *) * npages + + sizeof(struct scatterlist) * NVME_MAX_SEGS; + + return alloc_size; +} + +static void **apple_nvme_iod_list(struct request *req) +{ + struct apple_nvme_iod *iod = blk_mq_rq_to_pdu(req); + + return (void **)(iod->sg + blk_rq_nr_phys_segments(req)); +} + +static void apple_nvme_free_prps(struct apple_nvme *anv, struct request *req) +{ + const int last_prp = NVME_CTRL_PAGE_SIZE / sizeof(__le64) - 1; + struct apple_nvme_iod *iod = blk_mq_rq_to_pdu(req); + dma_addr_t dma_addr = iod->first_dma; + int i; + + for (i = 0; i < iod->npages; i++) { + __le64 *prp_list = apple_nvme_iod_list(req)[i]; + dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]); + + dma_pool_free(anv->prp_page_pool, prp_list, dma_addr); + dma_addr = next_dma_addr; + } +} + +static void apple_nvme_unmap_data(struct apple_nvme *anv, struct request *req) +{ + struct apple_nvme_iod *iod = blk_mq_rq_to_pdu(req); + + if (iod->dma_len) { + dma_unmap_page(anv->dev, iod->first_dma, iod->dma_len, + rq_dma_dir(req)); + return; + } + + WARN_ON_ONCE(!iod->nents); + + dma_unmap_sg(anv->dev, iod->sg, iod->nents, rq_dma_dir(req)); + if (iod->npages == 0) + dma_pool_free(anv->prp_small_pool, apple_nvme_iod_list(req)[0], + iod->first_dma); + else + apple_nvme_free_prps(anv, req); + mempool_free(iod->sg, anv->iod_mempool); +} + +static void apple_nvme_print_sgl(struct scatterlist *sgl, int nents) +{ + int i; + struct scatterlist *sg; + + for_each_sg(sgl, sg, nents, i) { + dma_addr_t phys = sg_phys(sg); + + pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d dma_address:%pad dma_length:%d\n", + i, &phys, sg->offset, sg->length, &sg_dma_address(sg), + sg_dma_len(sg)); + } +} + +static blk_status_t apple_nvme_setup_prps(struct apple_nvme *anv, + struct request *req, + struct nvme_rw_command *cmnd) +{ + struct apple_nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct dma_pool *pool; + int length = blk_rq_payload_bytes(req); + struct scatterlist *sg = iod->sg; + int dma_len = sg_dma_len(sg); + u64 dma_addr = sg_dma_address(sg); + int offset = dma_addr & (NVME_CTRL_PAGE_SIZE - 1); + __le64 *prp_list; + void **list = apple_nvme_iod_list(req); + dma_addr_t prp_dma; + int nprps, i; + + length -= (NVME_CTRL_PAGE_SIZE - offset); + if (length <= 0) { + iod->first_dma = 0; + goto done; + } + + dma_len -= (NVME_CTRL_PAGE_SIZE - offset); + if (dma_len) { + dma_addr += (NVME_CTRL_PAGE_SIZE - offset); + } else { + sg = sg_next(sg); + dma_addr = sg_dma_address(sg); + dma_len = sg_dma_len(sg); + } + + if (length <= NVME_CTRL_PAGE_SIZE) { + iod->first_dma = dma_addr; + goto done; + } + + nprps = DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE); + if (nprps <= (256 / 8)) { + pool = anv->prp_small_pool; + iod->npages = 0; + } else { + pool = anv->prp_page_pool; + iod->npages = 1; + } + + prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); + if (!prp_list) { + iod->first_dma = dma_addr; + iod->npages = -1; + return BLK_STS_RESOURCE; + } + list[0] = prp_list; + iod->first_dma = prp_dma; + i = 0; + for (;;) { + if (i == NVME_CTRL_PAGE_SIZE >> 3) { + __le64 *old_prp_list = prp_list; + + prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); + if (!prp_list) + goto free_prps; + list[iod->npages++] = prp_list; + prp_list[0] = old_prp_list[i - 1]; + old_prp_list[i - 1] = cpu_to_le64(prp_dma); + i = 1; + } + prp_list[i++] = cpu_to_le64(dma_addr); + dma_len -= NVME_CTRL_PAGE_SIZE; + dma_addr += NVME_CTRL_PAGE_SIZE; + length -= NVME_CTRL_PAGE_SIZE; + if (length <= 0) + break; + if (dma_len > 0) + continue; + if (unlikely(dma_len < 0)) + goto bad_sgl; + sg = sg_next(sg); + dma_addr = sg_dma_address(sg); + dma_len = sg_dma_len(sg); + } +done: + cmnd->dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); + cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma); + return BLK_STS_OK; +free_prps: + apple_nvme_free_prps(anv, req); + return BLK_STS_RESOURCE; +bad_sgl: + WARN(DO_ONCE(apple_nvme_print_sgl, iod->sg, iod->nents), + "Invalid SGL for payload:%d nents:%d\n", blk_rq_payload_bytes(req), + iod->nents); + return BLK_STS_IOERR; +} + +static blk_status_t apple_nvme_setup_prp_simple(struct apple_nvme *anv, + struct request *req, + struct nvme_rw_command *cmnd, + struct bio_vec *bv) +{ + struct apple_nvme_iod *iod = blk_mq_rq_to_pdu(req); + unsigned int offset = bv->bv_offset & (NVME_CTRL_PAGE_SIZE - 1); + unsigned int first_prp_len = NVME_CTRL_PAGE_SIZE - offset; + + iod->first_dma = dma_map_bvec(anv->dev, bv, rq_dma_dir(req), 0); + if (dma_mapping_error(anv->dev, iod->first_dma)) + return BLK_STS_RESOURCE; + iod->dma_len = bv->bv_len; + + cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma); + if (bv->bv_len > first_prp_len) + cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len); + return BLK_STS_OK; +} + +static blk_status_t apple_nvme_map_data(struct apple_nvme *anv, + struct request *req, + struct nvme_command *cmnd) +{ + struct apple_nvme_iod *iod = blk_mq_rq_to_pdu(req); + blk_status_t ret = BLK_STS_RESOURCE; + int nr_mapped; + + if (blk_rq_nr_phys_segments(req) == 1) { + struct bio_vec bv = req_bvec(req); + + if (bv.bv_offset + bv.bv_len <= NVME_CTRL_PAGE_SIZE * 2) + return apple_nvme_setup_prp_simple(anv, req, &cmnd->rw, + &bv); + } + + iod->dma_len = 0; + iod->sg = mempool_alloc(anv->iod_mempool, GFP_ATOMIC); + if (!iod->sg) + return BLK_STS_RESOURCE; + sg_init_table(iod->sg, blk_rq_nr_phys_segments(req)); + iod->nents = blk_rq_map_sg(req->q, req, iod->sg); + if (!iod->nents) + goto out_free_sg; + + nr_mapped = dma_map_sg_attrs(anv->dev, iod->sg, iod->nents, + rq_dma_dir(req), DMA_ATTR_NO_WARN); + if (!nr_mapped) + goto out_free_sg; + + ret = apple_nvme_setup_prps(anv, req, &cmnd->rw); + if (ret != BLK_STS_OK) + goto out_unmap_sg; + return BLK_STS_OK; + +out_unmap_sg: + dma_unmap_sg(anv->dev, iod->sg, iod->nents, rq_dma_dir(req)); +out_free_sg: + mempool_free(iod->sg, anv->iod_mempool); + return ret; +} + +static __always_inline void apple_nvme_unmap_rq(struct request *req) +{ + struct apple_nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct apple_nvme *anv = queue_to_apple_nvme(iod->q); + + if (blk_rq_nr_phys_segments(req)) + apple_nvme_unmap_data(anv, req); +} + +static void apple_nvme_complete_rq(struct request *req) +{ + apple_nvme_unmap_rq(req); + nvme_complete_rq(req); +} + +static void apple_nvme_complete_batch(struct io_comp_batch *iob) +{ + nvme_complete_batch(iob, apple_nvme_unmap_rq); +} + +static inline bool apple_nvme_cqe_pending(struct apple_nvme_queue *q) +{ + struct nvme_completion *hcqe = &q->cqes[q->cq_head]; + + return (le16_to_cpu(READ_ONCE(hcqe->status)) & 1) == q->cq_phase; +} + +static inline struct blk_mq_tags * +apple_nvme_queue_tagset(struct apple_nvme *anv, struct apple_nvme_queue *q) +{ + if (q->is_adminq) + return anv->admin_tagset.tags[0]; + else + return anv->tagset.tags[0]; +} + +static inline void apple_nvme_handle_cqe(struct apple_nvme_queue *q, + struct io_comp_batch *iob, u16 idx) +{ + struct apple_nvme *anv = queue_to_apple_nvme(q); + struct nvme_completion *cqe = &q->cqes[idx]; + __u16 command_id = READ_ONCE(cqe->command_id); + struct request *req; + + apple_nvmmu_inval(q, command_id); + + req = nvme_find_rq(apple_nvme_queue_tagset(anv, q), command_id); + if (unlikely(!req)) { + dev_warn(anv->dev, "invalid id %d completed", command_id); + return; + } + + if (!nvme_try_complete_req(req, cqe->status, cqe->result) && + !blk_mq_add_to_batch(req, iob, nvme_req(req)->status, + apple_nvme_complete_batch)) + apple_nvme_complete_rq(req); +} + +static inline void apple_nvme_update_cq_head(struct apple_nvme_queue *q) +{ + u32 tmp = q->cq_head + 1; + + if (tmp == apple_nvme_queue_depth(q)) { + q->cq_head = 0; + q->cq_phase ^= 1; + } else { + q->cq_head = tmp; + } +} + +static bool apple_nvme_poll_cq(struct apple_nvme_queue *q, + struct io_comp_batch *iob) +{ + bool found = false; + + while (apple_nvme_cqe_pending(q)) { + found = true; + + /* + * load-load control dependency between phase and the rest of + * the cqe requires a full read memory barrier + */ + dma_rmb(); + apple_nvme_handle_cqe(q, iob, q->cq_head); + apple_nvme_update_cq_head(q); + } + + if (found) + writel(q->cq_head, q->cq_db); + + return found; +} + +static bool apple_nvme_handle_cq(struct apple_nvme_queue *q, bool force) +{ + bool found; + DEFINE_IO_COMP_BATCH(iob); + + if (!READ_ONCE(q->enabled) && !force) + return false; + + found = apple_nvme_poll_cq(q, &iob); + + if (!rq_list_empty(iob.req_list)) + apple_nvme_complete_batch(&iob); + + return found; +} + +static irqreturn_t apple_nvme_irq(int irq, void *data) +{ + struct apple_nvme *anv = data; + bool handled = false; + unsigned long flags; + + spin_lock_irqsave(&anv->lock, flags); + if (apple_nvme_handle_cq(&anv->ioq, false)) + handled = true; + if (apple_nvme_handle_cq(&anv->adminq, false)) + handled = true; + spin_unlock_irqrestore(&anv->lock, flags); + + if (handled) + return IRQ_HANDLED; + return IRQ_NONE; +} + +static int apple_nvme_create_cq(struct apple_nvme *anv) +{ + struct nvme_command c = {}; + + /* + * Note: we (ab)use the fact that the prp fields survive if no data + * is attached to the request. + */ + c.create_cq.opcode = nvme_admin_create_cq; + c.create_cq.prp1 = cpu_to_le64(anv->ioq.cq_dma_addr); + c.create_cq.cqid = cpu_to_le16(1); + c.create_cq.qsize = cpu_to_le16(APPLE_ANS_MAX_QUEUE_DEPTH - 1); + c.create_cq.cq_flags = cpu_to_le16(NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED); + c.create_cq.irq_vector = cpu_to_le16(0); + + return nvme_submit_sync_cmd(anv->ctrl.admin_q, &c, NULL, 0); +} + +static int apple_nvme_remove_cq(struct apple_nvme *anv) +{ + struct nvme_command c = {}; + + c.delete_queue.opcode = nvme_admin_delete_cq; + c.delete_queue.qid = cpu_to_le16(1); + + return nvme_submit_sync_cmd(anv->ctrl.admin_q, &c, NULL, 0); +} + +static int apple_nvme_create_sq(struct apple_nvme *anv) +{ + struct nvme_command c = {}; + + /* + * Note: we (ab)use the fact that the prp fields survive if no data + * is attached to the request. + */ + c.create_sq.opcode = nvme_admin_create_sq; + c.create_sq.prp1 = cpu_to_le64(anv->ioq.sq_dma_addr); + c.create_sq.sqid = cpu_to_le16(1); + c.create_sq.qsize = cpu_to_le16(APPLE_ANS_MAX_QUEUE_DEPTH - 1); + c.create_sq.sq_flags = cpu_to_le16(NVME_QUEUE_PHYS_CONTIG); + c.create_sq.cqid = cpu_to_le16(1); + + return nvme_submit_sync_cmd(anv->ctrl.admin_q, &c, NULL, 0); +} + +static int apple_nvme_remove_sq(struct apple_nvme *anv) +{ + struct nvme_command c = {}; + + c.delete_queue.opcode = nvme_admin_delete_sq; + c.delete_queue.qid = cpu_to_le16(1); + + return nvme_submit_sync_cmd(anv->ctrl.admin_q, &c, NULL, 0); +} + +static blk_status_t apple_nvme_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + struct nvme_ns *ns = hctx->queue->queuedata; + struct apple_nvme_queue *q = hctx->driver_data; + struct apple_nvme *anv = queue_to_apple_nvme(q); + struct request *req = bd->rq; + struct apple_nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct nvme_command *cmnd = &iod->cmd; + blk_status_t ret; + + iod->npages = -1; + iod->nents = 0; + + /* + * We should not need to do this, but we're still using this to + * ensure we can drain requests on a dying queue. + */ + if (unlikely(!READ_ONCE(q->enabled))) + return BLK_STS_IOERR; + + if (!nvme_check_ready(&anv->ctrl, req, true)) + return nvme_fail_nonready_command(&anv->ctrl, req); + + ret = nvme_setup_cmd(ns, req); + if (ret) + return ret; + + if (blk_rq_nr_phys_segments(req)) { + ret = apple_nvme_map_data(anv, req, cmnd); + if (ret) + goto out_free_cmd; + } + + blk_mq_start_request(req); + apple_nvme_submit_cmd(q, cmnd); + return BLK_STS_OK; + +out_free_cmd: + nvme_cleanup_cmd(req); + return ret; +} + +static int apple_nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, + unsigned int hctx_idx) +{ + hctx->driver_data = data; + return 0; +} + +static int apple_nvme_init_request(struct blk_mq_tag_set *set, + struct request *req, unsigned int hctx_idx, + unsigned int numa_node) +{ + struct apple_nvme_queue *q = set->driver_data; + struct apple_nvme *anv = queue_to_apple_nvme(q); + struct apple_nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct nvme_request *nreq = nvme_req(req); + + iod->q = q; + nreq->ctrl = &anv->ctrl; + nreq->cmd = &iod->cmd; + + return 0; +} + +static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown) +{ + u32 csts = readl(anv->mmio_nvme + NVME_REG_CSTS); + bool dead = false, freeze = false; + unsigned long flags; + + if (apple_rtkit_is_crashed(anv->rtk)) + dead = true; + if (!(csts & NVME_CSTS_RDY)) + dead = true; + if (csts & NVME_CSTS_CFS) + dead = true; + + if (anv->ctrl.state == NVME_CTRL_LIVE || + anv->ctrl.state == NVME_CTRL_RESETTING) { + freeze = true; + nvme_start_freeze(&anv->ctrl); + } + + /* + * Give the controller a chance to complete all entered requests if + * doing a safe shutdown. + */ + if (!dead && shutdown && freeze) + nvme_wait_freeze_timeout(&anv->ctrl, NVME_IO_TIMEOUT); + + nvme_stop_queues(&anv->ctrl); + + if (!dead) { + if (READ_ONCE(anv->ioq.enabled)) { + apple_nvme_remove_sq(anv); + apple_nvme_remove_cq(anv); + } + + if (shutdown) + nvme_shutdown_ctrl(&anv->ctrl); + nvme_disable_ctrl(&anv->ctrl); + } + + WRITE_ONCE(anv->ioq.enabled, false); + WRITE_ONCE(anv->adminq.enabled, false); + mb(); /* ensure that nvme_queue_rq() sees that enabled is cleared */ + nvme_stop_admin_queue(&anv->ctrl); + + /* last chance to complete any requests before nvme_cancel_request */ + spin_lock_irqsave(&anv->lock, flags); + apple_nvme_handle_cq(&anv->ioq, true); + apple_nvme_handle_cq(&anv->adminq, true); + spin_unlock_irqrestore(&anv->lock, flags); + + blk_mq_tagset_busy_iter(&anv->tagset, nvme_cancel_request, &anv->ctrl); + blk_mq_tagset_busy_iter(&anv->admin_tagset, nvme_cancel_request, + &anv->ctrl); + blk_mq_tagset_wait_completed_request(&anv->tagset); + blk_mq_tagset_wait_completed_request(&anv->admin_tagset); + + /* + * The driver will not be starting up queues again if shutting down so + * must flush all entered requests to their failed completion to avoid + * deadlocking blk-mq hot-cpu notifier. + */ + if (shutdown) { + nvme_start_queues(&anv->ctrl); + nvme_start_admin_queue(&anv->ctrl); + } +} + +static enum blk_eh_timer_return apple_nvme_timeout(struct request *req, + bool reserved) +{ + struct apple_nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct apple_nvme_queue *q = iod->q; + struct apple_nvme *anv = queue_to_apple_nvme(q); + unsigned long flags; + u32 csts = readl(anv->mmio_nvme + NVME_REG_CSTS); + + if (anv->ctrl.state != NVME_CTRL_LIVE) { + /* + * From rdma.c: + * If we are resetting, connecting or deleting we should + * complete immediately because we may block controller + * teardown or setup sequence + * - ctrl disable/shutdown fabrics requests + * - connect requests + * - initialization admin requests + * - I/O requests that entered after unquiescing and + * the controller stopped responding + * + * All other requests should be cancelled by the error + * recovery work, so it's fine that we fail it here. + */ + dev_warn(anv->dev, + "I/O %d(aq:%d) timeout while not in live state\n", + req->tag, q->is_adminq); + if (blk_mq_request_started(req) && + !blk_mq_request_completed(req)) { + nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD; + nvme_req(req)->flags |= NVME_REQ_CANCELLED; + blk_mq_complete_request(req); + } + return BLK_EH_DONE; + } + + /* check if we just missed an interrupt if we're still alive */ + if (!apple_rtkit_is_crashed(anv->rtk) && !(csts & NVME_CSTS_CFS)) { + spin_lock_irqsave(&anv->lock, flags); + apple_nvme_handle_cq(q, false); + spin_unlock_irqrestore(&anv->lock, flags); + if (blk_mq_request_completed(req)) { + dev_warn(anv->dev, + "I/O %d(aq:%d) timeout: completion polled\n", + req->tag, q->is_adminq); + return BLK_EH_DONE; + } + } + + /* + * aborting commands isn't supported which leaves a full reset as our + * only option here + */ + dev_warn(anv->dev, "I/O %d(aq:%d) timeout: resetting controller\n", + req->tag, q->is_adminq); + nvme_req(req)->flags |= NVME_REQ_CANCELLED; + apple_nvme_disable(anv, false); + nvme_reset_ctrl(&anv->ctrl); + return BLK_EH_DONE; +} + +static int apple_nvme_poll(struct blk_mq_hw_ctx *hctx, + struct io_comp_batch *iob) +{ + struct apple_nvme_queue *q = hctx->driver_data; + struct apple_nvme *anv = queue_to_apple_nvme(q); + bool found; + unsigned long flags; + + spin_lock_irqsave(&anv->lock, flags); + found = apple_nvme_poll_cq(q, iob); + spin_unlock_irqrestore(&anv->lock, flags); + + return found; +} + +static const struct blk_mq_ops apple_nvme_mq_admin_ops = { + .queue_rq = apple_nvme_queue_rq, + .complete = apple_nvme_complete_rq, + .init_hctx = apple_nvme_init_hctx, + .init_request = apple_nvme_init_request, + .timeout = apple_nvme_timeout, +}; + +static const struct blk_mq_ops apple_nvme_mq_ops = { + .queue_rq = apple_nvme_queue_rq, + .complete = apple_nvme_complete_rq, + .init_hctx = apple_nvme_init_hctx, + .init_request = apple_nvme_init_request, + .timeout = apple_nvme_timeout, + .poll = apple_nvme_poll, +}; + +static void apple_nvme_init_queue(struct apple_nvme_queue *q) +{ + unsigned int depth = apple_nvme_queue_depth(q); + + q->cq_head = 0; + q->cq_phase = 1; + memset(q->tcbs, 0, + APPLE_ANS_MAX_QUEUE_DEPTH * sizeof(struct apple_nvmmu_tcb)); + memset(q->cqes, 0, depth * sizeof(struct nvme_completion)); + WRITE_ONCE(q->enabled, true); + wmb(); /* ensure the first interrupt sees the initialization */ +} + +static void apple_nvme_reset_work(struct work_struct *work) +{ + unsigned int nr_io_queues = 1; + int ret; + u32 boot_status, aqa; + struct apple_nvme *anv = + container_of(work, struct apple_nvme, ctrl.reset_work); + + if (anv->ctrl.state != NVME_CTRL_RESETTING) { + dev_warn(anv->dev, "ctrl state %d is not RESETTING\n", + anv->ctrl.state); + ret = -ENODEV; + goto out; + } + + /* there's unfortunately no known way to recover if RTKit crashed :( */ + if (apple_rtkit_is_crashed(anv->rtk)) { + dev_err(anv->dev, + "RTKit has crashed without any way to recover."); + ret = -EIO; + goto out; + } + + if (anv->ctrl.ctrl_config & NVME_CC_ENABLE) + apple_nvme_disable(anv, false); + + /* RTKit must be shut down cleanly for the (soft)-reset to work */ + if (apple_rtkit_is_running(anv->rtk)) { + dev_dbg(anv->dev, "Trying to shut down RTKit before reset."); + ret = apple_rtkit_shutdown(anv->rtk); + if (ret) + goto out; + } + + writel(0, anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL); + + ret = reset_control_assert(anv->reset); + if (ret) + goto out; + + ret = apple_rtkit_reinit(anv->rtk); + if (ret) + goto out; + + ret = reset_control_deassert(anv->reset); + if (ret) + goto out; + + writel(APPLE_ANS_COPROC_CPU_CONTROL_RUN, + anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL); + ret = apple_rtkit_boot(anv->rtk); + if (ret) { + dev_err(anv->dev, "ANS did not boot"); + goto out; + } + + ret = readl_poll_timeout(anv->mmio_nvme + APPLE_ANS_BOOT_STATUS, + boot_status, + boot_status == APPLE_ANS_BOOT_STATUS_OK, + USEC_PER_MSEC, APPLE_ANS_BOOT_TIMEOUT); + if (ret) { + dev_err(anv->dev, "ANS did not initialize"); + goto out; + } + + dev_dbg(anv->dev, "ANS booted successfully."); + + /* + * Limit the max command size to prevent iod->sg allocations going + * over a single page. + */ + anv->ctrl.max_hw_sectors = min_t(u32, NVME_MAX_KB_SZ << 1, + dma_max_mapping_size(anv->dev) >> 9); + anv->ctrl.max_segments = NVME_MAX_SEGS; + + /* + * Enable NVMMU and linear submission queues. + * While we could keep those disabled and pretend this is slightly + * more common NVMe controller we'd still need some quirks (e.g. + * sq entries will be 128 bytes) and Apple might drop support for + * that mode in the future. + */ + writel(APPLE_ANS_LINEAR_SQ_EN, + anv->mmio_nvme + APPLE_ANS_LINEAR_SQ_CTRL); + + /* Allow as many pending command as possible for both queues */ + writel(APPLE_ANS_MAX_QUEUE_DEPTH | (APPLE_ANS_MAX_QUEUE_DEPTH << 16), + anv->mmio_nvme + APPLE_ANS_MAX_PEND_CMDS_CTRL); + + /* Setup the NVMMU for the maximum admin and IO queue depth */ + writel(APPLE_ANS_MAX_QUEUE_DEPTH - 1, + anv->mmio_nvme + APPLE_NVMMU_NUM_TCBS); + + /* + * This is probably a chicken bit: without it all commands where any PRP + * is set to zero (including those that don't use that field) fail and + * the co-processor complains about "completed with err BAD_CMD-" or + * a "NULL_PRP_PTR_ERR" in the syslog + */ + writel(readl(anv->mmio_nvme + APPLE_ANS_UNKNOWN_CTRL) & + ~APPLE_ANS_PRP_NULL_CHECK, + anv->mmio_nvme + APPLE_ANS_UNKNOWN_CTRL); + + /* Setup the admin queue */ + aqa = APPLE_NVME_AQ_DEPTH - 1; + aqa |= aqa << 16; + writel(aqa, anv->mmio_nvme + NVME_REG_AQA); + writeq(anv->adminq.sq_dma_addr, anv->mmio_nvme + NVME_REG_ASQ); + writeq(anv->adminq.cq_dma_addr, anv->mmio_nvme + NVME_REG_ACQ); + + /* Setup NVMMU for both queues */ + writeq(anv->adminq.tcb_dma_addr, + anv->mmio_nvme + APPLE_NVMMU_ASQ_TCB_BASE); + writeq(anv->ioq.tcb_dma_addr, + anv->mmio_nvme + APPLE_NVMMU_IOSQ_TCB_BASE); + + anv->ctrl.sqsize = + APPLE_ANS_MAX_QUEUE_DEPTH - 1; /* 0's based queue depth */ + anv->ctrl.cap = readq(anv->mmio_nvme + NVME_REG_CAP); + + dev_dbg(anv->dev, "Enabling controller now"); + ret = nvme_enable_ctrl(&anv->ctrl); + if (ret) + goto out; + + dev_dbg(anv->dev, "Starting admin queue"); + apple_nvme_init_queue(&anv->adminq); + nvme_start_admin_queue(&anv->ctrl); + + if (!nvme_change_ctrl_state(&anv->ctrl, NVME_CTRL_CONNECTING)) { + dev_warn(anv->ctrl.device, + "failed to mark controller CONNECTING\n"); + ret = -ENODEV; + goto out; + } + + ret = nvme_init_ctrl_finish(&anv->ctrl); + if (ret) + goto out; + + dev_dbg(anv->dev, "Creating IOCQ"); + ret = apple_nvme_create_cq(anv); + if (ret) + goto out; + dev_dbg(anv->dev, "Creating IOSQ"); + ret = apple_nvme_create_sq(anv); + if (ret) + goto out_remove_cq; + + apple_nvme_init_queue(&anv->ioq); + nr_io_queues = 1; + ret = nvme_set_queue_count(&anv->ctrl, &nr_io_queues); + if (ret) + goto out_remove_sq; + if (nr_io_queues != 1) { + ret = -ENXIO; + goto out_remove_sq; + } + + anv->ctrl.queue_count = nr_io_queues + 1; + + nvme_start_queues(&anv->ctrl); + nvme_wait_freeze(&anv->ctrl); + blk_mq_update_nr_hw_queues(&anv->tagset, 1); + nvme_unfreeze(&anv->ctrl); + + if (!nvme_change_ctrl_state(&anv->ctrl, NVME_CTRL_LIVE)) { + dev_warn(anv->ctrl.device, + "failed to mark controller live state\n"); + ret = -ENODEV; + goto out_remove_sq; + } + + nvme_start_ctrl(&anv->ctrl); + + dev_dbg(anv->dev, "ANS boot and NVMe init completed."); + return; + +out_remove_sq: + apple_nvme_remove_sq(anv); +out_remove_cq: + apple_nvme_remove_cq(anv); +out: + dev_warn(anv->ctrl.device, "Reset failure status: %d\n", ret); + nvme_change_ctrl_state(&anv->ctrl, NVME_CTRL_DELETING); + nvme_get_ctrl(&anv->ctrl); + apple_nvme_disable(anv, false); + nvme_kill_queues(&anv->ctrl); + if (!queue_work(nvme_wq, &anv->remove_work)) + nvme_put_ctrl(&anv->ctrl); +} + +static void apple_nvme_remove_dead_ctrl_work(struct work_struct *work) +{ + struct apple_nvme *anv = + container_of(work, struct apple_nvme, remove_work); + + nvme_put_ctrl(&anv->ctrl); + device_release_driver(anv->dev); +} + +static int apple_nvme_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val) +{ + *val = readl(ctrl_to_apple_nvme(ctrl)->mmio_nvme + off); + return 0; +} + +static int apple_nvme_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val) +{ + writel(val, ctrl_to_apple_nvme(ctrl)->mmio_nvme + off); + return 0; +} + +static int apple_nvme_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) +{ + *val = readq(ctrl_to_apple_nvme(ctrl)->mmio_nvme + off); + return 0; +} + +static int apple_nvme_get_address(struct nvme_ctrl *ctrl, char *buf, int size) +{ + struct device *dev = ctrl_to_apple_nvme(ctrl)->dev; + + return snprintf(buf, size, "%s\n", dev_name(dev)); +} + +static void apple_nvme_free_ctrl(struct nvme_ctrl *ctrl) +{ + struct apple_nvme *anv = ctrl_to_apple_nvme(ctrl); + + if (anv->ctrl.admin_q) + blk_put_queue(anv->ctrl.admin_q); + put_device(anv->dev); +} + +static const struct nvme_ctrl_ops nvme_ctrl_ops = { + .name = "apple-nvme", + .module = THIS_MODULE, + .flags = 0, + .reg_read32 = apple_nvme_reg_read32, + .reg_write32 = apple_nvme_reg_write32, + .reg_read64 = apple_nvme_reg_read64, + .free_ctrl = apple_nvme_free_ctrl, + .get_address = apple_nvme_get_address, +}; + +static void apple_nvme_async_probe(void *data, async_cookie_t cookie) +{ + struct apple_nvme *anv = data; + + flush_work(&anv->ctrl.reset_work); + flush_work(&anv->ctrl.scan_work); + nvme_put_ctrl(&anv->ctrl); +} + +static int apple_nvme_alloc_tagsets(struct apple_nvme *anv) +{ + int ret; + + anv->admin_tagset.ops = &apple_nvme_mq_admin_ops; + anv->admin_tagset.nr_hw_queues = 1; + anv->admin_tagset.queue_depth = APPLE_NVME_AQ_MQ_TAG_DEPTH; + anv->admin_tagset.timeout = NVME_ADMIN_TIMEOUT; + anv->admin_tagset.numa_node = NUMA_NO_NODE; + anv->admin_tagset.cmd_size = sizeof(struct apple_nvme_iod); + anv->admin_tagset.flags = BLK_MQ_F_NO_SCHED; + anv->admin_tagset.driver_data = &anv->adminq; + + ret = blk_mq_alloc_tag_set(&anv->admin_tagset); + if (ret) + return ret; + ret = devm_add_action_or_reset(anv->dev, + (void (*)(void *))blk_mq_free_tag_set, + &anv->admin_tagset); + if (ret) + return ret; + + anv->tagset.ops = &apple_nvme_mq_ops; + anv->tagset.nr_hw_queues = 1; + anv->tagset.nr_maps = 1; + /* + * Tags are used as an index to the NVMMU and must be unique across + * both queues. The admin queue gets the first APPLE_NVME_AQ_DEPTH which + * must be marked as reserved in the IO queue. + */ + anv->tagset.reserved_tags = APPLE_NVME_AQ_DEPTH; + anv->tagset.queue_depth = APPLE_ANS_MAX_QUEUE_DEPTH - 1; + anv->tagset.timeout = NVME_IO_TIMEOUT; + anv->tagset.numa_node = NUMA_NO_NODE; + anv->tagset.cmd_size = sizeof(struct apple_nvme_iod); + anv->tagset.flags = BLK_MQ_F_SHOULD_MERGE; + anv->tagset.driver_data = &anv->ioq; + + ret = blk_mq_alloc_tag_set(&anv->tagset); + if (ret) + return ret; + ret = devm_add_action_or_reset( + anv->dev, (void (*)(void *))blk_mq_free_tag_set, &anv->tagset); + if (ret) + return ret; + + anv->ctrl.admin_tagset = &anv->admin_tagset; + anv->ctrl.tagset = &anv->tagset; + + return 0; +} + +static int apple_nvme_queue_alloc(struct apple_nvme *anv, + struct apple_nvme_queue *q) +{ + unsigned int depth = apple_nvme_queue_depth(q); + + q->cqes = dmam_alloc_coherent(anv->dev, + depth * sizeof(struct nvme_completion), + &q->cq_dma_addr, GFP_KERNEL); + if (!q->cqes) + return -ENOMEM; + + q->sqes = dmam_alloc_coherent(anv->dev, + depth * sizeof(struct nvme_command), + &q->sq_dma_addr, GFP_KERNEL); + if (!q->sqes) + return -ENOMEM; + + /* + * We need the maximum queue depth here because the NVMMU only has a + * single depth configuration shared between both queues. + */ + q->tcbs = dmam_alloc_coherent(anv->dev, + APPLE_ANS_MAX_QUEUE_DEPTH * + sizeof(struct apple_nvmmu_tcb), + &q->tcb_dma_addr, GFP_KERNEL); + if (!q->tcbs) + return -ENOMEM; + + /* + * initialize phase to make sure the allocated and empty memory + * doesn't look like a full cq already. + */ + q->cq_phase = 1; + return 0; +} + +static void apple_nvme_detach_genpd(struct apple_nvme *anv) +{ + int i; + + if (anv->pd_count <= 1) + return; + + for (i = anv->pd_count - 1; i >= 0; i--) { + if (anv->pd_link[i]) + device_link_del(anv->pd_link[i]); + if (!IS_ERR_OR_NULL(anv->pd_dev[i])) + dev_pm_domain_detach(anv->pd_dev[i], true); + } +} + +static int apple_nvme_attach_genpd(struct apple_nvme *anv) +{ + struct device *dev = anv->dev; + int i; + + anv->pd_count = of_count_phandle_with_args( + dev->of_node, "power-domains", "#power-domain-cells"); + if (anv->pd_count <= 1) + return 0; + + anv->pd_dev = devm_kcalloc(dev, anv->pd_count, sizeof(*anv->pd_dev), + GFP_KERNEL); + if (!anv->pd_dev) + return -ENOMEM; + + anv->pd_link = devm_kcalloc(dev, anv->pd_count, sizeof(*anv->pd_link), + GFP_KERNEL); + if (!anv->pd_link) + return -ENOMEM; + + for (i = 0; i < anv->pd_count; i++) { + anv->pd_dev[i] = dev_pm_domain_attach_by_id(dev, i); + if (IS_ERR(anv->pd_dev[i])) { + apple_nvme_detach_genpd(anv); + return PTR_ERR(anv->pd_dev[i]); + } + + anv->pd_link[i] = device_link_add(dev, anv->pd_dev[i], + DL_FLAG_STATELESS | + DL_FLAG_PM_RUNTIME | + DL_FLAG_RPM_ACTIVE); + if (!anv->pd_link[i]) { + apple_nvme_detach_genpd(anv); + return -EINVAL; + } + } + + return 0; +} + +static int apple_nvme_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct apple_nvme *anv; + int ret; + + anv = devm_kzalloc(dev, sizeof(*anv), GFP_KERNEL); + if (!anv) + return -ENOMEM; + + anv->dev = get_device(dev); + anv->adminq.is_adminq = true; + platform_set_drvdata(pdev, anv); + + ret = apple_nvme_attach_genpd(anv); + if (ret < 0) { + dev_err_probe(dev, ret, "Failed to attach power domains"); + goto put_dev; + } + if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))) { + ret = -ENXIO; + goto put_dev; + } + + anv->irq = platform_get_irq(pdev, 0); + if (anv->irq < 0) { + ret = anv->irq; + goto put_dev; + } + if (!anv->irq) { + ret = -ENXIO; + goto put_dev; + } + + anv->mmio_coproc = devm_platform_ioremap_resource_byname(pdev, "ans"); + if (IS_ERR(anv->mmio_coproc)) { + ret = PTR_ERR(anv->mmio_coproc); + goto put_dev; + } + anv->mmio_nvme = devm_platform_ioremap_resource_byname(pdev, "nvme"); + if (IS_ERR(anv->mmio_nvme)) { + ret = PTR_ERR(anv->mmio_nvme); + goto put_dev; + } + + anv->adminq.sq_db = anv->mmio_nvme + APPLE_ANS_LINEAR_ASQ_DB; + anv->adminq.cq_db = anv->mmio_nvme + APPLE_ANS_ACQ_DB; + anv->ioq.sq_db = anv->mmio_nvme + APPLE_ANS_LINEAR_IOSQ_DB; + anv->ioq.cq_db = anv->mmio_nvme + APPLE_ANS_IOCQ_DB; + + anv->sart = devm_apple_sart_get(dev); + if (IS_ERR(anv->sart)) { + ret = dev_err_probe(dev, PTR_ERR(anv->sart), + "Failed to initialize SART"); + goto put_dev; + } + + anv->reset = devm_reset_control_array_get_exclusive(anv->dev); + if (IS_ERR(anv->reset)) { + ret = dev_err_probe(dev, PTR_ERR(anv->reset), + "Failed to get reset control"); + goto put_dev; + } + + INIT_WORK(&anv->ctrl.reset_work, apple_nvme_reset_work); + INIT_WORK(&anv->remove_work, apple_nvme_remove_dead_ctrl_work); + spin_lock_init(&anv->lock); + + ret = apple_nvme_queue_alloc(anv, &anv->adminq); + if (ret) + goto put_dev; + ret = apple_nvme_queue_alloc(anv, &anv->ioq); + if (ret) + goto put_dev; + + anv->prp_page_pool = dmam_pool_create("prp list page", anv->dev, + NVME_CTRL_PAGE_SIZE, + NVME_CTRL_PAGE_SIZE, 0); + if (!anv->prp_page_pool) { + ret = -ENOMEM; + goto put_dev; + } + + anv->prp_small_pool = + dmam_pool_create("prp list 256", anv->dev, 256, 256, 0); + if (!anv->prp_small_pool) { + ret = -ENOMEM; + goto put_dev; + } + + WARN_ON_ONCE(apple_nvme_iod_alloc_size() > PAGE_SIZE); + anv->iod_mempool = + mempool_create_kmalloc_pool(1, apple_nvme_iod_alloc_size()); + if (!anv->iod_mempool) { + ret = -ENOMEM; + goto put_dev; + } + ret = devm_add_action_or_reset( + anv->dev, (void (*)(void *))mempool_destroy, anv->iod_mempool); + if (ret) + goto put_dev; + + ret = apple_nvme_alloc_tagsets(anv); + if (ret) + goto put_dev; + + ret = devm_request_irq(anv->dev, anv->irq, apple_nvme_irq, 0, + "nvme-apple", anv); + if (ret) { + dev_err_probe(dev, ret, "Failed to request IRQ"); + goto put_dev; + } + + anv->rtk = + devm_apple_rtkit_init(dev, anv, NULL, 0, &apple_nvme_rtkit_ops); + if (IS_ERR(anv->rtk)) { + ret = dev_err_probe(dev, PTR_ERR(anv->rtk), + "Failed to initialize RTKit"); + goto put_dev; + } + + ret = nvme_init_ctrl(&anv->ctrl, anv->dev, &nvme_ctrl_ops, + NVME_QUIRK_SKIP_CID_GEN); + if (ret) { + dev_err_probe(dev, ret, "Failed to initialize nvme_ctrl"); + goto put_dev; + } + + anv->ctrl.admin_q = blk_mq_init_queue(&anv->admin_tagset); + if (IS_ERR(anv->ctrl.admin_q)) { + ret = -ENOMEM; + goto put_dev; + } + + if (!blk_get_queue(anv->ctrl.admin_q)) { + nvme_start_admin_queue(&anv->ctrl); + blk_cleanup_queue(anv->ctrl.admin_q); + anv->ctrl.admin_q = NULL; + ret = -ENODEV; + goto put_dev; + } + + nvme_reset_ctrl(&anv->ctrl); + async_schedule(apple_nvme_async_probe, anv); + + return 0; + +put_dev: + put_device(anv->dev); + return ret; +} + +static int apple_nvme_remove(struct platform_device *pdev) +{ + struct apple_nvme *anv = platform_get_drvdata(pdev); + + nvme_change_ctrl_state(&anv->ctrl, NVME_CTRL_DELETING); + flush_work(&anv->ctrl.reset_work); + nvme_stop_ctrl(&anv->ctrl); + nvme_remove_namespaces(&anv->ctrl); + apple_nvme_disable(anv, true); + nvme_uninit_ctrl(&anv->ctrl); + + if (apple_rtkit_is_running(anv->rtk)) + apple_rtkit_shutdown(anv->rtk); + + apple_nvme_detach_genpd(anv); + + return 0; +} + +static void apple_nvme_shutdown(struct platform_device *pdev) +{ + struct apple_nvme *anv = platform_get_drvdata(pdev); + + apple_nvme_disable(anv, true); + if (apple_rtkit_is_running(anv->rtk)) + apple_rtkit_shutdown(anv->rtk); +} + +static int apple_nvme_resume(struct device *dev) +{ + struct apple_nvme *anv = dev_get_drvdata(dev); + + return nvme_reset_ctrl(&anv->ctrl); +} + +static int apple_nvme_suspend(struct device *dev) +{ + struct apple_nvme *anv = dev_get_drvdata(dev); + int ret = 0; + + apple_nvme_disable(anv, true); + + if (apple_rtkit_is_running(anv->rtk)) + ret = apple_rtkit_shutdown(anv->rtk); + + writel(0, anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL); + + return ret; +} + +static DEFINE_SIMPLE_DEV_PM_OPS(apple_nvme_pm_ops, apple_nvme_suspend, + apple_nvme_resume); + +static const struct of_device_id apple_nvme_of_match[] = { + { .compatible = "apple,nvme-ans2" }, + {}, +}; +MODULE_DEVICE_TABLE(of, apple_nvme_of_match); + +static struct platform_driver apple_nvme_driver = { + .driver = { + .name = "nvme-apple", + .of_match_table = apple_nvme_of_match, + .pm = pm_sleep_ptr(&apple_nvme_pm_ops), + }, + .probe = apple_nvme_probe, + .remove = apple_nvme_remove, + .shutdown = apple_nvme_shutdown, +}; +module_platform_driver(apple_nvme_driver); + +MODULE_AUTHOR("Sven Peter <sven@svenpeter.dev>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c index 7d49eb34b348..4910543f00ff 100644 --- a/drivers/nvme/host/constants.c +++ b/drivers/nvme/host/constants.c @@ -4,7 +4,6 @@ * Copyright (c) 2022, Oracle and/or its affiliates */ -#include <linux/blkdev.h> #include "nvme.h" #ifdef CONFIG_NVME_VERBOSE_ERRORS @@ -92,6 +91,7 @@ static const char * const nvme_statuses[] = { [NVME_SC_NS_WRITE_PROTECTED] = "Namespace is Write Protected", [NVME_SC_CMD_INTERRUPTED] = "Command Interrupted", [NVME_SC_TRANSIENT_TR_ERR] = "Transient Transport Error", + [NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY] = "Admin Command Media Not Ready", [NVME_SC_INVALID_IO_CMD_SET] = "Invalid IO Command Set", [NVME_SC_LBA_RANGE] = "LBA Out of Range", [NVME_SC_CAP_EXCEEDED] = "Capacity Exceeded", @@ -155,10 +155,13 @@ static const char * const nvme_statuses[] = { [NVME_SC_COMPARE_FAILED] = "Compare Failure", [NVME_SC_ACCESS_DENIED] = "Access Denied", [NVME_SC_UNWRITTEN_BLOCK] = "Deallocated or Unwritten Logical Block", + [NVME_SC_INTERNAL_PATH_ERROR] = "Internal Pathing Error", [NVME_SC_ANA_PERSISTENT_LOSS] = "Asymmetric Access Persistent Loss", [NVME_SC_ANA_INACCESSIBLE] = "Asymmetric Access Inaccessible", [NVME_SC_ANA_TRANSITION] = "Asymmetric Access Transition", + [NVME_SC_CTRL_PATH_ERROR] = "Controller Pathing Error", [NVME_SC_HOST_PATH_ERROR] = "Host Pathing Error", + [NVME_SC_HOST_ABORTED_CMD] = "Host Aborted Command", }; const unsigned char *nvme_get_error_status_str(u16 status) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index efb85c6d8e2d..24165daee3c8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -366,7 +366,7 @@ static inline void nvme_end_req(struct request *req) { blk_status_t status = nvme_error_status(nvme_req(req)->status); - if (unlikely(nvme_req(req)->status != NVME_SC_SUCCESS)) + if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET))) nvme_log_error(req); nvme_end_req_zoned(req); nvme_trace_bio_complete(req); @@ -1015,6 +1015,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, goto out; } + req->rq_flags |= RQF_QUIET; ret = nvme_execute_rq(req, at_head); if (result && ret >= 0) *result = nvme_req(req)->result; @@ -1205,8 +1206,10 @@ static void nvme_keep_alive_work(struct work_struct *work) nvme_init_request(rq, &ctrl->ka_cmd); rq->timeout = ctrl->kato * HZ; + rq->end_io = nvme_keep_alive_end_io; rq->end_io_data = ctrl; - blk_execute_rq_nowait(rq, false, nvme_keep_alive_end_io); + rq->rq_flags |= RQF_QUIET; + blk_execute_rq_nowait(rq, false); } static void nvme_start_keep_alive(struct nvme_ctrl *ctrl) @@ -1287,6 +1290,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, warn_str, cur->nidl); return -1; } + if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) + return NVME_NIDT_EUI64_LEN; memcpy(ids->eui64, data + sizeof(*cur), NVME_NIDT_EUI64_LEN); return NVME_NIDT_EUI64_LEN; case NVME_NIDT_NGUID: @@ -1295,6 +1300,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, warn_str, cur->nidl); return -1; } + if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) + return NVME_NIDT_NGUID_LEN; memcpy(ids->nguid, data + sizeof(*cur), NVME_NIDT_NGUID_LEN); return NVME_NIDT_NGUID_LEN; case NVME_NIDT_UUID: @@ -1303,6 +1310,8 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, warn_str, cur->nidl); return -1; } + if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) + return NVME_NIDT_UUID_LEN; uuid_copy(&ids->uuid, data + sizeof(*cur)); return NVME_NIDT_UUID_LEN; case NVME_NIDT_CSI: @@ -1399,12 +1408,18 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, if ((*id)->ncap == 0) /* namespace not allocated or attached */ goto out_free_id; - if (ctrl->vs >= NVME_VS(1, 1, 0) && - !memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) - memcpy(ids->eui64, (*id)->eui64, sizeof(ids->eui64)); - if (ctrl->vs >= NVME_VS(1, 2, 0) && - !memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) - memcpy(ids->nguid, (*id)->nguid, sizeof(ids->nguid)); + + if (ctrl->quirks & NVME_QUIRK_BOGUS_NID) { + dev_info(ctrl->device, + "Ignoring bogus Namespace Identifiers\n"); + } else { + if (ctrl->vs >= NVME_VS(1, 1, 0) && + !memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) + memcpy(ids->eui64, (*id)->eui64, sizeof(ids->eui64)); + if (ctrl->vs >= NVME_VS(1, 2, 0) && + !memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) + memcpy(ids->nguid, (*id)->nguid, sizeof(ids->nguid)); + } return 0; @@ -1413,6 +1428,32 @@ out_free_id: return error; } +static int nvme_identify_ns_cs_indep(struct nvme_ctrl *ctrl, unsigned nsid, + struct nvme_id_ns_cs_indep **id) +{ + struct nvme_command c = { + .identify.opcode = nvme_admin_identify, + .identify.nsid = cpu_to_le32(nsid), + .identify.cns = NVME_ID_CNS_NS_CS_INDEP, + }; + int ret; + + *id = kmalloc(sizeof(**id), GFP_KERNEL); + if (!*id) + return -ENOMEM; + + ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id)); + if (ret) { + dev_warn(ctrl->device, + "Identify namespace (CS independent) failed (%d)\n", + ret); + kfree(*id); + return ret; + } + + return 0; +} + static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid, unsigned int dword11, void *buffer, size_t buflen, u32 *result) { @@ -1608,20 +1649,22 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) u32 size = queue_logical_block_size(queue); if (ctrl->max_discard_sectors == 0) { - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue); + blk_queue_max_discard_sectors(queue, 0); return; } BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < NVME_DSM_MAX_RANGES); - queue->limits.discard_alignment = 0; queue->limits.discard_granularity = size; /* If discard is already enabled, don't reset queue limits */ - if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue)) + if (queue->limits.max_discard_sectors) return; + if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns, UINT_MAX)) + ctrl->max_discard_sectors = nvme_lba_to_sect(ns, ctrl->dmrsl); + blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors); blk_queue_max_discard_segments(queue, ctrl->max_discard_segments); @@ -1758,7 +1801,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); } blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1); - blk_queue_dma_alignment(q, 7); + blk_queue_dma_alignment(q, 3); blk_queue_write_cache(q, vwc, vwc); } @@ -2087,10 +2130,9 @@ static const struct block_device_operations nvme_bdev_ops = { .pr_ops = &nvme_pr_ops, }; -static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) +static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled) { - unsigned long timeout = - ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; + unsigned long timeout_jiffies = ((timeout + 1) * HZ / 2) + jiffies; u32 csts, bit = enabled ? NVME_CSTS_RDY : 0; int ret; @@ -2103,7 +2145,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) usleep_range(1000, 2000); if (fatal_signal_pending(current)) return -EINTR; - if (time_after(jiffies, timeout)) { + if (time_after(jiffies, timeout_jiffies)) { dev_err(ctrl->device, "Device not ready; aborting %s, CSTS=0x%x\n", enabled ? "initialisation" : "reset", csts); @@ -2134,13 +2176,14 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl) if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) msleep(NVME_QUIRK_DELAY_AMOUNT); - return nvme_wait_ready(ctrl, ctrl->cap, false); + return nvme_wait_ready(ctrl, NVME_CAP_TIMEOUT(ctrl->cap), false); } EXPORT_SYMBOL_GPL(nvme_disable_ctrl); int nvme_enable_ctrl(struct nvme_ctrl *ctrl) { unsigned dev_page_min; + u32 timeout; int ret; ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap); @@ -2161,15 +2204,44 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl) ctrl->ctrl_config = NVME_CC_CSS_CSI; else ctrl->ctrl_config = NVME_CC_CSS_NVM; + + if (ctrl->cap & NVME_CAP_CRMS_CRWMS) { + u32 crto; + + ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto); + if (ret) { + dev_err(ctrl->device, "Reading CRTO failed (%d)\n", + ret); + return ret; + } + + if (ctrl->cap & NVME_CAP_CRMS_CRIMS) { + ctrl->ctrl_config |= NVME_CC_CRIME; + timeout = NVME_CRTO_CRIMT(crto); + } else { + timeout = NVME_CRTO_CRWMT(crto); + } + } else { + timeout = NVME_CAP_TIMEOUT(ctrl->cap); + } + ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT; ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE; ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; - ctrl->ctrl_config |= NVME_CC_ENABLE; + ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); + if (ret) + return ret; + /* Flush write to device (required if transport is PCI) */ + ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CC, &ctrl->ctrl_config); + if (ret) + return ret; + + ctrl->ctrl_config |= NVME_CC_ENABLE; ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); if (ret) return ret; - return nvme_wait_ready(ctrl, ctrl->cap, true); + return nvme_wait_ready(ctrl, timeout, true); } EXPORT_SYMBOL_GPL(nvme_enable_ctrl); @@ -2881,8 +2953,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) if (id->dmrl) ctrl->max_discard_segments = id->dmrl; - if (id->dmrsl) - ctrl->max_discard_sectors = le32_to_cpu(id->dmrsl); + ctrl->dmrsl = le32_to_cpu(id->dmrsl); if (id->wzsl) ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl); @@ -3067,10 +3138,6 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl) if (ret) return ret; - ret = nvme_init_non_mdts_limits(ctrl); - if (ret < 0) - return ret; - ret = nvme_configure_apst(ctrl); if (ret < 0) return ret; @@ -3133,6 +3200,7 @@ static const struct file_operations nvme_dev_fops = { .release = nvme_dev_release, .unlocked_ioctl = nvme_dev_ioctl, .compat_ioctl = compat_ptr_ioctl, + .uring_cmd = nvme_dev_uring_cmd, }; static ssize_t nvme_sysfs_reset(struct device *dev, @@ -3686,6 +3754,7 @@ static const struct file_operations nvme_ns_chr_fops = { .release = nvme_ns_chr_release, .unlocked_ioctl = nvme_ns_chr_ioctl, .compat_ioctl = compat_ptr_ioctl, + .uring_cmd = nvme_ns_chr_uring_cmd, }; static int nvme_add_ns_cdev(struct nvme_ns *ns) @@ -4077,11 +4146,26 @@ out: static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns_ids ids = { }; + struct nvme_id_ns_cs_indep *id; struct nvme_ns *ns; + bool ready = true; if (nvme_identify_ns_descs(ctrl, nsid, &ids)) return; + /* + * Check if the namespace is ready. If not ignore it, we will get an + * AEN once it becomes ready and restart the scan. + */ + if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) && + !nvme_identify_ns_cs_indep(ctrl, nsid, &id)) { + ready = id->nstat & NVME_NSTAT_NRDY; + kfree(id); + } + + if (!ready) + return; + ns = nvme_find_get_ns(ctrl, nsid); if (ns) { nvme_validate_ns(ns, &ids); @@ -4224,11 +4308,26 @@ static void nvme_scan_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, scan_work); + int ret; /* No tagset on a live ctrl means IO queues could not created */ if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset) return; + /* + * Identify controller limits can change at controller reset due to + * new firmware download, even though it is not common we cannot ignore + * such scenario. Controller's non-mdts limits are reported in the unit + * of logical blocks that is dependent on the format of attached + * namespace. Hence re-read the limits at the time of ns allocation. + */ + ret = nvme_init_non_mdts_limits(ctrl); + if (ret < 0) { + dev_warn(ctrl->device, + "reading non-mdts-limits failed: %d\n", ret); + return; + } + if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) { dev_info(ctrl->device, "rescanning namespaces.\n"); nvme_clear_changed_ns_log(ctrl); @@ -4826,6 +4925,8 @@ static inline void _nvme_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_command) != 64); BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); + BUILD_BUG_ON(sizeof(struct nvme_id_ns_cs_indep) != + NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ns_nvm) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE); diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 1e3a09cad961..46d6e194ac2b 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -187,6 +187,14 @@ static inline char *nvmf_ctrl_subsysnqn(struct nvme_ctrl *ctrl) return ctrl->subsys->subnqn; } +static inline void nvmf_complete_timed_out_request(struct request *rq) +{ + if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) { + nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; + blk_mq_complete_request(rq); + } +} + int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val); int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val); int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 05f9da251758..3c778bb0c294 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3849,6 +3849,9 @@ process_local_list: return count; } +static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store); + +#ifdef CONFIG_BLK_CGROUP_FC_APPID /* Parse the cgroup id from a buf and return the length of cgrpid */ static int fc_parse_cgrpid(const char *buf, u64 *id) { @@ -3872,12 +3875,10 @@ static int fc_parse_cgrpid(const char *buf, u64 *id) } /* - * fc_update_appid: Parse and update the appid in the blkcg associated with - * cgroupid. - * @buf: buf contains both cgrpid and appid info - * @count: size of the buffer + * Parse and update the appid in the blkcg associated with the cgroupid. */ -static int fc_update_appid(const char *buf, size_t count) +static ssize_t fc_appid_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { u64 cgrp_id; int appid_len = 0; @@ -3905,23 +3906,14 @@ static int fc_update_appid(const char *buf, size_t count) return ret; return count; } - -static ssize_t fc_appid_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - int ret = 0; - - ret = fc_update_appid(buf, count); - if (ret < 0) - return -EINVAL; - return count; -} -static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store); static DEVICE_ATTR(appid_store, 0200, NULL, fc_appid_store); +#endif /* CONFIG_BLK_CGROUP_FC_APPID */ static struct attribute *nvme_fc_attrs[] = { &dev_attr_nvme_discovery.attr, +#ifdef CONFIG_BLK_CGROUP_FC_APPID &dev_attr_appid_store.attr, +#endif NULL }; diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 554566371ffa..a2e89db1cd63 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -5,6 +5,7 @@ */ #include <linux/ptrace.h> /* for force_successful_syscall_return */ #include <linux/nvme_ioctl.h> +#include <linux/io_uring.h> #include "nvme.h" /* @@ -53,10 +54,21 @@ out: return ERR_PTR(ret); } -static int nvme_submit_user_cmd(struct request_queue *q, +static int nvme_finish_user_metadata(struct request *req, void __user *ubuf, + void *meta, unsigned len, int ret) +{ + if (!ret && req_op(req) == REQ_OP_DRV_IN && + copy_to_user(ubuf, meta, len)) + ret = -EFAULT; + kfree(meta); + return ret; +} + +static struct request *nvme_alloc_user_request(struct request_queue *q, struct nvme_command *cmd, void __user *ubuffer, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, - u32 meta_seed, u64 *result, unsigned timeout, bool vec) + u32 meta_seed, void **metap, unsigned timeout, bool vec, + unsigned int rq_flags, blk_mq_req_flags_t blk_flags) { bool write = nvme_is_write(cmd); struct nvme_ns *ns = q->queuedata; @@ -66,9 +78,9 @@ static int nvme_submit_user_cmd(struct request_queue *q, void *meta = NULL; int ret; - req = blk_mq_alloc_request(q, nvme_req_op(cmd), 0); + req = blk_mq_alloc_request(q, nvme_req_op(cmd) | rq_flags, blk_flags); if (IS_ERR(req)) - return PTR_ERR(req); + return req; nvme_init_request(req, cmd); if (timeout) @@ -105,26 +117,50 @@ static int nvme_submit_user_cmd(struct request_queue *q, goto out_unmap; } req->cmd_flags |= REQ_INTEGRITY; + *metap = meta; } } + return req; + +out_unmap: + if (bio) + blk_rq_unmap_user(bio); +out: + blk_mq_free_request(req); + return ERR_PTR(ret); +} + +static int nvme_submit_user_cmd(struct request_queue *q, + struct nvme_command *cmd, void __user *ubuffer, + unsigned bufflen, void __user *meta_buffer, unsigned meta_len, + u32 meta_seed, u64 *result, unsigned timeout, bool vec) +{ + struct request *req; + void *meta = NULL; + struct bio *bio; + int ret; + + req = nvme_alloc_user_request(q, cmd, ubuffer, bufflen, meta_buffer, + meta_len, meta_seed, &meta, timeout, vec, 0, 0); + if (IS_ERR(req)) + return PTR_ERR(req); + + bio = req->bio; + ret = nvme_execute_passthru_rq(req); + if (result) *result = le64_to_cpu(nvme_req(req)->result.u64); - if (meta && !ret && !write) { - if (copy_to_user(meta_buffer, meta, meta_len)) - ret = -EFAULT; - } - kfree(meta); - out_unmap: + if (meta) + ret = nvme_finish_user_metadata(req, meta_buffer, meta, + meta_len, ret); if (bio) blk_rq_unmap_user(bio); - out: blk_mq_free_request(req); return ret; } - static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) { struct nvme_user_io io; @@ -296,6 +332,140 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return status; } +struct nvme_uring_data { + __u64 metadata; + __u64 addr; + __u32 data_len; + __u32 metadata_len; + __u32 timeout_ms; +}; + +/* + * This overlays struct io_uring_cmd pdu. + * Expect build errors if this grows larger than that. + */ +struct nvme_uring_cmd_pdu { + union { + struct bio *bio; + struct request *req; + }; + void *meta; /* kernel-resident buffer */ + void __user *meta_buffer; + u32 meta_len; +}; + +static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( + struct io_uring_cmd *ioucmd) +{ + return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu; +} + +static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd) +{ + struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); + struct request *req = pdu->req; + struct bio *bio = req->bio; + int status; + u64 result; + + if (nvme_req(req)->flags & NVME_REQ_CANCELLED) + status = -EINTR; + else + status = nvme_req(req)->status; + + result = le64_to_cpu(nvme_req(req)->result.u64); + + if (pdu->meta) + status = nvme_finish_user_metadata(req, pdu->meta_buffer, + pdu->meta, pdu->meta_len, status); + if (bio) + blk_rq_unmap_user(bio); + blk_mq_free_request(req); + + io_uring_cmd_done(ioucmd, status, result); +} + +static void nvme_uring_cmd_end_io(struct request *req, blk_status_t err) +{ + struct io_uring_cmd *ioucmd = req->end_io_data; + struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); + /* extract bio before reusing the same field for request */ + struct bio *bio = pdu->bio; + + pdu->req = req; + req->bio = bio; + /* this takes care of moving rest of completion-work to task context */ + io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb); +} + +static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec) +{ + struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); + const struct nvme_uring_cmd *cmd = ioucmd->cmd; + struct request_queue *q = ns ? ns->queue : ctrl->admin_q; + struct nvme_uring_data d; + struct nvme_command c; + struct request *req; + unsigned int rq_flags = 0; + blk_mq_req_flags_t blk_flags = 0; + void *meta = NULL; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + c.common.opcode = READ_ONCE(cmd->opcode); + c.common.flags = READ_ONCE(cmd->flags); + if (c.common.flags) + return -EINVAL; + + c.common.command_id = 0; + c.common.nsid = cpu_to_le32(cmd->nsid); + if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid))) + return -EINVAL; + + c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2)); + c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3)); + c.common.metadata = 0; + c.common.dptr.prp1 = c.common.dptr.prp2 = 0; + c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10)); + c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11)); + c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12)); + c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13)); + c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); + c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); + + d.metadata = READ_ONCE(cmd->metadata); + d.addr = READ_ONCE(cmd->addr); + d.data_len = READ_ONCE(cmd->data_len); + d.metadata_len = READ_ONCE(cmd->metadata_len); + d.timeout_ms = READ_ONCE(cmd->timeout_ms); + + if (issue_flags & IO_URING_F_NONBLOCK) { + rq_flags = REQ_NOWAIT; + blk_flags = BLK_MQ_REQ_NOWAIT; + } + + req = nvme_alloc_user_request(q, &c, nvme_to_user_ptr(d.addr), + d.data_len, nvme_to_user_ptr(d.metadata), + d.metadata_len, 0, &meta, d.timeout_ms ? + msecs_to_jiffies(d.timeout_ms) : 0, vec, rq_flags, + blk_flags); + if (IS_ERR(req)) + return PTR_ERR(req); + req->end_io = nvme_uring_cmd_end_io; + req->end_io_data = ioucmd; + + /* to free bio on completion, as req->bio will be null at that time */ + pdu->bio = req->bio; + pdu->meta = meta; + pdu->meta_buffer = nvme_to_user_ptr(d.metadata); + pdu->meta_len = d.metadata_len; + + blk_execute_rq_nowait(req, false); + return -EIOCBQUEUED; +} + static bool is_ctrl_ioctl(unsigned int cmd) { if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) @@ -387,6 +557,53 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return __nvme_ioctl(ns, cmd, (void __user *)arg); } +static int nvme_uring_cmd_checks(unsigned int issue_flags) +{ + /* IOPOLL not supported yet */ + if (issue_flags & IO_URING_F_IOPOLL) + return -EOPNOTSUPP; + + /* NVMe passthrough requires big SQE/CQE support */ + if ((issue_flags & (IO_URING_F_SQE128|IO_URING_F_CQE32)) != + (IO_URING_F_SQE128|IO_URING_F_CQE32)) + return -EOPNOTSUPP; + return 0; +} + +static int nvme_ns_uring_cmd(struct nvme_ns *ns, struct io_uring_cmd *ioucmd, + unsigned int issue_flags) +{ + struct nvme_ctrl *ctrl = ns->ctrl; + int ret; + + BUILD_BUG_ON(sizeof(struct nvme_uring_cmd_pdu) > sizeof(ioucmd->pdu)); + + ret = nvme_uring_cmd_checks(issue_flags); + if (ret) + return ret; + + switch (ioucmd->cmd_op) { + case NVME_URING_CMD_IO: + ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, false); + break; + case NVME_URING_CMD_IO_VEC: + ret = nvme_uring_cmd_io(ctrl, ns, ioucmd, issue_flags, true); + break; + default: + ret = -ENOTTY; + } + + return ret; +} + +int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) +{ + struct nvme_ns *ns = container_of(file_inode(ioucmd->file)->i_cdev, + struct nvme_ns, cdev); + + return nvme_ns_uring_cmd(ns, ioucmd, issue_flags); +} + #ifdef CONFIG_NVME_MULTIPATH static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *argp, struct nvme_ns_head *head, int srcu_idx) @@ -453,8 +670,46 @@ out_unlock: srcu_read_unlock(&head->srcu, srcu_idx); return ret; } + +int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, + unsigned int issue_flags) +{ + struct cdev *cdev = file_inode(ioucmd->file)->i_cdev; + struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); + int srcu_idx = srcu_read_lock(&head->srcu); + struct nvme_ns *ns = nvme_find_path(head); + int ret = -EINVAL; + + if (ns) + ret = nvme_ns_uring_cmd(ns, ioucmd, issue_flags); + srcu_read_unlock(&head->srcu, srcu_idx); + return ret; +} #endif /* CONFIG_NVME_MULTIPATH */ +int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) +{ + struct nvme_ctrl *ctrl = ioucmd->file->private_data; + int ret; + + ret = nvme_uring_cmd_checks(issue_flags); + if (ret) + return ret; + + switch (ioucmd->cmd_op) { + case NVME_URING_CMD_ADMIN: + ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, false); + break; + case NVME_URING_CMD_ADMIN_VEC: + ret = nvme_uring_cmd_io(ctrl, NULL, ioucmd, issue_flags, true); + break; + default: + ret = -ENOTTY; + } + + return ret; +} + static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) { struct nvme_ns *ns; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index d464fdf978fb..d3e2440d8abb 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -437,6 +437,7 @@ static const struct file_operations nvme_ns_head_chr_fops = { .release = nvme_ns_head_chr_release, .unlocked_ioctl = nvme_ns_head_chr_ioctl, .compat_ioctl = compat_ptr_ioctl, + .uring_cmd = nvme_ns_head_chr_uring_cmd, }; static int nvme_add_ns_head_cdev(struct nvme_ns_head *head) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 1393bbf82d71..9b72b6ecf33c 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -144,6 +144,11 @@ enum nvme_quirks { * encoding the generation sequence number. */ NVME_QUIRK_SKIP_CID_GEN = (1 << 17), + + /* + * Reports garbage in the namespace identifiers (eui64, nguid, uuid). + */ + NVME_QUIRK_BOGUS_NID = (1 << 18), }; /* @@ -279,6 +284,7 @@ struct nvme_ctrl { #endif u16 crdt[3]; u16 oncs; + u32 dmrsl; u16 oacs; u16 sqsize; u32 max_namespaces; @@ -777,7 +783,12 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg); long nvme_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd, + unsigned int issue_flags); +int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd, + unsigned int issue_flags); int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo); +int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags); extern const struct attribute_group *nvme_ns_id_attr_groups[]; extern const struct pr_ops nvme_pr_ops; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d817ca17463e..48f4f6eb877b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1438,8 +1438,10 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) } nvme_init_request(abort_req, &cmd); + abort_req->end_io = abort_endio; abort_req->end_io_data = NULL; - blk_execute_rq_nowait(abort_req, false, abort_endio); + abort_req->rq_flags |= RQF_QUIET; + blk_execute_rq_nowait(abort_req, false); /* * The aborted req will be completed on receiving the abort req. @@ -1775,6 +1777,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset); if (IS_ERR(dev->ctrl.admin_q)) { blk_mq_free_tag_set(&dev->admin_tagset); + dev->ctrl.admin_q = NULL; return -ENOMEM; } if (!blk_get_queue(dev->ctrl.admin_q)) { @@ -2483,11 +2486,15 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) return PTR_ERR(req); nvme_init_request(req, &cmd); + if (opcode == nvme_admin_delete_cq) + req->end_io = nvme_del_cq_end; + else + req->end_io = nvme_del_queue_end; req->end_io_data = nvmeq; init_completion(&nvmeq->delete_done); - blk_execute_rq_nowait(req, false, opcode == nvme_admin_delete_cq ? - nvme_del_cq_end : nvme_del_queue_end); + req->rq_flags |= RQF_QUIET; + blk_execute_rq_nowait(req, false); return 0; } @@ -2675,7 +2682,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) struct pci_dev *pdev = to_pci_dev(dev->dev); mutex_lock(&dev->shutdown_lock); - if (pci_is_enabled(pdev)) { + if (pci_device_is_present(pdev) && pci_is_enabled(pdev)) { u32 csts = readl(dev->bar + NVME_REG_CSTS); if (dev->ctrl.state == NVME_CTRL_LIVE || @@ -3409,7 +3416,10 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_IDENTIFY_CNS | - NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + NVME_QUIRK_DISABLE_WRITE_ZEROES | + NVME_QUIRK_BOGUS_NID, }, + { PCI_VDEVICE(REDHAT, 0x0010), /* Qemu emulated controller */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x126f, 0x2263), /* Silicon Motion unidentified */ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST, }, { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */ @@ -3447,6 +3457,12 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(0x2646, 0x2263), /* KINGSTON A2000 NVMe SSD */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, + { PCI_DEVICE(0x1e4B, 0x1001), /* MAXIO MAP1001 */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1e4B, 0x1002), /* MAXIO MAP1002 */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1e4B, 0x1202), /* MAXIO MAP1202 */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061), .driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, }, { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065), diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index d9f19d901313..f2a5e1ea508a 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -867,8 +867,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, ctrl->ctrl.numa_node = ibdev_to_node(ctrl->device->dev); /* T10-PI support */ - if (ctrl->device->dev->attrs.device_cap_flags & - IB_DEVICE_INTEGRITY_HANDOVER) + if (ctrl->device->dev->attrs.kernel_cap_flags & + IBK_INTEGRITY_HANDOVER) pi_capable = true; ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev, @@ -2010,10 +2010,7 @@ static void nvme_rdma_complete_timed_out(struct request *rq) struct nvme_rdma_queue *queue = req->queue; nvme_rdma_stop_queue(queue); - if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) { - nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; - blk_mq_complete_request(rq); - } + nvmf_complete_timed_out_request(rq); } static enum blk_eh_timer_return diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index ad3a2bf2f1e9..bb67538d241b 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2318,10 +2318,7 @@ static void nvme_tcp_complete_timed_out(struct request *rq) struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue)); - if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) { - nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; - blk_mq_complete_request(rq); - } + nvmf_complete_timed_out_request(rq); } static enum blk_eh_timer_return diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index d886c2c59554..27a72504d31c 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -360,7 +360,7 @@ static u16 nvmet_bdev_discard_range(struct nvmet_req *req, ret = __blkdev_issue_discard(ns->bdev, nvmet_lba_to_sect(ns, range->slba), le32_to_cpu(range->nlb) << (ns->blksize_shift - 9), - GFP_KERNEL, 0, bio); + GFP_KERNEL, bio); if (ret && ret != -EOPNOTSUPP) { req->error_slba = le64_to_cpu(range->slba); return errno_to_nvme_status(req, ret); diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 5247c24538eb..b1f7efab3918 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -97,7 +97,7 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req) id->sgls |= cpu_to_le32(1 << 20); /* - * When passsthru controller is setup using nvme-loop transport it will + * When passthru controller is setup using nvme-loop transport it will * export the passthru ctrl subsysnqn (PCIe NVMe ctrl) and will fail in * the nvme/host/core.c in the nvme_init_subsystem()->nvme_active_ctrl() * code path with duplicate ctr subsynqn. In order to prevent that we @@ -285,8 +285,9 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) req->p.rq = rq; queue_work(nvmet_wq, &req->p.work); } else { + rq->end_io = nvmet_passthru_req_done; rq->end_io_data = req; - blk_execute_rq_nowait(rq, false, nvmet_passthru_req_done); + blk_execute_rq_nowait(rq, false); } if (ns) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 2fab0b219b25..09fdcac87d17 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1221,8 +1221,8 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) ndev->inline_data_size = nport->inline_data_size; ndev->inline_page_count = inline_page_count; - if (nport->pi_enable && !(cm_id->device->attrs.device_cap_flags & - IB_DEVICE_INTEGRITY_HANDOVER)) { + if (nport->pi_enable && !(cm_id->device->attrs.kernel_cap_flags & + IBK_INTEGRITY_HANDOVER)) { pr_warn("T10-PI is not supported by device %s. Disabling it\n", cm_id->device->name); nport->pi_enable = false; diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index e34718b09550..82b61acf7a72 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -34,8 +34,7 @@ static int validate_conv_zones_cb(struct blk_zone *z, bool nvmet_bdev_zns_enable(struct nvmet_ns *ns) { - struct request_queue *q = ns->bdev->bd_disk->queue; - u8 zasl = nvmet_zasl(queue_max_zone_append_sectors(q)); + u8 zasl = nvmet_zasl(bdev_max_zone_append_sectors(ns->bdev)); struct gendisk *bd_disk = ns->bdev->bd_disk; int ret; |