diff options
Diffstat (limited to 'drivers/nvme/host/pci.c')
-rw-r--r-- | drivers/nvme/host/pci.c | 380 |
1 files changed, 166 insertions, 214 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a128672472ec..f8db70ae172d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -27,7 +27,6 @@ #include <linux/interrupt.h> #include <linux/io.h> #include <linux/kdev_t.h> -#include <linux/kthread.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/module.h> @@ -39,6 +38,7 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/t10-pi.h> +#include <linux/timer.h> #include <linux/types.h> #include <linux/io-64-nonatomic-lo-hi.h> #include <asm/unaligned.h> @@ -57,18 +57,6 @@ #define NVME_NR_AEN_COMMANDS 1 #define NVME_AQ_BLKMQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS) -unsigned char admin_timeout = 60; -module_param(admin_timeout, byte, 0644); -MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands"); - -unsigned char nvme_io_timeout = 30; -module_param_named(io_timeout, nvme_io_timeout, byte, 0644); -MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O"); - -unsigned char shutdown_timeout = 5; -module_param(shutdown_timeout, byte, 0644); -MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown"); - static int use_threaded_interrupts; module_param(use_threaded_interrupts, int, 0); @@ -76,24 +64,19 @@ static bool use_cmb_sqes = true; module_param(use_cmb_sqes, bool, 0644); MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes"); -static LIST_HEAD(dev_list); -static struct task_struct *nvme_thread; static struct workqueue_struct *nvme_workq; -static wait_queue_head_t nvme_kthread_wait; struct nvme_dev; struct nvme_queue; static int nvme_reset(struct nvme_dev *dev); static void nvme_process_cq(struct nvme_queue *nvmeq); -static void nvme_remove_dead_ctrl(struct nvme_dev *dev); static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); /* * Represents an NVM Express device. Each nvme_dev is a PCI function. */ struct nvme_dev { - struct list_head node; struct nvme_queue **queues; struct blk_mq_tag_set tagset; struct blk_mq_tag_set admin_tagset; @@ -111,6 +94,8 @@ struct nvme_dev { struct work_struct reset_work; struct work_struct scan_work; struct work_struct remove_work; + struct work_struct async_work; + struct timer_list watchdog_timer; struct mutex shutdown_lock; bool subsystem; void __iomem *cmb; @@ -120,6 +105,7 @@ struct nvme_dev { unsigned long flags; #define NVME_CTRL_RESETTING 0 +#define NVME_CTRL_REMOVING 1 struct nvme_ctrl ctrl; struct completion ioq_wait; @@ -148,7 +134,6 @@ struct nvme_queue { u32 __iomem *q_db; u16 q_depth; s16 cq_vector; - u16 sq_head; u16 sq_tail; u16 cq_head; u16 qid; @@ -286,23 +271,37 @@ static int nvme_init_request(void *data, struct request *req, return 0; } +static void nvme_queue_scan(struct nvme_dev *dev) +{ + /* + * Do not queue new scan work when a controller is reset during + * removal. + */ + if (test_bit(NVME_CTRL_REMOVING, &dev->flags)) + return; + queue_work(nvme_workq, &dev->scan_work); +} + static void nvme_complete_async_event(struct nvme_dev *dev, struct nvme_completion *cqe) { u16 status = le16_to_cpu(cqe->status) >> 1; u32 result = le32_to_cpu(cqe->result); - if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ) + if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ) { ++dev->ctrl.event_limit; + queue_work(nvme_workq, &dev->async_work); + } + if (status != NVME_SC_SUCCESS) return; switch (result & 0xff07) { case NVME_AER_NOTICE_NS_CHANGED: - dev_info(dev->dev, "rescanning\n"); - queue_work(nvme_workq, &dev->scan_work); + dev_info(dev->ctrl.device, "rescanning\n"); + nvme_queue_scan(dev); default: - dev_warn(dev->dev, "async event result %08x\n", result); + dev_warn(dev->ctrl.device, "async event result %08x\n", result); } } @@ -679,7 +678,10 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, spin_lock_irq(&nvmeq->q_lock); if (unlikely(nvmeq->cq_vector < 0)) { - ret = BLK_MQ_RQ_QUEUE_BUSY; + if (ns && !test_bit(NVME_NS_DEAD, &ns->flags)) + ret = BLK_MQ_RQ_QUEUE_BUSY; + else + ret = BLK_MQ_RQ_QUEUE_ERROR; spin_unlock_irq(&nvmeq->q_lock); goto out; } @@ -713,7 +715,7 @@ static void nvme_complete_rq(struct request *req) } if (unlikely(iod->aborted)) { - dev_warn(dev->dev, + dev_warn(dev->ctrl.device, "completing aborted command with status: %04x\n", req->errors); } @@ -735,7 +737,6 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) if ((status & 1) != phase) break; - nvmeq->sq_head = le16_to_cpu(cqe.sq_head); if (++head == nvmeq->q_depth) { head = 0; phase = !phase; @@ -745,7 +746,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) *tag = -1; if (unlikely(cqe.command_id >= nvmeq->q_depth)) { - dev_warn(nvmeq->q_dmadev, + dev_warn(nvmeq->dev->ctrl.device, "invalid id %d completed on queue %d\n", cqe.command_id, le16_to_cpu(cqe.sq_id)); continue; @@ -764,10 +765,8 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) } req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id); - if (req->cmd_type == REQ_TYPE_DRV_PRIV) { - u32 result = le32_to_cpu(cqe.result); - req->special = (void *)(uintptr_t)result; - } + if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special) + memcpy(req->special, &cqe, sizeof(cqe)); blk_mq_complete_request(req, status >> 1); } @@ -832,15 +831,22 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) return 0; } -static void nvme_submit_async_event(struct nvme_dev *dev) +static void nvme_async_event_work(struct work_struct *work) { + struct nvme_dev *dev = container_of(work, struct nvme_dev, async_work); + struct nvme_queue *nvmeq = dev->queues[0]; struct nvme_command c; memset(&c, 0, sizeof(c)); c.common.opcode = nvme_admin_async_event; - c.common.command_id = NVME_AQ_BLKMQ_DEPTH + --dev->ctrl.event_limit; - __nvme_submit_cmd(dev->queues[0], &c); + spin_lock_irq(&nvmeq->q_lock); + while (dev->ctrl.event_limit > 0) { + c.common.command_id = NVME_AQ_BLKMQ_DEPTH + + --dev->ctrl.event_limit; + __nvme_submit_cmd(nvmeq, &c); + } + spin_unlock_irq(&nvmeq->q_lock); } static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) @@ -910,12 +916,10 @@ static void abort_endio(struct request *req, int error) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_queue *nvmeq = iod->nvmeq; - u32 result = (u32)(uintptr_t)req->special; u16 status = req->errors; - dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result); + dev_warn(nvmeq->dev->ctrl.device, "Abort status: 0x%x", status); atomic_inc(&nvmeq->dev->ctrl.abort_limit); - blk_mq_free_request(req); } @@ -934,7 +938,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) * shutdown, so we return BLK_EH_HANDLED. */ if (test_bit(NVME_CTRL_RESETTING, &dev->flags)) { - dev_warn(dev->dev, + dev_warn(dev->ctrl.device, "I/O %d QID %d timeout, disable controller\n", req->tag, nvmeq->qid); nvme_dev_disable(dev, false); @@ -948,7 +952,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) * returned to the driver, or if this is the admin queue. */ if (!nvmeq->qid || iod->aborted) { - dev_warn(dev->dev, + dev_warn(dev->ctrl.device, "I/O %d QID %d timeout, reset controller\n", req->tag, nvmeq->qid); nvme_dev_disable(dev, false); @@ -974,8 +978,9 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) cmd.abort.cid = req->tag; cmd.abort.sqid = cpu_to_le16(nvmeq->qid); - dev_warn(nvmeq->q_dmadev, "I/O %d QID %d timeout, aborting\n", - req->tag, nvmeq->qid); + dev_warn(nvmeq->dev->ctrl.device, + "I/O %d QID %d timeout, aborting\n", + req->tag, nvmeq->qid); abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd, BLK_MQ_REQ_NOWAIT); @@ -1004,7 +1009,7 @@ static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved if (!blk_mq_request_started(req)) return; - dev_dbg_ratelimited(nvmeq->q_dmadev, + dev_dbg_ratelimited(nvmeq->dev->ctrl.device, "Cancelling I/O %d QID %d\n", req->tag, nvmeq->qid); status = NVME_SC_ABORT_REQ; @@ -1159,9 +1164,6 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, nvmeq->qid = qid; nvmeq->cq_vector = -1; dev->queues[qid] = nvmeq; - - /* make sure queue descriptor is set before queue count, for kthread */ - mb(); dev->queue_count++; return nvmeq; @@ -1250,6 +1252,12 @@ static struct blk_mq_ops nvme_mq_ops = { static void nvme_dev_remove_admin(struct nvme_dev *dev) { if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) { + /* + * If the controller was reset during removal, it's possible + * user requests may be waiting on a stopped queue. Start the + * queue to flush these to completion. + */ + blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true); blk_cleanup_queue(dev->ctrl.admin_q); blk_mq_free_tag_set(&dev->admin_tagset); } @@ -1340,53 +1348,31 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) return result; } -static int nvme_kthread(void *data) -{ - struct nvme_dev *dev, *next; - - while (!kthread_should_stop()) { - set_current_state(TASK_INTERRUPTIBLE); - spin_lock(&dev_list_lock); - list_for_each_entry_safe(dev, next, &dev_list, node) { - int i; - u32 csts = readl(dev->bar + NVME_REG_CSTS); - - /* - * Skip controllers currently under reset. - */ - if (work_pending(&dev->reset_work) || work_busy(&dev->reset_work)) - continue; - - if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) || - csts & NVME_CSTS_CFS) { - if (queue_work(nvme_workq, &dev->reset_work)) { - dev_warn(dev->dev, - "Failed status: %x, reset controller\n", - readl(dev->bar + NVME_REG_CSTS)); - } - continue; - } - for (i = 0; i < dev->queue_count; i++) { - struct nvme_queue *nvmeq = dev->queues[i]; - if (!nvmeq) - continue; - spin_lock_irq(&nvmeq->q_lock); - nvme_process_cq(nvmeq); - - while (i == 0 && dev->ctrl.event_limit > 0) - nvme_submit_async_event(dev); - spin_unlock_irq(&nvmeq->q_lock); - } +static void nvme_watchdog_timer(unsigned long data) +{ + struct nvme_dev *dev = (struct nvme_dev *)data; + u32 csts = readl(dev->bar + NVME_REG_CSTS); + + /* + * Skip controllers currently under reset. + */ + if (!work_pending(&dev->reset_work) && !work_busy(&dev->reset_work) && + ((csts & NVME_CSTS_CFS) || + (dev->subsystem && (csts & NVME_CSTS_NSSRO)))) { + if (queue_work(nvme_workq, &dev->reset_work)) { + dev_warn(dev->dev, + "Failed status: 0x%x, reset controller.\n", + csts); } - spin_unlock(&dev_list_lock); - schedule_timeout(round_jiffies_relative(HZ)); + return; } - return 0; + + mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ)); } static int nvme_create_io_queues(struct nvme_dev *dev) { - unsigned i; + unsigned i, max; int ret = 0; for (i = dev->queue_count; i <= dev->max_qid; i++) { @@ -1396,7 +1382,8 @@ static int nvme_create_io_queues(struct nvme_dev *dev) } } - for (i = dev->online_queues; i <= dev->queue_count - 1; i++) { + max = min(dev->max_qid, dev->queue_count - 1); + for (i = dev->online_queues; i <= max; i++) { ret = nvme_create_queue(dev->queues[i], i); if (ret) { nvme_free_queues(dev, i); @@ -1487,7 +1474,8 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) * access to the admin queue, as that might be only way to fix them up. */ if (result > 0) { - dev_err(dev->dev, "Could not set queue count (%d)\n", result); + dev_err(dev->ctrl.device, + "Could not set queue count (%d)\n", result); nr_io_queues = 0; result = 0; } @@ -1553,9 +1541,6 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) adminq->cq_vector = -1; goto free_queues; } - - /* Free previously allocated queues that are no longer usable */ - nvme_free_queues(dev, nr_io_queues + 1); return nvme_create_io_queues(dev); free_queues: @@ -1689,15 +1674,21 @@ static int nvme_dev_add(struct nvme_dev *dev) if (blk_mq_alloc_tag_set(&dev->tagset)) return 0; dev->ctrl.tagset = &dev->tagset; + } else { + blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1); + + /* Free previously allocated queues that are no longer usable */ + nvme_free_queues(dev, dev->online_queues); } - queue_work(nvme_workq, &dev->scan_work); + + nvme_queue_scan(dev); return 0; } -static int nvme_dev_map(struct nvme_dev *dev) +static int nvme_pci_enable(struct nvme_dev *dev) { u64 cap; - int bars, result = -ENOMEM; + int result = -ENOMEM; struct pci_dev *pdev = to_pci_dev(dev->dev); if (pci_enable_device_mem(pdev)) @@ -1705,24 +1696,14 @@ static int nvme_dev_map(struct nvme_dev *dev) dev->entry[0].vector = pdev->irq; pci_set_master(pdev); - bars = pci_select_bars(pdev, IORESOURCE_MEM); - if (!bars) - goto disable_pci; - - if (pci_request_selected_regions(pdev, bars, "nvme")) - goto disable_pci; if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) && dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32))) goto disable; - dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); - if (!dev->bar) - goto disable; - if (readl(dev->bar + NVME_REG_CSTS) == -1) { result = -ENODEV; - goto unmap; + goto disable; } /* @@ -1732,7 +1713,7 @@ static int nvme_dev_map(struct nvme_dev *dev) if (!pdev->irq) { result = pci_enable_msix(pdev, dev->entry, 1); if (result < 0) - goto unmap; + goto disable; } cap = lo_hi_readq(dev->bar + NVME_REG_CAP); @@ -1759,18 +1740,20 @@ static int nvme_dev_map(struct nvme_dev *dev) pci_save_state(pdev); return 0; - unmap: - iounmap(dev->bar); - dev->bar = NULL; disable: - pci_release_regions(pdev); - disable_pci: pci_disable_device(pdev); return result; } static void nvme_dev_unmap(struct nvme_dev *dev) { + if (dev->bar) + iounmap(dev->bar); + pci_release_regions(to_pci_dev(dev->dev)); +} + +static void nvme_pci_disable(struct nvme_dev *dev) +{ struct pci_dev *pdev = to_pci_dev(dev->dev); if (pdev->msi_enabled) @@ -1778,71 +1761,21 @@ static void nvme_dev_unmap(struct nvme_dev *dev) else if (pdev->msix_enabled) pci_disable_msix(pdev); - if (dev->bar) { - iounmap(dev->bar); - dev->bar = NULL; - pci_release_regions(pdev); - } - if (pci_is_enabled(pdev)) { pci_disable_pcie_error_reporting(pdev); pci_disable_device(pdev); } } -static int nvme_dev_list_add(struct nvme_dev *dev) -{ - bool start_thread = false; - - spin_lock(&dev_list_lock); - if (list_empty(&dev_list) && IS_ERR_OR_NULL(nvme_thread)) { - start_thread = true; - nvme_thread = NULL; - } - list_add(&dev->node, &dev_list); - spin_unlock(&dev_list_lock); - - if (start_thread) { - nvme_thread = kthread_run(nvme_kthread, NULL, "nvme"); - wake_up_all(&nvme_kthread_wait); - } else - wait_event_killable(nvme_kthread_wait, nvme_thread); - - if (IS_ERR_OR_NULL(nvme_thread)) - return nvme_thread ? PTR_ERR(nvme_thread) : -EINTR; - - return 0; -} - -/* -* Remove the node from the device list and check -* for whether or not we need to stop the nvme_thread. -*/ -static void nvme_dev_list_remove(struct nvme_dev *dev) -{ - struct task_struct *tmp = NULL; - - spin_lock(&dev_list_lock); - list_del_init(&dev->node); - if (list_empty(&dev_list) && !IS_ERR_OR_NULL(nvme_thread)) { - tmp = nvme_thread; - nvme_thread = NULL; - } - spin_unlock(&dev_list_lock); - - if (tmp) - kthread_stop(tmp); -} - static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) { int i; u32 csts = -1; - nvme_dev_list_remove(dev); + del_timer_sync(&dev->watchdog_timer); mutex_lock(&dev->shutdown_lock); - if (dev->bar) { + if (pci_is_enabled(to_pci_dev(dev->dev))) { nvme_stop_queues(&dev->ctrl); csts = readl(dev->bar + NVME_REG_CSTS); } @@ -1855,7 +1788,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_disable_io_queues(dev); nvme_disable_admin_queue(dev, shutdown); } - nvme_dev_unmap(dev); + nvme_pci_disable(dev); for (i = dev->queue_count - 1; i >= 0; i--) nvme_clear_queue(dev->queues[i]); @@ -1899,10 +1832,20 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) kfree(dev); } +static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status) +{ + dev_warn(dev->ctrl.device, "Removing after probe failure status: %d\n", status); + + kref_get(&dev->ctrl.kref); + nvme_dev_disable(dev, false); + if (!schedule_work(&dev->remove_work)) + nvme_put_ctrl(&dev->ctrl); +} + static void nvme_reset_work(struct work_struct *work) { struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work); - int result; + int result = -ENODEV; if (WARN_ON(test_bit(NVME_CTRL_RESETTING, &dev->flags))) goto out; @@ -1911,44 +1854,43 @@ static void nvme_reset_work(struct work_struct *work) * If we're called to reset a live controller first shut it down before * moving on. */ - if (dev->bar) + if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) nvme_dev_disable(dev, false); set_bit(NVME_CTRL_RESETTING, &dev->flags); - result = nvme_dev_map(dev); + result = nvme_pci_enable(dev); if (result) goto out; result = nvme_configure_admin_queue(dev); if (result) - goto unmap; + goto out; nvme_init_queue(dev->queues[0], 0); result = nvme_alloc_admin_tags(dev); if (result) - goto disable; + goto out; result = nvme_init_identify(&dev->ctrl); if (result) - goto free_tags; + goto out; result = nvme_setup_io_queues(dev); if (result) - goto free_tags; + goto out; dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS; + queue_work(nvme_workq, &dev->async_work); - result = nvme_dev_list_add(dev); - if (result) - goto remove; + mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ)); /* * Keep the controller around but remove all namespaces if we don't have * any working I/O queue. */ if (dev->online_queues < 2) { - dev_warn(dev->dev, "IO queues not created\n"); + dev_warn(dev->ctrl.device, "IO queues not created\n"); nvme_remove_namespaces(&dev->ctrl); } else { nvme_start_queues(&dev->ctrl); @@ -1958,19 +1900,8 @@ static void nvme_reset_work(struct work_struct *work) clear_bit(NVME_CTRL_RESETTING, &dev->flags); return; - remove: - nvme_dev_list_remove(dev); - free_tags: - nvme_dev_remove_admin(dev); - blk_put_queue(dev->ctrl.admin_q); - dev->ctrl.admin_q = NULL; - dev->queues[0]->tags = NULL; - disable: - nvme_disable_admin_queue(dev, false); - unmap: - nvme_dev_unmap(dev); out: - nvme_remove_dead_ctrl(dev); + nvme_remove_dead_ctrl(dev, result); } static void nvme_remove_dead_ctrl_work(struct work_struct *work) @@ -1978,19 +1909,12 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work) struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work); struct pci_dev *pdev = to_pci_dev(dev->dev); + nvme_kill_queues(&dev->ctrl); if (pci_get_drvdata(pdev)) pci_stop_and_remove_bus_device_locked(pdev); nvme_put_ctrl(&dev->ctrl); } -static void nvme_remove_dead_ctrl(struct nvme_dev *dev) -{ - dev_warn(dev->dev, "Removing after probe failure\n"); - kref_get(&dev->ctrl.kref); - if (!schedule_work(&dev->remove_work)) - nvme_put_ctrl(&dev->ctrl); -} - static int nvme_reset(struct nvme_dev *dev) { if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q)) @@ -2034,6 +1958,7 @@ static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl) } static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { + .module = THIS_MODULE, .reg_read32 = nvme_pci_reg_read32, .reg_write32 = nvme_pci_reg_write32, .reg_read64 = nvme_pci_reg_read64, @@ -2042,6 +1967,27 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { .free_ctrl = nvme_pci_free_ctrl, }; +static int nvme_dev_map(struct nvme_dev *dev) +{ + int bars; + struct pci_dev *pdev = to_pci_dev(dev->dev); + + bars = pci_select_bars(pdev, IORESOURCE_MEM); + if (!bars) + return -ENODEV; + if (pci_request_selected_regions(pdev, bars, "nvme")) + return -ENODEV; + + dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); + if (!dev->bar) + goto release; + + return 0; + release: + pci_release_regions(pdev); + return -ENODEV; +} + static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int node, result = -ENOMEM; @@ -2066,10 +2012,16 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev->dev = get_device(&pdev->dev); pci_set_drvdata(pdev, dev); - INIT_LIST_HEAD(&dev->node); + result = nvme_dev_map(dev); + if (result) + goto free; + INIT_WORK(&dev->scan_work, nvme_dev_scan); INIT_WORK(&dev->reset_work, nvme_reset_work); INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work); + INIT_WORK(&dev->async_work, nvme_async_event_work); + setup_timer(&dev->watchdog_timer, nvme_watchdog_timer, + (unsigned long)dev); mutex_init(&dev->shutdown_lock); init_completion(&dev->ioq_wait); @@ -2082,6 +2034,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto release_pools; + dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); + queue_work(nvme_workq, &dev->reset_work); return 0; @@ -2089,6 +2043,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) nvme_release_prp_pools(dev); put_pci: put_device(dev->dev); + nvme_dev_unmap(dev); free: kfree(dev->queues); kfree(dev->entry); @@ -2112,11 +2067,20 @@ static void nvme_shutdown(struct pci_dev *pdev) nvme_dev_disable(dev, true); } +/* + * The driver's remove may be called on a device in a partially initialized + * state. This function must not have any dependencies on the device state in + * order to proceed. + */ static void nvme_remove(struct pci_dev *pdev) { struct nvme_dev *dev = pci_get_drvdata(pdev); + del_timer_sync(&dev->watchdog_timer); + + set_bit(NVME_CTRL_REMOVING, &dev->flags); pci_set_drvdata(pdev, NULL); + flush_work(&dev->async_work); flush_work(&dev->scan_work); nvme_remove_namespaces(&dev->ctrl); nvme_uninit_ctrl(&dev->ctrl); @@ -2126,6 +2090,7 @@ static void nvme_remove(struct pci_dev *pdev) nvme_free_queues(dev, 0); nvme_release_cmb(dev); nvme_release_prp_pools(dev); + nvme_dev_unmap(dev); nvme_put_ctrl(&dev->ctrl); } @@ -2161,7 +2126,7 @@ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev, * shutdown the controller to quiesce. The controller will be restarted * after the slot reset through driver's slot_reset callback. */ - dev_warn(&pdev->dev, "error detected: state:%d\n", state); + dev_warn(dev->ctrl.device, "error detected: state:%d\n", state); switch (state) { case pci_channel_io_normal: return PCI_ERS_RESULT_CAN_RECOVER; @@ -2178,7 +2143,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev) { struct nvme_dev *dev = pci_get_drvdata(pdev); - dev_info(&pdev->dev, "restart after slot reset\n"); + dev_info(dev->ctrl.device, "restart after slot reset\n"); pci_restore_state(pdev); queue_work(nvme_workq, &dev->reset_work); return PCI_ERS_RESULT_RECOVERED; @@ -2201,7 +2166,8 @@ static const struct pci_error_handlers nvme_err_handler = { static const struct pci_device_id nvme_id_table[] = { { PCI_VDEVICE(INTEL, 0x0953), - .driver_data = NVME_QUIRK_STRIPE_SIZE, }, + .driver_data = NVME_QUIRK_STRIPE_SIZE | + NVME_QUIRK_DISCARD_ZEROES, }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, @@ -2226,34 +2192,20 @@ static int __init nvme_init(void) { int result; - init_waitqueue_head(&nvme_kthread_wait); - nvme_workq = alloc_workqueue("nvme", WQ_UNBOUND | WQ_MEM_RECLAIM, 0); if (!nvme_workq) return -ENOMEM; - result = nvme_core_init(); - if (result < 0) - goto kill_workq; - result = pci_register_driver(&nvme_driver); if (result) - goto core_exit; - return 0; - - core_exit: - nvme_core_exit(); - kill_workq: - destroy_workqueue(nvme_workq); + destroy_workqueue(nvme_workq); return result; } static void __exit nvme_exit(void) { pci_unregister_driver(&nvme_driver); - nvme_core_exit(); destroy_workqueue(nvme_workq); - BUG_ON(nvme_thread && !IS_ERR(nvme_thread)); _nvme_check_size(); } |