From 69d9a99c258eb1d6478fd9608a2070890797eed7 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 24 Feb 2016 09:15:56 -0700 Subject: NVMe: Move error handling to failed reset handler This moves failed queue handling out of the namespace removal path and into the reset failure path, fixing a hanging condition if the controller fails or link down during del_gendisk. Previously the driver had to see the controller as degraded prior to calling del_gendisk to setup the queues to fail. But, if the controller happened to fail after this, there was no task to end outstanding requests. On failure, all namespace states are set to dead. This has capacity revalidate to 0, and ends all new requests with error status. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'drivers/nvme/host/pci.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 6d2e4257308b..680f5780750c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -690,7 +690,10 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, spin_lock_irq(&nvmeq->q_lock); if (unlikely(nvmeq->cq_vector < 0)) { - ret = BLK_MQ_RQ_QUEUE_BUSY; + if (ns && !test_bit(NVME_NS_DEAD, &ns->flags)) + ret = BLK_MQ_RQ_QUEUE_BUSY; + else + ret = BLK_MQ_RQ_QUEUE_ERROR; spin_unlock_irq(&nvmeq->q_lock); goto out; } @@ -1261,6 +1264,12 @@ static struct blk_mq_ops nvme_mq_ops = { static void nvme_dev_remove_admin(struct nvme_dev *dev) { if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) { + /* + * If the controller was reset during removal, it's possible + * user requests may be waiting on a stopped queue. Start the + * queue to flush these to completion. + */ + blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true); blk_cleanup_queue(dev->ctrl.admin_q); blk_mq_free_tag_set(&dev->admin_tagset); } @@ -1901,6 +1910,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status) dev_warn(dev->dev, "Removing after probe failure status: %d\n", status); kref_get(&dev->ctrl.kref); + nvme_dev_disable(dev, false); if (!schedule_work(&dev->remove_work)) nvme_put_ctrl(&dev->ctrl); } @@ -1973,6 +1983,7 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work) struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work); struct pci_dev *pdev = to_pci_dev(dev->dev); + nvme_kill_queues(&dev->ctrl); if (pci_get_drvdata(pdev)) pci_stop_and_remove_bus_device_locked(pdev); nvme_put_ctrl(&dev->ctrl); -- cgit v1.2.3