diff options
author | Jens Axboe <axboe@kernel.dk> | 2018-11-21 15:56:28 +0300 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2018-11-21 15:56:28 +0300 |
commit | 14b04063cc994effc86f976625bf8f806d8d44cb (patch) | |
tree | b250b7a408e8ffb1a51346fca54301d76aaebf22 | |
parent | 8dc765d438f1e42b3e8227b3b09fad7d73f4ec9a (diff) | |
parent | 4cff280a5fccf6513ed9e895bb3a4e7ad8b0cedc (diff) | |
download | linux-14b04063cc994effc86f976625bf8f806d8d44cb.tar.xz |
Merge branch 'nvme-4.20' of git://git.infradead.org/nvme into for-linus
Pull NVMe fix from Christoph.
* 'nvme-4.20' of git://git.infradead.org/nvme:
nvme-fc: resolve io failures during connect
-rw-r--r-- | drivers/nvme/host/fc.c | 73 |
1 files changed, 63 insertions, 10 deletions
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 0b70c8bab045..54032c466636 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -152,6 +152,7 @@ struct nvme_fc_ctrl { bool ioq_live; bool assoc_active; + atomic_t err_work_active; u64 association_id; struct list_head ctrl_list; /* rport->ctrl_list */ @@ -160,6 +161,7 @@ struct nvme_fc_ctrl { struct blk_mq_tag_set tag_set; struct delayed_work connect_work; + struct work_struct err_work; struct kref ref; u32 flags; @@ -1531,6 +1533,10 @@ nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl) struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops; int i; + /* ensure we've initialized the ops once */ + if (!(aen_op->flags & FCOP_FLAGS_AEN)) + return; + for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) __nvme_fc_abort_op(ctrl, aen_op); } @@ -2049,7 +2055,25 @@ nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl) static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) { - /* only proceed if in LIVE state - e.g. on first error */ + int active; + + /* + * if an error (io timeout, etc) while (re)connecting, + * it's an error on creating the new association. + * Start the error recovery thread if it hasn't already + * been started. It is expected there could be multiple + * ios hitting this path before things are cleaned up. + */ + if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { + active = atomic_xchg(&ctrl->err_work_active, 1); + if (!active && !schedule_work(&ctrl->err_work)) { + atomic_set(&ctrl->err_work_active, 0); + WARN_ON(1); + } + return; + } + + /* Otherwise, only proceed if in LIVE state - e.g. on first error */ if (ctrl->ctrl.state != NVME_CTRL_LIVE) return; @@ -2814,6 +2838,7 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); + cancel_work_sync(&ctrl->err_work); cancel_delayed_work_sync(&ctrl->connect_work); /* * kill the association on the link side. this will block @@ -2866,23 +2891,30 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) } static void -nvme_fc_reset_ctrl_work(struct work_struct *work) +__nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl) { - struct nvme_fc_ctrl *ctrl = - container_of(work, struct nvme_fc_ctrl, ctrl.reset_work); - int ret; - - nvme_stop_ctrl(&ctrl->ctrl); + nvme_stop_keep_alive(&ctrl->ctrl); /* will block will waiting for io to terminate */ nvme_fc_delete_association(ctrl); - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { + if (ctrl->ctrl.state != NVME_CTRL_CONNECTING && + !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) dev_err(ctrl->ctrl.device, "NVME-FC{%d}: error_recovery: Couldn't change state " "to CONNECTING\n", ctrl->cnum); - return; - } +} + +static void +nvme_fc_reset_ctrl_work(struct work_struct *work) +{ + struct nvme_fc_ctrl *ctrl = + container_of(work, struct nvme_fc_ctrl, ctrl.reset_work); + int ret; + + __nvme_fc_terminate_io(ctrl); + + nvme_stop_ctrl(&ctrl->ctrl); if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) ret = nvme_fc_create_association(ctrl); @@ -2897,6 +2929,24 @@ nvme_fc_reset_ctrl_work(struct work_struct *work) ctrl->cnum); } +static void +nvme_fc_connect_err_work(struct work_struct *work) +{ + struct nvme_fc_ctrl *ctrl = + container_of(work, struct nvme_fc_ctrl, err_work); + + __nvme_fc_terminate_io(ctrl); + + atomic_set(&ctrl->err_work_active, 0); + + /* + * Rescheduling the connection after recovering + * from the io error is left to the reconnect work + * item, which is what should have stalled waiting on + * the io that had the error that scheduled this work. + */ +} + static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { .name = "fc", .module = THIS_MODULE, @@ -3007,6 +3057,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->cnum = idx; ctrl->ioq_live = false; ctrl->assoc_active = false; + atomic_set(&ctrl->err_work_active, 0); init_waitqueue_head(&ctrl->ioabort_wait); get_device(ctrl->dev); @@ -3014,6 +3065,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work); INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); + INIT_WORK(&ctrl->err_work, nvme_fc_connect_err_work); spin_lock_init(&ctrl->lock); /* io queue count */ @@ -3103,6 +3155,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, fail_ctrl: nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); cancel_work_sync(&ctrl->ctrl.reset_work); + cancel_work_sync(&ctrl->err_work); cancel_delayed_work_sync(&ctrl->connect_work); ctrl->ctrl.opts = NULL; |