diff options
Diffstat (limited to 'drivers/nvme/host/rdma.c')
-rw-r--r-- | drivers/nvme/host/rdma.c | 234 |
1 files changed, 111 insertions, 123 deletions
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 66ec5985c9f3..0805fa6215ee 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -40,13 +40,14 @@ #define NVME_RDMA_MAX_SEGMENTS 256 -#define NVME_RDMA_MAX_INLINE_SEGMENTS 1 +#define NVME_RDMA_MAX_INLINE_SEGMENTS 4 struct nvme_rdma_device { struct ib_device *dev; struct ib_pd *pd; struct kref ref; struct list_head entry; + unsigned int num_inline_segments; }; struct nvme_rdma_qe { @@ -117,6 +118,7 @@ struct nvme_rdma_ctrl { struct sockaddr_storage src_addr; struct nvme_ctrl ctrl; + bool use_inline_data; }; static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl) @@ -249,7 +251,7 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor) /* +1 for drain */ init_attr.cap.max_recv_wr = queue->queue_size + 1; init_attr.cap.max_recv_sge = 1; - init_attr.cap.max_send_sge = 1 + NVME_RDMA_MAX_INLINE_SEGMENTS; + init_attr.cap.max_send_sge = 1 + dev->num_inline_segments; init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; init_attr.qp_type = IB_QPT_RC; init_attr.send_cq = queue->ib_cq; @@ -286,6 +288,7 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set, struct ib_device *ibdev = dev->dev; int ret; + nvme_req(rq)->ctrl = &ctrl->ctrl; ret = nvme_rdma_alloc_qe(ibdev, &req->sqe, sizeof(struct nvme_command), DMA_TO_DEVICE); if (ret) @@ -374,6 +377,8 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id) goto out_free_pd; } + ndev->num_inline_segments = min(NVME_RDMA_MAX_INLINE_SEGMENTS, + ndev->dev->attrs.max_sge - 1); list_add(&ndev->entry, &device_list); out_unlock: mutex_unlock(&device_list_mutex); @@ -868,6 +873,31 @@ out_free_io_queues: return ret; } +static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, + bool remove) +{ + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + nvme_rdma_stop_queue(&ctrl->queues[0]); + blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, + &ctrl->ctrl); + blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); + nvme_rdma_destroy_admin_queue(ctrl, remove); +} + +static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, + bool remove) +{ + if (ctrl->ctrl.queue_count > 1) { + nvme_stop_queues(&ctrl->ctrl); + nvme_rdma_stop_io_queues(ctrl); + blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, + &ctrl->ctrl); + if (remove) + nvme_start_queues(&ctrl->ctrl); + nvme_rdma_destroy_io_queues(ctrl, remove); + } +} + static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); @@ -912,21 +942,44 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl) } } -static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) +static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) { - struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work), - struct nvme_rdma_ctrl, reconnect_work); + int ret = -EINVAL; bool changed; - int ret; - ++ctrl->ctrl.nr_reconnects; - - ret = nvme_rdma_configure_admin_queue(ctrl, false); + ret = nvme_rdma_configure_admin_queue(ctrl, new); if (ret) - goto requeue; + return ret; + + if (ctrl->ctrl.icdoff) { + dev_err(ctrl->ctrl.device, "icdoff is not supported!\n"); + goto destroy_admin; + } + + if (!(ctrl->ctrl.sgls & (1 << 2))) { + dev_err(ctrl->ctrl.device, + "Mandatory keyed sgls are not supported!\n"); + goto destroy_admin; + } + + if (ctrl->ctrl.opts->queue_size > ctrl->ctrl.sqsize + 1) { + dev_warn(ctrl->ctrl.device, + "queue_size %zu > ctrl sqsize %u, clamping down\n", + ctrl->ctrl.opts->queue_size, ctrl->ctrl.sqsize + 1); + } + + if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) { + dev_warn(ctrl->ctrl.device, + "sqsize %u > ctrl maxcmd %u, clamping down\n", + ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd); + ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1; + } + + if (ctrl->ctrl.sgls & (1 << 20)) + ctrl->use_inline_data = true; if (ctrl->ctrl.queue_count > 1) { - ret = nvme_rdma_configure_io_queues(ctrl, false); + ret = nvme_rdma_configure_io_queues(ctrl, new); if (ret) goto destroy_admin; } @@ -935,10 +988,31 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) if (!changed) { /* state change failure is ok if we're in DELETING state */ WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING); - return; + ret = -EINVAL; + goto destroy_io; } nvme_start_ctrl(&ctrl->ctrl); + return 0; + +destroy_io: + if (ctrl->ctrl.queue_count > 1) + nvme_rdma_destroy_io_queues(ctrl, new); +destroy_admin: + nvme_rdma_stop_queue(&ctrl->queues[0]); + nvme_rdma_destroy_admin_queue(ctrl, new); + return ret; +} + +static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) +{ + struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work), + struct nvme_rdma_ctrl, reconnect_work); + + ++ctrl->ctrl.nr_reconnects; + + if (nvme_rdma_setup_ctrl(ctrl, false)) + goto requeue; dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n", ctrl->ctrl.nr_reconnects); @@ -947,9 +1021,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) return; -destroy_admin: - nvme_rdma_stop_queue(&ctrl->queues[0]); - nvme_rdma_destroy_admin_queue(ctrl, false); requeue: dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", ctrl->ctrl.nr_reconnects); @@ -962,27 +1033,9 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) struct nvme_rdma_ctrl, err_work); nvme_stop_keep_alive(&ctrl->ctrl); - - if (ctrl->ctrl.queue_count > 1) { - nvme_stop_queues(&ctrl->ctrl); - nvme_rdma_stop_io_queues(ctrl); - blk_mq_tagset_busy_iter(&ctrl->tag_set, - nvme_cancel_request, &ctrl->ctrl); - nvme_rdma_destroy_io_queues(ctrl, false); - } - - blk_mq_quiesce_queue(ctrl->ctrl.admin_q); - nvme_rdma_stop_queue(&ctrl->queues[0]); - blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, - nvme_cancel_request, &ctrl->ctrl); - nvme_rdma_destroy_admin_queue(ctrl, false); - - /* - * queues are not a live anymore, so restart the queues to fail fast - * new IO - */ - blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); + nvme_rdma_teardown_io_queues(ctrl, false); nvme_start_queues(&ctrl->ctrl); + nvme_rdma_teardown_admin_queue(ctrl, false); if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { /* state change failure is ok if we're in DELETING state */ @@ -1090,19 +1143,27 @@ static int nvme_rdma_set_sg_null(struct nvme_command *c) } static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue, - struct nvme_rdma_request *req, struct nvme_command *c) + struct nvme_rdma_request *req, struct nvme_command *c, + int count) { struct nvme_sgl_desc *sg = &c->common.dptr.sgl; + struct scatterlist *sgl = req->sg_table.sgl; + struct ib_sge *sge = &req->sge[1]; + u32 len = 0; + int i; - req->sge[1].addr = sg_dma_address(req->sg_table.sgl); - req->sge[1].length = sg_dma_len(req->sg_table.sgl); - req->sge[1].lkey = queue->device->pd->local_dma_lkey; + for (i = 0; i < count; i++, sgl++, sge++) { + sge->addr = sg_dma_address(sgl); + sge->length = sg_dma_len(sgl); + sge->lkey = queue->device->pd->local_dma_lkey; + len += sge->length; + } sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff); - sg->length = cpu_to_le32(sg_dma_len(req->sg_table.sgl)); + sg->length = cpu_to_le32(len); sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET; - req->num_sge++; + req->num_sge += count; return 0; } @@ -1195,15 +1256,16 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, goto out_free_table; } - if (count == 1) { + if (count <= dev->num_inline_segments) { if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) && + queue->ctrl->use_inline_data && blk_rq_payload_bytes(rq) <= nvme_rdma_inline_data_size(queue)) { - ret = nvme_rdma_map_sg_inline(queue, req, c); + ret = nvme_rdma_map_sg_inline(queue, req, c, count); goto out; } - if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) { + if (count == 1 && dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) { ret = nvme_rdma_map_sg_single(queue, req, c); goto out; } @@ -1574,6 +1636,7 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: nvme_rdma_destroy_queue_ib(queue); + /* fall through */ case RDMA_CM_EVENT_ADDR_ERROR: dev_dbg(queue->ctrl->ctrl.device, "CM error event %d\n", ev->event); @@ -1736,25 +1799,12 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) { - if (ctrl->ctrl.queue_count > 1) { - nvme_stop_queues(&ctrl->ctrl); - nvme_rdma_stop_io_queues(ctrl); - blk_mq_tagset_busy_iter(&ctrl->tag_set, - nvme_cancel_request, &ctrl->ctrl); - nvme_rdma_destroy_io_queues(ctrl, shutdown); - } - + nvme_rdma_teardown_io_queues(ctrl, shutdown); if (shutdown) nvme_shutdown_ctrl(&ctrl->ctrl); else nvme_disable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); - - blk_mq_quiesce_queue(ctrl->ctrl.admin_q); - nvme_rdma_stop_queue(&ctrl->queues[0]); - blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, - nvme_cancel_request, &ctrl->ctrl); - blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); - nvme_rdma_destroy_admin_queue(ctrl, shutdown); + nvme_rdma_teardown_admin_queue(ctrl, shutdown); } static void nvme_rdma_delete_ctrl(struct nvme_ctrl *ctrl) @@ -1766,8 +1816,6 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work); - int ret; - bool changed; nvme_stop_ctrl(&ctrl->ctrl); nvme_rdma_shutdown_ctrl(ctrl, false); @@ -1778,25 +1826,9 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) return; } - ret = nvme_rdma_configure_admin_queue(ctrl, false); - if (ret) + if (nvme_rdma_setup_ctrl(ctrl, false)) goto out_fail; - if (ctrl->ctrl.queue_count > 1) { - ret = nvme_rdma_configure_io_queues(ctrl, false); - if (ret) - goto out_fail; - } - - changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); - if (!changed) { - /* state change failure is ok if we're in DELETING state */ - WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING); - return; - } - - nvme_start_ctrl(&ctrl->ctrl); - return; out_fail: @@ -1959,49 +1991,10 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING); WARN_ON_ONCE(!changed); - ret = nvme_rdma_configure_admin_queue(ctrl, true); + ret = nvme_rdma_setup_ctrl(ctrl, true); if (ret) goto out_uninit_ctrl; - /* sanity check icdoff */ - if (ctrl->ctrl.icdoff) { - dev_err(ctrl->ctrl.device, "icdoff is not supported!\n"); - ret = -EINVAL; - goto out_remove_admin_queue; - } - - /* sanity check keyed sgls */ - if (!(ctrl->ctrl.sgls & (1 << 2))) { - dev_err(ctrl->ctrl.device, - "Mandatory keyed sgls are not supported!\n"); - ret = -EINVAL; - goto out_remove_admin_queue; - } - - /* only warn if argument is too large here, will clamp later */ - if (opts->queue_size > ctrl->ctrl.sqsize + 1) { - dev_warn(ctrl->ctrl.device, - "queue_size %zu > ctrl sqsize %u, clamping down\n", - opts->queue_size, ctrl->ctrl.sqsize + 1); - } - - /* warn if maxcmd is lower than sqsize+1 */ - if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) { - dev_warn(ctrl->ctrl.device, - "sqsize %u > ctrl maxcmd %u, clamping down\n", - ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd); - ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1; - } - - if (opts->nr_io_queues) { - ret = nvme_rdma_configure_io_queues(ctrl, true); - if (ret) - goto out_remove_admin_queue; - } - - changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); - WARN_ON_ONCE(!changed); - dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n", ctrl->ctrl.opts->subsysnqn, &ctrl->addr); @@ -2011,13 +2004,8 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list); mutex_unlock(&nvme_rdma_ctrl_mutex); - nvme_start_ctrl(&ctrl->ctrl); - return &ctrl->ctrl; -out_remove_admin_queue: - nvme_rdma_stop_queue(&ctrl->queues[0]); - nvme_rdma_destroy_admin_queue(ctrl, true); out_uninit_ctrl: nvme_uninit_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl); |