diff options
Diffstat (limited to 'drivers/nvme/host/tcp.c')
-rw-r--r-- | drivers/nvme/host/tcp.c | 120 |
1 files changed, 96 insertions, 24 deletions
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 49d4373b84eb..0ef14f0fad86 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -20,6 +20,16 @@ struct nvme_tcp_queue; +/* Define the socket priority to use for connections were it is desirable + * that the NIC consider performing optimized packet processing or filtering. + * A non-zero value being sufficient to indicate general consideration of any + * possible optimization. Making it a module param allows for alternative + * values that may be unique for some NIC implementations. + */ +static int so_priority; +module_param(so_priority, int, 0644); +MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority"); + enum nvme_tcp_send_state { NVME_TCP_SEND_CMD_PDU = 0, NVME_TCP_SEND_H2C_PDU, @@ -1017,8 +1027,15 @@ static int nvme_tcp_try_send(struct nvme_tcp_queue *queue) if (req->state == NVME_TCP_SEND_DDGST) ret = nvme_tcp_try_send_ddgst(req); done: - if (ret == -EAGAIN) + if (ret == -EAGAIN) { ret = 0; + } else if (ret < 0) { + dev_err(queue->ctrl->ctrl.device, + "failed to send request %d\n", ret); + if (ret != -EPIPE && ret != -ECONNRESET) + nvme_tcp_fail_request(queue->request); + nvme_tcp_done_send_req(queue); + } return ret; } @@ -1049,25 +1066,16 @@ static void nvme_tcp_io_work(struct work_struct *w) int result; result = nvme_tcp_try_send(queue); - if (result > 0) { + if (result > 0) pending = true; - } else if (unlikely(result < 0)) { - dev_err(queue->ctrl->ctrl.device, - "failed to send request %d\n", result); - - /* - * Fail the request unless peer closed the connection, - * in which case error recovery flow will complete all. - */ - if ((result != -EPIPE) && (result != -ECONNRESET)) - nvme_tcp_fail_request(queue->request); - nvme_tcp_done_send_req(queue); - return; - } + else if (unlikely(result < 0)) + break; result = nvme_tcp_try_recv(queue); if (result > 0) pending = true; + else if (unlikely(result < 0)) + break; if (!pending) return; @@ -1248,13 +1256,67 @@ free_icreq: return ret; } +static bool nvme_tcp_admin_queue(struct nvme_tcp_queue *queue) +{ + return nvme_tcp_queue_id(queue) == 0; +} + +static bool nvme_tcp_default_queue(struct nvme_tcp_queue *queue) +{ + struct nvme_tcp_ctrl *ctrl = queue->ctrl; + int qid = nvme_tcp_queue_id(queue); + + return !nvme_tcp_admin_queue(queue) && + qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT]; +} + +static bool nvme_tcp_read_queue(struct nvme_tcp_queue *queue) +{ + struct nvme_tcp_ctrl *ctrl = queue->ctrl; + int qid = nvme_tcp_queue_id(queue); + + return !nvme_tcp_admin_queue(queue) && + !nvme_tcp_default_queue(queue) && + qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] + + ctrl->io_queues[HCTX_TYPE_READ]; +} + +static bool nvme_tcp_poll_queue(struct nvme_tcp_queue *queue) +{ + struct nvme_tcp_ctrl *ctrl = queue->ctrl; + int qid = nvme_tcp_queue_id(queue); + + return !nvme_tcp_admin_queue(queue) && + !nvme_tcp_default_queue(queue) && + !nvme_tcp_read_queue(queue) && + qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] + + ctrl->io_queues[HCTX_TYPE_READ] + + ctrl->io_queues[HCTX_TYPE_POLL]; +} + +static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue) +{ + struct nvme_tcp_ctrl *ctrl = queue->ctrl; + int qid = nvme_tcp_queue_id(queue); + int n = 0; + + if (nvme_tcp_default_queue(queue)) + n = qid - 1; + else if (nvme_tcp_read_queue(queue)) + n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - 1; + else if (nvme_tcp_poll_queue(queue)) + n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - + ctrl->io_queues[HCTX_TYPE_READ] - 1; + queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false); +} + static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, size_t queue_size) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; struct linger sol = { .l_onoff = 1, .l_linger = 0 }; - int ret, opt, rcv_pdu_size, n; + int ret, opt, rcv_pdu_size; queue->ctrl = ctrl; INIT_LIST_HEAD(&queue->send_list); @@ -1309,6 +1371,17 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, goto err_sock; } + if (so_priority > 0) { + ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_PRIORITY, + (char *)&so_priority, sizeof(so_priority)); + if (ret) { + dev_err(ctrl->ctrl.device, + "failed to set SO_PRIORITY sock opt, ret %d\n", + ret); + goto err_sock; + } + } + /* Set socket type of service */ if (nctrl->opts->tos >= 0) { opt = nctrl->opts->tos; @@ -1322,11 +1395,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, } queue->sock->sk->sk_allocation = GFP_ATOMIC; - if (!qid) - n = 0; - else - n = (qid - 1) % num_online_cpus(); - queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false); + nvme_tcp_set_queue_io_cpu(queue); queue->request = NULL; queue->data_remaining = 0; queue->ddgst_remaining = 0; @@ -1861,8 +1930,13 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) } if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) { - /* state change failure is ok if we're in DELETING state */ + /* + * state change failure is ok if we're in DELETING state, + * unless we're during creation of a new controller to + * avoid races with teardown flow. + */ WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING); + WARN_ON_ONCE(new); ret = -EINVAL; goto destroy_io; } @@ -2359,8 +2433,6 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n", ctrl->ctrl.opts->subsysnqn, &ctrl->addr); - nvme_get_ctrl(&ctrl->ctrl); - mutex_lock(&nvme_tcp_ctrl_mutex); list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list); mutex_unlock(&nvme_tcp_ctrl_mutex); |