summaryrefslogtreecommitdiff
path: root/drivers/nvme/host/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/tcp.c')
-rw-r--r--drivers/nvme/host/tcp.c139
1 files changed, 127 insertions, 12 deletions
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 840ae475074d..25e486e6e805 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -217,6 +217,19 @@ static inline int nvme_tcp_queue_id(struct nvme_tcp_queue *queue)
return queue - queue->ctrl->queues;
}
+static inline bool nvme_tcp_recv_pdu_supported(enum nvme_tcp_pdu_type type)
+{
+ switch (type) {
+ case nvme_tcp_c2h_term:
+ case nvme_tcp_c2h_data:
+ case nvme_tcp_r2t:
+ case nvme_tcp_rsp:
+ return true;
+ default:
+ return false;
+ }
+}
+
/*
* Check if the queue is TLS encrypted
*/
@@ -440,7 +453,8 @@ nvme_tcp_fetch_request(struct nvme_tcp_queue *queue)
return NULL;
}
- list_del(&req->entry);
+ list_del_init(&req->entry);
+ init_llist_node(&req->lentry);
return req;
}
@@ -548,6 +562,8 @@ static int nvme_tcp_init_request(struct blk_mq_tag_set *set,
req->queue = queue;
nvme_req(rq)->ctrl = &ctrl->ctrl;
nvme_req(rq)->cmd = &pdu->cmd;
+ init_llist_node(&req->lentry);
+ INIT_LIST_HEAD(&req->entry);
return 0;
}
@@ -752,6 +768,14 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
return -EPROTO;
}
+ if (llist_on_list(&req->lentry) ||
+ !list_empty(&req->entry)) {
+ dev_err(queue->ctrl->ctrl.device,
+ "req %d unexpected r2t while processing request\n",
+ rq->tag);
+ return -EPROTO;
+ }
+
req->pdu_len = 0;
req->h2cdata_left = r2t_length;
req->h2cdata_offset = r2t_offset;
@@ -763,6 +787,40 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
return 0;
}
+static void nvme_tcp_handle_c2h_term(struct nvme_tcp_queue *queue,
+ struct nvme_tcp_term_pdu *pdu)
+{
+ u16 fes;
+ const char *msg;
+ u32 plen = le32_to_cpu(pdu->hdr.plen);
+
+ static const char * const msg_table[] = {
+ [NVME_TCP_FES_INVALID_PDU_HDR] = "Invalid PDU Header Field",
+ [NVME_TCP_FES_PDU_SEQ_ERR] = "PDU Sequence Error",
+ [NVME_TCP_FES_HDR_DIGEST_ERR] = "Header Digest Error",
+ [NVME_TCP_FES_DATA_OUT_OF_RANGE] = "Data Transfer Out Of Range",
+ [NVME_TCP_FES_DATA_LIMIT_EXCEEDED] = "Data Transfer Limit Exceeded",
+ [NVME_TCP_FES_UNSUPPORTED_PARAM] = "Unsupported Parameter",
+ };
+
+ if (plen < NVME_TCP_MIN_C2HTERM_PLEN ||
+ plen > NVME_TCP_MAX_C2HTERM_PLEN) {
+ dev_err(queue->ctrl->ctrl.device,
+ "Received a malformed C2HTermReq PDU (plen = %u)\n",
+ plen);
+ return;
+ }
+
+ fes = le16_to_cpu(pdu->fes);
+ if (fes && fes < ARRAY_SIZE(msg_table))
+ msg = msg_table[fes];
+ else
+ msg = "Unknown";
+
+ dev_err(queue->ctrl->ctrl.device,
+ "Received C2HTermReq (FES = %s)\n", msg);
+}
+
static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb,
unsigned int *offset, size_t *len)
{
@@ -784,6 +842,25 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb,
return 0;
hdr = queue->pdu;
+ if (unlikely(hdr->hlen != sizeof(struct nvme_tcp_rsp_pdu))) {
+ if (!nvme_tcp_recv_pdu_supported(hdr->type))
+ goto unsupported_pdu;
+
+ dev_err(queue->ctrl->ctrl.device,
+ "pdu type %d has unexpected header length (%d)\n",
+ hdr->type, hdr->hlen);
+ return -EPROTO;
+ }
+
+ if (unlikely(hdr->type == nvme_tcp_c2h_term)) {
+ /*
+ * C2HTermReq never includes Header or Data digests.
+ * Skip the checks.
+ */
+ nvme_tcp_handle_c2h_term(queue, (void *)queue->pdu);
+ return -EINVAL;
+ }
+
if (queue->hdr_digest) {
ret = nvme_tcp_verify_hdgst(queue, queue->pdu, hdr->hlen);
if (unlikely(ret))
@@ -807,10 +884,13 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb,
nvme_tcp_init_recv_ctx(queue);
return nvme_tcp_handle_r2t(queue, (void *)queue->pdu);
default:
- dev_err(queue->ctrl->ctrl.device,
- "unsupported pdu type (%d)\n", hdr->type);
- return -EINVAL;
+ goto unsupported_pdu;
}
+
+unsupported_pdu:
+ dev_err(queue->ctrl->ctrl.device,
+ "unsupported pdu type (%d)\n", hdr->type);
+ return -EINVAL;
}
static inline void nvme_tcp_end_request(struct request *rq, u16 status)
@@ -1280,7 +1360,7 @@ static int nvme_tcp_try_recv(struct nvme_tcp_queue *queue)
queue->nr_cqe = 0;
consumed = sock->ops->read_sock(sk, &rd_desc, nvme_tcp_recv_skb);
release_sock(sk);
- return consumed;
+ return consumed == -EAGAIN ? 0 : consumed;
}
static void nvme_tcp_io_work(struct work_struct *w)
@@ -1308,6 +1388,11 @@ static void nvme_tcp_io_work(struct work_struct *w)
else if (unlikely(result < 0))
return;
+ /* did we get some space after spending time in recv? */
+ if (nvme_tcp_queue_has_pending(queue) &&
+ sk_stream_is_writeable(queue->sock->sk))
+ pending = true;
+
if (!pending || !queue->rd_enabled)
return;
@@ -1452,11 +1537,11 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
msg.msg_flags = MSG_WAITALL;
ret = kernel_recvmsg(queue->sock, &msg, &iov, 1,
iov.iov_len, msg.msg_flags);
- if (ret < sizeof(*icresp)) {
+ if (ret >= 0 && ret < sizeof(*icresp))
+ ret = -ECONNRESET;
+ if (ret < 0) {
pr_warn("queue %d: failed to receive icresp, error %d\n",
nvme_tcp_queue_id(queue), ret);
- if (ret >= 0)
- ret = -ECONNRESET;
goto free_icresp;
}
ret = -ENOTCONN;
@@ -1875,7 +1960,7 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
cancel_work_sync(&queue->io_work);
}
-static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
+static void nvme_tcp_stop_queue_nowait(struct nvme_ctrl *nctrl, int qid)
{
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
struct nvme_tcp_queue *queue = &ctrl->queues[qid];
@@ -1894,6 +1979,31 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
mutex_unlock(&queue->queue_lock);
}
+static void nvme_tcp_wait_queue(struct nvme_ctrl *nctrl, int qid)
+{
+ struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
+ struct nvme_tcp_queue *queue = &ctrl->queues[qid];
+ int timeout = 100;
+
+ while (timeout > 0) {
+ if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags) ||
+ !sk_wmem_alloc_get(queue->sock->sk))
+ return;
+ msleep(2);
+ timeout -= 2;
+ }
+ dev_warn(nctrl->device,
+ "qid %d: timeout draining sock wmem allocation expired\n",
+ qid);
+}
+
+static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
+{
+ nvme_tcp_stop_queue_nowait(nctrl, qid);
+ nvme_tcp_wait_queue(nctrl, qid);
+}
+
+
static void nvme_tcp_setup_sock_ops(struct nvme_tcp_queue *queue)
{
write_lock_bh(&queue->sock->sk->sk_callback_lock);
@@ -1961,7 +2071,9 @@ static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl)
int i;
for (i = 1; i < ctrl->queue_count; i++)
- nvme_tcp_stop_queue(ctrl, i);
+ nvme_tcp_stop_queue_nowait(ctrl, i);
+ for (i = 1; i < ctrl->queue_count; i++)
+ nvme_tcp_wait_queue(ctrl, i);
}
static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl,
@@ -2498,6 +2610,8 @@ static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg)
ctrl->async_req.offset = 0;
ctrl->async_req.curr_bio = NULL;
ctrl->async_req.data_len = 0;
+ init_llist_node(&ctrl->async_req.lentry);
+ INIT_LIST_HEAD(&ctrl->async_req.entry);
nvme_tcp_queue_request(&ctrl->async_req, true, true);
}
@@ -2667,6 +2781,7 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
{
struct nvme_tcp_queue *queue = hctx->driver_data;
struct sock *sk = queue->sock->sk;
+ int ret;
if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
return 0;
@@ -2674,9 +2789,9 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
set_bit(NVME_TCP_Q_POLLING, &queue->flags);
if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue))
sk_busy_loop(sk, true);
- nvme_tcp_try_recv(queue);
+ ret = nvme_tcp_try_recv(queue);
clear_bit(NVME_TCP_Q_POLLING, &queue->flags);
- return queue->nr_cqe;
+ return ret < 0 ? ret : queue->nr_cqe;
}
static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size)