nvme-tcp: optimize network stack with setting msg flags according to batch size

If we have a long list of request to send, signal the network stack that more is coming (MSG_MORE). If we have nothing else, signal MSG_EOR. Signed-off-by: Sagi Grimberg <sagi@grimberg.me> Tested-by: Mark Wunderlich <mark.wunderlich@intel.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
author: Sagi Grimberg <sagi@grimberg.me> 2020-06-19 03:30:24 +0300
committer: Christoph Hellwig <hch@lst.de> 2020-07-08 17:16:18 +0300
commit: 122e5b9f3d370ae11e1502d14ff5c7ea9b144a76 (patch)
tree: 0f0fad7575a1032d1965365c3deb825ed62c560a /drivers/nvme
parent: 86f0348ace1510d7ac25124b096fb88a6ab45270 (diff)
download: linux-122e5b9f3d370ae11e1502d14ff5c7ea9b144a76.tar.xz
1 files changed, 17 insertions, 3 deletions
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 2d3962c164a4..b2e73e19ef01 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -79,6 +79,7 @@ struct nvme_tcp_queue {
 	struct mutex		send_mutex;
 	struct llist_head	req_list;
 	struct list_head	send_list;
+	bool			more_requests;
 
 	/* recv state */
 	void			*pdu;
@@ -277,7 +278,9 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
 	 */
 	if (queue->io_cpu == smp_processor_id() &&
 	    sync && empty && mutex_trylock(&queue->send_mutex)) {
+		queue->more_requests = !last;
 		nvme_tcp_try_send(queue);
+		queue->more_requests = false;
 		mutex_unlock(&queue->send_mutex);
 	} else if (last) {
 		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
@@ -877,6 +880,12 @@ done:
 	read_unlock(&sk->sk_callback_lock);
 }
 
+static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
+{
+	return !list_empty(&queue->send_list) ||
+		!llist_empty(&queue->req_list) || queue->more_requests;
+}
+
 static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue)
 {
 	queue->request = NULL;
@@ -898,7 +907,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
 		bool last = nvme_tcp_pdu_last_send(req, len);
 		int ret, flags = MSG_DONTWAIT;
 
-		if (last && !queue->data_digest)
+		if (last && !queue->data_digest && !nvme_tcp_queue_more(queue))
 			flags |= MSG_EOR;
 		else
 			flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
@@ -945,7 +954,7 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
 	int flags = MSG_DONTWAIT;
 	int ret;
 
-	if (inline_data)
+	if (inline_data || nvme_tcp_queue_more(queue))
 		flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
 	else
 		flags |= MSG_EOR;
@@ -1010,12 +1019,17 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
 {
 	struct nvme_tcp_queue *queue = req->queue;
 	int ret;
-	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR };
+	struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
 	struct kvec iov = {
 		.iov_base = &req->ddgst + req->offset,
 		.iov_len = NVME_TCP_DIGEST_LENGTH - req->offset
 	};
 
+	if (nvme_tcp_queue_more(queue))
+		msg.msg_flags |= MSG_MORE;
+	else
+		msg.msg_flags |= MSG_EOR;
+
 	ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
 	if (unlikely(ret <= 0))
 		return ret;
author	Sagi Grimberg <sagi@grimberg.me>	2020-06-19 03:30:24 +0300
committer	Christoph Hellwig <hch@lst.de>	2020-07-08 17:16:18 +0300
commit	122e5b9f3d370ae11e1502d14ff5c7ea9b144a76 (patch)
tree	0f0fad7575a1032d1965365c3deb825ed62c560a /drivers/nvme
parent	86f0348ace1510d7ac25124b096fb88a6ab45270 (diff)
download	linux-122e5b9f3d370ae11e1502d14ff5c7ea9b144a76.tar.xz