summaryrefslogtreecommitdiff
path: root/net/sunrpc/xprtsock.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/sunrpc/xprtsock.c')
-rw-r--r--net/sunrpc/xprtsock.c281
1 files changed, 150 insertions, 131 deletions
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index ae09d850cd11..53de72d2dded 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -50,6 +50,7 @@
#include <linux/bvec.h>
#include <linux/highmem.h>
#include <linux/uio.h>
+#include <linux/sched/mm.h>
#include <trace/events/sunrpc.h>
@@ -404,8 +405,8 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
size_t want, seek_init = seek, offset = 0;
ssize_t ret;
- if (seek < buf->head[0].iov_len) {
- want = min_t(size_t, count, buf->head[0].iov_len);
+ want = min_t(size_t, count, buf->head[0].iov_len);
+ if (seek < want) {
ret = xs_read_kvec(sock, msg, flags, &buf->head[0], want, seek);
if (ret <= 0)
goto sock_err;
@@ -416,8 +417,8 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
goto out;
seek = 0;
} else {
- seek -= buf->head[0].iov_len;
- offset += buf->head[0].iov_len;
+ seek -= want;
+ offset += want;
}
want = xs_alloc_sparse_pages(buf,
@@ -442,8 +443,8 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
offset += want;
}
- if (seek < buf->tail[0].iov_len) {
- want = min_t(size_t, count - offset, buf->tail[0].iov_len);
+ want = min_t(size_t, count - offset, buf->tail[0].iov_len);
+ if (seek < want) {
ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek);
if (ret <= 0)
goto sock_err;
@@ -453,7 +454,7 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
if (ret != want)
goto out;
} else
- offset += buf->tail[0].iov_len;
+ offset = seek_init;
ret = -EMSGSIZE;
out:
*read = offset - seek_init;
@@ -481,6 +482,14 @@ xs_read_stream_request_done(struct sock_xprt *transport)
return transport->recv.fraghdr & cpu_to_be32(RPC_LAST_STREAM_FRAGMENT);
}
+static void
+xs_read_stream_check_eor(struct sock_xprt *transport,
+ struct msghdr *msg)
+{
+ if (xs_read_stream_request_done(transport))
+ msg->msg_flags |= MSG_EOR;
+}
+
static ssize_t
xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
int flags, struct rpc_rqst *req)
@@ -492,17 +501,21 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
xs_read_header(transport, buf);
want = transport->recv.len - transport->recv.offset;
- ret = xs_read_xdr_buf(transport->sock, msg, flags, buf,
- transport->recv.copied + want, transport->recv.copied,
- &read);
- transport->recv.offset += read;
- transport->recv.copied += read;
- if (transport->recv.offset == transport->recv.len) {
- if (xs_read_stream_request_done(transport))
- msg->msg_flags |= MSG_EOR;
- return read;
+ if (want != 0) {
+ ret = xs_read_xdr_buf(transport->sock, msg, flags, buf,
+ transport->recv.copied + want,
+ transport->recv.copied,
+ &read);
+ transport->recv.offset += read;
+ transport->recv.copied += read;
}
+ if (transport->recv.offset == transport->recv.len)
+ xs_read_stream_check_eor(transport, msg);
+
+ if (want == 0)
+ return 0;
+
switch (ret) {
default:
break;
@@ -655,13 +668,34 @@ out_err:
return ret != 0 ? ret : -ESHUTDOWN;
}
+static __poll_t xs_poll_socket(struct sock_xprt *transport)
+{
+ return transport->sock->ops->poll(NULL, transport->sock, NULL);
+}
+
+static bool xs_poll_socket_readable(struct sock_xprt *transport)
+{
+ __poll_t events = xs_poll_socket(transport);
+
+ return (events & (EPOLLIN | EPOLLRDNORM)) && !(events & EPOLLRDHUP);
+}
+
+static void xs_poll_check_readable(struct sock_xprt *transport)
+{
+
+ clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
+ if (!xs_poll_socket_readable(transport))
+ return;
+ if (!test_and_set_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
+ queue_work(xprtiod_workqueue, &transport->recv_worker);
+}
+
static void xs_stream_data_receive(struct sock_xprt *transport)
{
size_t read = 0;
ssize_t ret = 0;
mutex_lock(&transport->recv_mutex);
- clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
if (transport->sock == NULL)
goto out;
for (;;) {
@@ -671,6 +705,10 @@ static void xs_stream_data_receive(struct sock_xprt *transport)
read += ret;
cond_resched();
}
+ if (ret == -ESHUTDOWN)
+ kernel_sock_shutdown(transport->sock, SHUT_RDWR);
+ else
+ xs_poll_check_readable(transport);
out:
mutex_unlock(&transport->recv_mutex);
trace_xs_stream_read_data(&transport->xprt, ret, read);
@@ -680,7 +718,10 @@ static void xs_stream_data_receive_workfn(struct work_struct *work)
{
struct sock_xprt *transport =
container_of(work, struct sock_xprt, recv_worker);
+ unsigned int pflags = memalloc_nofs_save();
+
xs_stream_data_receive(transport);
+ memalloc_nofs_restore(pflags);
}
static void
@@ -690,99 +731,65 @@ xs_stream_reset_connect(struct sock_xprt *transport)
transport->recv.len = 0;
transport->recv.copied = 0;
transport->xmit.offset = 0;
+}
+
+static void
+xs_stream_start_connect(struct sock_xprt *transport)
+{
transport->xprt.stat.connect_count++;
transport->xprt.stat.connect_start = jiffies;
}
#define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)
+static int xs_sendmsg(struct socket *sock, struct msghdr *msg, size_t seek)
+{
+ if (seek)
+ iov_iter_advance(&msg->msg_iter, seek);
+ return sock_sendmsg(sock, msg);
+}
+
+static int xs_send_kvec(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t seek)
+{
+ iov_iter_kvec(&msg->msg_iter, WRITE, vec, 1, vec->iov_len);
+ return xs_sendmsg(sock, msg, seek);
+}
+
+static int xs_send_pagedata(struct socket *sock, struct msghdr *msg, struct xdr_buf *xdr, size_t base)
+{
+ int err;
+
+ err = xdr_alloc_bvec(xdr, GFP_KERNEL);
+ if (err < 0)
+ return err;
+
+ iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec,
+ xdr_buf_pagecount(xdr),
+ xdr->page_len + xdr->page_base);
+ return xs_sendmsg(sock, msg, base + xdr->page_base);
+}
+
+#define xs_record_marker_len() sizeof(rpc_fraghdr)
+
/* Common case:
* - stream transport
* - sending from byte 0 of the message
* - the message is wholly contained in @xdr's head iovec
*/
-static int xs_send_rm_and_kvec(struct socket *sock, struct xdr_buf *xdr,
- unsigned int remainder)
+static int xs_send_rm_and_kvec(struct socket *sock, struct msghdr *msg,
+ rpc_fraghdr marker, struct kvec *vec, size_t base)
{
- struct msghdr msg = {
- .msg_flags = XS_SENDMSG_FLAGS | (remainder ? MSG_MORE : 0)
- };
- rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT |
- (u32)xdr->len);
struct kvec iov[2] = {
- {
+ [0] = {
.iov_base = &marker,
.iov_len = sizeof(marker)
},
- {
- .iov_base = xdr->head[0].iov_base,
- .iov_len = xdr->head[0].iov_len
- },
- };
- int ret;
-
- ret = kernel_sendmsg(sock, &msg, iov, 2,
- iov[0].iov_len + iov[1].iov_len);
- if (ret < 0)
- return ret;
- if (ret < iov[0].iov_len)
- return -EPIPE;
- return ret - iov[0].iov_len;
-}
-
-static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more)
-{
- struct msghdr msg = {
- .msg_name = addr,
- .msg_namelen = addrlen,
- .msg_flags = XS_SENDMSG_FLAGS | (more ? MSG_MORE : 0),
- };
- struct kvec iov = {
- .iov_base = vec->iov_base + base,
- .iov_len = vec->iov_len - base,
+ [1] = *vec,
};
+ size_t len = iov[0].iov_len + iov[1].iov_len;
- if (iov.iov_len != 0)
- return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
- return kernel_sendmsg(sock, &msg, NULL, 0, 0);
-}
-
-static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy, int *sent_p)
-{
- ssize_t (*do_sendpage)(struct socket *sock, struct page *page,
- int offset, size_t size, int flags);
- struct page **ppage;
- unsigned int remainder;
- int err;
-
- remainder = xdr->page_len - base;
- base += xdr->page_base;
- ppage = xdr->pages + (base >> PAGE_SHIFT);
- base &= ~PAGE_MASK;
- do_sendpage = sock->ops->sendpage;
- if (!zerocopy)
- do_sendpage = sock_no_sendpage;
- for(;;) {
- unsigned int len = min_t(unsigned int, PAGE_SIZE - base, remainder);
- int flags = XS_SENDMSG_FLAGS;
-
- remainder -= len;
- if (more)
- flags |= MSG_MORE;
- if (remainder != 0)
- flags |= MSG_SENDPAGE_NOTLAST | MSG_MORE;
- err = do_sendpage(sock, *ppage, base, len, flags);
- if (remainder == 0 || err != len)
- break;
- *sent_p += err;
- ppage++;
- base = 0;
- }
- if (err > 0) {
- *sent_p += err;
- err = 0;
- }
- return err;
+ iov_iter_kvec(&msg->msg_iter, WRITE, iov, 2, len);
+ return xs_sendmsg(sock, msg, base);
}
/**
@@ -792,53 +799,60 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
* @addrlen: UDP only -- length of destination address
* @xdr: buffer containing this request
* @base: starting position in the buffer
- * @zerocopy: true if it is safe to use sendpage()
+ * @rm: stream record marker field
* @sent_p: return the total number of bytes successfully queued for sending
*
*/
-static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy, int *sent_p)
+static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, rpc_fraghdr rm, int *sent_p)
{
- unsigned int remainder = xdr->len - base;
+ struct msghdr msg = {
+ .msg_name = addr,
+ .msg_namelen = addrlen,
+ .msg_flags = XS_SENDMSG_FLAGS | MSG_MORE,
+ };
+ unsigned int rmsize = rm ? sizeof(rm) : 0;
+ unsigned int remainder = rmsize + xdr->len - base;
+ unsigned int want;
int err = 0;
- int sent = 0;
if (unlikely(!sock))
return -ENOTSOCK;
- if (base != 0) {
- addr = NULL;
- addrlen = 0;
- }
-
- if (base < xdr->head[0].iov_len || addr != NULL) {
- unsigned int len = xdr->head[0].iov_len - base;
+ want = xdr->head[0].iov_len + rmsize;
+ if (base < want) {
+ unsigned int len = want - base;
remainder -= len;
- if (!base && !addr)
- err = xs_send_rm_and_kvec(sock, xdr, remainder);
+ if (remainder == 0)
+ msg.msg_flags &= ~MSG_MORE;
+ if (rmsize)
+ err = xs_send_rm_and_kvec(sock, &msg, rm,
+ &xdr->head[0], base);
else
- err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0],
- base, remainder != 0);
+ err = xs_send_kvec(sock, &msg, &xdr->head[0], base);
if (remainder == 0 || err != len)
goto out;
*sent_p += err;
base = 0;
} else
- base -= xdr->head[0].iov_len;
+ base -= want;
if (base < xdr->page_len) {
unsigned int len = xdr->page_len - base;
remainder -= len;
- err = xs_send_pagedata(sock, xdr, base, remainder != 0, zerocopy, &sent);
- *sent_p += sent;
- if (remainder == 0 || sent != len)
+ if (remainder == 0)
+ msg.msg_flags &= ~MSG_MORE;
+ err = xs_send_pagedata(sock, &msg, xdr, base);
+ if (remainder == 0 || err != len)
goto out;
+ *sent_p += err;
base = 0;
} else
base -= xdr->page_len;
if (base >= xdr->tail[0].iov_len)
return 0;
- err = xs_send_kvec(sock, NULL, 0, &xdr->tail[0], base, 0);
+ msg.msg_flags &= ~MSG_MORE;
+ err = xs_send_kvec(sock, &msg, &xdr->tail[0], base);
out:
if (err > 0) {
*sent_p += err;
@@ -907,6 +921,17 @@ xs_send_request_was_aborted(struct sock_xprt *transport, struct rpc_rqst *req)
return transport->xmit.offset != 0 && req->rq_bytes_sent == 0;
}
+/*
+ * Return the stream record marker field for a record of length < 2^31-1
+ */
+static rpc_fraghdr
+xs_stream_record_marker(struct xdr_buf *xdr)
+{
+ if (!xdr->len)
+ return 0;
+ return cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | (u32)xdr->len);
+}
+
/**
* xs_local_send_request - write an RPC request to an AF_LOCAL socket
* @req: pointer to RPC request
@@ -939,7 +964,8 @@ static int xs_local_send_request(struct rpc_rqst *req)
req->rq_xtime = ktime_get();
status = xs_sendpages(transport->sock, NULL, 0, xdr,
transport->xmit.offset,
- true, &sent);
+ xs_stream_record_marker(xdr),
+ &sent);
dprintk("RPC: %s(%u) = %d\n",
__func__, xdr->len - transport->xmit.offset, status);
@@ -951,7 +977,6 @@ static int xs_local_send_request(struct rpc_rqst *req)
req->rq_bytes_sent = transport->xmit.offset;
if (likely(req->rq_bytes_sent >= req->rq_slen)) {
req->rq_xmit_bytes_sent += transport->xmit.offset;
- req->rq_bytes_sent = 0;
transport->xmit.offset = 0;
return 0;
}
@@ -1007,7 +1032,7 @@ static int xs_udp_send_request(struct rpc_rqst *req)
req->rq_xtime = ktime_get();
status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen,
- xdr, 0, true, &sent);
+ xdr, 0, 0, &sent);
dprintk("RPC: xs_udp_send_request(%u) = %d\n",
xdr->len, status);
@@ -1071,7 +1096,6 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf;
- bool zerocopy = true;
bool vm_wait = false;
int status;
int sent;
@@ -1086,12 +1110,6 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
xs_pktdump("packet data:",
req->rq_svec->iov_base,
req->rq_svec->iov_len);
- /* Don't use zero copy if this is a resend. If the RPC call
- * completes while the socket holds a reference to the pages,
- * then we may end up resending corrupted data.
- */
- if (req->rq_task->tk_flags & RPC_TASK_SENT)
- zerocopy = false;
if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state))
xs_tcp_set_socket_timeouts(xprt, transport->sock);
@@ -1104,7 +1122,8 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
sent = 0;
status = xs_sendpages(transport->sock, NULL, 0, xdr,
transport->xmit.offset,
- zerocopy, &sent);
+ xs_stream_record_marker(xdr),
+ &sent);
dprintk("RPC: xs_tcp_send_request(%u) = %d\n",
xdr->len - transport->xmit.offset, status);
@@ -1115,7 +1134,6 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
req->rq_bytes_sent = transport->xmit.offset;
if (likely(req->rq_bytes_sent >= req->rq_slen)) {
req->rq_xmit_bytes_sent += transport->xmit.offset;
- req->rq_bytes_sent = 0;
transport->xmit.offset = 0;
return 0;
}
@@ -1255,6 +1273,8 @@ static void xs_reset_transport(struct sock_xprt *transport)
xprt_clear_connected(xprt);
write_unlock_bh(&sk->sk_callback_lock);
xs_sock_reset_connection_flags(xprt);
+ /* Reset stream record info */
+ xs_stream_reset_connect(transport);
mutex_unlock(&transport->recv_mutex);
trace_rpc_socket_close(xprt, sock);
@@ -1382,7 +1402,6 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
int err;
mutex_lock(&transport->recv_mutex);
- clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
sk = transport->inet;
if (sk == NULL)
goto out;
@@ -1394,6 +1413,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
consume_skb(skb);
cond_resched();
}
+ xs_poll_check_readable(transport);
out:
mutex_unlock(&transport->recv_mutex);
}
@@ -1402,7 +1422,10 @@ static void xs_udp_data_receive_workfn(struct work_struct *work)
{
struct sock_xprt *transport =
container_of(work, struct sock_xprt, recv_worker);
+ unsigned int pflags = memalloc_nofs_save();
+
xs_udp_data_receive(transport);
+ memalloc_nofs_restore(pflags);
}
/**
@@ -1893,7 +1916,6 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
sk->sk_write_space = xs_udp_write_space;
sock_set_flag(sk, SOCK_FASYNC);
sk->sk_error_report = xs_error_report;
- sk->sk_allocation = GFP_NOIO;
xprt_clear_connected(xprt);
@@ -1904,7 +1926,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
write_unlock_bh(&sk->sk_callback_lock);
}
- xs_stream_reset_connect(transport);
+ xs_stream_start_connect(transport);
return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0);
}
@@ -2081,7 +2103,6 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_data_ready = xs_data_ready;
sk->sk_write_space = xs_udp_write_space;
sock_set_flag(sk, SOCK_FASYNC);
- sk->sk_allocation = GFP_NOIO;
xprt_set_connected(xprt);
@@ -2244,7 +2265,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_write_space = xs_tcp_write_space;
sock_set_flag(sk, SOCK_FASYNC);
sk->sk_error_report = xs_error_report;
- sk->sk_allocation = GFP_NOIO;
/* socket options */
sock_reset_flag(sk, SOCK_LINGER);
@@ -2264,8 +2284,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
xs_set_memalloc(xprt);
- /* Reset TCP record info */
- xs_stream_reset_connect(transport);
+ xs_stream_start_connect(transport);
/* Tell the socket layer to start connecting... */
set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);