summaryrefslogtreecommitdiff
path: root/net/sunrpc/xprtsock.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-10-08 20:49:23 +0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-08 20:49:23 +0400
commit25641c0c8d72f3d235c022fd2c19181912c2ae8b (patch)
treec8970effb0f33eb870777031162de5d1bfd0ec08 /net/sunrpc/xprtsock.c
parentef0625b70dac9405ac9d9928cf767108041a9e51 (diff)
parent72c23f0819977d37924af92a42a9b7fbfd1c95d8 (diff)
downloadlinux-25641c0c8d72f3d235c022fd2c19181912c2ae8b.tar.xz
Merge tag 'nfs-for-3.18-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust: "Highlights include: Stable fixes: - fix an NFSv4.1 state renewal regression - fix open/lock state recovery error handling - fix lock recovery when CREATE_SESSION/SETCLIENTID_CONFIRM fails - fix statd when reconnection fails - don't wake tasks during connection abort - don't start reboot recovery if lease check fails - fix duplicate proc entries Features: - pNFS block driver fixes and clean ups from Christoph - More code cleanups from Anna - Improve mmap() writeback performance - Replace use of PF_TRANS with a more generic mechanism for avoiding deadlocks in nfs_release_page" * tag 'nfs-for-3.18-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (66 commits) NFSv4.1: Fix an NFSv4.1 state renewal regression NFSv4: fix open/lock state recovery error handling NFSv4: Fix lock recovery when CREATE_SESSION/SETCLIENTID_CONFIRM fails NFS: Fabricate fscache server index key correctly SUNRPC: Add missing support for RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT NFSv3: Fix missing includes of nfs3_fs.h NFS/SUNRPC: Remove other deadlock-avoidance mechanisms in nfs_release_page() NFS: avoid waiting at all in nfs_release_page when congested. NFS: avoid deadlocks with loop-back mounted NFS filesystems. MM: export page_wakeup functions SCHED: add some "wait..on_bit...timeout()" interfaces. NFS: don't use STABLE writes during writeback. NFSv4: use exponential retry on NFS4ERR_DELAY for async requests. rpc: Add -EPERM processing for xs_udp_send_request() rpc: return sent and err from xs_sendpages() lockd: Try to reconnect if statd has moved SUNRPC: Don't wake tasks during connection abort Fixing lease renewal nfs: fix duplicate proc entries pnfs/blocklayout: Fix a 64-bit division/remainder issue in bl_map_stripe ...
Diffstat (limited to 'net/sunrpc/xprtsock.c')
-rw-r--r--net/sunrpc/xprtsock.c121
1 files changed, 70 insertions, 51 deletions
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 43cd89eacfab..3b305ab17afe 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -399,13 +399,13 @@ static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen,
return kernel_sendmsg(sock, &msg, NULL, 0, 0);
}
-static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy)
+static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy, int *sent_p)
{
ssize_t (*do_sendpage)(struct socket *sock, struct page *page,
int offset, size_t size, int flags);
struct page **ppage;
unsigned int remainder;
- int err, sent = 0;
+ int err;
remainder = xdr->page_len - base;
base += xdr->page_base;
@@ -424,15 +424,15 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
err = do_sendpage(sock, *ppage, base, len, flags);
if (remainder == 0 || err != len)
break;
- sent += err;
+ *sent_p += err;
ppage++;
base = 0;
}
- if (sent == 0)
- return err;
- if (err > 0)
- sent += err;
- return sent;
+ if (err > 0) {
+ *sent_p += err;
+ err = 0;
+ }
+ return err;
}
/**
@@ -443,12 +443,14 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
* @xdr: buffer containing this request
* @base: starting position in the buffer
* @zerocopy: true if it is safe to use sendpage()
+ * @sent_p: return the total number of bytes successfully queued for sending
*
*/
-static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy)
+static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy, int *sent_p)
{
unsigned int remainder = xdr->len - base;
- int err, sent = 0;
+ int err = 0;
+ int sent = 0;
if (unlikely(!sock))
return -ENOTSOCK;
@@ -465,7 +467,7 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0);
if (remainder == 0 || err != len)
goto out;
- sent += err;
+ *sent_p += err;
base = 0;
} else
base -= xdr->head[0].iov_len;
@@ -473,23 +475,23 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
if (base < xdr->page_len) {
unsigned int len = xdr->page_len - base;
remainder -= len;
- err = xs_send_pagedata(sock, xdr, base, remainder != 0, zerocopy);
- if (remainder == 0 || err != len)
+ err = xs_send_pagedata(sock, xdr, base, remainder != 0, zerocopy, &sent);
+ *sent_p += sent;
+ if (remainder == 0 || sent != len)
goto out;
- sent += err;
base = 0;
} else
base -= xdr->page_len;
if (base >= xdr->tail[0].iov_len)
- return sent;
+ return 0;
err = xs_send_kvec(sock, NULL, 0, &xdr->tail[0], base, 0);
out:
- if (sent == 0)
- return err;
- if (err > 0)
- sent += err;
- return sent;
+ if (err > 0) {
+ *sent_p += err;
+ err = 0;
+ }
+ return err;
}
static void xs_nospace_callback(struct rpc_task *task)
@@ -573,19 +575,20 @@ static int xs_local_send_request(struct rpc_task *task)
container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf;
int status;
+ int sent = 0;
xs_encode_stream_record_marker(&req->rq_snd_buf);
xs_pktdump("packet data:",
req->rq_svec->iov_base, req->rq_svec->iov_len);
- status = xs_sendpages(transport->sock, NULL, 0,
- xdr, req->rq_bytes_sent, true);
+ status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent,
+ true, &sent);
dprintk("RPC: %s(%u) = %d\n",
__func__, xdr->len - req->rq_bytes_sent, status);
- if (likely(status >= 0)) {
- req->rq_bytes_sent += status;
- req->rq_xmit_bytes_sent += status;
+ if (likely(sent > 0) || status == 0) {
+ req->rq_bytes_sent += sent;
+ req->rq_xmit_bytes_sent += sent;
if (likely(req->rq_bytes_sent >= req->rq_slen)) {
req->rq_bytes_sent = 0;
return 0;
@@ -626,6 +629,7 @@ static int xs_udp_send_request(struct rpc_task *task)
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf;
+ int sent = 0;
int status;
xs_pktdump("packet data:",
@@ -634,22 +638,25 @@ static int xs_udp_send_request(struct rpc_task *task)
if (!xprt_bound(xprt))
return -ENOTCONN;
- status = xs_sendpages(transport->sock,
- xs_addr(xprt),
- xprt->addrlen, xdr,
- req->rq_bytes_sent, true);
+ status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen,
+ xdr, req->rq_bytes_sent, true, &sent);
dprintk("RPC: xs_udp_send_request(%u) = %d\n",
xdr->len - req->rq_bytes_sent, status);
- if (status >= 0) {
- req->rq_xmit_bytes_sent += status;
- if (status >= req->rq_slen)
+ /* firewall is blocking us, don't return -EAGAIN or we end up looping */
+ if (status == -EPERM)
+ goto process_status;
+
+ if (sent > 0 || status == 0) {
+ req->rq_xmit_bytes_sent += sent;
+ if (sent >= req->rq_slen)
return 0;
/* Still some bytes left; set up for a retry later. */
status = -EAGAIN;
}
+process_status:
switch (status) {
case -ENOTSOCK:
status = -ENOTCONN;
@@ -665,6 +672,7 @@ static int xs_udp_send_request(struct rpc_task *task)
case -ENOBUFS:
case -EPIPE:
case -ECONNREFUSED:
+ case -EPERM:
/* When the server has died, an ICMP port unreachable message
* prompts ECONNREFUSED. */
clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
@@ -713,6 +721,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
struct xdr_buf *xdr = &req->rq_snd_buf;
bool zerocopy = true;
int status;
+ int sent;
xs_encode_stream_record_marker(&req->rq_snd_buf);
@@ -730,26 +739,26 @@ static int xs_tcp_send_request(struct rpc_task *task)
* to cope with writespace callbacks arriving _after_ we have
* called sendmsg(). */
while (1) {
- status = xs_sendpages(transport->sock,
- NULL, 0, xdr, req->rq_bytes_sent,
- zerocopy);
+ sent = 0;
+ status = xs_sendpages(transport->sock, NULL, 0, xdr,
+ req->rq_bytes_sent, zerocopy, &sent);
dprintk("RPC: xs_tcp_send_request(%u) = %d\n",
xdr->len - req->rq_bytes_sent, status);
- if (unlikely(status < 0))
+ if (unlikely(sent == 0 && status < 0))
break;
/* If we've sent the entire packet, immediately
* reset the count of bytes sent. */
- req->rq_bytes_sent += status;
- req->rq_xmit_bytes_sent += status;
+ req->rq_bytes_sent += sent;
+ req->rq_xmit_bytes_sent += sent;
if (likely(req->rq_bytes_sent >= req->rq_slen)) {
req->rq_bytes_sent = 0;
return 0;
}
- if (status != 0)
+ if (sent != 0)
continue;
status = -EAGAIN;
break;
@@ -845,6 +854,8 @@ static void xs_error_report(struct sock *sk)
dprintk("RPC: xs_error_report client %p, error=%d...\n",
xprt, -err);
trace_rpc_socket_error(xprt, sk->sk_socket, err);
+ if (test_bit(XPRT_CONNECTION_REUSE, &xprt->state))
+ goto out;
xprt_wake_pending_tasks(xprt, err);
out:
read_unlock_bh(&sk->sk_callback_lock);
@@ -1746,13 +1757,29 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
unsigned short port = xs_get_srcport(transport);
unsigned short last;
+ /*
+ * If we are asking for any ephemeral port (i.e. port == 0 &&
+ * transport->xprt.resvport == 0), don't bind. Let the local
+ * port selection happen implicitly when the socket is used
+ * (for example at connect time).
+ *
+ * This ensures that we can continue to establish TCP
+ * connections even when all local ephemeral ports are already
+ * a part of some TCP connection. This makes no difference
+ * for UDP sockets, but also doens't harm them.
+ *
+ * If we're asking for any reserved port (i.e. port == 0 &&
+ * transport->xprt.resvport == 1) xs_get_srcport above will
+ * ensure that port is non-zero and we will bind as needed.
+ */
+ if (port == 0)
+ return 0;
+
memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen);
do {
rpc_set_port((struct sockaddr *)&myaddr, port);
err = kernel_bind(sock, (struct sockaddr *)&myaddr,
transport->xprt.addrlen);
- if (port == 0)
- break;
if (err == 0) {
transport->srcport = port;
break;
@@ -1927,8 +1954,6 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
struct socket *sock;
int status = -EIO;
- current->flags |= PF_FSTRANS;
-
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
status = __sock_create(xprt->xprt_net, AF_LOCAL,
SOCK_STREAM, 0, &sock, 1);
@@ -1968,7 +1993,6 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
out:
xprt_clear_connecting(xprt);
xprt_wake_pending_tasks(xprt, status);
- current->flags &= ~PF_FSTRANS;
return status;
}
@@ -2071,8 +2095,6 @@ static void xs_udp_setup_socket(struct work_struct *work)
struct socket *sock = transport->sock;
int status = -EIO;
- current->flags |= PF_FSTRANS;
-
/* Start by resetting any existing state */
xs_reset_transport(transport);
sock = xs_create_sock(xprt, transport,
@@ -2092,7 +2114,6 @@ static void xs_udp_setup_socket(struct work_struct *work)
out:
xprt_clear_connecting(xprt);
xprt_wake_pending_tasks(xprt, status);
- current->flags &= ~PF_FSTRANS;
}
/*
@@ -2229,8 +2250,6 @@ static void xs_tcp_setup_socket(struct work_struct *work)
struct rpc_xprt *xprt = &transport->xprt;
int status = -EIO;
- current->flags |= PF_FSTRANS;
-
if (!sock) {
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
sock = xs_create_sock(xprt, transport,
@@ -2245,7 +2264,9 @@ static void xs_tcp_setup_socket(struct work_struct *work)
abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
&xprt->state);
/* "close" the socket, preserving the local port */
+ set_bit(XPRT_CONNECTION_REUSE, &xprt->state);
xs_tcp_reuse_connection(transport);
+ clear_bit(XPRT_CONNECTION_REUSE, &xprt->state);
if (abort_and_exit)
goto out_eagain;
@@ -2276,7 +2297,6 @@ static void xs_tcp_setup_socket(struct work_struct *work)
case -EINPROGRESS:
case -EALREADY:
xprt_clear_connecting(xprt);
- current->flags &= ~PF_FSTRANS;
return;
case -EINVAL:
/* Happens, for instance, if the user specified a link
@@ -2294,7 +2314,6 @@ out_eagain:
out:
xprt_clear_connecting(xprt);
xprt_wake_pending_tasks(xprt, status);
- current->flags &= ~PF_FSTRANS;
}
/**