summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/Makefile1
-rw-r--r--net/9p/client.c551
-rw-r--r--net/9p/mod.c9
-rw-r--r--net/9p/protocol.c20
-rw-r--r--net/9p/trans_fd.c64
-rw-r--r--net/9p/trans_rdma.c37
-rw-r--r--net/9p/trans_virtio.c44
-rw-r--r--net/9p/trans_xen.c17
-rw-r--r--net/9p/util.c140
-rw-r--r--net/bluetooth/bnep/sock.c19
-rw-r--r--net/bluetooth/cmtp/sock.c19
-rw-r--r--net/bluetooth/hidp/core.c10
-rw-r--r--net/bluetooth/hidp/hidp.h2
-rw-r--r--net/bluetooth/hidp/sock.c79
-rw-r--r--net/bridge/br_multicast.c9
-rw-r--r--net/ceph/crypto.c12
-rw-r--r--net/ceph/crypto.h2
-rw-r--r--net/ceph/messenger.c107
-rw-r--r--net/ceph/msgpool.c27
-rw-r--r--net/ceph/osd_client.c363
-rw-r--r--net/ceph/pagelist.c20
-rw-r--r--net/compat.c10
-rw-r--r--net/core/dev.c4
-rw-r--r--net/core/filter.c21
-rw-r--r--net/core/neighbour.c4
-rw-r--r--net/core/netclassid_cgroup.c1
-rw-r--r--net/core/netpoll.c3
-rw-r--r--net/core/rtnetlink.c12
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/core/sock.c1
-rw-r--r--net/core/sysctl_net_core.c10
-rw-r--r--net/ipv4/devinet.c1
-rw-r--r--net/ipv4/igmp.c53
-rw-r--r--net/ipv4/inet_hashtables.c2
-rw-r--r--net/ipv4/ip_fragment.c12
-rw-r--r--net/ipv4/ip_sockglue.c6
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_bpf.c1
-rw-r--r--net/ipv4/udp.c6
-rw-r--r--net/ipv4/udp_diag.c1
-rw-r--r--net/ipv6/addrconf.c1
-rw-r--r--net/ipv6/af_inet6.c5
-rw-r--r--net/ipv6/anycast.c80
-rw-r--r--net/ipv6/ip6_fib.c4
-rw-r--r--net/ipv6/ndisc.c3
-rw-r--r--net/ipv6/udp.c2
-rw-r--r--net/mac802154/llsec.c16
-rw-r--r--net/mac802154/llsec.h2
-rw-r--r--net/openvswitch/conntrack.c3
-rw-r--r--net/openvswitch/flow_netlink.c4
-rw-r--r--net/rxrpc/ar-internal.h3
-rw-r--r--net/rxrpc/call_event.c18
-rw-r--r--net/rxrpc/output.c35
-rw-r--r--net/rxrpc/rxkad.c44
-rw-r--r--net/sched/sch_gred.c2
-rw-r--r--net/sctp/associola.c10
-rw-r--r--net/sctp/outqueue.c2
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/sctp/socket.c8
-rw-r--r--net/smc/smc_core.c25
-rw-r--r--net/socket.c18
-rw-r--r--net/sunrpc/auth.c310
-rw-r--r--net/sunrpc/auth_generic.c2
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c45
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c87
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_keys.c9
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c53
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c38
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c18
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c28
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c28
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c1
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c41
-rw-r--r--net/sunrpc/auth_null.c6
-rw-r--r--net/sunrpc/auth_unix.c4
-rw-r--r--net/sunrpc/backchannel_rqst.c1
-rw-r--r--net/sunrpc/cache.c153
-rw-r--r--net/sunrpc/clnt.c174
-rw-r--r--net/sunrpc/sched.c178
-rw-r--r--net/sunrpc/socklib.c10
-rw-r--r--net/sunrpc/svc_xprt.c4
-rw-r--r--net/sunrpc/svcauth.c74
-rw-r--r--net/sunrpc/svcauth_unix.c24
-rw-r--r--net/sunrpc/svcsock.c59
-rw-r--r--net/sunrpc/xdr.c34
-rw-r--r--net/sunrpc/xprt.c908
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c20
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c131
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c137
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c30
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c38
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c10
-rw-r--r--net/sunrpc/xprtrdma/transport.c120
-rw-r--r--net/sunrpc/xprtrdma/verbs.c178
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h18
-rw-r--r--net/sunrpc/xprtsock.c1107
-rw-r--r--net/xfrm/Kconfig1
-rw-r--r--net/xfrm/xfrm_hash.c2
98 files changed, 3391 insertions, 2681 deletions
diff --git a/net/9p/Makefile b/net/9p/Makefile
index c0486cfc85d9..aa0a5641e5d0 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -8,7 +8,6 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
mod.o \
client.o \
error.o \
- util.o \
protocol.o \
trans_fd.o \
trans_common.o \
diff --git a/net/9p/client.c b/net/9p/client.c
index deae53a7dffc..5f23e18eecc0 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -231,144 +231,170 @@ free_and_return:
return ret;
}
-static struct p9_fcall *p9_fcall_alloc(int alloc_msize)
+static int p9_fcall_init(struct p9_client *c, struct p9_fcall *fc,
+ int alloc_msize)
{
- struct p9_fcall *fc;
- fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS);
- if (!fc)
- return NULL;
+ if (likely(c->fcall_cache) && alloc_msize == c->msize) {
+ fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS);
+ fc->cache = c->fcall_cache;
+ } else {
+ fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
+ fc->cache = NULL;
+ }
+ if (!fc->sdata)
+ return -ENOMEM;
fc->capacity = alloc_msize;
- fc->sdata = (char *) fc + sizeof(struct p9_fcall);
- return fc;
+ return 0;
+}
+
+void p9_fcall_fini(struct p9_fcall *fc)
+{
+ /* sdata can be NULL for interrupted requests in trans_rdma,
+ * and kmem_cache_free does not do NULL-check for us
+ */
+ if (unlikely(!fc->sdata))
+ return;
+
+ if (fc->cache)
+ kmem_cache_free(fc->cache, fc->sdata);
+ else
+ kfree(fc->sdata);
}
+EXPORT_SYMBOL(p9_fcall_fini);
+
+static struct kmem_cache *p9_req_cache;
/**
- * p9_tag_alloc - lookup/allocate a request by tag
- * @c: client session to lookup tag within
- * @tag: numeric id for transaction
- *
- * this is a simple array lookup, but will grow the
- * request_slots as necessary to accommodate transaction
- * ids which did not previously have a slot.
- *
- * this code relies on the client spinlock to manage locks, its
- * possible we should switch to something else, but I'd rather
- * stick with something low-overhead for the common case.
+ * p9_req_alloc - Allocate a new request.
+ * @c: Client session.
+ * @type: Transaction type.
+ * @max_size: Maximum packet size for this request.
*
+ * Context: Process context.
+ * Return: Pointer to new request.
*/
-
static struct p9_req_t *
-p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size)
+p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
{
- unsigned long flags;
- int row, col;
- struct p9_req_t *req;
+ struct p9_req_t *req = kmem_cache_alloc(p9_req_cache, GFP_NOFS);
int alloc_msize = min(c->msize, max_size);
+ int tag;
- /* This looks up the original request by tag so we know which
- * buffer to read the data into */
- tag++;
-
- if (tag >= c->max_tag) {
- spin_lock_irqsave(&c->lock, flags);
- /* check again since original check was outside of lock */
- while (tag >= c->max_tag) {
- row = (tag / P9_ROW_MAXTAG);
- c->reqs[row] = kcalloc(P9_ROW_MAXTAG,
- sizeof(struct p9_req_t), GFP_ATOMIC);
-
- if (!c->reqs[row]) {
- pr_err("Couldn't grow tag array\n");
- spin_unlock_irqrestore(&c->lock, flags);
- return ERR_PTR(-ENOMEM);
- }
- for (col = 0; col < P9_ROW_MAXTAG; col++) {
- req = &c->reqs[row][col];
- req->status = REQ_STATUS_IDLE;
- init_waitqueue_head(&req->wq);
- }
- c->max_tag += P9_ROW_MAXTAG;
- }
- spin_unlock_irqrestore(&c->lock, flags);
- }
- row = tag / P9_ROW_MAXTAG;
- col = tag % P9_ROW_MAXTAG;
-
- req = &c->reqs[row][col];
- if (!req->tc)
- req->tc = p9_fcall_alloc(alloc_msize);
- if (!req->rc)
- req->rc = p9_fcall_alloc(alloc_msize);
- if (!req->tc || !req->rc)
- goto grow_failed;
+ if (!req)
+ return ERR_PTR(-ENOMEM);
- p9pdu_reset(req->tc);
- p9pdu_reset(req->rc);
+ if (p9_fcall_init(c, &req->tc, alloc_msize))
+ goto free_req;
+ if (p9_fcall_init(c, &req->rc, alloc_msize))
+ goto free;
- req->tc->tag = tag-1;
+ p9pdu_reset(&req->tc);
+ p9pdu_reset(&req->rc);
req->status = REQ_STATUS_ALLOC;
+ init_waitqueue_head(&req->wq);
+ INIT_LIST_HEAD(&req->req_list);
+
+ idr_preload(GFP_NOFS);
+ spin_lock_irq(&c->lock);
+ if (type == P9_TVERSION)
+ tag = idr_alloc(&c->reqs, req, P9_NOTAG, P9_NOTAG + 1,
+ GFP_NOWAIT);
+ else
+ tag = idr_alloc(&c->reqs, req, 0, P9_NOTAG, GFP_NOWAIT);
+ req->tc.tag = tag;
+ spin_unlock_irq(&c->lock);
+ idr_preload_end();
+ if (tag < 0)
+ goto free;
+
+ /* Init ref to two because in the general case there is one ref
+ * that is put asynchronously by a writer thread, one ref
+ * temporarily given by p9_tag_lookup and put by p9_client_cb
+ * in the recv thread, and one ref put by p9_tag_remove in the
+ * main thread. The only exception is virtio that does not use
+ * p9_tag_lookup but does not have a writer thread either
+ * (the write happens synchronously in the request/zc_request
+ * callback), so p9_client_cb eats the second ref there
+ * as the pointer is duplicated directly by virtqueue_add_sgs()
+ */
+ refcount_set(&req->refcount.refcount, 2);
return req;
-grow_failed:
- pr_err("Couldn't grow tag array\n");
- kfree(req->tc);
- kfree(req->rc);
- req->tc = req->rc = NULL;
+free:
+ p9_fcall_fini(&req->tc);
+ p9_fcall_fini(&req->rc);
+free_req:
+ kmem_cache_free(p9_req_cache, req);
return ERR_PTR(-ENOMEM);
}
/**
- * p9_tag_lookup - lookup a request by tag
- * @c: client session to lookup tag within
- * @tag: numeric id for transaction
+ * p9_tag_lookup - Look up a request by tag.
+ * @c: Client session.
+ * @tag: Transaction ID.
*
+ * Context: Any context.
+ * Return: A request, or %NULL if there is no request with that tag.
*/
-
struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag)
{
- int row, col;
-
- /* This looks up the original request by tag so we know which
- * buffer to read the data into */
- tag++;
-
- if (tag >= c->max_tag)
- return NULL;
+ struct p9_req_t *req;
- row = tag / P9_ROW_MAXTAG;
- col = tag % P9_ROW_MAXTAG;
+ rcu_read_lock();
+again:
+ req = idr_find(&c->reqs, tag);
+ if (req) {
+ /* We have to be careful with the req found under rcu_read_lock
+ * Thanks to SLAB_TYPESAFE_BY_RCU we can safely try to get the
+ * ref again without corrupting other data, then check again
+ * that the tag matches once we have the ref
+ */
+ if (!p9_req_try_get(req))
+ goto again;
+ if (req->tc.tag != tag) {
+ p9_req_put(req);
+ goto again;
+ }
+ }
+ rcu_read_unlock();
- return &c->reqs[row][col];
+ return req;
}
EXPORT_SYMBOL(p9_tag_lookup);
/**
- * p9_tag_init - setup tags structure and contents
- * @c: v9fs client struct
- *
- * This initializes the tags structure for each client instance.
+ * p9_tag_remove - Remove a tag.
+ * @c: Client session.
+ * @r: Request of reference.
*
+ * Context: Any context.
*/
+static int p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
+{
+ unsigned long flags;
+ u16 tag = r->tc.tag;
+
+ p9_debug(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag);
+ spin_lock_irqsave(&c->lock, flags);
+ idr_remove(&c->reqs, tag);
+ spin_unlock_irqrestore(&c->lock, flags);
+ return p9_req_put(r);
+}
-static int p9_tag_init(struct p9_client *c)
+static void p9_req_free(struct kref *ref)
{
- int err = 0;
+ struct p9_req_t *r = container_of(ref, struct p9_req_t, refcount);
+ p9_fcall_fini(&r->tc);
+ p9_fcall_fini(&r->rc);
+ kmem_cache_free(p9_req_cache, r);
+}
- c->tagpool = p9_idpool_create();
- if (IS_ERR(c->tagpool)) {
- err = PTR_ERR(c->tagpool);
- goto error;
- }
- err = p9_idpool_get(c->tagpool); /* reserve tag 0 */
- if (err < 0) {
- p9_idpool_destroy(c->tagpool);
- goto error;
- }
- c->max_tag = 0;
-error:
- return err;
+int p9_req_put(struct p9_req_t *r)
+{
+ return kref_put(&r->refcount, p9_req_free);
}
+EXPORT_SYMBOL(p9_req_put);
/**
* p9_tag_cleanup - cleans up tags structure and reclaims resources
@@ -379,52 +405,17 @@ error:
*/
static void p9_tag_cleanup(struct p9_client *c)
{
- int row, col;
-
- /* check to insure all requests are idle */
- for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) {
- for (col = 0; col < P9_ROW_MAXTAG; col++) {
- if (c->reqs[row][col].status != REQ_STATUS_IDLE) {
- p9_debug(P9_DEBUG_MUX,
- "Attempting to cleanup non-free tag %d,%d\n",
- row, col);
- /* TODO: delay execution of cleanup */
- return;
- }
- }
- }
-
- if (c->tagpool) {
- p9_idpool_put(0, c->tagpool); /* free reserved tag 0 */
- p9_idpool_destroy(c->tagpool);
- }
+ struct p9_req_t *req;
+ int id;
- /* free requests associated with tags */
- for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) {
- for (col = 0; col < P9_ROW_MAXTAG; col++) {
- kfree(c->reqs[row][col].tc);
- kfree(c->reqs[row][col].rc);
- }
- kfree(c->reqs[row]);
+ rcu_read_lock();
+ idr_for_each_entry(&c->reqs, req, id) {
+ pr_info("Tag %d still in use\n", id);
+ if (p9_tag_remove(c, req) == 0)
+ pr_warn("Packet with tag %d has still references",
+ req->tc.tag);
}
- c->max_tag = 0;
-}
-
-/**
- * p9_free_req - free a request and clean-up as necessary
- * c: client state
- * r: request to release
- *
- */
-
-static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
-{
- int tag = r->tc->tag;
- p9_debug(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag);
-
- r->status = REQ_STATUS_IDLE;
- if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool))
- p9_idpool_put(tag, c->tagpool);
+ rcu_read_unlock();
}
/**
@@ -435,7 +426,7 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
*/
void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
{
- p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc->tag);
+ p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc.tag);
/*
* This barrier is needed to make sure any change made to req before
@@ -445,7 +436,8 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
req->status = status;
wake_up(&req->wq);
- p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
+ p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag);
+ p9_req_put(req);
}
EXPORT_SYMBOL(p9_client_cb);
@@ -516,18 +508,18 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
int err;
int ecode;
- err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
- if (req->rc->size >= c->msize) {
+ err = p9_parse_header(&req->rc, NULL, &type, NULL, 0);
+ if (req->rc.size >= c->msize) {
p9_debug(P9_DEBUG_ERROR,
"requested packet size too big: %d\n",
- req->rc->size);
+ req->rc.size);
return -EIO;
}
/*
* dump the response from server
* This should be after check errors which poplulate pdu_fcall.
*/
- trace_9p_protocol_dump(c, req->rc);
+ trace_9p_protocol_dump(c, &req->rc);
if (err) {
p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
return err;
@@ -537,7 +529,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
if (!p9_is_proto_dotl(c)) {
char *ename;
- err = p9pdu_readf(req->rc, c->proto_version, "s?d",
+ err = p9pdu_readf(&req->rc, c->proto_version, "s?d",
&ename, &ecode);
if (err)
goto out_err;
@@ -553,7 +545,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
}
kfree(ename);
} else {
- err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
+ err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode);
err = -ecode;
p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
@@ -587,12 +579,12 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
int8_t type;
char *ename = NULL;
- err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
+ err = p9_parse_header(&req->rc, NULL, &type, NULL, 0);
/*
* dump the response from server
* This should be after parse_header which poplulate pdu_fcall.
*/
- trace_9p_protocol_dump(c, req->rc);
+ trace_9p_protocol_dump(c, &req->rc);
if (err) {
p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
return err;
@@ -607,13 +599,13 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
/* 7 = header size for RERROR; */
int inline_len = in_hdrlen - 7;
- len = req->rc->size - req->rc->offset;
+ len = req->rc.size - req->rc.offset;
if (len > (P9_ZC_HDR_SZ - 7)) {
err = -EFAULT;
goto out_err;
}
- ename = &req->rc->sdata[req->rc->offset];
+ ename = &req->rc.sdata[req->rc.offset];
if (len > inline_len) {
/* We have error in external buffer */
if (!copy_from_iter_full(ename + inline_len,
@@ -623,7 +615,7 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
}
}
ename = NULL;
- err = p9pdu_readf(req->rc, c->proto_version, "s?d",
+ err = p9pdu_readf(&req->rc, c->proto_version, "s?d",
&ename, &ecode);
if (err)
goto out_err;
@@ -639,7 +631,7 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
}
kfree(ename);
} else {
- err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
+ err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode);
err = -ecode;
p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
@@ -672,7 +664,7 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
int16_t oldtag;
int err;
- err = p9_parse_header(oldreq->tc, NULL, NULL, &oldtag, 1);
+ err = p9_parse_header(&oldreq->tc, NULL, NULL, &oldtag, 1);
if (err)
return err;
@@ -686,11 +678,12 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
* if we haven't received a response for oldreq,
* remove it from the list
*/
- if (oldreq->status == REQ_STATUS_SENT)
+ if (oldreq->status == REQ_STATUS_SENT) {
if (c->trans_mod->cancelled)
c->trans_mod->cancelled(c, oldreq);
+ }
- p9_free_req(c, req);
+ p9_tag_remove(c, req);
return 0;
}
@@ -698,7 +691,7 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
int8_t type, int req_size,
const char *fmt, va_list ap)
{
- int tag, err;
+ int err;
struct p9_req_t *req;
p9_debug(P9_DEBUG_MUX, "client %p op %d\n", c, type);
@@ -711,27 +704,22 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
if ((c->status == BeginDisconnect) && (type != P9_TCLUNK))
return ERR_PTR(-EIO);
- tag = P9_NOTAG;
- if (type != P9_TVERSION) {
- tag = p9_idpool_get(c->tagpool);
- if (tag < 0)
- return ERR_PTR(-ENOMEM);
- }
-
- req = p9_tag_alloc(c, tag, req_size);
+ req = p9_tag_alloc(c, type, req_size);
if (IS_ERR(req))
return req;
/* marshall the data */
- p9pdu_prepare(req->tc, tag, type);
- err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap);
+ p9pdu_prepare(&req->tc, req->tc.tag, type);
+ err = p9pdu_vwritef(&req->tc, c->proto_version, fmt, ap);
if (err)
goto reterr;
- p9pdu_finalize(c, req->tc);
- trace_9p_client_req(c, type, tag);
+ p9pdu_finalize(c, &req->tc);
+ trace_9p_client_req(c, type, req->tc.tag);
return req;
reterr:
- p9_free_req(c, req);
+ p9_tag_remove(c, req);
+ /* We have to put also the 2nd reference as it won't be used */
+ p9_req_put(req);
return ERR_PTR(err);
}
@@ -741,7 +729,7 @@ reterr:
* @type: type of request
* @fmt: protocol format string (see protocol.c)
*
- * Returns request structure (which client must free using p9_free_req)
+ * Returns request structure (which client must free using p9_tag_remove)
*/
static struct p9_req_t *
@@ -766,6 +754,8 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
err = c->trans_mod->request(c, req);
if (err < 0) {
+ /* write won't happen */
+ p9_req_put(req);
if (err != -ERESTARTSYS && err != -EFAULT)
c->status = Disconnected;
goto recalc_sigpending;
@@ -813,11 +803,11 @@ recalc_sigpending:
goto reterr;
err = p9_check_errors(c, req);
- trace_9p_client_res(c, type, req->rc->tag, err);
+ trace_9p_client_res(c, type, req->rc.tag, err);
if (!err)
return req;
reterr:
- p9_free_req(c, req);
+ p9_tag_remove(c, req);
return ERR_PTR(safe_errno(err));
}
@@ -832,7 +822,7 @@ reterr:
* @hdrlen: reader header size, This is the size of response protocol data
* @fmt: protocol format string (see protocol.c)
*
- * Returns request structure (which client must free using p9_free_req)
+ * Returns request structure (which client must free using p9_tag_remove)
*/
static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
struct iov_iter *uidata,
@@ -895,11 +885,11 @@ recalc_sigpending:
goto reterr;
err = p9_check_zc_errors(c, req, uidata, in_hdrlen);
- trace_9p_client_res(c, type, req->rc->tag, err);
+ trace_9p_client_res(c, type, req->rc.tag, err);
if (!err)
return req;
reterr:
- p9_free_req(c, req);
+ p9_tag_remove(c, req);
return ERR_PTR(safe_errno(err));
}
@@ -978,10 +968,10 @@ static int p9_client_version(struct p9_client *c)
if (IS_ERR(req))
return PTR_ERR(req);
- err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version);
+ err = p9pdu_readf(&req->rc, c->proto_version, "ds", &msize, &version);
if (err) {
p9_debug(P9_DEBUG_9P, "version error %d\n", err);
- trace_9p_protocol_dump(c, req->rc);
+ trace_9p_protocol_dump(c, &req->rc);
goto error;
}
@@ -1002,7 +992,7 @@ static int p9_client_version(struct p9_client *c)
error:
kfree(version);
- p9_free_req(c, req);
+ p9_tag_remove(c, req);
return err;
}
@@ -1020,20 +1010,18 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
clnt->trans_mod = NULL;
clnt->trans = NULL;
+ clnt->fcall_cache = NULL;
client_id = utsname()->nodename;
memcpy(clnt->name, client_id, strlen(client_id) + 1);
spin_lock_init(&clnt->lock);
idr_init(&clnt->fids);
-
- err = p9_tag_init(clnt);
- if (err < 0)
- goto free_client;
+ idr_init(&clnt->reqs);
err = parse_opts(options, clnt);
if (err < 0)
- goto destroy_tagpool;
+ goto free_client;
if (!clnt->trans_mod)
clnt->trans_mod = v9fs_get_default_trans();
@@ -1042,7 +1030,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
err = -EPROTONOSUPPORT;
p9_debug(P9_DEBUG_ERROR,
"No transport defined or default transport\n");
- goto destroy_tagpool;
+ goto free_client;
}
p9_debug(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n",
@@ -1059,14 +1047,21 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
if (err)
goto close_trans;
+ /* P9_HDRSZ + 4 is the smallest packet header we can have that is
+ * followed by data accessed from userspace by read
+ */
+ clnt->fcall_cache =
+ kmem_cache_create_usercopy("9p-fcall-cache", clnt->msize,
+ 0, 0, P9_HDRSZ + 4,
+ clnt->msize - (P9_HDRSZ + 4),
+ NULL);
+
return clnt;
close_trans:
clnt->trans_mod->close(clnt);
put_trans:
v9fs_put_trans(clnt->trans_mod);
-destroy_tagpool:
- p9_idpool_destroy(clnt->tagpool);
free_client:
kfree(clnt);
return ERR_PTR(err);
@@ -1092,6 +1087,7 @@ void p9_client_destroy(struct p9_client *clnt)
p9_tag_cleanup(clnt);
+ kmem_cache_destroy(clnt->fcall_cache);
kfree(clnt);
}
EXPORT_SYMBOL(p9_client_destroy);
@@ -1135,10 +1131,10 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", &qid);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
- p9_free_req(clnt, req);
+ trace_9p_protocol_dump(clnt, &req->rc);
+ p9_tag_remove(clnt, req);
goto error;
}
@@ -1147,7 +1143,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
memmove(&fid->qid, &qid, sizeof(struct p9_qid));
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
return fid;
error:
@@ -1192,13 +1188,13 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "R", &nwqids, &wqids);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
- p9_free_req(clnt, req);
+ trace_9p_protocol_dump(clnt, &req->rc);
+ p9_tag_remove(clnt, req);
goto clunk_fid;
}
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
p9_debug(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids);
@@ -1259,9 +1255,9 @@ int p9_client_open(struct p9_fid *fid, int mode)
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", &qid, &iounit);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto free_and_error;
}
@@ -1273,7 +1269,7 @@ int p9_client_open(struct p9_fid *fid, int mode)
fid->iounit = iounit;
free_and_error:
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -1303,9 +1299,9 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", qid, &iounit);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto free_and_error;
}
@@ -1318,7 +1314,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32
ofid->iounit = iounit;
free_and_error:
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -1348,9 +1344,9 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", &qid, &iounit);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto free_and_error;
}
@@ -1363,7 +1359,7 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
fid->iounit = iounit;
free_and_error:
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -1387,9 +1383,9 @@ int p9_client_symlink(struct p9_fid *dfid, const char *name,
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto free_and_error;
}
@@ -1397,7 +1393,7 @@ int p9_client_symlink(struct p9_fid *dfid, const char *name,
qid->type, (unsigned long long)qid->path, qid->version);
free_and_error:
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -1417,7 +1413,7 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, const char *newna
return PTR_ERR(req);
p9_debug(P9_DEBUG_9P, "<<< RLINK\n");
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
return 0;
}
EXPORT_SYMBOL(p9_client_link);
@@ -1441,7 +1437,7 @@ int p9_client_fsync(struct p9_fid *fid, int datasync)
p9_debug(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
@@ -1476,7 +1472,7 @@ again:
p9_debug(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
/*
* Fid is not valid even after a failed clunk
@@ -1510,7 +1506,7 @@ int p9_client_remove(struct p9_fid *fid)
p9_debug(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
if (err == -ERESTARTSYS)
p9_client_clunk(fid);
@@ -1537,7 +1533,7 @@ int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags)
}
p9_debug(P9_DEBUG_9P, "<<< RUNLINKAT fid %d %s\n", dfid->fid, name);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -1585,11 +1581,11 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err)
break;
}
- *err = p9pdu_readf(req->rc, clnt->proto_version,
+ *err = p9pdu_readf(&req->rc, clnt->proto_version,
"D", &count, &dataptr);
if (*err) {
- trace_9p_protocol_dump(clnt, req->rc);
- p9_free_req(clnt, req);
+ trace_9p_protocol_dump(clnt, &req->rc);
+ p9_tag_remove(clnt, req);
break;
}
if (rsize < count) {
@@ -1599,7 +1595,7 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err)
p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
if (!count) {
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
break;
}
@@ -1609,7 +1605,7 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err)
offset += n;
if (n != count) {
*err = -EFAULT;
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
break;
}
} else {
@@ -1617,7 +1613,7 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err)
total += count;
offset += count;
}
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
}
return total;
}
@@ -1658,10 +1654,10 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
break;
}
- *err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count);
+ *err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &count);
if (*err) {
- trace_9p_protocol_dump(clnt, req->rc);
- p9_free_req(clnt, req);
+ trace_9p_protocol_dump(clnt, &req->rc);
+ p9_tag_remove(clnt, req);
break;
}
if (rsize < count) {
@@ -1671,7 +1667,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
iov_iter_advance(from, count);
total += count;
offset += count;
@@ -1702,10 +1698,10 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "wS", &ignored, ret);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
- p9_free_req(clnt, req);
+ trace_9p_protocol_dump(clnt, &req->rc);
+ p9_tag_remove(clnt, req);
goto error;
}
@@ -1722,7 +1718,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
from_kgid(&init_user_ns, ret->n_gid),
from_kuid(&init_user_ns, ret->n_muid));
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
return ret;
error:
@@ -1755,10 +1751,10 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "A", ret);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
- p9_free_req(clnt, req);
+ trace_9p_protocol_dump(clnt, &req->rc);
+ p9_tag_remove(clnt, req);
goto error;
}
@@ -1783,7 +1779,7 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
ret->st_ctime_nsec, ret->st_btime_sec, ret->st_btime_nsec,
ret->st_gen, ret->st_data_version);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
return ret;
error:
@@ -1852,7 +1848,7 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
p9_debug(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -1884,7 +1880,7 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr)
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -1907,12 +1903,12 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type,
- &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,
- &sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type,
+ &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,
+ &sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
- p9_free_req(clnt, req);
+ trace_9p_protocol_dump(clnt, &req->rc);
+ p9_tag_remove(clnt, req);
goto error;
}
@@ -1923,7 +1919,7 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
sb->blocks, sb->bfree, sb->bavail, sb->files, sb->ffree,
sb->fsid, (long int)sb->namelen);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -1951,7 +1947,7 @@ int p9_client_rename(struct p9_fid *fid,
p9_debug(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -1981,7 +1977,7 @@ int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name,
p9_debug(P9_DEBUG_9P, "<<< RRENAMEAT newdirfid %d new name %s\n",
newdirfid->fid, new_name);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -2015,13 +2011,13 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
err = PTR_ERR(req);
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "q", attr_size);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
- p9_free_req(clnt, req);
+ trace_9p_protocol_dump(clnt, &req->rc);
+ p9_tag_remove(clnt, req);
goto clunk_fid;
}
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
p9_debug(P9_DEBUG_9P, "<<< RXATTRWALK fid %d size %llu\n",
attr_fid->fid, *attr_size);
return attr_fid;
@@ -2055,7 +2051,7 @@ int p9_client_xattrcreate(struct p9_fid *fid, const char *name,
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -2103,9 +2099,9 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "D", &count, &dataptr);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto free_and_error;
}
if (rsize < count) {
@@ -2118,11 +2114,11 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
if (non_zc)
memmove(data, dataptr, count);
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
return count;
free_and_error:
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
error:
return err;
}
@@ -2144,16 +2140,16 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode,
if (IS_ERR(req))
return PTR_ERR(req);
- err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type,
(unsigned long long)qid->path, qid->version);
error:
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
return err;
}
@@ -2175,16 +2171,16 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
if (IS_ERR(req))
return PTR_ERR(req);
- err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type,
(unsigned long long)qid->path, qid->version);
error:
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
return err;
}
@@ -2210,14 +2206,14 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
if (IS_ERR(req))
return PTR_ERR(req);
- err = p9pdu_readf(req->rc, clnt->proto_version, "b", status);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "b", status);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status);
error:
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
return err;
}
@@ -2241,18 +2237,18 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
if (IS_ERR(req))
return PTR_ERR(req);
- err = p9pdu_readf(req->rc, clnt->proto_version, "bqqds", &glock->type,
- &glock->start, &glock->length, &glock->proc_id,
- &glock->client_id);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "bqqds", &glock->type,
+ &glock->start, &glock->length, &glock->proc_id,
+ &glock->client_id);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld "
"proc_id %d client_id %s\n", glock->type, glock->start,
glock->length, glock->proc_id, glock->client_id);
error:
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
return err;
}
EXPORT_SYMBOL(p9_client_getlock_dotl);
@@ -2271,14 +2267,25 @@ int p9_client_readlink(struct p9_fid *fid, char **target)
if (IS_ERR(req))
return PTR_ERR(req);
- err = p9pdu_readf(req->rc, clnt->proto_version, "s", target);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "s", target);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target);
error:
- p9_free_req(clnt, req);
+ p9_tag_remove(clnt, req);
return err;
}
EXPORT_SYMBOL(p9_client_readlink);
+
+int __init p9_client_init(void)
+{
+ p9_req_cache = KMEM_CACHE(p9_req_t, SLAB_TYPESAFE_BY_RCU);
+ return p9_req_cache ? 0 : -ENOMEM;
+}
+
+void __exit p9_client_exit(void)
+{
+ kmem_cache_destroy(p9_req_cache);
+}
diff --git a/net/9p/mod.c b/net/9p/mod.c
index 253ba824a325..0da56d6af73b 100644
--- a/net/9p/mod.c
+++ b/net/9p/mod.c
@@ -171,11 +171,17 @@ void v9fs_put_trans(struct p9_trans_module *m)
*/
static int __init init_p9(void)
{
+ int ret;
+
+ ret = p9_client_init();
+ if (ret)
+ return ret;
+
p9_error_init();
pr_info("Installing 9P2000 support\n");
p9_trans_fd_init();
- return 0;
+ return ret;
}
/**
@@ -188,6 +194,7 @@ static void __exit exit_p9(void)
pr_info("Unloading 9P2000 support\n");
p9_trans_fd_exit();
+ p9_client_exit();
}
module_init(init_p9)
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 4a1e1dd30b52..462ba144cb39 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -46,10 +46,15 @@ p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
void p9stat_free(struct p9_wstat *stbuf)
{
kfree(stbuf->name);
+ stbuf->name = NULL;
kfree(stbuf->uid);
+ stbuf->uid = NULL;
kfree(stbuf->gid);
+ stbuf->gid = NULL;
kfree(stbuf->muid);
+ stbuf->muid = NULL;
kfree(stbuf->extension);
+ stbuf->extension = NULL;
}
EXPORT_SYMBOL(p9stat_free);
@@ -566,9 +571,10 @@ int p9stat_read(struct p9_client *clnt, char *buf, int len, struct p9_wstat *st)
if (ret) {
p9_debug(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret);
trace_9p_protocol_dump(clnt, &fake_pdu);
+ return ret;
}
- return ret;
+ return fake_pdu.offset;
}
EXPORT_SYMBOL(p9stat_read);
@@ -617,13 +623,19 @@ int p9dirent_read(struct p9_client *clnt, char *buf, int len,
if (ret) {
p9_debug(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret);
trace_9p_protocol_dump(clnt, &fake_pdu);
- goto out;
+ return ret;
}
- strcpy(dirent->d_name, nameptr);
+ ret = strscpy(dirent->d_name, nameptr, sizeof(dirent->d_name));
+ if (ret < 0) {
+ p9_debug(P9_DEBUG_ERROR,
+ "On the wire dirent name too long: %s\n",
+ nameptr);
+ kfree(nameptr);
+ return ret;
+ }
kfree(nameptr);
-out:
return fake_pdu.offset;
}
EXPORT_SYMBOL(p9dirent_read);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index e2ef3c782c53..f868cf6fba79 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -131,7 +131,8 @@ struct p9_conn {
int err;
struct list_head req_list;
struct list_head unsent_req_list;
- struct p9_req_t *req;
+ struct p9_req_t *rreq;
+ struct p9_req_t *wreq;
char tmp_buf[7];
struct p9_fcall rc;
int wpos;
@@ -291,7 +292,6 @@ static void p9_read_work(struct work_struct *work)
__poll_t n;
int err;
struct p9_conn *m;
- int status = REQ_STATUS_ERROR;
m = container_of(work, struct p9_conn, rq);
@@ -322,7 +322,7 @@ static void p9_read_work(struct work_struct *work)
m->rc.offset += err;
/* header read in */
- if ((!m->req) && (m->rc.offset == m->rc.capacity)) {
+ if ((!m->rreq) && (m->rc.offset == m->rc.capacity)) {
p9_debug(P9_DEBUG_TRANS, "got new header\n");
/* Header size */
@@ -346,23 +346,23 @@ static void p9_read_work(struct work_struct *work)
"mux %p pkt: size: %d bytes tag: %d\n",
m, m->rc.size, m->rc.tag);
- m->req = p9_tag_lookup(m->client, m->rc.tag);
- if (!m->req || (m->req->status != REQ_STATUS_SENT)) {
+ m->rreq = p9_tag_lookup(m->client, m->rc.tag);
+ if (!m->rreq || (m->rreq->status != REQ_STATUS_SENT)) {
p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
m->rc.tag);
err = -EIO;
goto error;
}
- if (m->req->rc == NULL) {
+ if (!m->rreq->rc.sdata) {
p9_debug(P9_DEBUG_ERROR,
"No recv fcall for tag %d (req %p), disconnecting!\n",
- m->rc.tag, m->req);
- m->req = NULL;
+ m->rc.tag, m->rreq);
+ m->rreq = NULL;
err = -EIO;
goto error;
}
- m->rc.sdata = (char *)m->req->rc + sizeof(struct p9_fcall);
+ m->rc.sdata = m->rreq->rc.sdata;
memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity);
m->rc.capacity = m->rc.size;
}
@@ -370,20 +370,27 @@ static void p9_read_work(struct work_struct *work)
/* packet is read in
* not an else because some packets (like clunk) have no payload
*/
- if ((m->req) && (m->rc.offset == m->rc.capacity)) {
+ if ((m->rreq) && (m->rc.offset == m->rc.capacity)) {
p9_debug(P9_DEBUG_TRANS, "got new packet\n");
- m->req->rc->size = m->rc.offset;
+ m->rreq->rc.size = m->rc.offset;
spin_lock(&m->client->lock);
- if (m->req->status != REQ_STATUS_ERROR)
- status = REQ_STATUS_RCVD;
- list_del(&m->req->req_list);
- /* update req->status while holding client->lock */
- p9_client_cb(m->client, m->req, status);
+ if (m->rreq->status == REQ_STATUS_SENT) {
+ list_del(&m->rreq->req_list);
+ p9_client_cb(m->client, m->rreq, REQ_STATUS_RCVD);
+ } else {
+ spin_unlock(&m->client->lock);
+ p9_debug(P9_DEBUG_ERROR,
+ "Request tag %d errored out while we were reading the reply\n",
+ m->rc.tag);
+ err = -EIO;
+ goto error;
+ }
spin_unlock(&m->client->lock);
m->rc.sdata = NULL;
m->rc.offset = 0;
m->rc.capacity = 0;
- m->req = NULL;
+ p9_req_put(m->rreq);
+ m->rreq = NULL;
}
end_clear:
@@ -469,9 +476,11 @@ static void p9_write_work(struct work_struct *work)
p9_debug(P9_DEBUG_TRANS, "move req %p\n", req);
list_move_tail(&req->req_list, &m->req_list);
- m->wbuf = req->tc->sdata;
- m->wsize = req->tc->size;
+ m->wbuf = req->tc.sdata;
+ m->wsize = req->tc.size;
m->wpos = 0;
+ p9_req_get(req);
+ m->wreq = req;
spin_unlock(&m->client->lock);
}
@@ -492,8 +501,11 @@ static void p9_write_work(struct work_struct *work)
}
m->wpos += err;
- if (m->wpos == m->wsize)
+ if (m->wpos == m->wsize) {
m->wpos = m->wsize = 0;
+ p9_req_put(m->wreq);
+ m->wreq = NULL;
+ }
end_clear:
clear_bit(Wworksched, &m->wsched);
@@ -663,7 +675,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
struct p9_conn *m = &ts->conn;
p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n",
- m, current, req->tc, req->tc->id);
+ m, current, &req->tc, req->tc.id);
if (m->err < 0)
return m->err;
@@ -694,6 +706,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
if (req->status == REQ_STATUS_UNSENT) {
list_del(&req->req_list);
req->status = REQ_STATUS_FLSHD;
+ p9_req_put(req);
ret = 0;
}
spin_unlock(&client->lock);
@@ -711,6 +724,7 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
spin_lock(&client->lock);
list_del(&req->req_list);
spin_unlock(&client->lock);
+ p9_req_put(req);
return 0;
}
@@ -862,7 +876,15 @@ static void p9_conn_destroy(struct p9_conn *m)
p9_mux_poll_stop(m);
cancel_work_sync(&m->rq);
+ if (m->rreq) {
+ p9_req_put(m->rreq);
+ m->rreq = NULL;
+ }
cancel_work_sync(&m->wq);
+ if (m->wreq) {
+ p9_req_put(m->wreq);
+ m->wreq = NULL;
+ }
p9_conn_cancel(m, -ECONNRESET);
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index b513cffeeb3c..119103bfa82e 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -122,7 +122,7 @@ struct p9_rdma_context {
dma_addr_t busa;
union {
struct p9_req_t *req;
- struct p9_fcall *rc;
+ struct p9_fcall rc;
};
};
@@ -274,8 +274,7 @@ p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
case RDMA_CM_EVENT_DISCONNECTED:
if (rdma)
rdma->state = P9_RDMA_CLOSED;
- if (c)
- c->status = Disconnected;
+ c->status = Disconnected;
break;
case RDMA_CM_EVENT_TIMEWAIT_EXIT:
@@ -320,8 +319,8 @@ recv_done(struct ib_cq *cq, struct ib_wc *wc)
if (wc->status != IB_WC_SUCCESS)
goto err_out;
- c->rc->size = wc->byte_len;
- err = p9_parse_header(c->rc, NULL, NULL, &tag, 1);
+ c->rc.size = wc->byte_len;
+ err = p9_parse_header(&c->rc, NULL, NULL, &tag, 1);
if (err)
goto err_out;
@@ -331,12 +330,13 @@ recv_done(struct ib_cq *cq, struct ib_wc *wc)
/* Check that we have not yet received a reply for this request.
*/
- if (unlikely(req->rc)) {
+ if (unlikely(req->rc.sdata)) {
pr_err("Duplicate reply for request %d", tag);
goto err_out;
}
- req->rc = c->rc;
+ req->rc.size = c->rc.size;
+ req->rc.sdata = c->rc.sdata;
p9_client_cb(client, req, REQ_STATUS_RCVD);
out:
@@ -361,9 +361,10 @@ send_done(struct ib_cq *cq, struct ib_wc *wc)
container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
ib_dma_unmap_single(rdma->cm_id->device,
- c->busa, c->req->tc->size,
+ c->busa, c->req->tc.size,
DMA_TO_DEVICE);
up(&rdma->sq_sem);
+ p9_req_put(c->req);
kfree(c);
}
@@ -401,7 +402,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
struct ib_sge sge;
c->busa = ib_dma_map_single(rdma->cm_id->device,
- c->rc->sdata, client->msize,
+ c->rc.sdata, client->msize,
DMA_FROM_DEVICE);
if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
goto error;
@@ -443,9 +444,9 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
**/
if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
- /* Got one ! */
- kfree(req->rc);
- req->rc = NULL;
+ /* Got one! */
+ p9_fcall_fini(&req->rc);
+ req->rc.sdata = NULL;
goto dont_need_post_recv;
} else {
/* We raced and lost. */
@@ -459,7 +460,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
err = -ENOMEM;
goto recv_error;
}
- rpl_context->rc = req->rc;
+ rpl_context->rc.sdata = req->rc.sdata;
/*
* Post a receive buffer for this request. We need to ensure
@@ -475,11 +476,11 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
err = post_recv(client, rpl_context);
if (err) {
- p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n");
+ p9_debug(P9_DEBUG_ERROR, "POST RECV failed: %d\n", err);
goto recv_error;
}
/* remove posted receive buffer from request structure */
- req->rc = NULL;
+ req->rc.sdata = NULL;
dont_need_post_recv:
/* Post the request */
@@ -491,7 +492,7 @@ dont_need_post_recv:
c->req = req;
c->busa = ib_dma_map_single(rdma->cm_id->device,
- c->req->tc->sdata, c->req->tc->size,
+ c->req->tc.sdata, c->req->tc.size,
DMA_TO_DEVICE);
if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
err = -EIO;
@@ -501,7 +502,7 @@ dont_need_post_recv:
c->cqe.done = send_done;
sge.addr = c->busa;
- sge.length = c->req->tc->size;
+ sge.length = c->req->tc.size;
sge.lkey = rdma->pd->local_dma_lkey;
wr.next = NULL;
@@ -544,7 +545,7 @@ dont_need_post_recv:
recv_error:
kfree(rpl_context);
spin_lock_irqsave(&rdma->req_lock, flags);
- if (rdma->state < P9_RDMA_CLOSING) {
+ if (err != -EINTR && rdma->state < P9_RDMA_CLOSING) {
rdma->state = P9_RDMA_CLOSING;
spin_unlock_irqrestore(&rdma->req_lock, flags);
rdma_disconnect(rdma->cm_id);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 7728b0acde09..eb596c2ed546 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -155,7 +155,7 @@ static void req_done(struct virtqueue *vq)
}
if (len) {
- req->rc->size = len;
+ req->rc.size = len;
p9_client_cb(chan->client, req, REQ_STATUS_RCVD);
}
}
@@ -207,6 +207,13 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
return 1;
}
+/* Reply won't come, so drop req ref */
+static int p9_virtio_cancelled(struct p9_client *client, struct p9_req_t *req)
+{
+ p9_req_put(req);
+ return 0;
+}
+
/**
* pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer,
* this takes a list of pages.
@@ -273,12 +280,12 @@ req_retry:
out_sgs = in_sgs = 0;
/* Handle out VirtIO ring buffers */
out = pack_sg_list(chan->sg, 0,
- VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
+ VIRTQUEUE_NUM, req->tc.sdata, req->tc.size);
if (out)
sgs[out_sgs++] = chan->sg;
in = pack_sg_list(chan->sg, out,
- VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
+ VIRTQUEUE_NUM, req->rc.sdata, req->rc.capacity);
if (in)
sgs[out_sgs + in_sgs++] = chan->sg + out;
@@ -404,6 +411,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
struct scatterlist *sgs[4];
size_t offs;
int need_drop = 0;
+ int kicked = 0;
p9_debug(P9_DEBUG_TRANS, "virtio request\n");
@@ -411,29 +419,33 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
__le32 sz;
int n = p9_get_mapped_pages(chan, &out_pages, uodata,
outlen, &offs, &need_drop);
- if (n < 0)
- return n;
+ if (n < 0) {
+ err = n;
+ goto err_out;
+ }
out_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE);
if (n != outlen) {
__le32 v = cpu_to_le32(n);
- memcpy(&req->tc->sdata[req->tc->size - 4], &v, 4);
+ memcpy(&req->tc.sdata[req->tc.size - 4], &v, 4);
outlen = n;
}
/* The size field of the message must include the length of the
* header and the length of the data. We didn't actually know
* the length of the data until this point so add it in now.
*/
- sz = cpu_to_le32(req->tc->size + outlen);
- memcpy(&req->tc->sdata[0], &sz, sizeof(sz));
+ sz = cpu_to_le32(req->tc.size + outlen);
+ memcpy(&req->tc.sdata[0], &sz, sizeof(sz));
} else if (uidata) {
int n = p9_get_mapped_pages(chan, &in_pages, uidata,
inlen, &offs, &need_drop);
- if (n < 0)
- return n;
+ if (n < 0) {
+ err = n;
+ goto err_out;
+ }
in_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE);
if (n != inlen) {
__le32 v = cpu_to_le32(n);
- memcpy(&req->tc->sdata[req->tc->size - 4], &v, 4);
+ memcpy(&req->tc.sdata[req->tc.size - 4], &v, 4);
inlen = n;
}
}
@@ -445,7 +457,7 @@ req_retry_pinned:
/* out data */
out = pack_sg_list(chan->sg, 0,
- VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
+ VIRTQUEUE_NUM, req->tc.sdata, req->tc.size);
if (out)
sgs[out_sgs++] = chan->sg;
@@ -464,7 +476,7 @@ req_retry_pinned:
* alloced memory and payload onto the user buffer.
*/
in = pack_sg_list(chan->sg, out,
- VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len);
+ VIRTQUEUE_NUM, req->rc.sdata, in_hdr_len);
if (in)
sgs[out_sgs + in_sgs++] = chan->sg + out;
@@ -498,6 +510,7 @@ req_retry_pinned:
}
virtqueue_kick(chan->vq);
spin_unlock_irqrestore(&chan->lock, flags);
+ kicked = 1;
p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n");
err = wait_event_killable(req->wq, req->status >= REQ_STATUS_RCVD);
/*
@@ -518,6 +531,10 @@ err_out:
}
kvfree(in_pages);
kvfree(out_pages);
+ if (!kicked) {
+ /* reply won't come */
+ p9_req_put(req);
+ }
return err;
}
@@ -750,6 +767,7 @@ static struct p9_trans_module p9_virtio_trans = {
.request = p9_virtio_request,
.zc_request = p9_virtio_zc_request,
.cancel = p9_virtio_cancel,
+ .cancelled = p9_virtio_cancelled,
/*
* We leave one entry for input and one entry for response
* headers. We also skip one more entry to accomodate, address
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index c2d54ac76bfd..e2fbf3677b9b 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -141,7 +141,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
struct xen_9pfs_front_priv *priv = NULL;
RING_IDX cons, prod, masked_cons, masked_prod;
unsigned long flags;
- u32 size = p9_req->tc->size;
+ u32 size = p9_req->tc.size;
struct xen_9pfs_dataring *ring;
int num;
@@ -154,7 +154,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
if (!priv || priv->client != client)
return -EINVAL;
- num = p9_req->tc->tag % priv->num_rings;
+ num = p9_req->tc.tag % priv->num_rings;
ring = &priv->rings[num];
again:
@@ -176,7 +176,7 @@ again:
masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE);
masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
- xen_9pfs_write_packet(ring->data.out, p9_req->tc->sdata, size,
+ xen_9pfs_write_packet(ring->data.out, p9_req->tc.sdata, size,
&masked_prod, masked_cons, XEN_9PFS_RING_SIZE);
p9_req->status = REQ_STATUS_SENT;
@@ -185,6 +185,7 @@ again:
ring->intf->out_prod = prod;
spin_unlock_irqrestore(&ring->lock, flags);
notify_remote_via_irq(ring->irq);
+ p9_req_put(p9_req);
return 0;
}
@@ -229,12 +230,12 @@ static void p9_xen_response(struct work_struct *work)
continue;
}
- memcpy(req->rc, &h, sizeof(h));
- req->rc->offset = 0;
+ memcpy(&req->rc, &h, sizeof(h));
+ req->rc.offset = 0;
masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
/* Then, read the whole packet (including the header) */
- xen_9pfs_read_packet(req->rc->sdata, ring->data.in, h.size,
+ xen_9pfs_read_packet(req->rc.sdata, ring->data.in, h.size,
masked_prod, &masked_cons,
XEN_9PFS_RING_SIZE);
@@ -391,8 +392,8 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev,
unsigned int max_rings, max_ring_order, len = 0;
versions = xenbus_read(XBT_NIL, dev->otherend, "versions", &len);
- if (!len)
- return -EINVAL;
+ if (IS_ERR(versions))
+ return PTR_ERR(versions);
if (strcmp(versions, "1")) {
kfree(versions);
return -EINVAL;
diff --git a/net/9p/util.c b/net/9p/util.c
deleted file mode 100644
index 55ad98277e85..000000000000
--- a/net/9p/util.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * net/9p/util.c
- *
- * This file contains some helper functions
- *
- * Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net>
- * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to:
- * Free Software Foundation
- * 51 Franklin Street, Fifth Floor
- * Boston, MA 02111-1301 USA
- *
- */
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/parser.h>
-#include <linux/idr.h>
-#include <linux/slab.h>
-#include <net/9p/9p.h>
-
-/**
- * struct p9_idpool - per-connection accounting for tag idpool
- * @lock: protects the pool
- * @pool: idr to allocate tag id from
- *
- */
-
-struct p9_idpool {
- spinlock_t lock;
- struct idr pool;
-};
-
-/**
- * p9_idpool_create - create a new per-connection id pool
- *
- */
-
-struct p9_idpool *p9_idpool_create(void)
-{
- struct p9_idpool *p;
-
- p = kmalloc(sizeof(struct p9_idpool), GFP_KERNEL);
- if (!p)
- return ERR_PTR(-ENOMEM);
-
- spin_lock_init(&p->lock);
- idr_init(&p->pool);
-
- return p;
-}
-EXPORT_SYMBOL(p9_idpool_create);
-
-/**
- * p9_idpool_destroy - create a new per-connection id pool
- * @p: idpool to destroy
- */
-
-void p9_idpool_destroy(struct p9_idpool *p)
-{
- idr_destroy(&p->pool);
- kfree(p);
-}
-EXPORT_SYMBOL(p9_idpool_destroy);
-
-/**
- * p9_idpool_get - allocate numeric id from pool
- * @p: pool to allocate from
- *
- * Bugs: This seems to be an awful generic function, should it be in idr.c with
- * the lock included in struct idr?
- */
-
-int p9_idpool_get(struct p9_idpool *p)
-{
- int i;
- unsigned long flags;
-
- idr_preload(GFP_NOFS);
- spin_lock_irqsave(&p->lock, flags);
-
- /* no need to store exactly p, we just need something non-null */
- i = idr_alloc(&p->pool, p, 0, 0, GFP_NOWAIT);
-
- spin_unlock_irqrestore(&p->lock, flags);
- idr_preload_end();
- if (i < 0)
- return -1;
-
- p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", i, p);
- return i;
-}
-EXPORT_SYMBOL(p9_idpool_get);
-
-/**
- * p9_idpool_put - release numeric id from pool
- * @id: numeric id which is being released
- * @p: pool to release id into
- *
- * Bugs: This seems to be an awful generic function, should it be in idr.c with
- * the lock included in struct idr?
- */
-
-void p9_idpool_put(int id, struct p9_idpool *p)
-{
- unsigned long flags;
-
- p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", id, p);
-
- spin_lock_irqsave(&p->lock, flags);
- idr_remove(&p->pool, id);
- spin_unlock_irqrestore(&p->lock, flags);
-}
-EXPORT_SYMBOL(p9_idpool_put);
-
-/**
- * p9_idpool_check - check if the specified id is available
- * @id: id to check
- * @p: pool to check
- */
-
-int p9_idpool_check(int id, struct p9_idpool *p)
-{
- return idr_find(&p->pool, id) != NULL;
-}
-EXPORT_SYMBOL(p9_idpool_check);
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 00deacdcb51c..cfd83c5521ae 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -49,18 +49,17 @@ static int bnep_sock_release(struct socket *sock)
return 0;
}
-static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+static int do_bnep_sock_ioctl(struct socket *sock, unsigned int cmd, void __user *argp)
{
struct bnep_connlist_req cl;
struct bnep_connadd_req ca;
struct bnep_conndel_req cd;
struct bnep_conninfo ci;
struct socket *nsock;
- void __user *argp = (void __user *)arg;
__u32 supp_feat = BIT(BNEP_SETUP_RESPONSE);
int err;
- BT_DBG("cmd %x arg %lx", cmd, arg);
+ BT_DBG("cmd %x arg %p", cmd, argp);
switch (cmd) {
case BNEPCONNADD:
@@ -134,16 +133,22 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
return 0;
}
+static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+ return do_bnep_sock_ioctl(sock, cmd, (void __user *)arg);
+}
+
#ifdef CONFIG_COMPAT
static int bnep_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
+ void __user *argp = compat_ptr(arg);
if (cmd == BNEPGETCONNLIST) {
struct bnep_connlist_req cl;
+ unsigned __user *p = argp;
u32 uci;
int err;
- if (get_user(cl.cnum, (u32 __user *) arg) ||
- get_user(uci, (u32 __user *) (arg + 4)))
+ if (get_user(cl.cnum, p) || get_user(uci, p + 1))
return -EFAULT;
cl.ci = compat_ptr(uci);
@@ -153,13 +158,13 @@ static int bnep_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigne
err = bnep_get_connlist(&cl);
- if (!err && put_user(cl.cnum, (u32 __user *) arg))
+ if (!err && put_user(cl.cnum, p))
err = -EFAULT;
return err;
}
- return bnep_sock_ioctl(sock, cmd, arg);
+ return do_bnep_sock_ioctl(sock, cmd, argp);
}
#endif
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index e08f28fadd65..defdd4871919 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -63,17 +63,16 @@ static int cmtp_sock_release(struct socket *sock)
return 0;
}
-static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+static int do_cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, void __user *argp)
{
struct cmtp_connadd_req ca;
struct cmtp_conndel_req cd;
struct cmtp_connlist_req cl;
struct cmtp_conninfo ci;
struct socket *nsock;
- void __user *argp = (void __user *)arg;
int err;
- BT_DBG("cmd %x arg %lx", cmd, arg);
+ BT_DBG("cmd %x arg %p", cmd, argp);
switch (cmd) {
case CMTPCONNADD:
@@ -137,16 +136,22 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
return -EINVAL;
}
+static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+ return do_cmtp_sock_ioctl(sock, cmd, (void __user *)arg);
+}
+
#ifdef CONFIG_COMPAT
static int cmtp_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
+ void __user *argp = compat_ptr(arg);
if (cmd == CMTPGETCONNLIST) {
struct cmtp_connlist_req cl;
+ u32 __user *p = argp;
u32 uci;
int err;
- if (get_user(cl.cnum, (u32 __user *) arg) ||
- get_user(uci, (u32 __user *) (arg + 4)))
+ if (get_user(cl.cnum, p) || get_user(uci, p + 1))
return -EFAULT;
cl.ci = compat_ptr(uci);
@@ -156,13 +161,13 @@ static int cmtp_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigne
err = cmtp_get_connlist(&cl);
- if (!err && put_user(cl.cnum, (u32 __user *) arg))
+ if (!err && put_user(cl.cnum, p))
err = -EFAULT;
return err;
}
- return cmtp_sock_ioctl(sock, cmd, arg);
+ return do_cmtp_sock_ioctl(sock, cmd, argp);
}
#endif
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 3734dc1788b4..a442e21f3894 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -649,7 +649,7 @@ static void hidp_process_transmit(struct hidp_session *session,
}
static int hidp_setup_input(struct hidp_session *session,
- struct hidp_connadd_req *req)
+ const struct hidp_connadd_req *req)
{
struct input_dev *input;
int i;
@@ -748,7 +748,7 @@ EXPORT_SYMBOL_GPL(hidp_hid_driver);
/* This function sets up the hid device. It does not add it
to the HID system. That is done in hidp_add_connection(). */
static int hidp_setup_hid(struct hidp_session *session,
- struct hidp_connadd_req *req)
+ const struct hidp_connadd_req *req)
{
struct hid_device *hid;
int err;
@@ -807,7 +807,7 @@ fault:
/* initialize session devices */
static int hidp_session_dev_init(struct hidp_session *session,
- struct hidp_connadd_req *req)
+ const struct hidp_connadd_req *req)
{
int ret;
@@ -906,7 +906,7 @@ static void hidp_session_dev_work(struct work_struct *work)
static int hidp_session_new(struct hidp_session **out, const bdaddr_t *bdaddr,
struct socket *ctrl_sock,
struct socket *intr_sock,
- struct hidp_connadd_req *req,
+ const struct hidp_connadd_req *req,
struct l2cap_conn *conn)
{
struct hidp_session *session;
@@ -1338,7 +1338,7 @@ static int hidp_verify_sockets(struct socket *ctrl_sock,
return 0;
}
-int hidp_connection_add(struct hidp_connadd_req *req,
+int hidp_connection_add(const struct hidp_connadd_req *req,
struct socket *ctrl_sock,
struct socket *intr_sock)
{
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
index 8798492a6e99..6ef88d0a1919 100644
--- a/net/bluetooth/hidp/hidp.h
+++ b/net/bluetooth/hidp/hidp.h
@@ -122,7 +122,7 @@ struct hidp_connlist_req {
struct hidp_conninfo __user *ci;
};
-int hidp_connection_add(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock);
+int hidp_connection_add(const struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock);
int hidp_connection_del(struct hidp_conndel_req *req);
int hidp_get_connlist(struct hidp_connlist_req *req);
int hidp_get_conninfo(struct hidp_conninfo *ci);
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 1eaac01f85de..9f85a1943be9 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -46,9 +46,8 @@ static int hidp_sock_release(struct socket *sock)
return 0;
}
-static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+static int do_hidp_sock_ioctl(struct socket *sock, unsigned int cmd, void __user *argp)
{
- void __user *argp = (void __user *) arg;
struct hidp_connadd_req ca;
struct hidp_conndel_req cd;
struct hidp_connlist_req cl;
@@ -57,7 +56,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
struct socket *isock;
int err;
- BT_DBG("cmd %x arg %lx", cmd, arg);
+ BT_DBG("cmd %x arg %p", cmd, argp);
switch (cmd) {
case HIDPCONNADD:
@@ -122,6 +121,11 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
return -EINVAL;
}
+static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+ return do_hidp_sock_ioctl(sock, cmd, (void __user *)arg);
+}
+
#ifdef CONFIG_COMPAT
struct compat_hidp_connadd_req {
int ctrl_sock; /* Connected control socket */
@@ -141,13 +145,15 @@ struct compat_hidp_connadd_req {
static int hidp_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
+ void __user *argp = compat_ptr(arg);
+ int err;
+
if (cmd == HIDPGETCONNLIST) {
struct hidp_connlist_req cl;
+ u32 __user *p = argp;
u32 uci;
- int err;
- if (get_user(cl.cnum, (u32 __user *) arg) ||
- get_user(uci, (u32 __user *) (arg + 4)))
+ if (get_user(cl.cnum, p) || get_user(uci, p + 1))
return -EFAULT;
cl.ci = compat_ptr(uci);
@@ -157,39 +163,54 @@ static int hidp_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigne
err = hidp_get_connlist(&cl);
- if (!err && put_user(cl.cnum, (u32 __user *) arg))
+ if (!err && put_user(cl.cnum, p))
err = -EFAULT;
return err;
} else if (cmd == HIDPCONNADD) {
- struct compat_hidp_connadd_req ca;
- struct hidp_connadd_req __user *uca;
+ struct compat_hidp_connadd_req ca32;
+ struct hidp_connadd_req ca;
+ struct socket *csock;
+ struct socket *isock;
- uca = compat_alloc_user_space(sizeof(*uca));
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
- if (copy_from_user(&ca, (void __user *) arg, sizeof(ca)))
+ if (copy_from_user(&ca32, (void __user *) arg, sizeof(ca32)))
return -EFAULT;
- if (put_user(ca.ctrl_sock, &uca->ctrl_sock) ||
- put_user(ca.intr_sock, &uca->intr_sock) ||
- put_user(ca.parser, &uca->parser) ||
- put_user(ca.rd_size, &uca->rd_size) ||
- put_user(compat_ptr(ca.rd_data), &uca->rd_data) ||
- put_user(ca.country, &uca->country) ||
- put_user(ca.subclass, &uca->subclass) ||
- put_user(ca.vendor, &uca->vendor) ||
- put_user(ca.product, &uca->product) ||
- put_user(ca.version, &uca->version) ||
- put_user(ca.flags, &uca->flags) ||
- put_user(ca.idle_to, &uca->idle_to) ||
- copy_to_user(&uca->name[0], &ca.name[0], 128))
- return -EFAULT;
+ ca.ctrl_sock = ca32.ctrl_sock;
+ ca.intr_sock = ca32.intr_sock;
+ ca.parser = ca32.parser;
+ ca.rd_size = ca32.rd_size;
+ ca.rd_data = compat_ptr(ca32.rd_data);
+ ca.country = ca32.country;
+ ca.subclass = ca32.subclass;
+ ca.vendor = ca32.vendor;
+ ca.product = ca32.product;
+ ca.version = ca32.version;
+ ca.flags = ca32.flags;
+ ca.idle_to = ca32.idle_to;
+ memcpy(ca.name, ca32.name, 128);
+
+ csock = sockfd_lookup(ca.ctrl_sock, &err);
+ if (!csock)
+ return err;
- arg = (unsigned long) uca;
+ isock = sockfd_lookup(ca.intr_sock, &err);
+ if (!isock) {
+ sockfd_put(csock);
+ return err;
+ }
- /* Fall through. We don't actually write back any _changes_
- to the structure anyway, so there's no need to copy back
- into the original compat version */
+ err = hidp_connection_add(&ca, csock, isock);
+ if (!err && copy_to_user(argp, &ca32, sizeof(ca32)))
+ err = -EFAULT;
+
+ sockfd_put(csock);
+ sockfd_put(isock);
+
+ return err;
}
return hidp_sock_ioctl(sock, cmd, arg);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 024139b51d3a..6bac0d6b7b94 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1422,7 +1422,14 @@ static void br_multicast_query_received(struct net_bridge *br,
return;
br_multicast_update_query_timer(br, query, max_delay);
- br_multicast_mark_router(br, port);
+
+ /* Based on RFC4541, section 2.1.1 IGMP Forwarding Rules,
+ * the arrival port for IGMP Queries where the source address
+ * is 0.0.0.0 should not be added to router port list.
+ */
+ if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) ||
+ saddr->proto == htons(ETH_P_IPV6))
+ br_multicast_mark_router(br, port);
}
static void br_ip4_multicast_query(struct net_bridge *br,
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 02172c408ff2..5d6724cee38f 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -46,9 +46,9 @@ static int set_secret(struct ceph_crypto_key *key, void *buf)
goto fail;
}
- /* crypto_alloc_skcipher() allocates with GFP_KERNEL */
+ /* crypto_alloc_sync_skcipher() allocates with GFP_KERNEL */
noio_flag = memalloc_noio_save();
- key->tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
+ key->tfm = crypto_alloc_sync_skcipher("cbc(aes)", 0, 0);
memalloc_noio_restore(noio_flag);
if (IS_ERR(key->tfm)) {
ret = PTR_ERR(key->tfm);
@@ -56,7 +56,7 @@ static int set_secret(struct ceph_crypto_key *key, void *buf)
goto fail;
}
- ret = crypto_skcipher_setkey(key->tfm, key->key, key->len);
+ ret = crypto_sync_skcipher_setkey(key->tfm, key->key, key->len);
if (ret)
goto fail;
@@ -136,7 +136,7 @@ void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
if (key) {
kfree(key->key);
key->key = NULL;
- crypto_free_skcipher(key->tfm);
+ crypto_free_sync_skcipher(key->tfm);
key->tfm = NULL;
}
}
@@ -216,7 +216,7 @@ static void teardown_sgtable(struct sg_table *sgt)
static int ceph_aes_crypt(const struct ceph_crypto_key *key, bool encrypt,
void *buf, int buf_len, int in_len, int *pout_len)
{
- SKCIPHER_REQUEST_ON_STACK(req, key->tfm);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, key->tfm);
struct sg_table sgt;
struct scatterlist prealloc_sg;
char iv[AES_BLOCK_SIZE] __aligned(8);
@@ -232,7 +232,7 @@ static int ceph_aes_crypt(const struct ceph_crypto_key *key, bool encrypt,
return ret;
memcpy(iv, aes_iv, AES_BLOCK_SIZE);
- skcipher_request_set_tfm(req, key->tfm);
+ skcipher_request_set_sync_tfm(req, key->tfm);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sgt.sgl, sgt.sgl, crypt_len, iv);
diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
index bb45c7d43739..96ef4d860bc9 100644
--- a/net/ceph/crypto.h
+++ b/net/ceph/crypto.h
@@ -13,7 +13,7 @@ struct ceph_crypto_key {
struct ceph_timespec created;
int len;
void *key;
- struct crypto_skcipher *tfm;
+ struct crypto_sync_skcipher *tfm;
};
int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 0a187196aeed..88e35830198c 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -156,7 +156,6 @@ static bool con_flag_test_and_set(struct ceph_connection *con,
/* Slab caches for frequently-allocated structures */
static struct kmem_cache *ceph_msg_cache;
-static struct kmem_cache *ceph_msg_data_cache;
/* static tag bytes (protocol control messages) */
static char tag_msg = CEPH_MSGR_TAG_MSG;
@@ -235,23 +234,11 @@ static int ceph_msgr_slab_init(void)
if (!ceph_msg_cache)
return -ENOMEM;
- BUG_ON(ceph_msg_data_cache);
- ceph_msg_data_cache = KMEM_CACHE(ceph_msg_data, 0);
- if (ceph_msg_data_cache)
- return 0;
-
- kmem_cache_destroy(ceph_msg_cache);
- ceph_msg_cache = NULL;
-
- return -ENOMEM;
+ return 0;
}
static void ceph_msgr_slab_exit(void)
{
- BUG_ON(!ceph_msg_data_cache);
- kmem_cache_destroy(ceph_msg_data_cache);
- ceph_msg_data_cache = NULL;
-
BUG_ON(!ceph_msg_cache);
kmem_cache_destroy(ceph_msg_cache);
ceph_msg_cache = NULL;
@@ -1141,16 +1128,13 @@ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
static void ceph_msg_data_cursor_init(struct ceph_msg *msg, size_t length)
{
struct ceph_msg_data_cursor *cursor = &msg->cursor;
- struct ceph_msg_data *data;
BUG_ON(!length);
BUG_ON(length > msg->data_length);
- BUG_ON(list_empty(&msg->data));
+ BUG_ON(!msg->num_data_items);
- cursor->data_head = &msg->data;
cursor->total_resid = length;
- data = list_first_entry(&msg->data, struct ceph_msg_data, links);
- cursor->data = data;
+ cursor->data = msg->data;
__ceph_msg_data_cursor_init(cursor);
}
@@ -1231,8 +1215,7 @@ static void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
if (!cursor->resid && cursor->total_resid) {
WARN_ON(!cursor->last_piece);
- BUG_ON(list_is_last(&cursor->data->links, cursor->data_head));
- cursor->data = list_next_entry(cursor->data, links);
+ cursor->data++;
__ceph_msg_data_cursor_init(cursor);
new_piece = true;
}
@@ -1248,9 +1231,6 @@ static size_t sizeof_footer(struct ceph_connection *con)
static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
{
- BUG_ON(!msg);
- BUG_ON(!data_len);
-
/* Initialize data cursor */
ceph_msg_data_cursor_init(msg, (size_t)data_len);
@@ -1590,7 +1570,7 @@ static int write_partial_message_data(struct ceph_connection *con)
dout("%s %p msg %p\n", __func__, con, msg);
- if (list_empty(&msg->data))
+ if (!msg->num_data_items)
return -EINVAL;
/*
@@ -2347,8 +2327,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
u32 crc = 0;
int ret;
- BUG_ON(!msg);
- if (list_empty(&msg->data))
+ if (!msg->num_data_items)
return -EIO;
if (do_datacrc)
@@ -3256,32 +3235,16 @@ bool ceph_con_keepalive_expired(struct ceph_connection *con,
return false;
}
-static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type)
+static struct ceph_msg_data *ceph_msg_data_add(struct ceph_msg *msg)
{
- struct ceph_msg_data *data;
-
- if (WARN_ON(!ceph_msg_data_type_valid(type)))
- return NULL;
-
- data = kmem_cache_zalloc(ceph_msg_data_cache, GFP_NOFS);
- if (!data)
- return NULL;
-
- data->type = type;
- INIT_LIST_HEAD(&data->links);
-
- return data;
+ BUG_ON(msg->num_data_items >= msg->max_data_items);
+ return &msg->data[msg->num_data_items++];
}
static void ceph_msg_data_destroy(struct ceph_msg_data *data)
{
- if (!data)
- return;
-
- WARN_ON(!list_empty(&data->links));
if (data->type == CEPH_MSG_DATA_PAGELIST)
ceph_pagelist_release(data->pagelist);
- kmem_cache_free(ceph_msg_data_cache, data);
}
void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
@@ -3292,13 +3255,12 @@ void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
BUG_ON(!pages);
BUG_ON(!length);
- data = ceph_msg_data_create(CEPH_MSG_DATA_PAGES);
- BUG_ON(!data);
+ data = ceph_msg_data_add(msg);
+ data->type = CEPH_MSG_DATA_PAGES;
data->pages = pages;
data->length = length;
data->alignment = alignment & ~PAGE_MASK;
- list_add_tail(&data->links, &msg->data);
msg->data_length += length;
}
EXPORT_SYMBOL(ceph_msg_data_add_pages);
@@ -3311,11 +3273,11 @@ void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
BUG_ON(!pagelist);
BUG_ON(!pagelist->length);
- data = ceph_msg_data_create(CEPH_MSG_DATA_PAGELIST);
- BUG_ON(!data);
+ data = ceph_msg_data_add(msg);
+ data->type = CEPH_MSG_DATA_PAGELIST;
+ refcount_inc(&pagelist->refcnt);
data->pagelist = pagelist;
- list_add_tail(&data->links, &msg->data);
msg->data_length += pagelist->length;
}
EXPORT_SYMBOL(ceph_msg_data_add_pagelist);
@@ -3326,12 +3288,11 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
{
struct ceph_msg_data *data;
- data = ceph_msg_data_create(CEPH_MSG_DATA_BIO);
- BUG_ON(!data);
+ data = ceph_msg_data_add(msg);
+ data->type = CEPH_MSG_DATA_BIO;
data->bio_pos = *bio_pos;
data->bio_length = length;
- list_add_tail(&data->links, &msg->data);
msg->data_length += length;
}
EXPORT_SYMBOL(ceph_msg_data_add_bio);
@@ -3342,11 +3303,10 @@ void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
{
struct ceph_msg_data *data;
- data = ceph_msg_data_create(CEPH_MSG_DATA_BVECS);
- BUG_ON(!data);
+ data = ceph_msg_data_add(msg);
+ data->type = CEPH_MSG_DATA_BVECS;
data->bvec_pos = *bvec_pos;
- list_add_tail(&data->links, &msg->data);
msg->data_length += bvec_pos->iter.bi_size;
}
EXPORT_SYMBOL(ceph_msg_data_add_bvecs);
@@ -3355,8 +3315,8 @@ EXPORT_SYMBOL(ceph_msg_data_add_bvecs);
* construct a new message with given type, size
* the new msg has a ref count of 1.
*/
-struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
- bool can_fail)
+struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items,
+ gfp_t flags, bool can_fail)
{
struct ceph_msg *m;
@@ -3370,7 +3330,6 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
INIT_LIST_HEAD(&m->list_head);
kref_init(&m->kref);
- INIT_LIST_HEAD(&m->data);
/* front */
if (front_len) {
@@ -3385,6 +3344,15 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
}
m->front_alloc_len = m->front.iov_len = front_len;
+ if (max_data_items) {
+ m->data = kmalloc_array(max_data_items, sizeof(*m->data),
+ flags);
+ if (!m->data)
+ goto out2;
+
+ m->max_data_items = max_data_items;
+ }
+
dout("ceph_msg_new %p front %d\n", m, front_len);
return m;
@@ -3401,6 +3369,13 @@ out:
}
return NULL;
}
+EXPORT_SYMBOL(ceph_msg_new2);
+
+struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
+ bool can_fail)
+{
+ return ceph_msg_new2(type, front_len, 0, flags, can_fail);
+}
EXPORT_SYMBOL(ceph_msg_new);
/*
@@ -3496,13 +3471,14 @@ static void ceph_msg_free(struct ceph_msg *m)
{
dout("%s %p\n", __func__, m);
kvfree(m->front.iov_base);
+ kfree(m->data);
kmem_cache_free(ceph_msg_cache, m);
}
static void ceph_msg_release(struct kref *kref)
{
struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
- struct ceph_msg_data *data, *next;
+ int i;
dout("%s %p\n", __func__, m);
WARN_ON(!list_empty(&m->list_head));
@@ -3515,11 +3491,8 @@ static void ceph_msg_release(struct kref *kref)
m->middle = NULL;
}
- list_for_each_entry_safe(data, next, &m->data, links) {
- list_del_init(&data->links);
- ceph_msg_data_destroy(data);
- }
- m->data_length = 0;
+ for (i = 0; i < m->num_data_items; i++)
+ ceph_msg_data_destroy(&m->data[i]);
if (m->pool)
ceph_msgpool_put(m->pool, m);
diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c
index 72571535883f..e3ecb80cd182 100644
--- a/net/ceph/msgpool.c
+++ b/net/ceph/msgpool.c
@@ -14,7 +14,8 @@ static void *msgpool_alloc(gfp_t gfp_mask, void *arg)
struct ceph_msgpool *pool = arg;
struct ceph_msg *msg;
- msg = ceph_msg_new(pool->type, pool->front_len, gfp_mask, true);
+ msg = ceph_msg_new2(pool->type, pool->front_len, pool->max_data_items,
+ gfp_mask, true);
if (!msg) {
dout("msgpool_alloc %s failed\n", pool->name);
} else {
@@ -35,11 +36,13 @@ static void msgpool_free(void *element, void *arg)
}
int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
- int front_len, int size, bool blocking, const char *name)
+ int front_len, int max_data_items, int size,
+ const char *name)
{
dout("msgpool %s init\n", name);
pool->type = type;
pool->front_len = front_len;
+ pool->max_data_items = max_data_items;
pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool);
if (!pool->pool)
return -ENOMEM;
@@ -53,18 +56,21 @@ void ceph_msgpool_destroy(struct ceph_msgpool *pool)
mempool_destroy(pool->pool);
}
-struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
- int front_len)
+struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len,
+ int max_data_items)
{
struct ceph_msg *msg;
- if (front_len > pool->front_len) {
- dout("msgpool_get %s need front %d, pool size is %d\n",
- pool->name, front_len, pool->front_len);
- WARN_ON(1);
+ if (front_len > pool->front_len ||
+ max_data_items > pool->max_data_items) {
+ pr_warn_ratelimited("%s need %d/%d, pool %s has %d/%d\n",
+ __func__, front_len, max_data_items, pool->name,
+ pool->front_len, pool->max_data_items);
+ WARN_ON_ONCE(1);
/* try to alloc a fresh message */
- return ceph_msg_new(pool->type, front_len, GFP_NOFS, false);
+ return ceph_msg_new2(pool->type, front_len, max_data_items,
+ GFP_NOFS, false);
}
msg = mempool_alloc(pool->pool, GFP_NOFS);
@@ -80,6 +86,9 @@ void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg)
msg->front.iov_len = pool->front_len;
msg->hdr.front_len = cpu_to_le32(pool->front_len);
+ msg->data_length = 0;
+ msg->num_data_items = 0;
+
kref_init(&msg->kref); /* retake single ref */
mempool_free(msg, pool->pool);
}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 60934bd8796c..d23a9f81f3d7 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -126,6 +126,9 @@ static void ceph_osd_data_init(struct ceph_osd_data *osd_data)
osd_data->type = CEPH_OSD_DATA_TYPE_NONE;
}
+/*
+ * Consumes @pages if @own_pages is true.
+ */
static void ceph_osd_data_pages_init(struct ceph_osd_data *osd_data,
struct page **pages, u64 length, u32 alignment,
bool pages_from_pool, bool own_pages)
@@ -138,6 +141,9 @@ static void ceph_osd_data_pages_init(struct ceph_osd_data *osd_data,
osd_data->own_pages = own_pages;
}
+/*
+ * Consumes a ref on @pagelist.
+ */
static void ceph_osd_data_pagelist_init(struct ceph_osd_data *osd_data,
struct ceph_pagelist *pagelist)
{
@@ -362,6 +368,8 @@ static void ceph_osd_data_release(struct ceph_osd_data *osd_data)
num_pages = calc_pages_for((u64)osd_data->alignment,
(u64)osd_data->length);
ceph_release_page_vector(osd_data->pages, num_pages);
+ } else if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGELIST) {
+ ceph_pagelist_release(osd_data->pagelist);
}
ceph_osd_data_init(osd_data);
}
@@ -402,6 +410,9 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
case CEPH_OSD_OP_LIST_WATCHERS:
ceph_osd_data_release(&op->list_watchers.response_data);
break;
+ case CEPH_OSD_OP_COPY_FROM:
+ ceph_osd_data_release(&op->copy_from.osd_data);
+ break;
default:
break;
}
@@ -606,12 +617,15 @@ static int ceph_oloc_encoding_size(const struct ceph_object_locator *oloc)
return 8 + 4 + 4 + 4 + (oloc->pool_ns ? oloc->pool_ns->len : 0);
}
-int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
+static int __ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp,
+ int num_request_data_items,
+ int num_reply_data_items)
{
struct ceph_osd_client *osdc = req->r_osdc;
struct ceph_msg *msg;
int msg_size;
+ WARN_ON(req->r_request || req->r_reply);
WARN_ON(ceph_oid_empty(&req->r_base_oid));
WARN_ON(ceph_oloc_empty(&req->r_base_oloc));
@@ -633,9 +647,11 @@ int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
msg_size += 4 + 8; /* retry_attempt, features */
if (req->r_mempool)
- msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
+ msg = ceph_msgpool_get(&osdc->msgpool_op, msg_size,
+ num_request_data_items);
else
- msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp, true);
+ msg = ceph_msg_new2(CEPH_MSG_OSD_OP, msg_size,
+ num_request_data_items, gfp, true);
if (!msg)
return -ENOMEM;
@@ -648,9 +664,11 @@ int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
msg_size += req->r_num_ops * sizeof(struct ceph_osd_op);
if (req->r_mempool)
- msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0);
+ msg = ceph_msgpool_get(&osdc->msgpool_op_reply, msg_size,
+ num_reply_data_items);
else
- msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, msg_size, gfp, true);
+ msg = ceph_msg_new2(CEPH_MSG_OSD_OPREPLY, msg_size,
+ num_reply_data_items, gfp, true);
if (!msg)
return -ENOMEM;
@@ -658,7 +676,6 @@ int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
return 0;
}
-EXPORT_SYMBOL(ceph_osdc_alloc_messages);
static bool osd_req_opcode_valid(u16 opcode)
{
@@ -671,6 +688,65 @@ __CEPH_FORALL_OSD_OPS(GENERATE_CASE)
}
}
+static void get_num_data_items(struct ceph_osd_request *req,
+ int *num_request_data_items,
+ int *num_reply_data_items)
+{
+ struct ceph_osd_req_op *op;
+
+ *num_request_data_items = 0;
+ *num_reply_data_items = 0;
+
+ for (op = req->r_ops; op != &req->r_ops[req->r_num_ops]; op++) {
+ switch (op->op) {
+ /* request */
+ case CEPH_OSD_OP_WRITE:
+ case CEPH_OSD_OP_WRITEFULL:
+ case CEPH_OSD_OP_SETXATTR:
+ case CEPH_OSD_OP_CMPXATTR:
+ case CEPH_OSD_OP_NOTIFY_ACK:
+ case CEPH_OSD_OP_COPY_FROM:
+ *num_request_data_items += 1;
+ break;
+
+ /* reply */
+ case CEPH_OSD_OP_STAT:
+ case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_LIST_WATCHERS:
+ *num_reply_data_items += 1;
+ break;
+
+ /* both */
+ case CEPH_OSD_OP_NOTIFY:
+ *num_request_data_items += 1;
+ *num_reply_data_items += 1;
+ break;
+ case CEPH_OSD_OP_CALL:
+ *num_request_data_items += 2;
+ *num_reply_data_items += 1;
+ break;
+
+ default:
+ WARN_ON(!osd_req_opcode_valid(op->op));
+ break;
+ }
+ }
+}
+
+/*
+ * oid, oloc and OSD op opcode(s) must be filled in before this function
+ * is called.
+ */
+int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
+{
+ int num_request_data_items, num_reply_data_items;
+
+ get_num_data_items(req, &num_request_data_items, &num_reply_data_items);
+ return __ceph_osdc_alloc_messages(req, gfp, num_request_data_items,
+ num_reply_data_items);
+}
+EXPORT_SYMBOL(ceph_osdc_alloc_messages);
+
/*
* This is an osd op init function for opcodes that have no data or
* other information associated with them. It also serves as a
@@ -767,22 +843,19 @@ void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
EXPORT_SYMBOL(osd_req_op_extent_dup_last);
int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
- u16 opcode, const char *class, const char *method)
+ const char *class, const char *method)
{
- struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
- opcode, 0);
+ struct ceph_osd_req_op *op;
struct ceph_pagelist *pagelist;
size_t payload_len = 0;
size_t size;
- BUG_ON(opcode != CEPH_OSD_OP_CALL);
+ op = _osd_req_op_init(osd_req, which, CEPH_OSD_OP_CALL, 0);
- pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS);
+ pagelist = ceph_pagelist_alloc(GFP_NOFS);
if (!pagelist)
return -ENOMEM;
- ceph_pagelist_init(pagelist);
-
op->cls.class_name = class;
size = strlen(class);
BUG_ON(size > (size_t) U8_MAX);
@@ -815,12 +888,10 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
BUG_ON(opcode != CEPH_OSD_OP_SETXATTR && opcode != CEPH_OSD_OP_CMPXATTR);
- pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
+ pagelist = ceph_pagelist_alloc(GFP_NOFS);
if (!pagelist)
return -ENOMEM;
- ceph_pagelist_init(pagelist);
-
payload_len = strlen(name);
op->xattr.name_len = payload_len;
ceph_pagelist_append(pagelist, name, payload_len);
@@ -900,12 +971,6 @@ static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
static u32 osd_req_encode_op(struct ceph_osd_op *dst,
const struct ceph_osd_req_op *src)
{
- if (WARN_ON(!osd_req_opcode_valid(src->op))) {
- pr_err("unrecognized osd opcode %d\n", src->op);
-
- return 0;
- }
-
switch (src->op) {
case CEPH_OSD_OP_STAT:
break;
@@ -955,6 +1020,14 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
case CEPH_OSD_OP_CREATE:
case CEPH_OSD_OP_DELETE:
break;
+ case CEPH_OSD_OP_COPY_FROM:
+ dst->copy_from.snapid = cpu_to_le64(src->copy_from.snapid);
+ dst->copy_from.src_version =
+ cpu_to_le64(src->copy_from.src_version);
+ dst->copy_from.flags = src->copy_from.flags;
+ dst->copy_from.src_fadvise_flags =
+ cpu_to_le32(src->copy_from.src_fadvise_flags);
+ break;
default:
pr_err("unsupported osd opcode %s\n",
ceph_osd_op_name(src->op));
@@ -1038,7 +1111,15 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
if (flags & CEPH_OSD_FLAG_WRITE)
req->r_data_offset = off;
- r = ceph_osdc_alloc_messages(req, GFP_NOFS);
+ if (num_ops > 1)
+ /*
+ * This is a special case for ceph_writepages_start(), but it
+ * also covers ceph_uninline_data(). If more multi-op request
+ * use cases emerge, we will need a separate helper.
+ */
+ r = __ceph_osdc_alloc_messages(req, GFP_NOFS, num_ops, 0);
+ else
+ r = ceph_osdc_alloc_messages(req, GFP_NOFS);
if (r)
goto fail;
@@ -1845,48 +1926,55 @@ static bool should_plug_request(struct ceph_osd_request *req)
return true;
}
-static void setup_request_data(struct ceph_osd_request *req,
- struct ceph_msg *msg)
+/*
+ * Keep get_num_data_items() in sync with this function.
+ */
+static void setup_request_data(struct ceph_osd_request *req)
{
- u32 data_len = 0;
- int i;
+ struct ceph_msg *request_msg = req->r_request;
+ struct ceph_msg *reply_msg = req->r_reply;
+ struct ceph_osd_req_op *op;
- if (!list_empty(&msg->data))
+ if (req->r_request->num_data_items || req->r_reply->num_data_items)
return;
- WARN_ON(msg->data_length);
- for (i = 0; i < req->r_num_ops; i++) {
- struct ceph_osd_req_op *op = &req->r_ops[i];
-
+ WARN_ON(request_msg->data_length || reply_msg->data_length);
+ for (op = req->r_ops; op != &req->r_ops[req->r_num_ops]; op++) {
switch (op->op) {
/* request */
case CEPH_OSD_OP_WRITE:
case CEPH_OSD_OP_WRITEFULL:
WARN_ON(op->indata_len != op->extent.length);
- ceph_osdc_msg_data_add(msg, &op->extent.osd_data);
+ ceph_osdc_msg_data_add(request_msg,
+ &op->extent.osd_data);
break;
case CEPH_OSD_OP_SETXATTR:
case CEPH_OSD_OP_CMPXATTR:
WARN_ON(op->indata_len != op->xattr.name_len +
op->xattr.value_len);
- ceph_osdc_msg_data_add(msg, &op->xattr.osd_data);
+ ceph_osdc_msg_data_add(request_msg,
+ &op->xattr.osd_data);
break;
case CEPH_OSD_OP_NOTIFY_ACK:
- ceph_osdc_msg_data_add(msg,
+ ceph_osdc_msg_data_add(request_msg,
&op->notify_ack.request_data);
break;
+ case CEPH_OSD_OP_COPY_FROM:
+ ceph_osdc_msg_data_add(request_msg,
+ &op->copy_from.osd_data);
+ break;
/* reply */
case CEPH_OSD_OP_STAT:
- ceph_osdc_msg_data_add(req->r_reply,
+ ceph_osdc_msg_data_add(reply_msg,
&op->raw_data_in);
break;
case CEPH_OSD_OP_READ:
- ceph_osdc_msg_data_add(req->r_reply,
+ ceph_osdc_msg_data_add(reply_msg,
&op->extent.osd_data);
break;
case CEPH_OSD_OP_LIST_WATCHERS:
- ceph_osdc_msg_data_add(req->r_reply,
+ ceph_osdc_msg_data_add(reply_msg,
&op->list_watchers.response_data);
break;
@@ -1895,25 +1983,23 @@ static void setup_request_data(struct ceph_osd_request *req,
WARN_ON(op->indata_len != op->cls.class_len +
op->cls.method_len +
op->cls.indata_len);
- ceph_osdc_msg_data_add(msg, &op->cls.request_info);
+ ceph_osdc_msg_data_add(request_msg,
+ &op->cls.request_info);
/* optional, can be NONE */
- ceph_osdc_msg_data_add(msg, &op->cls.request_data);
+ ceph_osdc_msg_data_add(request_msg,
+ &op->cls.request_data);
/* optional, can be NONE */
- ceph_osdc_msg_data_add(req->r_reply,
+ ceph_osdc_msg_data_add(reply_msg,
&op->cls.response_data);
break;
case CEPH_OSD_OP_NOTIFY:
- ceph_osdc_msg_data_add(msg,
+ ceph_osdc_msg_data_add(request_msg,
&op->notify.request_data);
- ceph_osdc_msg_data_add(req->r_reply,
+ ceph_osdc_msg_data_add(reply_msg,
&op->notify.response_data);
break;
}
-
- data_len += op->indata_len;
}
-
- WARN_ON(data_len != msg->data_length);
}
static void encode_pgid(void **p, const struct ceph_pg *pgid)
@@ -1961,7 +2047,7 @@ static void encode_request_partial(struct ceph_osd_request *req,
req->r_data_offset || req->r_snapc);
}
- setup_request_data(req, msg);
+ setup_request_data(req);
encode_spgid(&p, &req->r_t.spgid); /* actual spg */
ceph_encode_32(&p, req->r_t.pgid.seed); /* raw hash */
@@ -3001,11 +3087,21 @@ static void linger_submit(struct ceph_osd_linger_request *lreq)
struct ceph_osd_client *osdc = lreq->osdc;
struct ceph_osd *osd;
+ down_write(&osdc->lock);
+ linger_register(lreq);
+ if (lreq->is_watch) {
+ lreq->reg_req->r_ops[0].watch.cookie = lreq->linger_id;
+ lreq->ping_req->r_ops[0].watch.cookie = lreq->linger_id;
+ } else {
+ lreq->reg_req->r_ops[0].notify.cookie = lreq->linger_id;
+ }
+
calc_target(osdc, &lreq->t, NULL, false);
osd = lookup_create_osd(osdc, lreq->t.osd, true);
link_linger(osd, lreq);
send_linger(lreq);
+ up_write(&osdc->lock);
}
static void cancel_linger_map_check(struct ceph_osd_linger_request *lreq)
@@ -4318,9 +4414,7 @@ static void handle_watch_notify(struct ceph_osd_client *osdc,
lreq->notify_id, notify_id);
} else if (!completion_done(&lreq->notify_finish_wait)) {
struct ceph_msg_data *data =
- list_first_entry_or_null(&msg->data,
- struct ceph_msg_data,
- links);
+ msg->num_data_items ? &msg->data[0] : NULL;
if (data) {
if (lreq->preply_pages) {
@@ -4476,6 +4570,23 @@ alloc_linger_request(struct ceph_osd_linger_request *lreq)
ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid);
ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
+ return req;
+}
+
+static struct ceph_osd_request *
+alloc_watch_request(struct ceph_osd_linger_request *lreq, u8 watch_opcode)
+{
+ struct ceph_osd_request *req;
+
+ req = alloc_linger_request(lreq);
+ if (!req)
+ return NULL;
+
+ /*
+ * Pass 0 for cookie because we don't know it yet, it will be
+ * filled in by linger_submit().
+ */
+ osd_req_op_watch_init(req, 0, 0, watch_opcode);
if (ceph_osdc_alloc_messages(req, GFP_NOIO)) {
ceph_osdc_put_request(req);
@@ -4514,27 +4625,19 @@ ceph_osdc_watch(struct ceph_osd_client *osdc,
lreq->t.flags = CEPH_OSD_FLAG_WRITE;
ktime_get_real_ts64(&lreq->mtime);
- lreq->reg_req = alloc_linger_request(lreq);
+ lreq->reg_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_WATCH);
if (!lreq->reg_req) {
ret = -ENOMEM;
goto err_put_lreq;
}
- lreq->ping_req = alloc_linger_request(lreq);
+ lreq->ping_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_PING);
if (!lreq->ping_req) {
ret = -ENOMEM;
goto err_put_lreq;
}
- down_write(&osdc->lock);
- linger_register(lreq); /* before osd_req_op_* */
- osd_req_op_watch_init(lreq->reg_req, 0, lreq->linger_id,
- CEPH_OSD_WATCH_OP_WATCH);
- osd_req_op_watch_init(lreq->ping_req, 0, lreq->linger_id,
- CEPH_OSD_WATCH_OP_PING);
linger_submit(lreq);
- up_write(&osdc->lock);
-
ret = linger_reg_commit_wait(lreq);
if (ret) {
linger_cancel(lreq);
@@ -4599,11 +4702,10 @@ static int osd_req_op_notify_ack_init(struct ceph_osd_request *req, int which,
op = _osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY_ACK, 0);
- pl = kmalloc(sizeof(*pl), GFP_NOIO);
+ pl = ceph_pagelist_alloc(GFP_NOIO);
if (!pl)
return -ENOMEM;
- ceph_pagelist_init(pl);
ret = ceph_pagelist_encode_64(pl, notify_id);
ret |= ceph_pagelist_encode_64(pl, cookie);
if (payload) {
@@ -4641,12 +4743,12 @@ int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
ceph_oloc_copy(&req->r_base_oloc, oloc);
req->r_flags = CEPH_OSD_FLAG_READ;
- ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+ ret = osd_req_op_notify_ack_init(req, 0, notify_id, cookie, payload,
+ payload_len);
if (ret)
goto out_put_req;
- ret = osd_req_op_notify_ack_init(req, 0, notify_id, cookie, payload,
- payload_len);
+ ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
if (ret)
goto out_put_req;
@@ -4670,11 +4772,10 @@ static int osd_req_op_notify_init(struct ceph_osd_request *req, int which,
op = _osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
op->notify.cookie = cookie;
- pl = kmalloc(sizeof(*pl), GFP_NOIO);
+ pl = ceph_pagelist_alloc(GFP_NOIO);
if (!pl)
return -ENOMEM;
- ceph_pagelist_init(pl);
ret = ceph_pagelist_encode_32(pl, 1); /* prot_ver */
ret |= ceph_pagelist_encode_32(pl, timeout);
ret |= ceph_pagelist_encode_32(pl, payload_len);
@@ -4733,29 +4834,30 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
goto out_put_lreq;
}
+ /*
+ * Pass 0 for cookie because we don't know it yet, it will be
+ * filled in by linger_submit().
+ */
+ ret = osd_req_op_notify_init(lreq->reg_req, 0, 0, 1, timeout,
+ payload, payload_len);
+ if (ret)
+ goto out_put_lreq;
+
/* for notify_id */
pages = ceph_alloc_page_vector(1, GFP_NOIO);
if (IS_ERR(pages)) {
ret = PTR_ERR(pages);
goto out_put_lreq;
}
-
- down_write(&osdc->lock);
- linger_register(lreq); /* before osd_req_op_* */
- ret = osd_req_op_notify_init(lreq->reg_req, 0, lreq->linger_id, 1,
- timeout, payload, payload_len);
- if (ret) {
- linger_unregister(lreq);
- up_write(&osdc->lock);
- ceph_release_page_vector(pages, 1);
- goto out_put_lreq;
- }
ceph_osd_data_pages_init(osd_req_op_data(lreq->reg_req, 0, notify,
response_data),
pages, PAGE_SIZE, 0, false, true);
- linger_submit(lreq);
- up_write(&osdc->lock);
+ ret = ceph_osdc_alloc_messages(lreq->reg_req, GFP_NOIO);
+ if (ret)
+ goto out_put_lreq;
+
+ linger_submit(lreq);
ret = linger_reg_commit_wait(lreq);
if (!ret)
ret = linger_notify_finish_wait(lreq);
@@ -4881,10 +4983,6 @@ int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
ceph_oloc_copy(&req->r_base_oloc, oloc);
req->r_flags = CEPH_OSD_FLAG_READ;
- ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
- if (ret)
- goto out_put_req;
-
pages = ceph_alloc_page_vector(1, GFP_NOIO);
if (IS_ERR(pages)) {
ret = PTR_ERR(pages);
@@ -4896,6 +4994,10 @@ int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
response_data),
pages, PAGE_SIZE, 0, false, true);
+ ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+ if (ret)
+ goto out_put_req;
+
ceph_osdc_start_request(osdc, req, false);
ret = ceph_osdc_wait_request(osdc, req);
if (ret >= 0) {
@@ -4958,11 +5060,7 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
ceph_oloc_copy(&req->r_base_oloc, oloc);
req->r_flags = flags;
- ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
- if (ret)
- goto out_put_req;
-
- ret = osd_req_op_cls_init(req, 0, CEPH_OSD_OP_CALL, class, method);
+ ret = osd_req_op_cls_init(req, 0, class, method);
if (ret)
goto out_put_req;
@@ -4973,6 +5071,10 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
osd_req_op_cls_response_data_pages(req, 0, &resp_page,
*resp_len, 0, false, false);
+ ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+ if (ret)
+ goto out_put_req;
+
ceph_osdc_start_request(osdc, req, false);
ret = ceph_osdc_wait_request(osdc, req);
if (ret >= 0) {
@@ -5021,11 +5123,12 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
goto out_map;
err = ceph_msgpool_init(&osdc->msgpool_op, CEPH_MSG_OSD_OP,
- PAGE_SIZE, 10, true, "osd_op");
+ PAGE_SIZE, CEPH_OSD_SLAB_OPS, 10, "osd_op");
if (err < 0)
goto out_mempool;
err = ceph_msgpool_init(&osdc->msgpool_op_reply, CEPH_MSG_OSD_OPREPLY,
- PAGE_SIZE, 10, true, "osd_op_reply");
+ PAGE_SIZE, CEPH_OSD_SLAB_OPS, 10,
+ "osd_op_reply");
if (err < 0)
goto out_msgpool;
@@ -5168,6 +5271,80 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
}
EXPORT_SYMBOL(ceph_osdc_writepages);
+static int osd_req_op_copy_from_init(struct ceph_osd_request *req,
+ u64 src_snapid, u64 src_version,
+ struct ceph_object_id *src_oid,
+ struct ceph_object_locator *src_oloc,
+ u32 src_fadvise_flags,
+ u32 dst_fadvise_flags,
+ u8 copy_from_flags)
+{
+ struct ceph_osd_req_op *op;
+ struct page **pages;
+ void *p, *end;
+
+ pages = ceph_alloc_page_vector(1, GFP_KERNEL);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+
+ op = _osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM, dst_fadvise_flags);
+ op->copy_from.snapid = src_snapid;
+ op->copy_from.src_version = src_version;
+ op->copy_from.flags = copy_from_flags;
+ op->copy_from.src_fadvise_flags = src_fadvise_flags;
+
+ p = page_address(pages[0]);
+ end = p + PAGE_SIZE;
+ ceph_encode_string(&p, end, src_oid->name, src_oid->name_len);
+ encode_oloc(&p, end, src_oloc);
+ op->indata_len = PAGE_SIZE - (end - p);
+
+ ceph_osd_data_pages_init(&op->copy_from.osd_data, pages,
+ op->indata_len, 0, false, true);
+ return 0;
+}
+
+int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
+ u64 src_snapid, u64 src_version,
+ struct ceph_object_id *src_oid,
+ struct ceph_object_locator *src_oloc,
+ u32 src_fadvise_flags,
+ struct ceph_object_id *dst_oid,
+ struct ceph_object_locator *dst_oloc,
+ u32 dst_fadvise_flags,
+ u8 copy_from_flags)
+{
+ struct ceph_osd_request *req;
+ int ret;
+
+ req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL);
+ if (!req)
+ return -ENOMEM;
+
+ req->r_flags = CEPH_OSD_FLAG_WRITE;
+
+ ceph_oloc_copy(&req->r_t.base_oloc, dst_oloc);
+ ceph_oid_copy(&req->r_t.base_oid, dst_oid);
+
+ ret = osd_req_op_copy_from_init(req, src_snapid, src_version, src_oid,
+ src_oloc, src_fadvise_flags,
+ dst_fadvise_flags, copy_from_flags);
+ if (ret)
+ goto out;
+
+ ret = ceph_osdc_alloc_messages(req, GFP_KERNEL);
+ if (ret)
+ goto out;
+
+ ceph_osdc_start_request(osdc, req, false);
+ ret = ceph_osdc_wait_request(osdc, req);
+
+out:
+ ceph_osdc_put_request(req);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_osdc_copy_from);
+
int __init ceph_osdc_setup(void)
{
size_t size = sizeof(struct ceph_osd_request) +
@@ -5295,7 +5472,7 @@ static struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr)
u32 front_len = le32_to_cpu(hdr->front_len);
u32 data_len = le32_to_cpu(hdr->data_len);
- m = ceph_msg_new(type, front_len, GFP_NOIO, false);
+ m = ceph_msg_new2(type, front_len, 1, GFP_NOIO, false);
if (!m)
return NULL;
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index 2ea0564771d2..65e34f78b05d 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -6,6 +6,26 @@
#include <linux/highmem.h>
#include <linux/ceph/pagelist.h>
+struct ceph_pagelist *ceph_pagelist_alloc(gfp_t gfp_flags)
+{
+ struct ceph_pagelist *pl;
+
+ pl = kmalloc(sizeof(*pl), gfp_flags);
+ if (!pl)
+ return NULL;
+
+ INIT_LIST_HEAD(&pl->head);
+ pl->mapped_tail = NULL;
+ pl->length = 0;
+ pl->room = 0;
+ INIT_LIST_HEAD(&pl->free_list);
+ pl->num_pages_free = 0;
+ refcount_set(&pl->refcnt, 1);
+
+ return pl;
+}
+EXPORT_SYMBOL(ceph_pagelist_alloc);
+
static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
{
if (pl->mapped_tail) {
diff --git a/net/compat.c b/net/compat.c
index 3b2105f6549d..47a614b370cd 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -812,21 +812,21 @@ COMPAT_SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, buf, compat_size_t, len
static int __compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
unsigned int vlen, unsigned int flags,
- struct compat_timespec __user *timeout)
+ struct old_timespec32 __user *timeout)
{
int datagrams;
- struct timespec ktspec;
+ struct timespec64 ktspec;
if (timeout == NULL)
return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
flags | MSG_CMSG_COMPAT, NULL);
- if (compat_get_timespec(&ktspec, timeout))
+ if (compat_get_timespec64(&ktspec, timeout))
return -EFAULT;
datagrams = __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
flags | MSG_CMSG_COMPAT, &ktspec);
- if (datagrams > 0 && compat_put_timespec(&ktspec, timeout))
+ if (datagrams > 0 && compat_put_timespec64(&ktspec, timeout))
datagrams = -EFAULT;
return datagrams;
@@ -834,7 +834,7 @@ static int __compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg,
unsigned int, vlen, unsigned int, flags,
- struct compat_timespec __user *, timeout)
+ struct old_timespec32 __user *, timeout)
{
return __compat_sys_recvmmsg(fd, mmsg, vlen, flags, timeout);
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 022ad73d6253..0ffcbdd55fa9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3272,7 +3272,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *de
}
skb = next;
- if (netif_xmit_stopped(txq) && skb) {
+ if (netif_tx_queue_stopped(txq) && skb) {
rc = NETDEV_TX_BUSY;
break;
}
@@ -5457,7 +5457,7 @@ static void gro_flush_oldest(struct list_head *head)
/* Do not adjust napi->gro_hash[].count, caller is adding a new
* SKB to the chain.
*/
- list_del(&oldest->list);
+ skb_list_del_init(oldest);
napi_gro_complete(oldest);
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 35c6933c2622..e521c5ebc7d1 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5264,8 +5264,6 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_msg_pull_data_proto;
case BPF_FUNC_msg_push_data:
return &bpf_msg_push_data_proto;
- case BPF_FUNC_get_local_storage:
- return &bpf_get_local_storage_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -5296,8 +5294,6 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_redirect_map_proto;
case BPF_FUNC_sk_redirect_hash:
return &bpf_sk_redirect_hash_proto;
- case BPF_FUNC_get_local_storage:
- return &bpf_get_local_storage_proto;
#ifdef CONFIG_INET
case BPF_FUNC_sk_lookup_tcp:
return &bpf_sk_lookup_tcp_proto;
@@ -5496,7 +5492,13 @@ static bool cg_skb_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, flow_keys):
return false;
+ case bpf_ctx_range(struct __sk_buff, data):
+ case bpf_ctx_range(struct __sk_buff, data_end):
+ if (!capable(CAP_SYS_ADMIN))
+ return false;
+ break;
}
+
if (type == BPF_WRITE) {
switch (off) {
case bpf_ctx_range(struct __sk_buff, mark):
@@ -5638,6 +5640,15 @@ static bool sock_filter_is_valid_access(int off, int size,
prog->expected_attach_type);
}
+static int bpf_noop_prologue(struct bpf_insn *insn_buf, bool direct_write,
+ const struct bpf_prog *prog)
+{
+ /* Neither direct read nor direct write requires any preliminary
+ * action.
+ */
+ return 0;
+}
+
static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
const struct bpf_prog *prog, int drop_verdict)
{
@@ -7204,6 +7215,7 @@ const struct bpf_verifier_ops xdp_verifier_ops = {
.get_func_proto = xdp_func_proto,
.is_valid_access = xdp_is_valid_access,
.convert_ctx_access = xdp_convert_ctx_access,
+ .gen_prologue = bpf_noop_prologue,
};
const struct bpf_prog_ops xdp_prog_ops = {
@@ -7302,6 +7314,7 @@ const struct bpf_verifier_ops sk_msg_verifier_ops = {
.get_func_proto = sk_msg_func_proto,
.is_valid_access = sk_msg_is_valid_access,
.convert_ctx_access = sk_msg_convert_ctx_access,
+ .gen_prologue = bpf_noop_prologue,
};
const struct bpf_prog_ops sk_msg_prog_ops = {
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index ee605d9d8bd4..41954e42a2de 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2364,7 +2364,7 @@ static bool neigh_master_filtered(struct net_device *dev, int master_idx)
if (!master_idx)
return false;
- master = netdev_master_upper_dev_get(dev);
+ master = dev ? netdev_master_upper_dev_get(dev) : NULL;
if (!master || master->ifindex != master_idx)
return true;
@@ -2373,7 +2373,7 @@ static bool neigh_master_filtered(struct net_device *dev, int master_idx)
static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
{
- if (filter_idx && dev->ifindex != filter_idx)
+ if (filter_idx && (!dev || dev->ifindex != filter_idx))
return true;
return false;
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 5e4f04004a49..7bf833598615 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -106,6 +106,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
iterate_fd(p->files, 0, update_classid_sock,
(void *)(unsigned long)cs->classid);
task_unlock(p);
+ cond_resched();
}
css_task_iter_end(&it);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 5da9552b186b..2b9fdbc43205 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -717,7 +717,8 @@ int netpoll_setup(struct netpoll *np)
read_lock_bh(&idev->lock);
list_for_each_entry(ifp, &idev->addr_list, if_list) {
- if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)
+ if (!!(ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) !=
+ !!(ipv6_addr_type(&np->remote_ip.in6) & IPV6_ADDR_LINKLOCAL))
continue;
np->local_ip.in6 = ifp->addr;
err = 0;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f679c7a7d761..33d9227a8b80 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3367,7 +3367,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
cb->seq = 0;
}
ret = dumpit(skb, cb);
- if (ret < 0)
+ if (ret)
break;
}
cb->family = idx;
@@ -3600,6 +3600,11 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
}
+ if (dev->type != ARPHRD_ETHER) {
+ NL_SET_ERR_MSG(extack, "FDB add only supported for Ethernet devices");
+ return -EINVAL;
+ }
+
addr = nla_data(tb[NDA_LLADDR]);
err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
@@ -3704,6 +3709,11 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
}
+ if (dev->type != ARPHRD_ETHER) {
+ NL_SET_ERR_MSG(extack, "FDB delete only supported for Ethernet devices");
+ return -EINVAL;
+ }
+
addr = nla_data(tb[NDA_LLADDR]);
err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 946de0e24c87..b4ee5c8b928f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4944,6 +4944,8 @@ static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
*
* This is a helper to do that correctly considering GSO_BY_FRAGS.
*
+ * @skb: GSO skb
+ *
* @seg_len: The segmented length (from skb_gso_*_seglen). In the
* GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS].
*
diff --git a/net/core/sock.c b/net/core/sock.c
index 6fcc4bc07d19..080a880a1761 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3279,6 +3279,7 @@ int sock_load_diag_module(int family, int protocol)
#ifdef CONFIG_INET
if (family == AF_INET &&
+ protocol != IPPROTO_RAW &&
!rcu_access_pointer(inet_protos[protocol]))
return -ENOENT;
#endif
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b1a2c5e38530..37b4667128a3 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -279,7 +279,6 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
return ret;
}
-# ifdef CONFIG_HAVE_EBPF_JIT
static int
proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
@@ -290,7 +289,6 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
}
-# endif
#endif
static struct ctl_table net_core_table[] = {
@@ -397,6 +395,14 @@ static struct ctl_table net_core_table[] = {
.extra2 = &one,
},
# endif
+ {
+ .procname = "bpf_jit_limit",
+ .data = &bpf_jit_limit,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax_bpf_restricted,
+ .extra1 = &one,
+ },
#endif
{
.procname = "netdev_tstamp_prequeue",
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 9250b309c742..a34602ae27de 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1704,6 +1704,7 @@ static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
if (IS_ERR(net)) {
+ fillargs->netnsid = -1;
NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
return PTR_ERR(net);
}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 4da39446da2d..765b2b32c4a4 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -111,13 +111,10 @@
#ifdef CONFIG_IP_MULTICAST
/* Parameter names and values are taken from igmp-v2-06 draft */
-#define IGMP_V1_ROUTER_PRESENT_TIMEOUT (400*HZ)
-#define IGMP_V2_ROUTER_PRESENT_TIMEOUT (400*HZ)
#define IGMP_V2_UNSOLICITED_REPORT_INTERVAL (10*HZ)
#define IGMP_V3_UNSOLICITED_REPORT_INTERVAL (1*HZ)
+#define IGMP_QUERY_INTERVAL (125*HZ)
#define IGMP_QUERY_RESPONSE_INTERVAL (10*HZ)
-#define IGMP_QUERY_ROBUSTNESS_VARIABLE 2
-
#define IGMP_INITIAL_REPORT_DELAY (1)
@@ -935,13 +932,15 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
max_delay = IGMP_QUERY_RESPONSE_INTERVAL;
in_dev->mr_v1_seen = jiffies +
- IGMP_V1_ROUTER_PRESENT_TIMEOUT;
+ (in_dev->mr_qrv * in_dev->mr_qi) +
+ in_dev->mr_qri;
group = 0;
} else {
/* v2 router present */
max_delay = ih->code*(HZ/IGMP_TIMER_SCALE);
in_dev->mr_v2_seen = jiffies +
- IGMP_V2_ROUTER_PRESENT_TIMEOUT;
+ (in_dev->mr_qrv * in_dev->mr_qi) +
+ in_dev->mr_qri;
}
/* cancel the interface change timer */
in_dev->mr_ifc_count = 0;
@@ -981,8 +980,21 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
if (!max_delay)
max_delay = 1; /* can't mod w/ 0 */
in_dev->mr_maxdelay = max_delay;
- if (ih3->qrv)
- in_dev->mr_qrv = ih3->qrv;
+
+ /* RFC3376, 4.1.6. QRV and 4.1.7. QQIC, when the most recently
+ * received value was zero, use the default or statically
+ * configured value.
+ */
+ in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv;
+ in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;
+
+ /* RFC3376, 8.3. Query Response Interval:
+ * The number of seconds represented by the [Query Response
+ * Interval] must be less than the [Query Interval].
+ */
+ if (in_dev->mr_qri >= in_dev->mr_qi)
+ in_dev->mr_qri = (in_dev->mr_qi/HZ - 1)*HZ;
+
if (!group) { /* general query */
if (ih3->nsrcs)
return true; /* no sources allowed */
@@ -1723,18 +1735,30 @@ void ip_mc_down(struct in_device *in_dev)
ip_mc_dec_group(in_dev, IGMP_ALL_HOSTS);
}
-void ip_mc_init_dev(struct in_device *in_dev)
-{
#ifdef CONFIG_IP_MULTICAST
+static void ip_mc_reset(struct in_device *in_dev)
+{
struct net *net = dev_net(in_dev->dev);
+
+ in_dev->mr_qi = IGMP_QUERY_INTERVAL;
+ in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;
+ in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
+}
+#else
+static void ip_mc_reset(struct in_device *in_dev)
+{
+}
#endif
+
+void ip_mc_init_dev(struct in_device *in_dev)
+{
ASSERT_RTNL();
#ifdef CONFIG_IP_MULTICAST
timer_setup(&in_dev->mr_gq_timer, igmp_gq_timer_expire, 0);
timer_setup(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, 0);
- in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
#endif
+ ip_mc_reset(in_dev);
spin_lock_init(&in_dev->mc_tomb_lock);
}
@@ -1744,15 +1768,10 @@ void ip_mc_init_dev(struct in_device *in_dev)
void ip_mc_up(struct in_device *in_dev)
{
struct ip_mc_list *pmc;
-#ifdef CONFIG_IP_MULTICAST
- struct net *net = dev_net(in_dev->dev);
-#endif
ASSERT_RTNL();
-#ifdef CONFIG_IP_MULTICAST
- in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
-#endif
+ ip_mc_reset(in_dev);
ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
for_each_pmc_rtnl(in_dev, pmc) {
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index f5c9ef2586de..411dd7a90046 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -19,7 +19,7 @@
#include <linux/slab.h>
#include <linux/wait.h>
#include <linux/vmalloc.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <net/addrconf.h>
#include <net/inet_connection_sock.h>
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 9b0158fa431f..d6ee343fdb86 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -722,10 +722,14 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
if (ip_is_fragment(&iph)) {
skb = skb_share_check(skb, GFP_ATOMIC);
if (skb) {
- if (!pskb_may_pull(skb, netoff + iph.ihl * 4))
- return skb;
- if (pskb_trim_rcsum(skb, netoff + len))
- return skb;
+ if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) {
+ kfree_skb(skb);
+ return NULL;
+ }
+ if (pskb_trim_rcsum(skb, netoff + len)) {
+ kfree_skb(skb);
+ return NULL;
+ }
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
if (ip_defrag(net, skb, user))
return NULL;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 26c36cccabdc..fffcc130900e 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1246,7 +1246,7 @@ int ip_setsockopt(struct sock *sk, int level,
return -ENOPROTOOPT;
err = do_ip_setsockopt(sk, level, optname, optval, optlen);
-#ifdef CONFIG_BPFILTER
+#if IS_ENABLED(CONFIG_BPFILTER_UMH)
if (optname >= BPFILTER_IPT_SO_SET_REPLACE &&
optname < BPFILTER_IPT_SET_MAX)
err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen);
@@ -1559,7 +1559,7 @@ int ip_getsockopt(struct sock *sk, int level,
int err;
err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
-#ifdef CONFIG_BPFILTER
+#if IS_ENABLED(CONFIG_BPFILTER_UMH)
if (optname >= BPFILTER_IPT_SO_GET_INFO &&
optname < BPFILTER_IPT_GET_MAX)
err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
@@ -1596,7 +1596,7 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
err = do_ip_getsockopt(sk, level, optname, optval, optlen,
MSG_CMSG_COMPAT);
-#ifdef CONFIG_BPFILTER
+#if IS_ENABLED(CONFIG_BPFILTER_UMH)
if (optname >= BPFILTER_IPT_SO_GET_INFO &&
optname < BPFILTER_IPT_GET_MAX)
err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1834818ed07b..9e6bc4d6daa7 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -262,7 +262,7 @@
#include <linux/net.h>
#include <linux/socket.h>
#include <linux/random.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/highmem.h>
#include <linux/swap.h>
#include <linux/cache.h>
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index b7918d4caa30..3b45fe530f91 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -145,6 +145,7 @@ msg_bytes_ready:
ret = err;
goto out;
}
+ copied = -EAGAIN;
}
ret = copied;
out:
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7e048288fcab..1976fddb9e00 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -81,7 +81,7 @@
#include <linux/uaccess.h>
#include <asm/ioctls.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/highmem.h>
#include <linux/swap.h>
#include <linux/types.h>
@@ -609,8 +609,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
struct net *net = dev_net(skb->dev);
sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
- iph->saddr, uh->source, skb->dev->ifindex, 0,
- udptable, NULL);
+ iph->saddr, uh->source, skb->dev->ifindex,
+ inet_sdif(skb), udptable, NULL);
if (!sk) {
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return; /* No socket for error */
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index d9ad986c7b2c..5cbb9be05295 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -42,6 +42,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
rcu_read_lock();
if (req->sdiag_family == AF_INET)
+ /* src and dst are swapped for historical reasons */
sk = __udp4_lib_lookup(net,
req->id.idiag_src[0], req->id.idiag_sport,
req->id.idiag_dst[0], req->id.idiag_dport,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7eb09c86fa13..63a808d5af15 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5058,6 +5058,7 @@ static int inet6_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
fillargs->netnsid = nla_get_s32(tb[i]);
net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
if (IS_ERR(net)) {
+ fillargs->netnsid = -1;
NL_SET_ERR_MSG_MOD(extack, "Invalid target network namespace id");
return PTR_ERR(net);
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3f4d61017a69..f0cd291034f0 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1001,6 +1001,9 @@ static int __init inet6_init(void)
err = ip6_flowlabel_init();
if (err)
goto ip6_flowlabel_fail;
+ err = ipv6_anycast_init();
+ if (err)
+ goto ipv6_anycast_fail;
err = addrconf_init();
if (err)
goto addrconf_fail;
@@ -1091,6 +1094,8 @@ ipv6_frag_fail:
ipv6_exthdrs_fail:
addrconf_cleanup();
addrconf_fail:
+ ipv6_anycast_cleanup();
+ipv6_anycast_fail:
ip6_flowlabel_cleanup();
ip6_flowlabel_fail:
ndisc_late_cleanup();
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 4e0ff7031edd..94999058e110 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -44,8 +44,22 @@
#include <net/checksum.h>
+#define IN6_ADDR_HSIZE_SHIFT 8
+#define IN6_ADDR_HSIZE BIT(IN6_ADDR_HSIZE_SHIFT)
+/* anycast address hash table
+ */
+static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
+static DEFINE_SPINLOCK(acaddr_hash_lock);
+
static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
+static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+ u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+
+ return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
+}
+
/*
* socket join an anycast group
*/
@@ -204,16 +218,39 @@ void ipv6_sock_ac_close(struct sock *sk)
rtnl_unlock();
}
+static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca)
+{
+ unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr);
+
+ spin_lock(&acaddr_hash_lock);
+ hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]);
+ spin_unlock(&acaddr_hash_lock);
+}
+
+static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca)
+{
+ spin_lock(&acaddr_hash_lock);
+ hlist_del_init_rcu(&aca->aca_addr_lst);
+ spin_unlock(&acaddr_hash_lock);
+}
+
static void aca_get(struct ifacaddr6 *aca)
{
refcount_inc(&aca->aca_refcnt);
}
+static void aca_free_rcu(struct rcu_head *h)
+{
+ struct ifacaddr6 *aca = container_of(h, struct ifacaddr6, rcu);
+
+ fib6_info_release(aca->aca_rt);
+ kfree(aca);
+}
+
static void aca_put(struct ifacaddr6 *ac)
{
if (refcount_dec_and_test(&ac->aca_refcnt)) {
- fib6_info_release(ac->aca_rt);
- kfree(ac);
+ call_rcu(&ac->rcu, aca_free_rcu);
}
}
@@ -229,6 +266,7 @@ static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
aca->aca_addr = *addr;
fib6_info_hold(f6i);
aca->aca_rt = f6i;
+ INIT_HLIST_NODE(&aca->aca_addr_lst);
aca->aca_users = 1;
/* aca_tstamp should be updated upon changes */
aca->aca_cstamp = aca->aca_tstamp = jiffies;
@@ -285,6 +323,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
aca_get(aca);
write_unlock_bh(&idev->lock);
+ ipv6_add_acaddr_hash(net, aca);
+
ip6_ins_rt(net, f6i);
addrconf_join_solict(idev->dev, &aca->aca_addr);
@@ -325,6 +365,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
else
idev->ac_list = aca->aca_next;
write_unlock_bh(&idev->lock);
+ ipv6_del_acaddr_hash(aca);
addrconf_leave_solict(idev, &aca->aca_addr);
ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
@@ -352,6 +393,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
idev->ac_list = aca->aca_next;
write_unlock_bh(&idev->lock);
+ ipv6_del_acaddr_hash(aca);
+
addrconf_leave_solict(idev, &aca->aca_addr);
ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
@@ -390,17 +433,25 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad
bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
const struct in6_addr *addr)
{
+ unsigned int hash = inet6_acaddr_hash(net, addr);
+ struct net_device *nh_dev;
+ struct ifacaddr6 *aca;
bool found = false;
rcu_read_lock();
if (dev)
found = ipv6_chk_acast_dev(dev, addr);
else
- for_each_netdev_rcu(net, dev)
- if (ipv6_chk_acast_dev(dev, addr)) {
+ hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash],
+ aca_addr_lst) {
+ nh_dev = fib6_info_nh_dev(aca->aca_rt);
+ if (!nh_dev || !net_eq(dev_net(nh_dev), net))
+ continue;
+ if (ipv6_addr_equal(&aca->aca_addr, addr)) {
found = true;
break;
}
+ }
rcu_read_unlock();
return found;
}
@@ -540,3 +591,24 @@ void ac6_proc_exit(struct net *net)
remove_proc_entry("anycast6", net->proc_net);
}
#endif
+
+/* Init / cleanup code
+ */
+int __init ipv6_anycast_init(void)
+{
+ int i;
+
+ for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
+ return 0;
+}
+
+void ipv6_anycast_cleanup(void)
+{
+ int i;
+
+ spin_lock(&acaddr_hash_lock);
+ for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
+ spin_unlock(&acaddr_hash_lock);
+}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 1b8bc008b53b..ae3786132c23 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -591,7 +591,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
/* fib entries are never clones */
if (arg.filter.flags & RTM_F_CLONED)
- return skb->len;
+ goto out;
w = (void *)cb->args[2];
if (!w) {
@@ -621,7 +621,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
tb = fib6_get_table(net, arg.filter.table_id);
if (!tb) {
if (arg.filter.dump_all_families)
- return skb->len;
+ goto out;
NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
return -ENOENT;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index a25cfdd47c89..659ecf4e4b3c 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1732,10 +1732,9 @@ int ndisc_rcv(struct sk_buff *skb)
return 0;
}
- memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
-
switch (msg->icmph.icmp6_type) {
case NDISC_NEIGHBOUR_SOLICITATION:
+ memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
ndisc_recv_ns(skb);
break;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 06d17ff3562f..d2d97d07ef27 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -478,7 +478,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct net *net = dev_net(skb->dev);
sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
- inet6_iif(skb), 0, udptable, skb);
+ inet6_iif(skb), inet6_sdif(skb), udptable, skb);
if (!sk) {
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
ICMP6_MIB_INERRORS);
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 2fb703d70803..7e29f88dbf6a 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -146,18 +146,18 @@ llsec_key_alloc(const struct ieee802154_llsec_key *template)
goto err_tfm;
}
- key->tfm0 = crypto_alloc_skcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC);
+ key->tfm0 = crypto_alloc_sync_skcipher("ctr(aes)", 0, 0);
if (IS_ERR(key->tfm0))
goto err_tfm;
- if (crypto_skcipher_setkey(key->tfm0, template->key,
+ if (crypto_sync_skcipher_setkey(key->tfm0, template->key,
IEEE802154_LLSEC_KEY_SIZE))
goto err_tfm0;
return key;
err_tfm0:
- crypto_free_skcipher(key->tfm0);
+ crypto_free_sync_skcipher(key->tfm0);
err_tfm:
for (i = 0; i < ARRAY_SIZE(key->tfm); i++)
if (key->tfm[i])
@@ -177,7 +177,7 @@ static void llsec_key_release(struct kref *ref)
for (i = 0; i < ARRAY_SIZE(key->tfm); i++)
crypto_free_aead(key->tfm[i]);
- crypto_free_skcipher(key->tfm0);
+ crypto_free_sync_skcipher(key->tfm0);
kzfree(key);
}
@@ -622,7 +622,7 @@ llsec_do_encrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
{
u8 iv[16];
struct scatterlist src;
- SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
int err, datalen;
unsigned char *data;
@@ -632,7 +632,7 @@ llsec_do_encrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
datalen = skb_tail_pointer(skb) - data;
sg_init_one(&src, data, datalen);
- skcipher_request_set_tfm(req, key->tfm0);
+ skcipher_request_set_sync_tfm(req, key->tfm0);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, &src, &src, datalen, iv);
err = crypto_skcipher_encrypt(req);
@@ -840,7 +840,7 @@ llsec_do_decrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
unsigned char *data;
int datalen;
struct scatterlist src;
- SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
int err;
llsec_geniv(iv, dev_addr, &hdr->sec);
@@ -849,7 +849,7 @@ llsec_do_decrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
sg_init_one(&src, data, datalen);
- skcipher_request_set_tfm(req, key->tfm0);
+ skcipher_request_set_sync_tfm(req, key->tfm0);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, &src, &src, datalen, iv);
diff --git a/net/mac802154/llsec.h b/net/mac802154/llsec.h
index 6f3b658e3279..8be46d74dc39 100644
--- a/net/mac802154/llsec.h
+++ b/net/mac802154/llsec.h
@@ -29,7 +29,7 @@ struct mac802154_llsec_key {
/* one tfm for each authsize (4/8/16) */
struct crypto_aead *tfm[3];
- struct crypto_skcipher *tfm0;
+ struct crypto_sync_skcipher *tfm0;
struct kref ref;
};
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 6bec37ab4472..a4660c48ff01 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1203,7 +1203,8 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
&info->labels.mask);
if (err)
return err;
- } else if (labels_nonzero(&info->labels.mask)) {
+ } else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
+ labels_nonzero(&info->labels.mask)) {
err = ovs_ct_set_labels(ct, key, &info->labels.value,
&info->labels.mask);
if (err)
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index a70097ecf33c..865ecef68196 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -3030,7 +3030,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
* is already present */
if (mac_proto != MAC_PROTO_NONE)
return -EINVAL;
- mac_proto = MAC_PROTO_NONE;
+ mac_proto = MAC_PROTO_ETHERNET;
break;
case OVS_ACTION_ATTR_POP_ETH:
@@ -3038,7 +3038,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
return -EINVAL;
if (vlan_tci & htons(VLAN_TAG_PRESENT))
return -EINVAL;
- mac_proto = MAC_PROTO_ETHERNET;
+ mac_proto = MAC_PROTO_NONE;
break;
case OVS_ACTION_ATTR_PUSH_NSH:
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 0a7c49e8e053..bc628acf4f4f 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -435,7 +435,7 @@ struct rxrpc_connection {
struct sk_buff_head rx_queue; /* received conn-level packets */
const struct rxrpc_security *security; /* applied security module */
struct key *server_key; /* security for this service */
- struct crypto_skcipher *cipher; /* encryption handle */
+ struct crypto_sync_skcipher *cipher; /* encryption handle */
struct rxrpc_crypt csum_iv; /* packet checksum base */
unsigned long flags;
unsigned long events;
@@ -611,6 +611,7 @@ struct rxrpc_call {
* not hard-ACK'd packet follows this.
*/
rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */
+ u16 tx_backoff; /* Delay to insert due to Tx failure */
/* TCP-style slow-start congestion control [RFC5681]. Since the SMSS
* is fixed, we keep these numbers in terms of segments (ie. DATA
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 8e7434e92097..468efc3660c0 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -123,6 +123,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
else
ack_at = expiry;
+ ack_at += READ_ONCE(call->tx_backoff);
ack_at += now;
if (time_before(ack_at, call->ack_at)) {
WRITE_ONCE(call->ack_at, ack_at);
@@ -311,6 +312,7 @@ void rxrpc_process_call(struct work_struct *work)
container_of(work, struct rxrpc_call, processor);
rxrpc_serial_t *send_ack;
unsigned long now, next, t;
+ unsigned int iterations = 0;
rxrpc_see_call(call);
@@ -319,6 +321,11 @@ void rxrpc_process_call(struct work_struct *work)
call->debug_id, rxrpc_call_states[call->state], call->events);
recheck_state:
+ /* Limit the number of times we do this before returning to the manager */
+ iterations++;
+ if (iterations > 5)
+ goto requeue;
+
if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
rxrpc_send_abort_packet(call);
goto recheck_state;
@@ -447,13 +454,16 @@ recheck_state:
rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart);
/* other events may have been raised since we started checking */
- if (call->events && call->state < RXRPC_CALL_COMPLETE) {
- __rxrpc_queue_call(call);
- goto out;
- }
+ if (call->events && call->state < RXRPC_CALL_COMPLETE)
+ goto requeue;
out_put:
rxrpc_put_call(call, rxrpc_call_put);
out:
_leave("");
+ return;
+
+requeue:
+ __rxrpc_queue_call(call);
+ goto out;
}
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 189418888839..736aa9281100 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -35,6 +35,21 @@ struct rxrpc_abort_buffer {
static const char rxrpc_keepalive_string[] = "";
/*
+ * Increase Tx backoff on transmission failure and clear it on success.
+ */
+static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret)
+{
+ if (ret < 0) {
+ u16 tx_backoff = READ_ONCE(call->tx_backoff);
+
+ if (tx_backoff < HZ)
+ WRITE_ONCE(call->tx_backoff, tx_backoff + 1);
+ } else {
+ WRITE_ONCE(call->tx_backoff, 0);
+ }
+}
+
+/*
* Arrange for a keepalive ping a certain time after we last transmitted. This
* lets the far side know we're still interested in this call and helps keep
* the route through any intervening firewall open.
@@ -210,6 +225,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
else
trace_rxrpc_tx_packet(call->debug_id, &pkt->whdr,
rxrpc_tx_point_call_ack);
+ rxrpc_tx_backoff(call, ret);
if (call->state < RXRPC_CALL_COMPLETE) {
if (ret < 0) {
@@ -218,7 +234,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
rxrpc_propose_ACK(call, pkt->ack.reason,
ntohs(pkt->ack.maxSkew),
ntohl(pkt->ack.serial),
- true, true,
+ false, true,
rxrpc_propose_ack_retry_tx);
} else {
spin_lock_bh(&call->lock);
@@ -300,7 +316,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
else
trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr,
rxrpc_tx_point_call_abort);
-
+ rxrpc_tx_backoff(call, ret);
rxrpc_put_connection(conn);
return ret;
@@ -413,6 +429,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
else
trace_rxrpc_tx_packet(call->debug_id, &whdr,
rxrpc_tx_point_call_data_nofrag);
+ rxrpc_tx_backoff(call, ret);
if (ret == -EMSGSIZE)
goto send_fragmentable;
@@ -445,9 +462,18 @@ done:
rxrpc_reduce_call_timer(call, expect_rx_by, nowj,
rxrpc_timer_set_for_normal);
}
- }
- rxrpc_set_keepalive(call);
+ rxrpc_set_keepalive(call);
+ } else {
+ /* Cancel the call if the initial transmission fails,
+ * particularly if that's due to network routing issues that
+ * aren't going away anytime soon. The layer above can arrange
+ * the retransmission.
+ */
+ if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags))
+ rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+ RX_USER_ABORT, ret);
+ }
_leave(" = %d [%u]", ret, call->peer->maxdata);
return ret;
@@ -506,6 +532,7 @@ send_fragmentable:
else
trace_rxrpc_tx_packet(call->debug_id, &whdr,
rxrpc_tx_point_call_data_frag);
+ rxrpc_tx_backoff(call, ret);
up_write(&conn->params.local->defrag_sem);
goto done;
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index cea16838d588..cbef9ea43dec 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -46,7 +46,7 @@ struct rxkad_level2_hdr {
* alloc routine, but since we have it to hand, we use it to decrypt RESPONSE
* packets
*/
-static struct crypto_skcipher *rxkad_ci;
+static struct crypto_sync_skcipher *rxkad_ci;
static DEFINE_MUTEX(rxkad_ci_mutex);
/*
@@ -54,7 +54,7 @@ static DEFINE_MUTEX(rxkad_ci_mutex);
*/
static int rxkad_init_connection_security(struct rxrpc_connection *conn)
{
- struct crypto_skcipher *ci;
+ struct crypto_sync_skcipher *ci;
struct rxrpc_key_token *token;
int ret;
@@ -63,14 +63,14 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn)
token = conn->params.key->payload.data[0];
conn->security_ix = token->security_index;
- ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
+ ci = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0);
if (IS_ERR(ci)) {
_debug("no cipher");
ret = PTR_ERR(ci);
goto error;
}
- if (crypto_skcipher_setkey(ci, token->kad->session_key,
+ if (crypto_sync_skcipher_setkey(ci, token->kad->session_key,
sizeof(token->kad->session_key)) < 0)
BUG();
@@ -104,7 +104,7 @@ error:
static int rxkad_prime_packet_security(struct rxrpc_connection *conn)
{
struct rxrpc_key_token *token;
- SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
struct scatterlist sg;
struct rxrpc_crypt iv;
__be32 *tmpbuf;
@@ -128,7 +128,7 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn)
tmpbuf[3] = htonl(conn->security_ix);
sg_init_one(&sg, tmpbuf, tmpsize);
- skcipher_request_set_tfm(req, conn->cipher);
+ skcipher_request_set_sync_tfm(req, conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, &sg, &sg, tmpsize, iv.x);
crypto_skcipher_encrypt(req);
@@ -167,7 +167,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
memset(&iv, 0, sizeof(iv));
sg_init_one(&sg, sechdr, 8);
- skcipher_request_set_tfm(req, call->conn->cipher);
+ skcipher_request_set_sync_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
crypto_skcipher_encrypt(req);
@@ -212,7 +212,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
memcpy(&iv, token->kad->session_key, sizeof(iv));
sg_init_one(&sg[0], sechdr, sizeof(rxkhdr));
- skcipher_request_set_tfm(req, call->conn->cipher);
+ skcipher_request_set_sync_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, &sg[0], &sg[0], sizeof(rxkhdr), iv.x);
crypto_skcipher_encrypt(req);
@@ -250,7 +250,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call,
void *sechdr)
{
struct rxrpc_skb_priv *sp;
- SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxrpc_crypt iv;
struct scatterlist sg;
u32 x, y;
@@ -279,7 +279,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call,
call->crypto_buf[1] = htonl(x);
sg_init_one(&sg, call->crypto_buf, 8);
- skcipher_request_set_tfm(req, call->conn->cipher);
+ skcipher_request_set_sync_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
crypto_skcipher_encrypt(req);
@@ -352,7 +352,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
/* start the decryption afresh */
memset(&iv, 0, sizeof(iv));
- skcipher_request_set_tfm(req, call->conn->cipher);
+ skcipher_request_set_sync_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, 8, iv.x);
crypto_skcipher_decrypt(req);
@@ -450,7 +450,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
token = call->conn->params.key->payload.data[0];
memcpy(&iv, token->kad->session_key, sizeof(iv));
- skcipher_request_set_tfm(req, call->conn->cipher);
+ skcipher_request_set_sync_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, len, iv.x);
crypto_skcipher_decrypt(req);
@@ -506,7 +506,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
unsigned int offset, unsigned int len,
rxrpc_seq_t seq, u16 expected_cksum)
{
- SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxrpc_crypt iv;
struct scatterlist sg;
bool aborted;
@@ -529,7 +529,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
call->crypto_buf[1] = htonl(x);
sg_init_one(&sg, call->crypto_buf, 8);
- skcipher_request_set_tfm(req, call->conn->cipher);
+ skcipher_request_set_sync_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
crypto_skcipher_encrypt(req);
@@ -755,7 +755,7 @@ static void rxkad_encrypt_response(struct rxrpc_connection *conn,
struct rxkad_response *resp,
const struct rxkad_key *s2)
{
- SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
struct rxrpc_crypt iv;
struct scatterlist sg[1];
@@ -764,7 +764,7 @@ static void rxkad_encrypt_response(struct rxrpc_connection *conn,
sg_init_table(sg, 1);
sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted));
- skcipher_request_set_tfm(req, conn->cipher);
+ skcipher_request_set_sync_tfm(req, conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
crypto_skcipher_encrypt(req);
@@ -1021,7 +1021,7 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
struct rxkad_response *resp,
const struct rxrpc_crypt *session_key)
{
- SKCIPHER_REQUEST_ON_STACK(req, rxkad_ci);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, rxkad_ci);
struct scatterlist sg[1];
struct rxrpc_crypt iv;
@@ -1031,7 +1031,7 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
ASSERT(rxkad_ci != NULL);
mutex_lock(&rxkad_ci_mutex);
- if (crypto_skcipher_setkey(rxkad_ci, session_key->x,
+ if (crypto_sync_skcipher_setkey(rxkad_ci, session_key->x,
sizeof(*session_key)) < 0)
BUG();
@@ -1039,7 +1039,7 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
sg_init_table(sg, 1);
sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted));
- skcipher_request_set_tfm(req, rxkad_ci);
+ skcipher_request_set_sync_tfm(req, rxkad_ci);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
crypto_skcipher_decrypt(req);
@@ -1218,7 +1218,7 @@ static void rxkad_clear(struct rxrpc_connection *conn)
_enter("");
if (conn->cipher)
- crypto_free_skcipher(conn->cipher);
+ crypto_free_sync_skcipher(conn->cipher);
}
/*
@@ -1228,7 +1228,7 @@ static int rxkad_init(void)
{
/* pin the cipher we need so that the crypto layer doesn't invoke
* keventd to go get it */
- rxkad_ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
+ rxkad_ci = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0);
return PTR_ERR_OR_ZERO(rxkad_ci);
}
@@ -1238,7 +1238,7 @@ static int rxkad_init(void)
static void rxkad_exit(void)
{
if (rxkad_ci)
- crypto_free_skcipher(rxkad_ci);
+ crypto_free_sync_skcipher(rxkad_ci);
}
/*
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index cbe4831f46f4..4a042abf844c 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -413,7 +413,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) {
if (tb[TCA_GRED_LIMIT] != NULL)
sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
- return gred_change_table_def(sch, opt);
+ return gred_change_table_def(sch, tb[TCA_GRED_DPS]);
}
if (tb[TCA_GRED_PARMS] == NULL ||
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index a827a1f562bf..6a28b96e779e 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -499,8 +499,9 @@ void sctp_assoc_set_primary(struct sctp_association *asoc,
void sctp_assoc_rm_peer(struct sctp_association *asoc,
struct sctp_transport *peer)
{
- struct list_head *pos;
- struct sctp_transport *transport;
+ struct sctp_transport *transport;
+ struct list_head *pos;
+ struct sctp_chunk *ch;
pr_debug("%s: association:%p addr:%pISpc\n",
__func__, asoc, &peer->ipaddr.sa);
@@ -564,7 +565,6 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
*/
if (!list_empty(&peer->transmitted)) {
struct sctp_transport *active = asoc->peer.active_path;
- struct sctp_chunk *ch;
/* Reset the transport of each chunk on this list */
list_for_each_entry(ch, &peer->transmitted,
@@ -586,6 +586,10 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
sctp_transport_hold(active);
}
+ list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list)
+ if (ch->transport == peer)
+ ch->transport = NULL;
+
asoc->peer.transport_count--;
sctp_transport_free(peer);
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 9cb854b05342..c37e1c2dec9d 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -212,7 +212,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
INIT_LIST_HEAD(&q->retransmit);
INIT_LIST_HEAD(&q->sacked);
INIT_LIST_HEAD(&q->abandoned);
- sctp_sched_set_sched(asoc, SCTP_SS_FCFS);
+ sctp_sched_set_sched(asoc, SCTP_SS_DEFAULT);
}
/* Free the outqueue structure and any related pending chunks.
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e948db29ab53..9b277bd36d1a 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -46,7 +46,7 @@
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/seq_file.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/highmem.h>
#include <linux/swap.h>
#include <linux/slab.h>
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index fc0386e8ff23..739f3e50120d 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -7083,14 +7083,15 @@ static int sctp_getsockopt_pr_assocstatus(struct sock *sk, int len,
}
policy = params.sprstat_policy;
- if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)))
+ if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)) ||
+ ((policy & SCTP_PR_SCTP_ALL) && (policy & SCTP_PR_SCTP_MASK)))
goto out;
asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
if (!asoc)
goto out;
- if (policy & SCTP_PR_SCTP_ALL) {
+ if (policy == SCTP_PR_SCTP_ALL) {
params.sprstat_abandoned_unsent = 0;
params.sprstat_abandoned_sent = 0;
for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
@@ -7142,7 +7143,8 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
}
policy = params.sprstat_policy;
- if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)))
+ if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)) ||
+ ((policy & SCTP_PR_SCTP_ALL) && (policy & SCTP_PR_SCTP_MASK)))
goto out;
asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index e871368500e3..18daebcef181 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -122,22 +122,17 @@ static void __smc_lgr_unregister_conn(struct smc_connection *conn)
sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
}
-/* Unregister connection and trigger lgr freeing if applicable
+/* Unregister connection from lgr
*/
static void smc_lgr_unregister_conn(struct smc_connection *conn)
{
struct smc_link_group *lgr = conn->lgr;
- int reduced = 0;
write_lock_bh(&lgr->conns_lock);
if (conn->alert_token_local) {
- reduced = 1;
__smc_lgr_unregister_conn(conn);
}
write_unlock_bh(&lgr->conns_lock);
- if (!reduced || lgr->conns_num)
- return;
- smc_lgr_schedule_free_work(lgr);
}
/* Send delete link, either as client to request the initiation
@@ -291,7 +286,8 @@ out:
return rc;
}
-static void smc_buf_unuse(struct smc_connection *conn)
+static void smc_buf_unuse(struct smc_connection *conn,
+ struct smc_link_group *lgr)
{
if (conn->sndbuf_desc)
conn->sndbuf_desc->used = 0;
@@ -301,8 +297,6 @@ static void smc_buf_unuse(struct smc_connection *conn)
conn->rmb_desc->used = 0;
} else {
/* buf registration failed, reuse not possible */
- struct smc_link_group *lgr = conn->lgr;
-
write_lock_bh(&lgr->rmbs_lock);
list_del(&conn->rmb_desc->list);
write_unlock_bh(&lgr->rmbs_lock);
@@ -315,16 +309,21 @@ static void smc_buf_unuse(struct smc_connection *conn)
/* remove a finished connection from its link group */
void smc_conn_free(struct smc_connection *conn)
{
- if (!conn->lgr)
+ struct smc_link_group *lgr = conn->lgr;
+
+ if (!lgr)
return;
- if (conn->lgr->is_smcd) {
+ if (lgr->is_smcd) {
smc_ism_unset_conn(conn);
tasklet_kill(&conn->rx_tsklet);
} else {
smc_cdc_tx_dismiss_slots(conn);
}
- smc_lgr_unregister_conn(conn);
- smc_buf_unuse(conn);
+ smc_lgr_unregister_conn(conn); /* unsets conn->lgr */
+ smc_buf_unuse(conn, lgr); /* allow buffer reuse */
+
+ if (!lgr->conns_num)
+ smc_lgr_schedule_free_work(lgr);
}
static void smc_link_clear(struct smc_link *lnk)
diff --git a/net/socket.c b/net/socket.c
index b68801c7d0ab..99c96851469f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2342,7 +2342,7 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
*/
int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
- unsigned int flags, struct timespec *timeout)
+ unsigned int flags, struct timespec64 *timeout)
{
int fput_needed, err, datagrams;
struct socket *sock;
@@ -2407,8 +2407,7 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
if (timeout) {
ktime_get_ts64(&timeout64);
- *timeout = timespec64_to_timespec(
- timespec64_sub(end_time, timeout64));
+ *timeout = timespec64_sub(end_time, timeout64);
if (timeout->tv_sec < 0) {
timeout->tv_sec = timeout->tv_nsec = 0;
break;
@@ -2454,10 +2453,10 @@ out_put:
static int do_sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
unsigned int vlen, unsigned int flags,
- struct timespec __user *timeout)
+ struct __kernel_timespec __user *timeout)
{
int datagrams;
- struct timespec timeout_sys;
+ struct timespec64 timeout_sys;
if (flags & MSG_CMSG_COMPAT)
return -EINVAL;
@@ -2465,13 +2464,12 @@ static int do_sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
if (!timeout)
return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
- if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
+ if (get_timespec64(&timeout_sys, timeout))
return -EFAULT;
datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
- if (datagrams > 0 &&
- copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
+ if (datagrams > 0 && put_timespec64(&timeout_sys, timeout))
datagrams = -EFAULT;
return datagrams;
@@ -2479,7 +2477,7 @@ static int do_sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
unsigned int, vlen, unsigned int, flags,
- struct timespec __user *, timeout)
+ struct __kernel_timespec __user *, timeout)
{
return do_sys_recvmmsg(fd, mmsg, vlen, flags, timeout);
}
@@ -2603,7 +2601,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
break;
case SYS_RECVMMSG:
err = do_sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2],
- a[3], (struct timespec __user *)a[4]);
+ a[3], (struct __kernel_timespec __user *)a[4]);
break;
case SYS_ACCEPT4:
err = __sys_accept4(a0, (struct sockaddr __user *)a1,
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 305ecea92170..ad8ead738981 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -30,10 +30,9 @@ struct rpc_cred_cache {
static unsigned int auth_hashbits = RPC_CREDCACHE_DEFAULT_HASHBITS;
-static DEFINE_SPINLOCK(rpc_authflavor_lock);
-static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
- &authnull_ops, /* AUTH_NULL */
- &authunix_ops, /* AUTH_UNIX */
+static const struct rpc_authops __rcu *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
+ [RPC_AUTH_NULL] = (const struct rpc_authops __force __rcu *)&authnull_ops,
+ [RPC_AUTH_UNIX] = (const struct rpc_authops __force __rcu *)&authunix_ops,
NULL, /* others can be loadable modules */
};
@@ -93,39 +92,65 @@ pseudoflavor_to_flavor(u32 flavor) {
int
rpcauth_register(const struct rpc_authops *ops)
{
+ const struct rpc_authops *old;
rpc_authflavor_t flavor;
- int ret = -EPERM;
if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
return -EINVAL;
- spin_lock(&rpc_authflavor_lock);
- if (auth_flavors[flavor] == NULL) {
- auth_flavors[flavor] = ops;
- ret = 0;
- }
- spin_unlock(&rpc_authflavor_lock);
- return ret;
+ old = cmpxchg((const struct rpc_authops ** __force)&auth_flavors[flavor], NULL, ops);
+ if (old == NULL || old == ops)
+ return 0;
+ return -EPERM;
}
EXPORT_SYMBOL_GPL(rpcauth_register);
int
rpcauth_unregister(const struct rpc_authops *ops)
{
+ const struct rpc_authops *old;
rpc_authflavor_t flavor;
- int ret = -EPERM;
if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
return -EINVAL;
- spin_lock(&rpc_authflavor_lock);
- if (auth_flavors[flavor] == ops) {
- auth_flavors[flavor] = NULL;
- ret = 0;
- }
- spin_unlock(&rpc_authflavor_lock);
- return ret;
+
+ old = cmpxchg((const struct rpc_authops ** __force)&auth_flavors[flavor], ops, NULL);
+ if (old == ops || old == NULL)
+ return 0;
+ return -EPERM;
}
EXPORT_SYMBOL_GPL(rpcauth_unregister);
+static const struct rpc_authops *
+rpcauth_get_authops(rpc_authflavor_t flavor)
+{
+ const struct rpc_authops *ops;
+
+ if (flavor >= RPC_AUTH_MAXFLAVOR)
+ return NULL;
+
+ rcu_read_lock();
+ ops = rcu_dereference(auth_flavors[flavor]);
+ if (ops == NULL) {
+ rcu_read_unlock();
+ request_module("rpc-auth-%u", flavor);
+ rcu_read_lock();
+ ops = rcu_dereference(auth_flavors[flavor]);
+ if (ops == NULL)
+ goto out;
+ }
+ if (!try_module_get(ops->owner))
+ ops = NULL;
+out:
+ rcu_read_unlock();
+ return ops;
+}
+
+static void
+rpcauth_put_authops(const struct rpc_authops *ops)
+{
+ module_put(ops->owner);
+}
+
/**
* rpcauth_get_pseudoflavor - check if security flavor is supported
* @flavor: a security flavor
@@ -138,25 +163,16 @@ EXPORT_SYMBOL_GPL(rpcauth_unregister);
rpc_authflavor_t
rpcauth_get_pseudoflavor(rpc_authflavor_t flavor, struct rpcsec_gss_info *info)
{
- const struct rpc_authops *ops;
+ const struct rpc_authops *ops = rpcauth_get_authops(flavor);
rpc_authflavor_t pseudoflavor;
- ops = auth_flavors[flavor];
- if (ops == NULL)
- request_module("rpc-auth-%u", flavor);
- spin_lock(&rpc_authflavor_lock);
- ops = auth_flavors[flavor];
- if (ops == NULL || !try_module_get(ops->owner)) {
- spin_unlock(&rpc_authflavor_lock);
+ if (!ops)
return RPC_AUTH_MAXFLAVOR;
- }
- spin_unlock(&rpc_authflavor_lock);
-
pseudoflavor = flavor;
if (ops->info2flavor != NULL)
pseudoflavor = ops->info2flavor(info);
- module_put(ops->owner);
+ rpcauth_put_authops(ops);
return pseudoflavor;
}
EXPORT_SYMBOL_GPL(rpcauth_get_pseudoflavor);
@@ -176,25 +192,15 @@ rpcauth_get_gssinfo(rpc_authflavor_t pseudoflavor, struct rpcsec_gss_info *info)
const struct rpc_authops *ops;
int result;
- if (flavor >= RPC_AUTH_MAXFLAVOR)
- return -EINVAL;
-
- ops = auth_flavors[flavor];
+ ops = rpcauth_get_authops(flavor);
if (ops == NULL)
- request_module("rpc-auth-%u", flavor);
- spin_lock(&rpc_authflavor_lock);
- ops = auth_flavors[flavor];
- if (ops == NULL || !try_module_get(ops->owner)) {
- spin_unlock(&rpc_authflavor_lock);
return -ENOENT;
- }
- spin_unlock(&rpc_authflavor_lock);
result = -ENOENT;
if (ops->flavor2info != NULL)
result = ops->flavor2info(pseudoflavor, info);
- module_put(ops->owner);
+ rpcauth_put_authops(ops);
return result;
}
EXPORT_SYMBOL_GPL(rpcauth_get_gssinfo);
@@ -212,15 +218,13 @@ EXPORT_SYMBOL_GPL(rpcauth_get_gssinfo);
int
rpcauth_list_flavors(rpc_authflavor_t *array, int size)
{
- rpc_authflavor_t flavor;
- int result = 0;
+ const struct rpc_authops *ops;
+ rpc_authflavor_t flavor, pseudos[4];
+ int i, len, result = 0;
- spin_lock(&rpc_authflavor_lock);
+ rcu_read_lock();
for (flavor = 0; flavor < RPC_AUTH_MAXFLAVOR; flavor++) {
- const struct rpc_authops *ops = auth_flavors[flavor];
- rpc_authflavor_t pseudos[4];
- int i, len;
-
+ ops = rcu_dereference(auth_flavors[flavor]);
if (result >= size) {
result = -ENOMEM;
break;
@@ -245,7 +249,7 @@ rpcauth_list_flavors(rpc_authflavor_t *array, int size)
array[result++] = pseudos[i];
}
}
- spin_unlock(&rpc_authflavor_lock);
+ rcu_read_unlock();
dprintk("RPC: %s returns %d\n", __func__, result);
return result;
@@ -255,25 +259,17 @@ EXPORT_SYMBOL_GPL(rpcauth_list_flavors);
struct rpc_auth *
rpcauth_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
{
- struct rpc_auth *auth;
+ struct rpc_auth *auth = ERR_PTR(-EINVAL);
const struct rpc_authops *ops;
- u32 flavor = pseudoflavor_to_flavor(args->pseudoflavor);
+ u32 flavor = pseudoflavor_to_flavor(args->pseudoflavor);
- auth = ERR_PTR(-EINVAL);
- if (flavor >= RPC_AUTH_MAXFLAVOR)
+ ops = rpcauth_get_authops(flavor);
+ if (ops == NULL)
goto out;
- if ((ops = auth_flavors[flavor]) == NULL)
- request_module("rpc-auth-%u", flavor);
- spin_lock(&rpc_authflavor_lock);
- ops = auth_flavors[flavor];
- if (ops == NULL || !try_module_get(ops->owner)) {
- spin_unlock(&rpc_authflavor_lock);
- goto out;
- }
- spin_unlock(&rpc_authflavor_lock);
auth = ops->create(args, clnt);
- module_put(ops->owner);
+
+ rpcauth_put_authops(ops);
if (IS_ERR(auth))
return auth;
if (clnt->cl_auth)
@@ -288,32 +284,37 @@ EXPORT_SYMBOL_GPL(rpcauth_create);
void
rpcauth_release(struct rpc_auth *auth)
{
- if (!atomic_dec_and_test(&auth->au_count))
+ if (!refcount_dec_and_test(&auth->au_count))
return;
auth->au_ops->destroy(auth);
}
static DEFINE_SPINLOCK(rpc_credcache_lock);
-static void
+/*
+ * On success, the caller is responsible for freeing the reference
+ * held by the hashtable
+ */
+static bool
rpcauth_unhash_cred_locked(struct rpc_cred *cred)
{
+ if (!test_and_clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags))
+ return false;
hlist_del_rcu(&cred->cr_hash);
- smp_mb__before_atomic();
- clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
+ return true;
}
-static int
+static bool
rpcauth_unhash_cred(struct rpc_cred *cred)
{
spinlock_t *cache_lock;
- int ret;
+ bool ret;
+ if (!test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags))
+ return false;
cache_lock = &cred->cr_auth->au_credcache->lock;
spin_lock(cache_lock);
- ret = atomic_read(&cred->cr_count) == 0;
- if (ret)
- rpcauth_unhash_cred_locked(cred);
+ ret = rpcauth_unhash_cred_locked(cred);
spin_unlock(cache_lock);
return ret;
}
@@ -392,6 +393,44 @@ void rpcauth_destroy_credlist(struct list_head *head)
}
}
+static void
+rpcauth_lru_add_locked(struct rpc_cred *cred)
+{
+ if (!list_empty(&cred->cr_lru))
+ return;
+ number_cred_unused++;
+ list_add_tail(&cred->cr_lru, &cred_unused);
+}
+
+static void
+rpcauth_lru_add(struct rpc_cred *cred)
+{
+ if (!list_empty(&cred->cr_lru))
+ return;
+ spin_lock(&rpc_credcache_lock);
+ rpcauth_lru_add_locked(cred);
+ spin_unlock(&rpc_credcache_lock);
+}
+
+static void
+rpcauth_lru_remove_locked(struct rpc_cred *cred)
+{
+ if (list_empty(&cred->cr_lru))
+ return;
+ number_cred_unused--;
+ list_del_init(&cred->cr_lru);
+}
+
+static void
+rpcauth_lru_remove(struct rpc_cred *cred)
+{
+ if (list_empty(&cred->cr_lru))
+ return;
+ spin_lock(&rpc_credcache_lock);
+ rpcauth_lru_remove_locked(cred);
+ spin_unlock(&rpc_credcache_lock);
+}
+
/*
* Clear the RPC credential cache, and delete those credentials
* that are not referenced.
@@ -411,13 +450,10 @@ rpcauth_clear_credcache(struct rpc_cred_cache *cache)
head = &cache->hashtable[i];
while (!hlist_empty(head)) {
cred = hlist_entry(head->first, struct rpc_cred, cr_hash);
- get_rpccred(cred);
- if (!list_empty(&cred->cr_lru)) {
- list_del(&cred->cr_lru);
- number_cred_unused--;
- }
- list_add_tail(&cred->cr_lru, &free);
rpcauth_unhash_cred_locked(cred);
+ /* Note: We now hold a reference to cred */
+ rpcauth_lru_remove_locked(cred);
+ list_add_tail(&cred->cr_lru, &free);
}
}
spin_unlock(&cache->lock);
@@ -451,7 +487,6 @@ EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache);
static long
rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
{
- spinlock_t *cache_lock;
struct rpc_cred *cred, *next;
unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM;
long freed = 0;
@@ -460,32 +495,24 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
if (nr_to_scan-- == 0)
break;
+ if (refcount_read(&cred->cr_count) > 1) {
+ rpcauth_lru_remove_locked(cred);
+ continue;
+ }
/*
* Enforce a 60 second garbage collection moratorium
* Note that the cred_unused list must be time-ordered.
*/
- if (time_in_range(cred->cr_expire, expired, jiffies) &&
- test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) {
- freed = SHRINK_STOP;
- break;
- }
-
- list_del_init(&cred->cr_lru);
- number_cred_unused--;
- freed++;
- if (atomic_read(&cred->cr_count) != 0)
+ if (!time_in_range(cred->cr_expire, expired, jiffies))
+ continue;
+ if (!rpcauth_unhash_cred(cred))
continue;
- cache_lock = &cred->cr_auth->au_credcache->lock;
- spin_lock(cache_lock);
- if (atomic_read(&cred->cr_count) == 0) {
- get_rpccred(cred);
- list_add_tail(&cred->cr_lru, free);
- rpcauth_unhash_cred_locked(cred);
- }
- spin_unlock(cache_lock);
+ rpcauth_lru_remove_locked(cred);
+ freed++;
+ list_add_tail(&cred->cr_lru, free);
}
- return freed;
+ return freed ? freed : SHRINK_STOP;
}
static unsigned long
@@ -561,19 +588,15 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
if (!entry->cr_ops->crmatch(acred, entry, flags))
continue;
if (flags & RPCAUTH_LOOKUP_RCU) {
- if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) &&
- !test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags))
- cred = entry;
+ if (test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags) ||
+ refcount_read(&entry->cr_count) == 0)
+ continue;
+ cred = entry;
break;
}
- spin_lock(&cache->lock);
- if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) {
- spin_unlock(&cache->lock);
- continue;
- }
cred = get_rpccred(entry);
- spin_unlock(&cache->lock);
- break;
+ if (cred)
+ break;
}
rcu_read_unlock();
@@ -594,11 +617,13 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
if (!entry->cr_ops->crmatch(acred, entry, flags))
continue;
cred = get_rpccred(entry);
- break;
+ if (cred)
+ break;
}
if (cred == NULL) {
cred = new;
set_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
+ refcount_inc(&cred->cr_count);
hlist_add_head_rcu(&cred->cr_hash, &cache->hashtable[nr]);
} else
list_add_tail(&new->cr_lru, &free);
@@ -645,7 +670,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
{
INIT_HLIST_NODE(&cred->cr_hash);
INIT_LIST_HEAD(&cred->cr_lru);
- atomic_set(&cred->cr_count, 1);
+ refcount_set(&cred->cr_count, 1);
cred->cr_auth = auth;
cred->cr_ops = ops;
cred->cr_expire = jiffies;
@@ -713,36 +738,29 @@ put_rpccred(struct rpc_cred *cred)
{
if (cred == NULL)
return;
- /* Fast path for unhashed credentials */
- if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) == 0) {
- if (atomic_dec_and_test(&cred->cr_count))
- cred->cr_ops->crdestroy(cred);
- return;
- }
-
- if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock))
- return;
- if (!list_empty(&cred->cr_lru)) {
- number_cred_unused--;
- list_del_init(&cred->cr_lru);
- }
- if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) {
- if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) {
- cred->cr_expire = jiffies;
- list_add_tail(&cred->cr_lru, &cred_unused);
- number_cred_unused++;
- goto out_nodestroy;
- }
- if (!rpcauth_unhash_cred(cred)) {
- /* We were hashed and someone looked us up... */
- goto out_nodestroy;
- }
+ rcu_read_lock();
+ if (refcount_dec_and_test(&cred->cr_count))
+ goto destroy;
+ if (refcount_read(&cred->cr_count) != 1 ||
+ !test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags))
+ goto out;
+ if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) {
+ cred->cr_expire = jiffies;
+ rpcauth_lru_add(cred);
+ /* Race breaker */
+ if (unlikely(!test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags)))
+ rpcauth_lru_remove(cred);
+ } else if (rpcauth_unhash_cred(cred)) {
+ rpcauth_lru_remove(cred);
+ if (refcount_dec_and_test(&cred->cr_count))
+ goto destroy;
}
- spin_unlock(&rpc_credcache_lock);
- cred->cr_ops->crdestroy(cred);
+out:
+ rcu_read_unlock();
return;
-out_nodestroy:
- spin_unlock(&rpc_credcache_lock);
+destroy:
+ rcu_read_unlock();
+ cred->cr_ops->crdestroy(cred);
}
EXPORT_SYMBOL_GPL(put_rpccred);
@@ -817,6 +835,16 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp,
return rpcauth_unwrap_req_decode(decode, rqstp, data, obj);
}
+bool
+rpcauth_xmit_need_reencode(struct rpc_task *task)
+{
+ struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+
+ if (!cred || !cred->cr_ops->crneed_reencode)
+ return false;
+ return cred->cr_ops->crneed_reencode(task);
+}
+
int
rpcauth_refreshcred(struct rpc_task *task)
{
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index f1df9837f1ac..d8831b988b1e 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -274,7 +274,7 @@ static const struct rpc_authops generic_auth_ops = {
static struct rpc_auth generic_auth = {
.au_ops = &generic_auth_ops,
- .au_count = ATOMIC_INIT(0),
+ .au_count = REFCOUNT_INIT(1),
};
static bool generic_key_to_expire(struct rpc_cred *cred)
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 21c0aa0a0d1d..30f970cdc7f6 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1058,7 +1058,7 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
auth->au_flavor = flavor;
if (gss_pseudoflavor_to_datatouch(gss_auth->mech, flavor))
auth->au_flags |= RPCAUTH_AUTH_DATATOUCH;
- atomic_set(&auth->au_count, 1);
+ refcount_set(&auth->au_count, 1);
kref_init(&gss_auth->kref);
err = rpcauth_init_credcache(auth);
@@ -1187,7 +1187,7 @@ gss_auth_find_or_add_hashed(const struct rpc_auth_create_args *args,
if (strcmp(gss_auth->target_name, args->target_name))
continue;
}
- if (!atomic_inc_not_zero(&gss_auth->rpc_auth.au_count))
+ if (!refcount_inc_not_zero(&gss_auth->rpc_auth.au_count))
continue;
goto out;
}
@@ -1984,6 +1984,46 @@ gss_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
return decode(rqstp, &xdr, obj);
}
+static bool
+gss_seq_is_newer(u32 new, u32 old)
+{
+ return (s32)(new - old) > 0;
+}
+
+static bool
+gss_xmit_need_reencode(struct rpc_task *task)
+{
+ struct rpc_rqst *req = task->tk_rqstp;
+ struct rpc_cred *cred = req->rq_cred;
+ struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
+ u32 win, seq_xmit;
+ bool ret = true;
+
+ if (!ctx)
+ return true;
+
+ if (gss_seq_is_newer(req->rq_seqno, READ_ONCE(ctx->gc_seq)))
+ goto out;
+
+ seq_xmit = READ_ONCE(ctx->gc_seq_xmit);
+ while (gss_seq_is_newer(req->rq_seqno, seq_xmit)) {
+ u32 tmp = seq_xmit;
+
+ seq_xmit = cmpxchg(&ctx->gc_seq_xmit, tmp, req->rq_seqno);
+ if (seq_xmit == tmp) {
+ ret = false;
+ goto out;
+ }
+ }
+
+ win = ctx->gc_win;
+ if (win > 0)
+ ret = !gss_seq_is_newer(req->rq_seqno, seq_xmit - win);
+out:
+ gss_put_ctx(ctx);
+ return ret;
+}
+
static int
gss_unwrap_resp(struct rpc_task *task,
kxdrdproc_t decode, void *rqstp, __be32 *p, void *obj)
@@ -2052,6 +2092,7 @@ static const struct rpc_credops gss_credops = {
.crunwrap_resp = gss_unwrap_resp,
.crkey_timeout = gss_key_timeout,
.crstringify_acceptor = gss_stringify_acceptor,
+ .crneed_reencode = gss_xmit_need_reencode,
};
static const struct rpc_credops gss_nullops = {
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 0220e1ca5280..4f43383971ba 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -53,7 +53,7 @@
u32
krb5_encrypt(
- struct crypto_skcipher *tfm,
+ struct crypto_sync_skcipher *tfm,
void * iv,
void * in,
void * out,
@@ -62,24 +62,24 @@ krb5_encrypt(
u32 ret = -EINVAL;
struct scatterlist sg[1];
u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
- SKCIPHER_REQUEST_ON_STACK(req, tfm);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
- if (length % crypto_skcipher_blocksize(tfm) != 0)
+ if (length % crypto_sync_skcipher_blocksize(tfm) != 0)
goto out;
- if (crypto_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
+ if (crypto_sync_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
dprintk("RPC: gss_k5encrypt: tfm iv size too large %d\n",
- crypto_skcipher_ivsize(tfm));
+ crypto_sync_skcipher_ivsize(tfm));
goto out;
}
if (iv)
- memcpy(local_iv, iv, crypto_skcipher_ivsize(tfm));
+ memcpy(local_iv, iv, crypto_sync_skcipher_ivsize(tfm));
memcpy(out, in, length);
sg_init_one(sg, out, length);
- skcipher_request_set_tfm(req, tfm);
+ skcipher_request_set_sync_tfm(req, tfm);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, length, local_iv);
@@ -92,7 +92,7 @@ out:
u32
krb5_decrypt(
- struct crypto_skcipher *tfm,
+ struct crypto_sync_skcipher *tfm,
void * iv,
void * in,
void * out,
@@ -101,23 +101,23 @@ krb5_decrypt(
u32 ret = -EINVAL;
struct scatterlist sg[1];
u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
- SKCIPHER_REQUEST_ON_STACK(req, tfm);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
- if (length % crypto_skcipher_blocksize(tfm) != 0)
+ if (length % crypto_sync_skcipher_blocksize(tfm) != 0)
goto out;
- if (crypto_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
+ if (crypto_sync_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
dprintk("RPC: gss_k5decrypt: tfm iv size too large %d\n",
- crypto_skcipher_ivsize(tfm));
+ crypto_sync_skcipher_ivsize(tfm));
goto out;
}
if (iv)
- memcpy(local_iv,iv, crypto_skcipher_ivsize(tfm));
+ memcpy(local_iv, iv, crypto_sync_skcipher_ivsize(tfm));
memcpy(out, in, length);
sg_init_one(sg, out, length);
- skcipher_request_set_tfm(req, tfm);
+ skcipher_request_set_sync_tfm(req, tfm);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, length, local_iv);
@@ -466,7 +466,8 @@ encryptor(struct scatterlist *sg, void *data)
{
struct encryptor_desc *desc = data;
struct xdr_buf *outbuf = desc->outbuf;
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(desc->req);
+ struct crypto_sync_skcipher *tfm =
+ crypto_sync_skcipher_reqtfm(desc->req);
struct page *in_page;
int thislen = desc->fraglen + sg->length;
int fraglen, ret;
@@ -492,7 +493,7 @@ encryptor(struct scatterlist *sg, void *data)
desc->fraglen += sg->length;
desc->pos += sg->length;
- fraglen = thislen & (crypto_skcipher_blocksize(tfm) - 1);
+ fraglen = thislen & (crypto_sync_skcipher_blocksize(tfm) - 1);
thislen -= fraglen;
if (thislen == 0)
@@ -526,16 +527,16 @@ encryptor(struct scatterlist *sg, void *data)
}
int
-gss_encrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *buf,
+gss_encrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *buf,
int offset, struct page **pages)
{
int ret;
struct encryptor_desc desc;
- SKCIPHER_REQUEST_ON_STACK(req, tfm);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
- BUG_ON((buf->len - offset) % crypto_skcipher_blocksize(tfm) != 0);
+ BUG_ON((buf->len - offset) % crypto_sync_skcipher_blocksize(tfm) != 0);
- skcipher_request_set_tfm(req, tfm);
+ skcipher_request_set_sync_tfm(req, tfm);
skcipher_request_set_callback(req, 0, NULL, NULL);
memset(desc.iv, 0, sizeof(desc.iv));
@@ -567,7 +568,8 @@ decryptor(struct scatterlist *sg, void *data)
{
struct decryptor_desc *desc = data;
int thislen = desc->fraglen + sg->length;
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(desc->req);
+ struct crypto_sync_skcipher *tfm =
+ crypto_sync_skcipher_reqtfm(desc->req);
int fraglen, ret;
/* Worst case is 4 fragments: head, end of page 1, start
@@ -578,7 +580,7 @@ decryptor(struct scatterlist *sg, void *data)
desc->fragno++;
desc->fraglen += sg->length;
- fraglen = thislen & (crypto_skcipher_blocksize(tfm) - 1);
+ fraglen = thislen & (crypto_sync_skcipher_blocksize(tfm) - 1);
thislen -= fraglen;
if (thislen == 0)
@@ -608,17 +610,17 @@ decryptor(struct scatterlist *sg, void *data)
}
int
-gss_decrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *buf,
+gss_decrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *buf,
int offset)
{
int ret;
struct decryptor_desc desc;
- SKCIPHER_REQUEST_ON_STACK(req, tfm);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
/* XXXJBF: */
- BUG_ON((buf->len - offset) % crypto_skcipher_blocksize(tfm) != 0);
+ BUG_ON((buf->len - offset) % crypto_sync_skcipher_blocksize(tfm) != 0);
- skcipher_request_set_tfm(req, tfm);
+ skcipher_request_set_sync_tfm(req, tfm);
skcipher_request_set_callback(req, 0, NULL, NULL);
memset(desc.iv, 0, sizeof(desc.iv));
@@ -672,12 +674,12 @@ xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen)
}
static u32
-gss_krb5_cts_crypt(struct crypto_skcipher *cipher, struct xdr_buf *buf,
+gss_krb5_cts_crypt(struct crypto_sync_skcipher *cipher, struct xdr_buf *buf,
u32 offset, u8 *iv, struct page **pages, int encrypt)
{
u32 ret;
struct scatterlist sg[1];
- SKCIPHER_REQUEST_ON_STACK(req, cipher);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, cipher);
u8 *data;
struct page **save_pages;
u32 len = buf->len - offset;
@@ -706,7 +708,7 @@ gss_krb5_cts_crypt(struct crypto_skcipher *cipher, struct xdr_buf *buf,
sg_init_one(sg, data, len);
- skcipher_request_set_tfm(req, cipher);
+ skcipher_request_set_sync_tfm(req, cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
skcipher_request_set_crypt(req, sg, sg, len, iv);
@@ -735,7 +737,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
struct xdr_netobj hmac;
u8 *cksumkey;
u8 *ecptr;
- struct crypto_skcipher *cipher, *aux_cipher;
+ struct crypto_sync_skcipher *cipher, *aux_cipher;
int blocksize;
struct page **save_pages;
int nblocks, nbytes;
@@ -754,7 +756,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
cksumkey = kctx->acceptor_integ;
usage = KG_USAGE_ACCEPTOR_SEAL;
}
- blocksize = crypto_skcipher_blocksize(cipher);
+ blocksize = crypto_sync_skcipher_blocksize(cipher);
/* hide the gss token header and insert the confounder */
offset += GSS_KRB5_TOK_HDR_LEN;
@@ -807,7 +809,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
memset(desc.iv, 0, sizeof(desc.iv));
if (cbcbytes) {
- SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
desc.pos = offset + GSS_KRB5_TOK_HDR_LEN;
desc.fragno = 0;
@@ -816,7 +818,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
desc.outbuf = buf;
desc.req = req;
- skcipher_request_set_tfm(req, aux_cipher);
+ skcipher_request_set_sync_tfm(req, aux_cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
sg_init_table(desc.infrags, 4);
@@ -855,7 +857,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
struct xdr_buf subbuf;
u32 ret = 0;
u8 *cksum_key;
- struct crypto_skcipher *cipher, *aux_cipher;
+ struct crypto_sync_skcipher *cipher, *aux_cipher;
struct xdr_netobj our_hmac_obj;
u8 our_hmac[GSS_KRB5_MAX_CKSUM_LEN];
u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN];
@@ -874,7 +876,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
cksum_key = kctx->initiator_integ;
usage = KG_USAGE_INITIATOR_SEAL;
}
- blocksize = crypto_skcipher_blocksize(cipher);
+ blocksize = crypto_sync_skcipher_blocksize(cipher);
/* create a segment skipping the header and leaving out the checksum */
@@ -891,13 +893,13 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
memset(desc.iv, 0, sizeof(desc.iv));
if (cbcbytes) {
- SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
desc.fragno = 0;
desc.fraglen = 0;
desc.req = req;
- skcipher_request_set_tfm(req, aux_cipher);
+ skcipher_request_set_sync_tfm(req, aux_cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
sg_init_table(desc.frags, 4);
@@ -946,7 +948,8 @@ out_err:
* Set the key of the given cipher.
*/
int
-krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
+krb5_rc4_setup_seq_key(struct krb5_ctx *kctx,
+ struct crypto_sync_skcipher *cipher,
unsigned char *cksum)
{
struct crypto_shash *hmac;
@@ -994,7 +997,7 @@ krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
if (err)
goto out_err;
- err = crypto_skcipher_setkey(cipher, Kseq, kctx->gk5e->keylength);
+ err = crypto_sync_skcipher_setkey(cipher, Kseq, kctx->gk5e->keylength);
if (err)
goto out_err;
@@ -1012,7 +1015,8 @@ out_err:
* Set the key of cipher kctx->enc.
*/
int
-krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
+krb5_rc4_setup_enc_key(struct krb5_ctx *kctx,
+ struct crypto_sync_skcipher *cipher,
s32 seqnum)
{
struct crypto_shash *hmac;
@@ -1069,7 +1073,8 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
if (err)
goto out_err;
- err = crypto_skcipher_setkey(cipher, Kcrypt, kctx->gk5e->keylength);
+ err = crypto_sync_skcipher_setkey(cipher, Kcrypt,
+ kctx->gk5e->keylength);
if (err)
goto out_err;
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index f7fe2d2b851f..550fdf18d3b3 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -147,7 +147,7 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
size_t blocksize, keybytes, keylength, n;
unsigned char *inblockdata, *outblockdata, *rawkey;
struct xdr_netobj inblock, outblock;
- struct crypto_skcipher *cipher;
+ struct crypto_sync_skcipher *cipher;
u32 ret = EINVAL;
blocksize = gk5e->blocksize;
@@ -157,11 +157,10 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
if ((inkey->len != keylength) || (outkey->len != keylength))
goto err_return;
- cipher = crypto_alloc_skcipher(gk5e->encrypt_name, 0,
- CRYPTO_ALG_ASYNC);
+ cipher = crypto_alloc_sync_skcipher(gk5e->encrypt_name, 0, 0);
if (IS_ERR(cipher))
goto err_return;
- if (crypto_skcipher_setkey(cipher, inkey->data, inkey->len))
+ if (crypto_sync_skcipher_setkey(cipher, inkey->data, inkey->len))
goto err_return;
/* allocate and set up buffers */
@@ -238,7 +237,7 @@ err_free_in:
memset(inblockdata, 0, blocksize);
kfree(inblockdata);
err_free_cipher:
- crypto_free_skcipher(cipher);
+ crypto_free_sync_skcipher(cipher);
err_return:
return ret;
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 7bb2514aadd9..7f0424dfa8f6 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -218,7 +218,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
static inline const void *
get_key(const void *p, const void *end,
- struct krb5_ctx *ctx, struct crypto_skcipher **res)
+ struct krb5_ctx *ctx, struct crypto_sync_skcipher **res)
{
struct xdr_netobj key;
int alg;
@@ -246,15 +246,14 @@ get_key(const void *p, const void *end,
if (IS_ERR(p))
goto out_err;
- *res = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
- CRYPTO_ALG_ASYNC);
+ *res = crypto_alloc_sync_skcipher(ctx->gk5e->encrypt_name, 0, 0);
if (IS_ERR(*res)) {
printk(KERN_WARNING "gss_kerberos_mech: unable to initialize "
"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
*res = NULL;
goto out_err_free_key;
}
- if (crypto_skcipher_setkey(*res, key.data, key.len)) {
+ if (crypto_sync_skcipher_setkey(*res, key.data, key.len)) {
printk(KERN_WARNING "gss_kerberos_mech: error setting key for "
"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
goto out_err_free_tfm;
@@ -264,7 +263,7 @@ get_key(const void *p, const void *end,
return p;
out_err_free_tfm:
- crypto_free_skcipher(*res);
+ crypto_free_sync_skcipher(*res);
out_err_free_key:
kfree(key.data);
p = ERR_PTR(-EINVAL);
@@ -336,30 +335,30 @@ gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
return 0;
out_err_free_key2:
- crypto_free_skcipher(ctx->seq);
+ crypto_free_sync_skcipher(ctx->seq);
out_err_free_key1:
- crypto_free_skcipher(ctx->enc);
+ crypto_free_sync_skcipher(ctx->enc);
out_err_free_mech:
kfree(ctx->mech_used.data);
out_err:
return PTR_ERR(p);
}
-static struct crypto_skcipher *
+static struct crypto_sync_skcipher *
context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key)
{
- struct crypto_skcipher *cp;
+ struct crypto_sync_skcipher *cp;
- cp = crypto_alloc_skcipher(cname, 0, CRYPTO_ALG_ASYNC);
+ cp = crypto_alloc_sync_skcipher(cname, 0, 0);
if (IS_ERR(cp)) {
dprintk("gss_kerberos_mech: unable to initialize "
"crypto algorithm %s\n", cname);
return NULL;
}
- if (crypto_skcipher_setkey(cp, key, ctx->gk5e->keylength)) {
+ if (crypto_sync_skcipher_setkey(cp, key, ctx->gk5e->keylength)) {
dprintk("gss_kerberos_mech: error setting key for "
"crypto algorithm %s\n", cname);
- crypto_free_skcipher(cp);
+ crypto_free_sync_skcipher(cp);
return NULL;
}
return cp;
@@ -413,9 +412,9 @@ context_derive_keys_des3(struct krb5_ctx *ctx, gfp_t gfp_mask)
return 0;
out_free_enc:
- crypto_free_skcipher(ctx->enc);
+ crypto_free_sync_skcipher(ctx->enc);
out_free_seq:
- crypto_free_skcipher(ctx->seq);
+ crypto_free_sync_skcipher(ctx->seq);
out_err:
return -EINVAL;
}
@@ -469,17 +468,15 @@ context_derive_keys_rc4(struct krb5_ctx *ctx)
/*
* allocate hash, and skciphers for data and seqnum encryption
*/
- ctx->enc = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
- CRYPTO_ALG_ASYNC);
+ ctx->enc = crypto_alloc_sync_skcipher(ctx->gk5e->encrypt_name, 0, 0);
if (IS_ERR(ctx->enc)) {
err = PTR_ERR(ctx->enc);
goto out_err_free_hmac;
}
- ctx->seq = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
- CRYPTO_ALG_ASYNC);
+ ctx->seq = crypto_alloc_sync_skcipher(ctx->gk5e->encrypt_name, 0, 0);
if (IS_ERR(ctx->seq)) {
- crypto_free_skcipher(ctx->enc);
+ crypto_free_sync_skcipher(ctx->enc);
err = PTR_ERR(ctx->seq);
goto out_err_free_hmac;
}
@@ -591,7 +588,7 @@ context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
context_v2_alloc_cipher(ctx, "cbc(aes)",
ctx->acceptor_seal);
if (ctx->acceptor_enc_aux == NULL) {
- crypto_free_skcipher(ctx->initiator_enc_aux);
+ crypto_free_sync_skcipher(ctx->initiator_enc_aux);
goto out_free_acceptor_enc;
}
}
@@ -599,9 +596,9 @@ context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
return 0;
out_free_acceptor_enc:
- crypto_free_skcipher(ctx->acceptor_enc);
+ crypto_free_sync_skcipher(ctx->acceptor_enc);
out_free_initiator_enc:
- crypto_free_skcipher(ctx->initiator_enc);
+ crypto_free_sync_skcipher(ctx->initiator_enc);
out_err:
return -EINVAL;
}
@@ -713,12 +710,12 @@ static void
gss_delete_sec_context_kerberos(void *internal_ctx) {
struct krb5_ctx *kctx = internal_ctx;
- crypto_free_skcipher(kctx->seq);
- crypto_free_skcipher(kctx->enc);
- crypto_free_skcipher(kctx->acceptor_enc);
- crypto_free_skcipher(kctx->initiator_enc);
- crypto_free_skcipher(kctx->acceptor_enc_aux);
- crypto_free_skcipher(kctx->initiator_enc_aux);
+ crypto_free_sync_skcipher(kctx->seq);
+ crypto_free_sync_skcipher(kctx->enc);
+ crypto_free_sync_skcipher(kctx->acceptor_enc);
+ crypto_free_sync_skcipher(kctx->initiator_enc);
+ crypto_free_sync_skcipher(kctx->acceptor_enc_aux);
+ crypto_free_sync_skcipher(kctx->initiator_enc_aux);
kfree(kctx->mech_used.data);
kfree(kctx);
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index eaad9bc7a0bd..b4adeb06660b 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -63,13 +63,12 @@
#include <linux/sunrpc/gss_krb5.h>
#include <linux/random.h>
#include <linux/crypto.h>
+#include <linux/atomic.h>
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
-DEFINE_SPINLOCK(krb5_seq_lock);
-
static void *
setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
{
@@ -124,6 +123,30 @@ setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
return krb5_hdr;
}
+u32
+gss_seq_send_fetch_and_inc(struct krb5_ctx *ctx)
+{
+ u32 old, seq_send = READ_ONCE(ctx->seq_send);
+
+ do {
+ old = seq_send;
+ seq_send = cmpxchg(&ctx->seq_send, old, old + 1);
+ } while (old != seq_send);
+ return seq_send;
+}
+
+u64
+gss_seq_send64_fetch_and_inc(struct krb5_ctx *ctx)
+{
+ u64 old, seq_send = READ_ONCE(ctx->seq_send);
+
+ do {
+ old = seq_send;
+ seq_send = cmpxchg64(&ctx->seq_send64, old, old + 1);
+ } while (old != seq_send);
+ return seq_send;
+}
+
static u32
gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
struct xdr_netobj *token)
@@ -154,9 +177,7 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
- spin_lock(&krb5_seq_lock);
- seq_send = ctx->seq_send++;
- spin_unlock(&krb5_seq_lock);
+ seq_send = gss_seq_send_fetch_and_inc(ctx);
if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff,
seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))
@@ -174,7 +195,6 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
.data = cksumdata};
void *krb5_hdr;
s32 now;
- u64 seq_send;
u8 *cksumkey;
unsigned int cksum_usage;
__be64 seq_send_be64;
@@ -185,11 +205,7 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
/* Set up the sequence number. Now 64-bits in clear
* text and w/o direction indicator */
- spin_lock(&krb5_seq_lock);
- seq_send = ctx->seq_send64++;
- spin_unlock(&krb5_seq_lock);
-
- seq_send_be64 = cpu_to_be64(seq_send);
+ seq_send_be64 = cpu_to_be64(gss_seq_send64_fetch_and_inc(ctx));
memcpy(krb5_hdr + 8, (char *) &seq_send_be64, 8);
if (ctx->initiate) {
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index c8b9082f4a9d..fb6656295204 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -43,13 +43,12 @@ static s32
krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
unsigned char *cksum, unsigned char *buf)
{
- struct crypto_skcipher *cipher;
+ struct crypto_sync_skcipher *cipher;
unsigned char plain[8];
s32 code;
dprintk("RPC: %s:\n", __func__);
- cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
- CRYPTO_ALG_ASYNC);
+ cipher = crypto_alloc_sync_skcipher(kctx->gk5e->encrypt_name, 0, 0);
if (IS_ERR(cipher))
return PTR_ERR(cipher);
@@ -68,12 +67,12 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
code = krb5_encrypt(cipher, cksum, plain, buf, 8);
out:
- crypto_free_skcipher(cipher);
+ crypto_free_sync_skcipher(cipher);
return code;
}
s32
krb5_make_seq_num(struct krb5_ctx *kctx,
- struct crypto_skcipher *key,
+ struct crypto_sync_skcipher *key,
int direction,
u32 seqnum,
unsigned char *cksum, unsigned char *buf)
@@ -101,13 +100,12 @@ static s32
krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
unsigned char *buf, int *direction, s32 *seqnum)
{
- struct crypto_skcipher *cipher;
+ struct crypto_sync_skcipher *cipher;
unsigned char plain[8];
s32 code;
dprintk("RPC: %s:\n", __func__);
- cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
- CRYPTO_ALG_ASYNC);
+ cipher = crypto_alloc_sync_skcipher(kctx->gk5e->encrypt_name, 0, 0);
if (IS_ERR(cipher))
return PTR_ERR(cipher);
@@ -130,7 +128,7 @@ krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
*seqnum = ((plain[0] << 24) | (plain[1] << 16) |
(plain[2] << 8) | (plain[3]));
out:
- crypto_free_skcipher(cipher);
+ crypto_free_sync_skcipher(cipher);
return code;
}
@@ -142,7 +140,7 @@ krb5_get_seq_num(struct krb5_ctx *kctx,
{
s32 code;
unsigned char plain[8];
- struct crypto_skcipher *key = kctx->seq;
+ struct crypto_sync_skcipher *key = kctx->seq;
dprintk("RPC: krb5_get_seq_num:\n");
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 39a2e672900b..962fa84e6db1 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -174,7 +174,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
now = get_seconds();
- blocksize = crypto_skcipher_blocksize(kctx->enc);
+ blocksize = crypto_sync_skcipher_blocksize(kctx->enc);
gss_krb5_add_padding(buf, offset, blocksize);
BUG_ON((buf->len - offset) % blocksize);
plainlen = conflen + buf->len - offset;
@@ -228,9 +228,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
- spin_lock(&krb5_seq_lock);
- seq_send = kctx->seq_send++;
- spin_unlock(&krb5_seq_lock);
+ seq_send = gss_seq_send_fetch_and_inc(kctx);
/* XXX would probably be more efficient to compute checksum
* and encrypt at the same time: */
@@ -239,10 +237,10 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
return GSS_S_FAILURE;
if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
- struct crypto_skcipher *cipher;
+ struct crypto_sync_skcipher *cipher;
int err;
- cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
- CRYPTO_ALG_ASYNC);
+ cipher = crypto_alloc_sync_skcipher(kctx->gk5e->encrypt_name,
+ 0, 0);
if (IS_ERR(cipher))
return GSS_S_FAILURE;
@@ -250,7 +248,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
err = gss_encrypt_xdr_buf(cipher, buf,
offset + headlen - conflen, pages);
- crypto_free_skcipher(cipher);
+ crypto_free_sync_skcipher(cipher);
if (err)
return GSS_S_FAILURE;
} else {
@@ -327,18 +325,18 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
return GSS_S_BAD_SIG;
if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
- struct crypto_skcipher *cipher;
+ struct crypto_sync_skcipher *cipher;
int err;
- cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
- CRYPTO_ALG_ASYNC);
+ cipher = crypto_alloc_sync_skcipher(kctx->gk5e->encrypt_name,
+ 0, 0);
if (IS_ERR(cipher))
return GSS_S_FAILURE;
krb5_rc4_setup_enc_key(kctx, cipher, seqnum);
err = gss_decrypt_xdr_buf(cipher, buf, crypt_offset);
- crypto_free_skcipher(cipher);
+ crypto_free_sync_skcipher(cipher);
if (err)
return GSS_S_DEFECTIVE_TOKEN;
} else {
@@ -371,7 +369,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
/* Copy the data back to the right position. XXX: Would probably be
* better to copy and encrypt at the same time. */
- blocksize = crypto_skcipher_blocksize(kctx->enc);
+ blocksize = crypto_sync_skcipher_blocksize(kctx->enc);
data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) +
conflen;
orig_start = buf->head[0].iov_base + offset;
@@ -477,9 +475,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
*be16ptr++ = 0;
be64ptr = (__be64 *)be16ptr;
- spin_lock(&krb5_seq_lock);
- *be64ptr = cpu_to_be64(kctx->seq_send64++);
- spin_unlock(&krb5_seq_lock);
+ *be64ptr = cpu_to_be64(gss_seq_send64_fetch_and_inc(kctx));
err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, pages);
if (err)
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 5fec3abbe19b..16ac0f4cb7d8 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -117,7 +117,7 @@ int gss_mech_register(struct gss_api_mech *gm)
if (status)
return status;
spin_lock(&registered_mechs_lock);
- list_add(&gm->gm_list, &registered_mechs);
+ list_add_rcu(&gm->gm_list, &registered_mechs);
spin_unlock(&registered_mechs_lock);
dprintk("RPC: registered gss mechanism %s\n", gm->gm_name);
return 0;
@@ -132,7 +132,7 @@ EXPORT_SYMBOL_GPL(gss_mech_register);
void gss_mech_unregister(struct gss_api_mech *gm)
{
spin_lock(&registered_mechs_lock);
- list_del(&gm->gm_list);
+ list_del_rcu(&gm->gm_list);
spin_unlock(&registered_mechs_lock);
dprintk("RPC: unregistered gss mechanism %s\n", gm->gm_name);
gss_mech_free(gm);
@@ -151,15 +151,15 @@ _gss_mech_get_by_name(const char *name)
{
struct gss_api_mech *pos, *gm = NULL;
- spin_lock(&registered_mechs_lock);
- list_for_each_entry(pos, &registered_mechs, gm_list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pos, &registered_mechs, gm_list) {
if (0 == strcmp(name, pos->gm_name)) {
if (try_module_get(pos->gm_owner))
gm = pos;
break;
}
}
- spin_unlock(&registered_mechs_lock);
+ rcu_read_unlock();
return gm;
}
@@ -186,8 +186,8 @@ struct gss_api_mech *gss_mech_get_by_OID(struct rpcsec_gss_oid *obj)
dprintk("RPC: %s(%s)\n", __func__, buf);
request_module("rpc-auth-gss-%s", buf);
- spin_lock(&registered_mechs_lock);
- list_for_each_entry(pos, &registered_mechs, gm_list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pos, &registered_mechs, gm_list) {
if (obj->len == pos->gm_oid.len) {
if (0 == memcmp(obj->data, pos->gm_oid.data, obj->len)) {
if (try_module_get(pos->gm_owner))
@@ -196,7 +196,7 @@ struct gss_api_mech *gss_mech_get_by_OID(struct rpcsec_gss_oid *obj)
}
}
}
- spin_unlock(&registered_mechs_lock);
+ rcu_read_unlock();
return gm;
}
@@ -216,15 +216,15 @@ static struct gss_api_mech *_gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
{
struct gss_api_mech *gm = NULL, *pos;
- spin_lock(&registered_mechs_lock);
- list_for_each_entry(pos, &registered_mechs, gm_list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pos, &registered_mechs, gm_list) {
if (!mech_supports_pseudoflavor(pos, pseudoflavor))
continue;
if (try_module_get(pos->gm_owner))
gm = pos;
break;
}
- spin_unlock(&registered_mechs_lock);
+ rcu_read_unlock();
return gm;
}
@@ -257,8 +257,8 @@ int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr, int size)
struct gss_api_mech *pos = NULL;
int j, i = 0;
- spin_lock(&registered_mechs_lock);
- list_for_each_entry(pos, &registered_mechs, gm_list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pos, &registered_mechs, gm_list) {
for (j = 0; j < pos->gm_pf_num; j++) {
if (i >= size) {
spin_unlock(&registered_mechs_lock);
@@ -267,7 +267,7 @@ int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr, int size)
array_ptr[i++] = pos->gm_pfs[j].pseudoflavor;
}
}
- spin_unlock(&registered_mechs_lock);
+ rcu_read_unlock();
return i;
}
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index 444380f968f1..006062ad5f58 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -784,6 +784,7 @@ void gssx_enc_accept_sec_context(struct rpc_rqst *req,
xdr_inline_pages(&req->rq_rcv_buf,
PAGE_SIZE/2 /* pretty arbitrary */,
arg->pages, 0 /* page base */, arg->npages * PAGE_SIZE);
+ req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
done:
if (err)
dprintk("RPC: gssx_enc_accept_sec_context: %d\n", err);
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 860f2a1bbb67..1ece4bc3eb8d 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -76,6 +76,7 @@ struct rsi {
struct xdr_netobj in_handle, in_token;
struct xdr_netobj out_handle, out_token;
int major_status, minor_status;
+ struct rcu_head rcu_head;
};
static struct rsi *rsi_update(struct cache_detail *cd, struct rsi *new, struct rsi *old);
@@ -89,13 +90,21 @@ static void rsi_free(struct rsi *rsii)
kfree(rsii->out_token.data);
}
-static void rsi_put(struct kref *ref)
+static void rsi_free_rcu(struct rcu_head *head)
{
- struct rsi *rsii = container_of(ref, struct rsi, h.ref);
+ struct rsi *rsii = container_of(head, struct rsi, rcu_head);
+
rsi_free(rsii);
kfree(rsii);
}
+static void rsi_put(struct kref *ref)
+{
+ struct rsi *rsii = container_of(ref, struct rsi, h.ref);
+
+ call_rcu(&rsii->rcu_head, rsi_free_rcu);
+}
+
static inline int rsi_hash(struct rsi *item)
{
return hash_mem(item->in_handle.data, item->in_handle.len, RSI_HASHBITS)
@@ -282,7 +291,7 @@ static struct rsi *rsi_lookup(struct cache_detail *cd, struct rsi *item)
struct cache_head *ch;
int hash = rsi_hash(item);
- ch = sunrpc_cache_lookup(cd, &item->h, hash);
+ ch = sunrpc_cache_lookup_rcu(cd, &item->h, hash);
if (ch)
return container_of(ch, struct rsi, h);
else
@@ -330,6 +339,7 @@ struct rsc {
struct svc_cred cred;
struct gss_svc_seq_data seqdata;
struct gss_ctx *mechctx;
+ struct rcu_head rcu_head;
};
static struct rsc *rsc_update(struct cache_detail *cd, struct rsc *new, struct rsc *old);
@@ -343,12 +353,22 @@ static void rsc_free(struct rsc *rsci)
free_svc_cred(&rsci->cred);
}
+static void rsc_free_rcu(struct rcu_head *head)
+{
+ struct rsc *rsci = container_of(head, struct rsc, rcu_head);
+
+ kfree(rsci->handle.data);
+ kfree(rsci);
+}
+
static void rsc_put(struct kref *ref)
{
struct rsc *rsci = container_of(ref, struct rsc, h.ref);
- rsc_free(rsci);
- kfree(rsci);
+ if (rsci->mechctx)
+ gss_delete_sec_context(&rsci->mechctx);
+ free_svc_cred(&rsci->cred);
+ call_rcu(&rsci->rcu_head, rsc_free_rcu);
}
static inline int
@@ -542,7 +562,7 @@ static struct rsc *rsc_lookup(struct cache_detail *cd, struct rsc *item)
struct cache_head *ch;
int hash = rsc_hash(item);
- ch = sunrpc_cache_lookup(cd, &item->h, hash);
+ ch = sunrpc_cache_lookup_rcu(cd, &item->h, hash);
if (ch)
return container_of(ch, struct rsc, h);
else
@@ -1764,14 +1784,21 @@ out_err:
}
static void
-svcauth_gss_domain_release(struct auth_domain *dom)
+svcauth_gss_domain_release_rcu(struct rcu_head *head)
{
+ struct auth_domain *dom = container_of(head, struct auth_domain, rcu_head);
struct gss_domain *gd = container_of(dom, struct gss_domain, h);
kfree(dom->name);
kfree(gd);
}
+static void
+svcauth_gss_domain_release(struct auth_domain *dom)
+{
+ call_rcu(&dom->rcu_head, svcauth_gss_domain_release_rcu);
+}
+
static struct auth_ops svcauthops_gss = {
.name = "rpcsec_gss",
.owner = THIS_MODULE,
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 4b48228ee8c7..2694a1bc026b 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -21,7 +21,7 @@ static struct rpc_cred null_cred;
static struct rpc_auth *
nul_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
{
- atomic_inc(&null_auth.au_count);
+ refcount_inc(&null_auth.au_count);
return &null_auth;
}
@@ -119,7 +119,7 @@ struct rpc_auth null_auth = {
.au_flags = RPCAUTH_AUTH_NO_CRKEY_TIMEOUT,
.au_ops = &authnull_ops,
.au_flavor = RPC_AUTH_NULL,
- .au_count = ATOMIC_INIT(0),
+ .au_count = REFCOUNT_INIT(1),
};
static
@@ -138,6 +138,6 @@ struct rpc_cred null_cred = {
.cr_lru = LIST_HEAD_INIT(null_cred.cr_lru),
.cr_auth = &null_auth,
.cr_ops = &null_credops,
- .cr_count = ATOMIC_INIT(1),
+ .cr_count = REFCOUNT_INIT(2),
.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE,
};
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 185e56d4f9ae..4c1c7e56288f 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -34,7 +34,7 @@ unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
{
dprintk("RPC: creating UNIX authenticator for client %p\n",
clnt);
- atomic_inc(&unix_auth.au_count);
+ refcount_inc(&unix_auth.au_count);
return &unix_auth;
}
@@ -239,7 +239,7 @@ struct rpc_auth unix_auth = {
.au_flags = RPCAUTH_AUTH_NO_CRKEY_TIMEOUT,
.au_ops = &authunix_ops,
.au_flavor = RPC_AUTH_UNIX,
- .au_count = ATOMIC_INIT(0),
+ .au_count = REFCOUNT_INIT(1),
};
static
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 3c15a99b9700..fa5ba6ed3197 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -91,7 +91,6 @@ struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt, gfp_t gfp_flags)
return NULL;
req->rq_xprt = xprt;
- INIT_LIST_HEAD(&req->rq_list);
INIT_LIST_HEAD(&req->rq_bc_list);
/* Preallocate one XDR receive buffer */
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 109fbe591e7b..f96345b1180e 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -54,28 +54,33 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail)
h->last_refresh = now;
}
-struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
- struct cache_head *key, int hash)
+static struct cache_head *sunrpc_cache_find_rcu(struct cache_detail *detail,
+ struct cache_head *key,
+ int hash)
{
- struct cache_head *new = NULL, *freeme = NULL, *tmp = NULL;
- struct hlist_head *head;
-
- head = &detail->hash_table[hash];
-
- read_lock(&detail->hash_lock);
+ struct hlist_head *head = &detail->hash_table[hash];
+ struct cache_head *tmp;
- hlist_for_each_entry(tmp, head, cache_list) {
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(tmp, head, cache_list) {
if (detail->match(tmp, key)) {
if (cache_is_expired(detail, tmp))
- /* This entry is expired, we will discard it. */
- break;
- cache_get(tmp);
- read_unlock(&detail->hash_lock);
+ continue;
+ tmp = cache_get_rcu(tmp);
+ rcu_read_unlock();
return tmp;
}
}
- read_unlock(&detail->hash_lock);
- /* Didn't find anything, insert an empty entry */
+ rcu_read_unlock();
+ return NULL;
+}
+
+static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
+ struct cache_head *key,
+ int hash)
+{
+ struct cache_head *new, *tmp, *freeme = NULL;
+ struct hlist_head *head = &detail->hash_table[hash];
new = detail->alloc();
if (!new)
@@ -87,35 +92,46 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
cache_init(new, detail);
detail->init(new, key);
- write_lock(&detail->hash_lock);
+ spin_lock(&detail->hash_lock);
/* check if entry appeared while we slept */
- hlist_for_each_entry(tmp, head, cache_list) {
+ hlist_for_each_entry_rcu(tmp, head, cache_list) {
if (detail->match(tmp, key)) {
if (cache_is_expired(detail, tmp)) {
- hlist_del_init(&tmp->cache_list);
+ hlist_del_init_rcu(&tmp->cache_list);
detail->entries --;
freeme = tmp;
break;
}
cache_get(tmp);
- write_unlock(&detail->hash_lock);
+ spin_unlock(&detail->hash_lock);
cache_put(new, detail);
return tmp;
}
}
- hlist_add_head(&new->cache_list, head);
+ hlist_add_head_rcu(&new->cache_list, head);
detail->entries++;
cache_get(new);
- write_unlock(&detail->hash_lock);
+ spin_unlock(&detail->hash_lock);
if (freeme)
cache_put(freeme, detail);
return new;
}
-EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
+struct cache_head *sunrpc_cache_lookup_rcu(struct cache_detail *detail,
+ struct cache_head *key, int hash)
+{
+ struct cache_head *ret;
+
+ ret = sunrpc_cache_find_rcu(detail, key, hash);
+ if (ret)
+ return ret;
+ /* Didn't find anything, insert an empty entry */
+ return sunrpc_cache_add_entry(detail, key, hash);
+}
+EXPORT_SYMBOL_GPL(sunrpc_cache_lookup_rcu);
static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
@@ -151,18 +167,18 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
struct cache_head *tmp;
if (!test_bit(CACHE_VALID, &old->flags)) {
- write_lock(&detail->hash_lock);
+ spin_lock(&detail->hash_lock);
if (!test_bit(CACHE_VALID, &old->flags)) {
if (test_bit(CACHE_NEGATIVE, &new->flags))
set_bit(CACHE_NEGATIVE, &old->flags);
else
detail->update(old, new);
cache_fresh_locked(old, new->expiry_time, detail);
- write_unlock(&detail->hash_lock);
+ spin_unlock(&detail->hash_lock);
cache_fresh_unlocked(old, detail);
return old;
}
- write_unlock(&detail->hash_lock);
+ spin_unlock(&detail->hash_lock);
}
/* We need to insert a new entry */
tmp = detail->alloc();
@@ -173,7 +189,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
cache_init(tmp, detail);
detail->init(tmp, old);
- write_lock(&detail->hash_lock);
+ spin_lock(&detail->hash_lock);
if (test_bit(CACHE_NEGATIVE, &new->flags))
set_bit(CACHE_NEGATIVE, &tmp->flags);
else
@@ -183,7 +199,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
cache_get(tmp);
cache_fresh_locked(tmp, new->expiry_time, detail);
cache_fresh_locked(old, 0, detail);
- write_unlock(&detail->hash_lock);
+ spin_unlock(&detail->hash_lock);
cache_fresh_unlocked(tmp, detail);
cache_fresh_unlocked(old, detail);
cache_put(old, detail);
@@ -223,7 +239,7 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h
{
int rv;
- write_lock(&detail->hash_lock);
+ spin_lock(&detail->hash_lock);
rv = cache_is_valid(h);
if (rv == -EAGAIN) {
set_bit(CACHE_NEGATIVE, &h->flags);
@@ -231,7 +247,7 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h
detail);
rv = -ENOENT;
}
- write_unlock(&detail->hash_lock);
+ spin_unlock(&detail->hash_lock);
cache_fresh_unlocked(h, detail);
return rv;
}
@@ -341,7 +357,7 @@ static struct delayed_work cache_cleaner;
void sunrpc_init_cache_detail(struct cache_detail *cd)
{
- rwlock_init(&cd->hash_lock);
+ spin_lock_init(&cd->hash_lock);
INIT_LIST_HEAD(&cd->queue);
spin_lock(&cache_list_lock);
cd->nextcheck = 0;
@@ -361,11 +377,11 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
{
cache_purge(cd);
spin_lock(&cache_list_lock);
- write_lock(&cd->hash_lock);
+ spin_lock(&cd->hash_lock);
if (current_detail == cd)
current_detail = NULL;
list_del_init(&cd->others);
- write_unlock(&cd->hash_lock);
+ spin_unlock(&cd->hash_lock);
spin_unlock(&cache_list_lock);
if (list_empty(&cache_list)) {
/* module must be being unloaded so its safe to kill the worker */
@@ -422,7 +438,7 @@ static int cache_clean(void)
struct hlist_head *head;
struct hlist_node *tmp;
- write_lock(&current_detail->hash_lock);
+ spin_lock(&current_detail->hash_lock);
/* Ok, now to clean this strand */
@@ -433,13 +449,13 @@ static int cache_clean(void)
if (!cache_is_expired(current_detail, ch))
continue;
- hlist_del_init(&ch->cache_list);
+ hlist_del_init_rcu(&ch->cache_list);
current_detail->entries--;
rv = 1;
break;
}
- write_unlock(&current_detail->hash_lock);
+ spin_unlock(&current_detail->hash_lock);
d = current_detail;
if (!ch)
current_index ++;
@@ -494,9 +510,9 @@ void cache_purge(struct cache_detail *detail)
struct hlist_node *tmp = NULL;
int i = 0;
- write_lock(&detail->hash_lock);
+ spin_lock(&detail->hash_lock);
if (!detail->entries) {
- write_unlock(&detail->hash_lock);
+ spin_unlock(&detail->hash_lock);
return;
}
@@ -504,17 +520,17 @@ void cache_purge(struct cache_detail *detail)
for (i = 0; i < detail->hash_size; i++) {
head = &detail->hash_table[i];
hlist_for_each_entry_safe(ch, tmp, head, cache_list) {
- hlist_del_init(&ch->cache_list);
+ hlist_del_init_rcu(&ch->cache_list);
detail->entries--;
set_bit(CACHE_CLEANED, &ch->flags);
- write_unlock(&detail->hash_lock);
+ spin_unlock(&detail->hash_lock);
cache_fresh_unlocked(ch, detail);
cache_put(ch, detail);
- write_lock(&detail->hash_lock);
+ spin_lock(&detail->hash_lock);
}
}
- write_unlock(&detail->hash_lock);
+ spin_unlock(&detail->hash_lock);
}
EXPORT_SYMBOL_GPL(cache_purge);
@@ -1289,21 +1305,19 @@ EXPORT_SYMBOL_GPL(qword_get);
* get a header, then pass each real item in the cache
*/
-void *cache_seq_start(struct seq_file *m, loff_t *pos)
- __acquires(cd->hash_lock)
+static void *__cache_seq_start(struct seq_file *m, loff_t *pos)
{
loff_t n = *pos;
unsigned int hash, entry;
struct cache_head *ch;
struct cache_detail *cd = m->private;
- read_lock(&cd->hash_lock);
if (!n--)
return SEQ_START_TOKEN;
hash = n >> 32;
entry = n & ((1LL<<32) - 1);
- hlist_for_each_entry(ch, &cd->hash_table[hash], cache_list)
+ hlist_for_each_entry_rcu(ch, &cd->hash_table[hash], cache_list)
if (!entry--)
return ch;
n &= ~((1LL<<32) - 1);
@@ -1315,12 +1329,12 @@ void *cache_seq_start(struct seq_file *m, loff_t *pos)
if (hash >= cd->hash_size)
return NULL;
*pos = n+1;
- return hlist_entry_safe(cd->hash_table[hash].first,
+ return hlist_entry_safe(rcu_dereference_raw(
+ hlist_first_rcu(&cd->hash_table[hash])),
struct cache_head, cache_list);
}
-EXPORT_SYMBOL_GPL(cache_seq_start);
-void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
+static void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
{
struct cache_head *ch = p;
int hash = (*pos >> 32);
@@ -1333,7 +1347,8 @@ void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
*pos += 1LL<<32;
} else {
++*pos;
- return hlist_entry_safe(ch->cache_list.next,
+ return hlist_entry_safe(rcu_dereference_raw(
+ hlist_next_rcu(&ch->cache_list)),
struct cache_head, cache_list);
}
*pos &= ~((1LL<<32) - 1);
@@ -1345,18 +1360,32 @@ void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
if (hash >= cd->hash_size)
return NULL;
++*pos;
- return hlist_entry_safe(cd->hash_table[hash].first,
+ return hlist_entry_safe(rcu_dereference_raw(
+ hlist_first_rcu(&cd->hash_table[hash])),
struct cache_head, cache_list);
}
EXPORT_SYMBOL_GPL(cache_seq_next);
-void cache_seq_stop(struct seq_file *m, void *p)
- __releases(cd->hash_lock)
+void *cache_seq_start_rcu(struct seq_file *m, loff_t *pos)
+ __acquires(RCU)
{
- struct cache_detail *cd = m->private;
- read_unlock(&cd->hash_lock);
+ rcu_read_lock();
+ return __cache_seq_start(m, pos);
+}
+EXPORT_SYMBOL_GPL(cache_seq_start_rcu);
+
+void *cache_seq_next_rcu(struct seq_file *file, void *p, loff_t *pos)
+{
+ return cache_seq_next(file, p, pos);
+}
+EXPORT_SYMBOL_GPL(cache_seq_next_rcu);
+
+void cache_seq_stop_rcu(struct seq_file *m, void *p)
+ __releases(RCU)
+{
+ rcu_read_unlock();
}
-EXPORT_SYMBOL_GPL(cache_seq_stop);
+EXPORT_SYMBOL_GPL(cache_seq_stop_rcu);
static int c_show(struct seq_file *m, void *p)
{
@@ -1384,9 +1413,9 @@ static int c_show(struct seq_file *m, void *p)
}
static const struct seq_operations cache_content_op = {
- .start = cache_seq_start,
- .next = cache_seq_next,
- .stop = cache_seq_stop,
+ .start = cache_seq_start_rcu,
+ .next = cache_seq_next_rcu,
+ .stop = cache_seq_stop_rcu,
.show = c_show,
};
@@ -1844,13 +1873,13 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs);
void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h)
{
- write_lock(&cd->hash_lock);
+ spin_lock(&cd->hash_lock);
if (!hlist_unhashed(&h->cache_list)){
- hlist_del_init(&h->cache_list);
+ hlist_del_init_rcu(&h->cache_list);
cd->entries--;
- write_unlock(&cd->hash_lock);
+ spin_unlock(&cd->hash_lock);
cache_put(h, cd);
} else
- write_unlock(&cd->hash_lock);
+ spin_unlock(&cd->hash_lock);
}
EXPORT_SYMBOL_GPL(sunrpc_cache_unhash);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 8ea2f5fadd96..ae3b8145da35 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -61,6 +61,7 @@ static void call_start(struct rpc_task *task);
static void call_reserve(struct rpc_task *task);
static void call_reserveresult(struct rpc_task *task);
static void call_allocate(struct rpc_task *task);
+static void call_encode(struct rpc_task *task);
static void call_decode(struct rpc_task *task);
static void call_bind(struct rpc_task *task);
static void call_bind_status(struct rpc_task *task);
@@ -1137,10 +1138,10 @@ EXPORT_SYMBOL_GPL(rpc_call_async);
struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
{
struct rpc_task *task;
- struct xdr_buf *xbufp = &req->rq_snd_buf;
struct rpc_task_setup task_setup_data = {
.callback_ops = &rpc_default_ops,
- .flags = RPC_TASK_SOFTCONN,
+ .flags = RPC_TASK_SOFTCONN |
+ RPC_TASK_NO_RETRANS_TIMEOUT,
};
dprintk("RPC: rpc_run_bc_task req= %p\n", req);
@@ -1148,14 +1149,7 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
* Create an rpc_task to send the data
*/
task = rpc_new_task(&task_setup_data);
- task->tk_rqstp = req;
-
- /*
- * Set up the xdr_buf length.
- * This also indicates that the buffer is XDR encoded already.
- */
- xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
- xbufp->tail[0].iov_len;
+ xprt_init_bc_request(req, task);
task->tk_action = call_bc_transmit;
atomic_inc(&task->tk_count);
@@ -1558,7 +1552,6 @@ call_reserveresult(struct rpc_task *task)
task->tk_status = 0;
if (status >= 0) {
if (task->tk_rqstp) {
- xprt_request_init(task);
task->tk_action = call_refresh;
return;
}
@@ -1680,7 +1673,7 @@ call_allocate(struct rpc_task *task)
dprint_status(task);
task->tk_status = 0;
- task->tk_action = call_bind;
+ task->tk_action = call_encode;
if (req->rq_buffer)
return;
@@ -1721,22 +1714,15 @@ call_allocate(struct rpc_task *task)
rpc_exit(task, -ERESTARTSYS);
}
-static inline int
+static int
rpc_task_need_encode(struct rpc_task *task)
{
- return task->tk_rqstp->rq_snd_buf.len == 0;
+ return test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) == 0 &&
+ (!(task->tk_flags & RPC_TASK_SENT) ||
+ !(task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) ||
+ xprt_request_need_retransmit(task));
}
-static inline void
-rpc_task_force_reencode(struct rpc_task *task)
-{
- task->tk_rqstp->rq_snd_buf.len = 0;
- task->tk_rqstp->rq_bytes_sent = 0;
-}
-
-/*
- * 3. Encode arguments of an RPC call
- */
static void
rpc_xdr_encode(struct rpc_task *task)
{
@@ -1752,6 +1738,7 @@ rpc_xdr_encode(struct rpc_task *task)
xdr_buf_init(&req->rq_rcv_buf,
req->rq_rbuffer,
req->rq_rcvsize);
+ req->rq_bytes_sent = 0;
p = rpc_encode_header(task);
if (p == NULL) {
@@ -1766,6 +1753,36 @@ rpc_xdr_encode(struct rpc_task *task)
task->tk_status = rpcauth_wrap_req(task, encode, req, p,
task->tk_msg.rpc_argp);
+ if (task->tk_status == 0)
+ xprt_request_prepare(req);
+}
+
+/*
+ * 3. Encode arguments of an RPC call
+ */
+static void
+call_encode(struct rpc_task *task)
+{
+ if (!rpc_task_need_encode(task))
+ goto out;
+ /* Encode here so that rpcsec_gss can use correct sequence number. */
+ rpc_xdr_encode(task);
+ /* Did the encode result in an error condition? */
+ if (task->tk_status != 0) {
+ /* Was the error nonfatal? */
+ if (task->tk_status == -EAGAIN || task->tk_status == -ENOMEM)
+ rpc_delay(task, HZ >> 4);
+ else
+ rpc_exit(task, task->tk_status);
+ return;
+ }
+
+ /* Add task to reply queue before transmission to avoid races */
+ if (rpc_reply_expected(task))
+ xprt_request_enqueue_receive(task);
+ xprt_request_enqueue_transmit(task);
+out:
+ task->tk_action = call_bind;
}
/*
@@ -1947,43 +1964,16 @@ call_connect_status(struct rpc_task *task)
static void
call_transmit(struct rpc_task *task)
{
- int is_retrans = RPC_WAS_SENT(task);
-
dprint_status(task);
- task->tk_action = call_status;
- if (task->tk_status < 0)
- return;
- if (!xprt_prepare_transmit(task))
- return;
- task->tk_action = call_transmit_status;
- /* Encode here so that rpcsec_gss can use correct sequence number. */
- if (rpc_task_need_encode(task)) {
- rpc_xdr_encode(task);
- /* Did the encode result in an error condition? */
- if (task->tk_status != 0) {
- /* Was the error nonfatal? */
- if (task->tk_status == -EAGAIN)
- rpc_delay(task, HZ >> 4);
- else
- rpc_exit(task, task->tk_status);
+ task->tk_status = 0;
+ if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
+ if (!xprt_prepare_transmit(task))
return;
- }
+ xprt_transmit(task);
}
- xprt_transmit(task);
- if (task->tk_status < 0)
- return;
- if (is_retrans)
- task->tk_client->cl_stats->rpcretrans++;
- /*
- * On success, ensure that we call xprt_end_transmit() before sleeping
- * in order to allow access to the socket to other RPC requests.
- */
- call_transmit_status(task);
- if (rpc_reply_expected(task))
- return;
- task->tk_action = rpc_exit_task;
- rpc_wake_up_queued_task(&task->tk_rqstp->rq_xprt->pending, task);
+ task->tk_action = call_transmit_status;
+ xprt_end_transmit(task);
}
/*
@@ -1999,19 +1989,17 @@ call_transmit_status(struct rpc_task *task)
* test first.
*/
if (task->tk_status == 0) {
- xprt_end_transmit(task);
- rpc_task_force_reencode(task);
+ xprt_request_wait_receive(task);
return;
}
switch (task->tk_status) {
- case -EAGAIN:
- case -ENOBUFS:
- break;
default:
dprint_status(task);
- xprt_end_transmit(task);
- rpc_task_force_reencode(task);
+ break;
+ case -EBADMSG:
+ task->tk_status = 0;
+ task->tk_action = call_encode;
break;
/*
* Special cases: if we've been waiting on the
@@ -2019,6 +2007,14 @@ call_transmit_status(struct rpc_task *task)
* socket just returned a connection error,
* then hold onto the transport lock.
*/
+ case -ENOBUFS:
+ rpc_delay(task, HZ>>2);
+ /* fall through */
+ case -EBADSLT:
+ case -EAGAIN:
+ task->tk_action = call_transmit;
+ task->tk_status = 0;
+ break;
case -ECONNREFUSED:
case -EHOSTDOWN:
case -ENETDOWN:
@@ -2026,7 +2022,6 @@ call_transmit_status(struct rpc_task *task)
case -ENETUNREACH:
case -EPERM:
if (RPC_IS_SOFTCONN(task)) {
- xprt_end_transmit(task);
if (!task->tk_msg.rpc_proc->p_proc)
trace_xprt_ping(task->tk_xprt,
task->tk_status);
@@ -2039,7 +2034,7 @@ call_transmit_status(struct rpc_task *task)
case -EADDRINUSE:
case -ENOTCONN:
case -EPIPE:
- rpc_task_force_reencode(task);
+ break;
}
}
@@ -2053,6 +2048,11 @@ call_bc_transmit(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
+ if (rpc_task_need_encode(task))
+ xprt_request_enqueue_transmit(task);
+ if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+ goto out_wakeup;
+
if (!xprt_prepare_transmit(task))
goto out_retry;
@@ -2061,14 +2061,9 @@ call_bc_transmit(struct rpc_task *task)
"error: %d\n", task->tk_status);
goto out_done;
}
- if (req->rq_connect_cookie != req->rq_xprt->connect_cookie)
- req->rq_bytes_sent = 0;
xprt_transmit(task);
- if (task->tk_status == -EAGAIN)
- goto out_nospace;
-
xprt_end_transmit(task);
dprint_status(task);
switch (task->tk_status) {
@@ -2084,6 +2079,8 @@ call_bc_transmit(struct rpc_task *task)
case -ENOTCONN:
case -EPIPE:
break;
+ case -EAGAIN:
+ goto out_retry;
case -ETIMEDOUT:
/*
* Problem reaching the server. Disconnect and let the
@@ -2107,12 +2104,11 @@ call_bc_transmit(struct rpc_task *task)
"error: %d\n", task->tk_status);
break;
}
+out_wakeup:
rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
out_done:
task->tk_action = rpc_exit_task;
return;
-out_nospace:
- req->rq_connect_cookie = req->rq_xprt->connect_cookie;
out_retry:
task->tk_status = 0;
}
@@ -2125,15 +2121,11 @@ static void
call_status(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
- struct rpc_rqst *req = task->tk_rqstp;
int status;
if (!task->tk_msg.rpc_proc->p_proc)
trace_xprt_ping(task->tk_xprt, task->tk_status);
- if (req->rq_reply_bytes_recvd > 0 && !req->rq_bytes_sent)
- task->tk_status = req->rq_reply_bytes_recvd;
-
dprint_status(task);
status = task->tk_status;
@@ -2173,13 +2165,8 @@ call_status(struct rpc_task *task)
/* fall through */
case -EPIPE:
case -ENOTCONN:
- task->tk_action = call_bind;
- break;
- case -ENOBUFS:
- rpc_delay(task, HZ>>2);
- /* fall through */
case -EAGAIN:
- task->tk_action = call_transmit;
+ task->tk_action = call_encode;
break;
case -EIO:
/* shutdown or soft timeout */
@@ -2244,7 +2231,7 @@ call_timeout(struct rpc_task *task)
rpcauth_invalcred(task);
retry:
- task->tk_action = call_bind;
+ task->tk_action = call_encode;
task->tk_status = 0;
}
@@ -2261,6 +2248,11 @@ call_decode(struct rpc_task *task)
dprint_status(task);
+ if (!decode) {
+ task->tk_action = rpc_exit_task;
+ return;
+ }
+
if (task->tk_flags & RPC_CALL_MAJORSEEN) {
if (clnt->cl_chatty) {
printk(KERN_NOTICE "%s: server %s OK\n",
@@ -2283,7 +2275,7 @@ call_decode(struct rpc_task *task)
if (req->rq_rcv_buf.len < 12) {
if (!RPC_IS_SOFT(task)) {
- task->tk_action = call_bind;
+ task->tk_action = call_encode;
goto out_retry;
}
dprintk("RPC: %s: too small RPC reply size (%d bytes)\n",
@@ -2298,13 +2290,11 @@ call_decode(struct rpc_task *task)
goto out_retry;
return;
}
-
task->tk_action = rpc_exit_task;
- if (decode) {
- task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
- task->tk_msg.rpc_resp);
- }
+ task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
+ task->tk_msg.rpc_resp);
+
dprintk("RPC: %5u call_decode result %d\n", task->tk_pid,
task->tk_status);
return;
@@ -2416,7 +2406,7 @@ rpc_verify_header(struct rpc_task *task)
task->tk_garb_retry--;
dprintk("RPC: %5u %s: retry garbled creds\n",
task->tk_pid, __func__);
- task->tk_action = call_bind;
+ task->tk_action = call_encode;
goto out_retry;
case RPC_AUTH_TOOWEAK:
printk(KERN_NOTICE "RPC: server %s requires stronger "
@@ -2485,7 +2475,7 @@ out_garbage:
task->tk_garb_retry--;
dprintk("RPC: %5u %s: retrying\n",
task->tk_pid, __func__);
- task->tk_action = call_bind;
+ task->tk_action = call_encode;
out_retry:
return ERR_PTR(-EAGAIN);
}
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 3fe5d60ab0e2..57ca5bead1cb 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -99,65 +99,79 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
}
-static void rpc_rotate_queue_owner(struct rpc_wait_queue *queue)
-{
- struct list_head *q = &queue->tasks[queue->priority];
- struct rpc_task *task;
-
- if (!list_empty(q)) {
- task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
- if (task->tk_owner == queue->owner)
- list_move_tail(&task->u.tk_wait.list, q);
- }
-}
-
static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
{
if (queue->priority != priority) {
- /* Fairness: rotate the list when changing priority */
- rpc_rotate_queue_owner(queue);
queue->priority = priority;
+ queue->nr = 1U << priority;
}
}
-static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
-{
- queue->owner = pid;
- queue->nr = RPC_BATCH_COUNT;
-}
-
static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
{
rpc_set_waitqueue_priority(queue, queue->maxpriority);
- rpc_set_waitqueue_owner(queue, 0);
}
/*
- * Add new request to a priority queue.
+ * Add a request to a queue list
*/
-static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
- struct rpc_task *task,
- unsigned char queue_priority)
+static void
+__rpc_list_enqueue_task(struct list_head *q, struct rpc_task *task)
{
- struct list_head *q;
struct rpc_task *t;
- INIT_LIST_HEAD(&task->u.tk_wait.links);
- if (unlikely(queue_priority > queue->maxpriority))
- queue_priority = queue->maxpriority;
- if (queue_priority > queue->priority)
- rpc_set_waitqueue_priority(queue, queue_priority);
- q = &queue->tasks[queue_priority];
list_for_each_entry(t, q, u.tk_wait.list) {
if (t->tk_owner == task->tk_owner) {
- list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
+ list_add_tail(&task->u.tk_wait.links,
+ &t->u.tk_wait.links);
+ /* Cache the queue head in task->u.tk_wait.list */
+ task->u.tk_wait.list.next = q;
+ task->u.tk_wait.list.prev = NULL;
return;
}
}
+ INIT_LIST_HEAD(&task->u.tk_wait.links);
list_add_tail(&task->u.tk_wait.list, q);
}
/*
+ * Remove request from a queue list
+ */
+static void
+__rpc_list_dequeue_task(struct rpc_task *task)
+{
+ struct list_head *q;
+ struct rpc_task *t;
+
+ if (task->u.tk_wait.list.prev == NULL) {
+ list_del(&task->u.tk_wait.links);
+ return;
+ }
+ if (!list_empty(&task->u.tk_wait.links)) {
+ t = list_first_entry(&task->u.tk_wait.links,
+ struct rpc_task,
+ u.tk_wait.links);
+ /* Assume __rpc_list_enqueue_task() cached the queue head */
+ q = t->u.tk_wait.list.next;
+ list_add_tail(&t->u.tk_wait.list, q);
+ list_del(&task->u.tk_wait.links);
+ }
+ list_del(&task->u.tk_wait.list);
+}
+
+/*
+ * Add new request to a priority queue.
+ */
+static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
+ struct rpc_task *task,
+ unsigned char queue_priority)
+{
+ if (unlikely(queue_priority > queue->maxpriority))
+ queue_priority = queue->maxpriority;
+ __rpc_list_enqueue_task(&queue->tasks[queue_priority], task);
+}
+
+/*
* Add new request to wait queue.
*
* Swapper tasks always get inserted at the head of the queue.
@@ -194,13 +208,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
*/
static void __rpc_remove_wait_queue_priority(struct rpc_task *task)
{
- struct rpc_task *t;
-
- if (!list_empty(&task->u.tk_wait.links)) {
- t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list);
- list_move(&t->u.tk_wait.list, &task->u.tk_wait.list);
- list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links);
- }
+ __rpc_list_dequeue_task(task);
}
/*
@@ -212,7 +220,8 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas
__rpc_disable_timer(queue, task);
if (RPC_IS_PRIORITY(queue))
__rpc_remove_wait_queue_priority(task);
- list_del(&task->u.tk_wait.list);
+ else
+ list_del(&task->u.tk_wait.list);
queue->qlen--;
dprintk("RPC: %5u removed from queue %p \"%s\"\n",
task->tk_pid, queue, rpc_qname(queue));
@@ -440,14 +449,28 @@ static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq,
/*
* Wake up a queued task while the queue lock is being held
*/
-static void rpc_wake_up_task_on_wq_queue_locked(struct workqueue_struct *wq,
- struct rpc_wait_queue *queue, struct rpc_task *task)
+static struct rpc_task *
+rpc_wake_up_task_on_wq_queue_action_locked(struct workqueue_struct *wq,
+ struct rpc_wait_queue *queue, struct rpc_task *task,
+ bool (*action)(struct rpc_task *, void *), void *data)
{
if (RPC_IS_QUEUED(task)) {
smp_rmb();
- if (task->tk_waitqueue == queue)
- __rpc_do_wake_up_task_on_wq(wq, queue, task);
+ if (task->tk_waitqueue == queue) {
+ if (action == NULL || action(task, data)) {
+ __rpc_do_wake_up_task_on_wq(wq, queue, task);
+ return task;
+ }
+ }
}
+ return NULL;
+}
+
+static void
+rpc_wake_up_task_on_wq_queue_locked(struct workqueue_struct *wq,
+ struct rpc_wait_queue *queue, struct rpc_task *task)
+{
+ rpc_wake_up_task_on_wq_queue_action_locked(wq, queue, task, NULL, NULL);
}
/*
@@ -465,6 +488,8 @@ void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq,
struct rpc_wait_queue *queue,
struct rpc_task *task)
{
+ if (!RPC_IS_QUEUED(task))
+ return;
spin_lock_bh(&queue->lock);
rpc_wake_up_task_on_wq_queue_locked(wq, queue, task);
spin_unlock_bh(&queue->lock);
@@ -475,12 +500,48 @@ void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq,
*/
void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task)
{
+ if (!RPC_IS_QUEUED(task))
+ return;
spin_lock_bh(&queue->lock);
rpc_wake_up_task_queue_locked(queue, task);
spin_unlock_bh(&queue->lock);
}
EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task);
+static bool rpc_task_action_set_status(struct rpc_task *task, void *status)
+{
+ task->tk_status = *(int *)status;
+ return true;
+}
+
+static void
+rpc_wake_up_task_queue_set_status_locked(struct rpc_wait_queue *queue,
+ struct rpc_task *task, int status)
+{
+ rpc_wake_up_task_on_wq_queue_action_locked(rpciod_workqueue, queue,
+ task, rpc_task_action_set_status, &status);
+}
+
+/**
+ * rpc_wake_up_queued_task_set_status - wake up a task and set task->tk_status
+ * @queue: pointer to rpc_wait_queue
+ * @task: pointer to rpc_task
+ * @status: integer error value
+ *
+ * If @task is queued on @queue, then it is woken up, and @task->tk_status is
+ * set to the value of @status.
+ */
+void
+rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *queue,
+ struct rpc_task *task, int status)
+{
+ if (!RPC_IS_QUEUED(task))
+ return;
+ spin_lock_bh(&queue->lock);
+ rpc_wake_up_task_queue_set_status_locked(queue, task, status);
+ spin_unlock_bh(&queue->lock);
+}
+
/*
* Wake up the next task on a priority queue.
*/
@@ -493,17 +554,9 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
* Service a batch of tasks from a single owner.
*/
q = &queue->tasks[queue->priority];
- if (!list_empty(q)) {
- task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
- if (queue->owner == task->tk_owner) {
- if (--queue->nr)
- goto out;
- list_move_tail(&task->u.tk_wait.list, q);
- }
- /*
- * Check if we need to switch queues.
- */
- goto new_owner;
+ if (!list_empty(q) && --queue->nr) {
+ task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
+ goto out;
}
/*
@@ -515,7 +568,7 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
else
q = q - 1;
if (!list_empty(q)) {
- task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
+ task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
goto new_queue;
}
} while (q != &queue->tasks[queue->priority]);
@@ -525,8 +578,6 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
new_queue:
rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0]));
-new_owner:
- rpc_set_waitqueue_owner(queue, task->tk_owner);
out:
return task;
}
@@ -553,12 +604,9 @@ struct rpc_task *rpc_wake_up_first_on_wq(struct workqueue_struct *wq,
queue, rpc_qname(queue));
spin_lock_bh(&queue->lock);
task = __rpc_find_next_queued(queue);
- if (task != NULL) {
- if (func(task, data))
- rpc_wake_up_task_on_wq_queue_locked(wq, queue, task);
- else
- task = NULL;
- }
+ if (task != NULL)
+ task = rpc_wake_up_task_on_wq_queue_action_locked(wq, queue,
+ task, func, data);
spin_unlock_bh(&queue->lock);
return task;
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index f217c348b341..9062967575c4 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -26,7 +26,8 @@
* Possibly called several times to iterate over an sk_buff and copy
* data out of it.
*/
-size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
+static size_t
+xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
{
if (len > desc->count)
len = desc->count;
@@ -36,7 +37,6 @@ size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
desc->offset += len;
return len;
}
-EXPORT_SYMBOL_GPL(xdr_skb_read_bits);
/**
* xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer
@@ -69,7 +69,8 @@ static size_t xdr_skb_read_and_csum_bits(struct xdr_skb_reader *desc, void *to,
* @copy_actor: virtual method for copying data
*
*/
-ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor)
+static ssize_t
+xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor)
{
struct page **ppage = xdr->pages;
unsigned int len, pglen = xdr->page_len;
@@ -104,7 +105,7 @@ ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct
/* ACL likes to be lazy in allocating pages - ACLs
* are small by default but can get huge. */
- if (unlikely(*ppage == NULL)) {
+ if ((xdr->flags & XDRBUF_SPARSE_PAGES) && *ppage == NULL) {
*ppage = alloc_page(GFP_ATOMIC);
if (unlikely(*ppage == NULL)) {
if (copied == 0)
@@ -140,7 +141,6 @@ copy_tail:
out:
return copied;
}
-EXPORT_SYMBOL_GPL(xdr_partial_copy_from_skb);
/**
* csum_partial_copy_to_xdr - checksum and copy data
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 5185efb9027b..51d36230b6e3 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -171,7 +171,6 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
mutex_init(&xprt->xpt_mutex);
spin_lock_init(&xprt->xpt_lock);
set_bit(XPT_BUSY, &xprt->xpt_flags);
- rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
xprt->xpt_net = get_net(net);
strcpy(xprt->xpt_remotebuf, "uninitialized");
}
@@ -895,7 +894,6 @@ int svc_send(struct svc_rqst *rqstp)
else
len = xprt->xpt_ops->xpo_sendto(rqstp);
mutex_unlock(&xprt->xpt_mutex);
- rpc_wake_up(&xprt->xpt_bc_pending);
trace_svc_send(rqstp, len);
svc_xprt_release(rqstp);
@@ -989,7 +987,7 @@ static void call_xpt_users(struct svc_xprt *xprt)
spin_lock(&xprt->xpt_lock);
while (!list_empty(&xprt->xpt_users)) {
u = list_first_entry(&xprt->xpt_users, struct svc_xpt_user, list);
- list_del(&u->list);
+ list_del_init(&u->list);
u->callback(u);
}
spin_unlock(&xprt->xpt_lock);
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index bb8db3cb8032..775b8c94265b 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -27,12 +27,32 @@
extern struct auth_ops svcauth_null;
extern struct auth_ops svcauth_unix;
-static DEFINE_SPINLOCK(authtab_lock);
-static struct auth_ops *authtab[RPC_AUTH_MAXFLAVOR] = {
- [0] = &svcauth_null,
- [1] = &svcauth_unix,
+static struct auth_ops __rcu *authtab[RPC_AUTH_MAXFLAVOR] = {
+ [RPC_AUTH_NULL] = (struct auth_ops __force __rcu *)&svcauth_null,
+ [RPC_AUTH_UNIX] = (struct auth_ops __force __rcu *)&svcauth_unix,
};
+static struct auth_ops *
+svc_get_auth_ops(rpc_authflavor_t flavor)
+{
+ struct auth_ops *aops;
+
+ if (flavor >= RPC_AUTH_MAXFLAVOR)
+ return NULL;
+ rcu_read_lock();
+ aops = rcu_dereference(authtab[flavor]);
+ if (aops != NULL && !try_module_get(aops->owner))
+ aops = NULL;
+ rcu_read_unlock();
+ return aops;
+}
+
+static void
+svc_put_auth_ops(struct auth_ops *aops)
+{
+ module_put(aops->owner);
+}
+
int
svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
{
@@ -45,14 +65,11 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
dprintk("svc: svc_authenticate (%d)\n", flavor);
- spin_lock(&authtab_lock);
- if (flavor >= RPC_AUTH_MAXFLAVOR || !(aops = authtab[flavor]) ||
- !try_module_get(aops->owner)) {
- spin_unlock(&authtab_lock);
+ aops = svc_get_auth_ops(flavor);
+ if (aops == NULL) {
*authp = rpc_autherr_badcred;
return SVC_DENIED;
}
- spin_unlock(&authtab_lock);
rqstp->rq_auth_slack = 0;
init_svc_cred(&rqstp->rq_cred);
@@ -82,7 +99,7 @@ int svc_authorise(struct svc_rqst *rqstp)
if (aops) {
rv = aops->release(rqstp);
- module_put(aops->owner);
+ svc_put_auth_ops(aops);
}
return rv;
}
@@ -90,13 +107,14 @@ int svc_authorise(struct svc_rqst *rqstp)
int
svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops)
{
+ struct auth_ops *old;
int rv = -EINVAL;
- spin_lock(&authtab_lock);
- if (flavor < RPC_AUTH_MAXFLAVOR && authtab[flavor] == NULL) {
- authtab[flavor] = aops;
- rv = 0;
+
+ if (flavor < RPC_AUTH_MAXFLAVOR) {
+ old = cmpxchg((struct auth_ops ** __force)&authtab[flavor], NULL, aops);
+ if (old == NULL || old == aops)
+ rv = 0;
}
- spin_unlock(&authtab_lock);
return rv;
}
EXPORT_SYMBOL_GPL(svc_auth_register);
@@ -104,10 +122,8 @@ EXPORT_SYMBOL_GPL(svc_auth_register);
void
svc_auth_unregister(rpc_authflavor_t flavor)
{
- spin_lock(&authtab_lock);
if (flavor < RPC_AUTH_MAXFLAVOR)
- authtab[flavor] = NULL;
- spin_unlock(&authtab_lock);
+ rcu_assign_pointer(authtab[flavor], NULL);
}
EXPORT_SYMBOL_GPL(svc_auth_unregister);
@@ -127,10 +143,11 @@ static struct hlist_head auth_domain_table[DN_HASHMAX];
static DEFINE_SPINLOCK(auth_domain_lock);
static void auth_domain_release(struct kref *kref)
+ __releases(&auth_domain_lock)
{
struct auth_domain *dom = container_of(kref, struct auth_domain, ref);
- hlist_del(&dom->hash);
+ hlist_del_rcu(&dom->hash);
dom->flavour->domain_release(dom);
spin_unlock(&auth_domain_lock);
}
@@ -159,7 +176,7 @@ auth_domain_lookup(char *name, struct auth_domain *new)
}
}
if (new)
- hlist_add_head(&new->hash, head);
+ hlist_add_head_rcu(&new->hash, head);
spin_unlock(&auth_domain_lock);
return new;
}
@@ -167,6 +184,21 @@ EXPORT_SYMBOL_GPL(auth_domain_lookup);
struct auth_domain *auth_domain_find(char *name)
{
- return auth_domain_lookup(name, NULL);
+ struct auth_domain *hp;
+ struct hlist_head *head;
+
+ head = &auth_domain_table[hash_str(name, DN_HASHBITS)];
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(hp, head, hash) {
+ if (strcmp(hp->name, name)==0) {
+ if (!kref_get_unless_zero(&hp->ref))
+ hp = NULL;
+ rcu_read_unlock();
+ return hp;
+ }
+ }
+ rcu_read_unlock();
+ return NULL;
}
EXPORT_SYMBOL_GPL(auth_domain_find);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index af7f28fb8102..fb9041b92f72 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -37,20 +37,26 @@ struct unix_domain {
extern struct auth_ops svcauth_null;
extern struct auth_ops svcauth_unix;
-static void svcauth_unix_domain_release(struct auth_domain *dom)
+static void svcauth_unix_domain_release_rcu(struct rcu_head *head)
{
+ struct auth_domain *dom = container_of(head, struct auth_domain, rcu_head);
struct unix_domain *ud = container_of(dom, struct unix_domain, h);
kfree(dom->name);
kfree(ud);
}
+static void svcauth_unix_domain_release(struct auth_domain *dom)
+{
+ call_rcu(&dom->rcu_head, svcauth_unix_domain_release_rcu);
+}
+
struct auth_domain *unix_domain_find(char *name)
{
struct auth_domain *rv;
struct unix_domain *new = NULL;
- rv = auth_domain_lookup(name, NULL);
+ rv = auth_domain_find(name);
while(1) {
if (rv) {
if (new && rv != &new->h)
@@ -91,6 +97,7 @@ struct ip_map {
char m_class[8]; /* e.g. "nfsd" */
struct in6_addr m_addr;
struct unix_domain *m_client;
+ struct rcu_head m_rcu;
};
static void ip_map_put(struct kref *kref)
@@ -101,7 +108,7 @@ static void ip_map_put(struct kref *kref)
if (test_bit(CACHE_VALID, &item->flags) &&
!test_bit(CACHE_NEGATIVE, &item->flags))
auth_domain_put(&im->m_client->h);
- kfree(im);
+ kfree_rcu(im, m_rcu);
}
static inline int hash_ip6(const struct in6_addr *ip)
@@ -280,9 +287,9 @@ static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class,
strcpy(ip.m_class, class);
ip.m_addr = *addr;
- ch = sunrpc_cache_lookup(cd, &ip.h,
- hash_str(class, IP_HASHBITS) ^
- hash_ip6(addr));
+ ch = sunrpc_cache_lookup_rcu(cd, &ip.h,
+ hash_str(class, IP_HASHBITS) ^
+ hash_ip6(addr));
if (ch)
return container_of(ch, struct ip_map, h);
@@ -412,6 +419,7 @@ struct unix_gid {
struct cache_head h;
kuid_t uid;
struct group_info *gi;
+ struct rcu_head rcu;
};
static int unix_gid_hash(kuid_t uid)
@@ -426,7 +434,7 @@ static void unix_gid_put(struct kref *kref)
if (test_bit(CACHE_VALID, &item->flags) &&
!test_bit(CACHE_NEGATIVE, &item->flags))
put_group_info(ug->gi);
- kfree(ug);
+ kfree_rcu(ug, rcu);
}
static int unix_gid_match(struct cache_head *corig, struct cache_head *cnew)
@@ -619,7 +627,7 @@ static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, kuid_t uid)
struct cache_head *ch;
ug.uid = uid;
- ch = sunrpc_cache_lookup(cd, &ug.h, unix_gid_hash(uid));
+ ch = sunrpc_cache_lookup_rcu(cd, &ug.h, unix_gid_hash(uid));
if (ch)
return container_of(ch, struct unix_gid, h);
else
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 5445145e639c..3b525accaa68 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -325,59 +325,34 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
/*
* Generic recvfrom routine.
*/
-static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
- int buflen)
+static ssize_t svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov,
+ unsigned int nr, size_t buflen, unsigned int base)
{
struct svc_sock *svsk =
container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
- struct msghdr msg = {
- .msg_flags = MSG_DONTWAIT,
- };
- int len;
+ struct msghdr msg = { NULL };
+ ssize_t len;
rqstp->rq_xprt_hlen = 0;
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, nr, buflen);
- len = sock_recvmsg(svsk->sk_sock, &msg, msg.msg_flags);
+ if (base != 0) {
+ iov_iter_advance(&msg.msg_iter, base);
+ buflen -= base;
+ }
+ len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT);
/* If we read a full record, then assume there may be more
* data to read (stream based sockets only!)
*/
if (len == buflen)
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
- dprintk("svc: socket %p recvfrom(%p, %zu) = %d\n",
+ dprintk("svc: socket %p recvfrom(%p, %zu) = %zd\n",
svsk, iov[0].iov_base, iov[0].iov_len, len);
return len;
}
-static int svc_partial_recvfrom(struct svc_rqst *rqstp,
- struct kvec *iov, int nr,
- int buflen, unsigned int base)
-{
- size_t save_iovlen;
- void *save_iovbase;
- unsigned int i;
- int ret;
-
- if (base == 0)
- return svc_recvfrom(rqstp, iov, nr, buflen);
-
- for (i = 0; i < nr; i++) {
- if (iov[i].iov_len > base)
- break;
- base -= iov[i].iov_len;
- }
- save_iovlen = iov[i].iov_len;
- save_iovbase = iov[i].iov_base;
- iov[i].iov_len -= base;
- iov[i].iov_base += base;
- ret = svc_recvfrom(rqstp, &iov[i], nr - i, buflen);
- iov[i].iov_len = save_iovlen;
- iov[i].iov_base = save_iovbase;
- return ret;
-}
-
/*
* Set socket snd and rcv buffer lengths
*/
@@ -962,7 +937,8 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen;
iov.iov_len = want;
- if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0)
+ len = svc_recvfrom(rqstp, &iov, 1, want, 0);
+ if (len < 0)
goto error;
svsk->sk_tcplen += len;
@@ -1004,7 +980,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
if (!bc_xprt)
return -EAGAIN;
- spin_lock(&bc_xprt->recv_lock);
+ spin_lock(&bc_xprt->queue_lock);
req = xprt_lookup_rqst(bc_xprt, xid);
if (!req)
goto unlock_notfound;
@@ -1022,7 +998,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
memcpy(dst->iov_base, src->iov_base, src->iov_len);
xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len);
rqstp->rq_arg.len = 0;
- spin_unlock(&bc_xprt->recv_lock);
+ spin_unlock(&bc_xprt->queue_lock);
return 0;
unlock_notfound:
printk(KERN_NOTICE
@@ -1031,7 +1007,7 @@ unlock_notfound:
__func__, ntohl(calldir),
bc_xprt, ntohl(xid));
unlock_eagain:
- spin_unlock(&bc_xprt->recv_lock);
+ spin_unlock(&bc_xprt->queue_lock);
return -EAGAIN;
}
@@ -1088,14 +1064,13 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
vec = rqstp->rq_vec;
- pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0],
- svsk->sk_datalen + want);
+ pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], base + want);
rqstp->rq_respages = &rqstp->rq_pages[pnum];
rqstp->rq_next_page = rqstp->rq_respages + 1;
/* Now receive data */
- len = svc_partial_recvfrom(rqstp, vec, pnum, want, base);
+ len = svc_recvfrom(rqstp, vec, pnum, base + want, base);
if (len >= 0) {
svsk->sk_tcplen += len;
svsk->sk_datalen += len;
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 30afbd236656..2bbb8d38d2bf 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -15,6 +15,7 @@
#include <linux/errno.h>
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/msg_prot.h>
+#include <linux/bvec.h>
/*
* XDR functions for basic NFS types
@@ -128,6 +129,39 @@ xdr_terminate_string(struct xdr_buf *buf, const u32 len)
}
EXPORT_SYMBOL_GPL(xdr_terminate_string);
+size_t
+xdr_buf_pagecount(struct xdr_buf *buf)
+{
+ if (!buf->page_len)
+ return 0;
+ return (buf->page_base + buf->page_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+}
+
+int
+xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp)
+{
+ size_t i, n = xdr_buf_pagecount(buf);
+
+ if (n != 0 && buf->bvec == NULL) {
+ buf->bvec = kmalloc_array(n, sizeof(buf->bvec[0]), gfp);
+ if (!buf->bvec)
+ return -ENOMEM;
+ for (i = 0; i < n; i++) {
+ buf->bvec[i].bv_page = buf->pages[i];
+ buf->bvec[i].bv_len = PAGE_SIZE;
+ buf->bvec[i].bv_offset = 0;
+ }
+ }
+ return 0;
+}
+
+void
+xdr_free_bvec(struct xdr_buf *buf)
+{
+ kfree(buf->bvec);
+ buf->bvec = NULL;
+}
+
void
xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
struct page **pages, unsigned int base, unsigned int len)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index a8db2e3f8904..86bea4520c4d 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -68,8 +68,6 @@
static void xprt_init(struct rpc_xprt *xprt, struct net *net);
static __be32 xprt_alloc_xid(struct rpc_xprt *xprt);
static void xprt_connect_status(struct rpc_task *task);
-static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
-static void __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *);
static void xprt_destroy(struct rpc_xprt *xprt);
static DEFINE_SPINLOCK(xprt_list_lock);
@@ -171,6 +169,17 @@ out:
}
EXPORT_SYMBOL_GPL(xprt_load_transport);
+static void xprt_clear_locked(struct rpc_xprt *xprt)
+{
+ xprt->snd_task = NULL;
+ if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
+ smp_mb__before_atomic();
+ clear_bit(XPRT_LOCKED, &xprt->state);
+ smp_mb__after_atomic();
+ } else
+ queue_work(xprtiod_workqueue, &xprt->task_cleanup);
+}
+
/**
* xprt_reserve_xprt - serialize write access to transports
* @task: task that is requesting access to the transport
@@ -183,44 +192,53 @@ EXPORT_SYMBOL_GPL(xprt_load_transport);
int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
- int priority;
if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
if (task == xprt->snd_task)
return 1;
goto out_sleep;
}
+ if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
+ goto out_unlock;
xprt->snd_task = task;
- if (req != NULL)
- req->rq_ntrans++;
return 1;
+out_unlock:
+ xprt_clear_locked(xprt);
out_sleep:
dprintk("RPC: %5u failed to lock transport %p\n",
task->tk_pid, xprt);
- task->tk_timeout = 0;
+ task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0;
task->tk_status = -EAGAIN;
- if (req == NULL)
- priority = RPC_PRIORITY_LOW;
- else if (!req->rq_ntrans)
- priority = RPC_PRIORITY_NORMAL;
- else
- priority = RPC_PRIORITY_HIGH;
- rpc_sleep_on_priority(&xprt->sending, task, NULL, priority);
+ rpc_sleep_on(&xprt->sending, task, NULL);
return 0;
}
EXPORT_SYMBOL_GPL(xprt_reserve_xprt);
-static void xprt_clear_locked(struct rpc_xprt *xprt)
+static bool
+xprt_need_congestion_window_wait(struct rpc_xprt *xprt)
{
- xprt->snd_task = NULL;
- if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
- smp_mb__before_atomic();
- clear_bit(XPRT_LOCKED, &xprt->state);
- smp_mb__after_atomic();
- } else
- queue_work(xprtiod_workqueue, &xprt->task_cleanup);
+ return test_bit(XPRT_CWND_WAIT, &xprt->state);
+}
+
+static void
+xprt_set_congestion_window_wait(struct rpc_xprt *xprt)
+{
+ if (!list_empty(&xprt->xmit_queue)) {
+ /* Peek at head of queue to see if it can make progress */
+ if (list_first_entry(&xprt->xmit_queue, struct rpc_rqst,
+ rq_xmit)->rq_cong)
+ return;
+ }
+ set_bit(XPRT_CWND_WAIT, &xprt->state);
+}
+
+static void
+xprt_test_and_clear_congestion_window_wait(struct rpc_xprt *xprt)
+{
+ if (!RPCXPRT_CONGESTED(xprt))
+ clear_bit(XPRT_CWND_WAIT, &xprt->state);
}
/*
@@ -230,11 +248,11 @@ static void xprt_clear_locked(struct rpc_xprt *xprt)
* Same as xprt_reserve_xprt, but Van Jacobson congestion control is
* integrated into the decision of whether a request is allowed to be
* woken up and given access to the transport.
+ * Note that the lock is only granted if we know there are free slots.
*/
int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
- int priority;
if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
if (task == xprt->snd_task)
@@ -245,25 +263,19 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
xprt->snd_task = task;
return 1;
}
- if (__xprt_get_cong(xprt, task)) {
+ if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
+ goto out_unlock;
+ if (!xprt_need_congestion_window_wait(xprt)) {
xprt->snd_task = task;
- req->rq_ntrans++;
return 1;
}
+out_unlock:
xprt_clear_locked(xprt);
out_sleep:
- if (req)
- __xprt_put_cong(xprt, req);
dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
- task->tk_timeout = 0;
+ task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0;
task->tk_status = -EAGAIN;
- if (req == NULL)
- priority = RPC_PRIORITY_LOW;
- else if (!req->rq_ntrans)
- priority = RPC_PRIORITY_NORMAL;
- else
- priority = RPC_PRIORITY_HIGH;
- rpc_sleep_on_priority(&xprt->sending, task, NULL, priority);
+ rpc_sleep_on(&xprt->sending, task, NULL);
return 0;
}
EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong);
@@ -272,6 +284,8 @@ static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
{
int retval;
+ if (test_bit(XPRT_LOCKED, &xprt->state) && xprt->snd_task == task)
+ return 1;
spin_lock_bh(&xprt->transport_lock);
retval = xprt->ops->reserve_xprt(xprt, task);
spin_unlock_bh(&xprt->transport_lock);
@@ -281,12 +295,8 @@ static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
static bool __xprt_lock_write_func(struct rpc_task *task, void *data)
{
struct rpc_xprt *xprt = data;
- struct rpc_rqst *req;
- req = task->tk_rqstp;
xprt->snd_task = task;
- if (req)
- req->rq_ntrans++;
return true;
}
@@ -294,53 +304,30 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt)
{
if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
return;
-
+ if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
+ goto out_unlock;
if (rpc_wake_up_first_on_wq(xprtiod_workqueue, &xprt->sending,
__xprt_lock_write_func, xprt))
return;
+out_unlock:
xprt_clear_locked(xprt);
}
-static bool __xprt_lock_write_cong_func(struct rpc_task *task, void *data)
-{
- struct rpc_xprt *xprt = data;
- struct rpc_rqst *req;
-
- req = task->tk_rqstp;
- if (req == NULL) {
- xprt->snd_task = task;
- return true;
- }
- if (__xprt_get_cong(xprt, task)) {
- xprt->snd_task = task;
- req->rq_ntrans++;
- return true;
- }
- return false;
-}
-
static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
{
if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
return;
- if (RPCXPRT_CONGESTED(xprt))
+ if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
+ goto out_unlock;
+ if (xprt_need_congestion_window_wait(xprt))
goto out_unlock;
if (rpc_wake_up_first_on_wq(xprtiod_workqueue, &xprt->sending,
- __xprt_lock_write_cong_func, xprt))
+ __xprt_lock_write_func, xprt))
return;
out_unlock:
xprt_clear_locked(xprt);
}
-static void xprt_task_clear_bytes_sent(struct rpc_task *task)
-{
- if (task != NULL) {
- struct rpc_rqst *req = task->tk_rqstp;
- if (req != NULL)
- req->rq_bytes_sent = 0;
- }
-}
-
/**
* xprt_release_xprt - allow other requests to use a transport
* @xprt: transport with other tasks potentially waiting
@@ -351,7 +338,6 @@ static void xprt_task_clear_bytes_sent(struct rpc_task *task)
void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
{
if (xprt->snd_task == task) {
- xprt_task_clear_bytes_sent(task);
xprt_clear_locked(xprt);
__xprt_lock_write_next(xprt);
}
@@ -369,7 +355,6 @@ EXPORT_SYMBOL_GPL(xprt_release_xprt);
void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
{
if (xprt->snd_task == task) {
- xprt_task_clear_bytes_sent(task);
xprt_clear_locked(xprt);
__xprt_lock_write_next_cong(xprt);
}
@@ -378,6 +363,8 @@ EXPORT_SYMBOL_GPL(xprt_release_xprt_cong);
static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
{
+ if (xprt->snd_task != task)
+ return;
spin_lock_bh(&xprt->transport_lock);
xprt->ops->release_xprt(xprt, task);
spin_unlock_bh(&xprt->transport_lock);
@@ -388,16 +375,16 @@ static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *ta
* overflowed. Put the task to sleep if this is the case.
*/
static int
-__xprt_get_cong(struct rpc_xprt *xprt, struct rpc_task *task)
+__xprt_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
{
- struct rpc_rqst *req = task->tk_rqstp;
-
if (req->rq_cong)
return 1;
dprintk("RPC: %5u xprt_cwnd_limited cong = %lu cwnd = %lu\n",
- task->tk_pid, xprt->cong, xprt->cwnd);
- if (RPCXPRT_CONGESTED(xprt))
+ req->rq_task->tk_pid, xprt->cong, xprt->cwnd);
+ if (RPCXPRT_CONGESTED(xprt)) {
+ xprt_set_congestion_window_wait(xprt);
return 0;
+ }
req->rq_cong = 1;
xprt->cong += RPC_CWNDSCALE;
return 1;
@@ -414,10 +401,32 @@ __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
return;
req->rq_cong = 0;
xprt->cong -= RPC_CWNDSCALE;
+ xprt_test_and_clear_congestion_window_wait(xprt);
__xprt_lock_write_next_cong(xprt);
}
/**
+ * xprt_request_get_cong - Request congestion control credits
+ * @xprt: pointer to transport
+ * @req: pointer to RPC request
+ *
+ * Useful for transports that require congestion control.
+ */
+bool
+xprt_request_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
+{
+ bool ret = false;
+
+ if (req->rq_cong)
+ return true;
+ spin_lock_bh(&xprt->transport_lock);
+ ret = __xprt_get_cong(xprt, req) != 0;
+ spin_unlock_bh(&xprt->transport_lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xprt_request_get_cong);
+
+/**
* xprt_release_rqst_cong - housekeeping when request is complete
* @task: RPC request that recently completed
*
@@ -431,6 +440,20 @@ void xprt_release_rqst_cong(struct rpc_task *task)
}
EXPORT_SYMBOL_GPL(xprt_release_rqst_cong);
+/*
+ * Clear the congestion window wait flag and wake up the next
+ * entry on xprt->sending
+ */
+static void
+xprt_clear_congestion_window_wait(struct rpc_xprt *xprt)
+{
+ if (test_and_clear_bit(XPRT_CWND_WAIT, &xprt->state)) {
+ spin_lock_bh(&xprt->transport_lock);
+ __xprt_lock_write_next_cong(xprt);
+ spin_unlock_bh(&xprt->transport_lock);
+ }
+}
+
/**
* xprt_adjust_cwnd - adjust transport congestion window
* @xprt: pointer to xprt
@@ -488,39 +511,46 @@ EXPORT_SYMBOL_GPL(xprt_wake_pending_tasks);
/**
* xprt_wait_for_buffer_space - wait for transport output buffer to clear
- * @task: task to be put to sleep
- * @action: function pointer to be executed after wait
+ * @xprt: transport
*
* Note that we only set the timer for the case of RPC_IS_SOFT(), since
* we don't in general want to force a socket disconnection due to
* an incomplete RPC call transmission.
*/
-void xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action)
+void xprt_wait_for_buffer_space(struct rpc_xprt *xprt)
{
- struct rpc_rqst *req = task->tk_rqstp;
- struct rpc_xprt *xprt = req->rq_xprt;
-
- task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0;
- rpc_sleep_on(&xprt->pending, task, action);
+ set_bit(XPRT_WRITE_SPACE, &xprt->state);
}
EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space);
+static bool
+xprt_clear_write_space_locked(struct rpc_xprt *xprt)
+{
+ if (test_and_clear_bit(XPRT_WRITE_SPACE, &xprt->state)) {
+ __xprt_lock_write_next(xprt);
+ dprintk("RPC: write space: waking waiting task on "
+ "xprt %p\n", xprt);
+ return true;
+ }
+ return false;
+}
+
/**
* xprt_write_space - wake the task waiting for transport output buffer space
* @xprt: transport with waiting tasks
*
* Can be called in a soft IRQ context, so xprt_write_space never sleeps.
*/
-void xprt_write_space(struct rpc_xprt *xprt)
+bool xprt_write_space(struct rpc_xprt *xprt)
{
+ bool ret;
+
+ if (!test_bit(XPRT_WRITE_SPACE, &xprt->state))
+ return false;
spin_lock_bh(&xprt->transport_lock);
- if (xprt->snd_task) {
- dprintk("RPC: write space: waking waiting task on "
- "xprt %p\n", xprt);
- rpc_wake_up_queued_task_on_wq(xprtiod_workqueue,
- &xprt->pending, xprt->snd_task);
- }
+ ret = xprt_clear_write_space_locked(xprt);
spin_unlock_bh(&xprt->transport_lock);
+ return ret;
}
EXPORT_SYMBOL_GPL(xprt_write_space);
@@ -631,6 +661,7 @@ void xprt_disconnect_done(struct rpc_xprt *xprt)
dprintk("RPC: disconnected transport %p\n", xprt);
spin_lock_bh(&xprt->transport_lock);
xprt_clear_connected(xprt);
+ xprt_clear_write_space_locked(xprt);
xprt_wake_pending_tasks(xprt, -EAGAIN);
spin_unlock_bh(&xprt->transport_lock);
}
@@ -654,6 +685,22 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
}
EXPORT_SYMBOL_GPL(xprt_force_disconnect);
+static unsigned int
+xprt_connect_cookie(struct rpc_xprt *xprt)
+{
+ return READ_ONCE(xprt->connect_cookie);
+}
+
+static bool
+xprt_request_retransmit_after_disconnect(struct rpc_task *task)
+{
+ struct rpc_rqst *req = task->tk_rqstp;
+ struct rpc_xprt *xprt = req->rq_xprt;
+
+ return req->rq_connect_cookie != xprt_connect_cookie(xprt) ||
+ !xprt_connected(xprt);
+}
+
/**
* xprt_conditional_disconnect - force a transport to disconnect
* @xprt: transport to disconnect
@@ -692,7 +739,7 @@ static void
xprt_schedule_autodisconnect(struct rpc_xprt *xprt)
__must_hold(&xprt->transport_lock)
{
- if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
+ if (RB_EMPTY_ROOT(&xprt->recv_queue) && xprt_has_timer(xprt))
mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout);
}
@@ -702,7 +749,7 @@ xprt_init_autodisconnect(struct timer_list *t)
struct rpc_xprt *xprt = from_timer(xprt, t, timer);
spin_lock(&xprt->transport_lock);
- if (!list_empty(&xprt->recv))
+ if (!RB_EMPTY_ROOT(&xprt->recv_queue))
goto out_abort;
/* Reset xprt->last_used to avoid connect/autodisconnect cycling */
xprt->last_used = jiffies;
@@ -726,7 +773,6 @@ bool xprt_lock_connect(struct rpc_xprt *xprt,
goto out;
if (xprt->snd_task != task)
goto out;
- xprt_task_clear_bytes_sent(task);
xprt->snd_task = cookie;
ret = true;
out:
@@ -772,7 +818,6 @@ void xprt_connect(struct rpc_task *task)
xprt->ops->close(xprt);
if (!xprt_connected(xprt)) {
- task->tk_rqstp->rq_bytes_sent = 0;
task->tk_timeout = task->tk_rqstp->rq_timeout;
task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie;
rpc_sleep_on(&xprt->pending, task, xprt_connect_status);
@@ -789,17 +834,11 @@ void xprt_connect(struct rpc_task *task)
static void xprt_connect_status(struct rpc_task *task)
{
- struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
-
- if (task->tk_status == 0) {
- xprt->stat.connect_count++;
- xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start;
+ switch (task->tk_status) {
+ case 0:
dprintk("RPC: %5u xprt_connect_status: connection established\n",
task->tk_pid);
- return;
- }
-
- switch (task->tk_status) {
+ break;
case -ECONNREFUSED:
case -ECONNRESET:
case -ECONNABORTED:
@@ -816,28 +855,97 @@ static void xprt_connect_status(struct rpc_task *task)
default:
dprintk("RPC: %5u xprt_connect_status: error %d connecting to "
"server %s\n", task->tk_pid, -task->tk_status,
- xprt->servername);
+ task->tk_rqstp->rq_xprt->servername);
task->tk_status = -EIO;
}
}
+enum xprt_xid_rb_cmp {
+ XID_RB_EQUAL,
+ XID_RB_LEFT,
+ XID_RB_RIGHT,
+};
+static enum xprt_xid_rb_cmp
+xprt_xid_cmp(__be32 xid1, __be32 xid2)
+{
+ if (xid1 == xid2)
+ return XID_RB_EQUAL;
+ if ((__force u32)xid1 < (__force u32)xid2)
+ return XID_RB_LEFT;
+ return XID_RB_RIGHT;
+}
+
+static struct rpc_rqst *
+xprt_request_rb_find(struct rpc_xprt *xprt, __be32 xid)
+{
+ struct rb_node *n = xprt->recv_queue.rb_node;
+ struct rpc_rqst *req;
+
+ while (n != NULL) {
+ req = rb_entry(n, struct rpc_rqst, rq_recv);
+ switch (xprt_xid_cmp(xid, req->rq_xid)) {
+ case XID_RB_LEFT:
+ n = n->rb_left;
+ break;
+ case XID_RB_RIGHT:
+ n = n->rb_right;
+ break;
+ case XID_RB_EQUAL:
+ return req;
+ }
+ }
+ return NULL;
+}
+
+static void
+xprt_request_rb_insert(struct rpc_xprt *xprt, struct rpc_rqst *new)
+{
+ struct rb_node **p = &xprt->recv_queue.rb_node;
+ struct rb_node *n = NULL;
+ struct rpc_rqst *req;
+
+ while (*p != NULL) {
+ n = *p;
+ req = rb_entry(n, struct rpc_rqst, rq_recv);
+ switch(xprt_xid_cmp(new->rq_xid, req->rq_xid)) {
+ case XID_RB_LEFT:
+ p = &n->rb_left;
+ break;
+ case XID_RB_RIGHT:
+ p = &n->rb_right;
+ break;
+ case XID_RB_EQUAL:
+ WARN_ON_ONCE(new != req);
+ return;
+ }
+ }
+ rb_link_node(&new->rq_recv, n, p);
+ rb_insert_color(&new->rq_recv, &xprt->recv_queue);
+}
+
+static void
+xprt_request_rb_remove(struct rpc_xprt *xprt, struct rpc_rqst *req)
+{
+ rb_erase(&req->rq_recv, &xprt->recv_queue);
+}
+
/**
* xprt_lookup_rqst - find an RPC request corresponding to an XID
* @xprt: transport on which the original request was transmitted
* @xid: RPC XID of incoming reply
*
- * Caller holds xprt->recv_lock.
+ * Caller holds xprt->queue_lock.
*/
struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
{
struct rpc_rqst *entry;
- list_for_each_entry(entry, &xprt->recv, rq_list)
- if (entry->rq_xid == xid) {
- trace_xprt_lookup_rqst(xprt, xid, 0);
- entry->rq_rtt = ktime_sub(ktime_get(), entry->rq_xtime);
- return entry;
- }
+ entry = xprt_request_rb_find(xprt, xid);
+ if (entry != NULL) {
+ trace_xprt_lookup_rqst(xprt, xid, 0);
+ entry->rq_rtt = ktime_sub(ktime_get(), entry->rq_xtime);
+ return entry;
+ }
dprintk("RPC: xprt_lookup_rqst did not find xid %08x\n",
ntohl(xid));
@@ -847,16 +955,22 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
}
EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
+static bool
+xprt_is_pinned_rqst(struct rpc_rqst *req)
+{
+ return atomic_read(&req->rq_pin) != 0;
+}
+
/**
* xprt_pin_rqst - Pin a request on the transport receive list
* @req: Request to pin
*
* Caller must ensure this is atomic with the call to xprt_lookup_rqst()
- * so should be holding the xprt transport lock.
+ * so should be holding the xprt receive lock.
*/
void xprt_pin_rqst(struct rpc_rqst *req)
{
- set_bit(RPC_TASK_MSG_RECV, &req->rq_task->tk_runstate);
+ atomic_inc(&req->rq_pin);
}
EXPORT_SYMBOL_GPL(xprt_pin_rqst);
@@ -864,38 +978,87 @@ EXPORT_SYMBOL_GPL(xprt_pin_rqst);
* xprt_unpin_rqst - Unpin a request on the transport receive list
* @req: Request to pin
*
- * Caller should be holding the xprt transport lock.
+ * Caller should be holding the xprt receive lock.
*/
void xprt_unpin_rqst(struct rpc_rqst *req)
{
- struct rpc_task *task = req->rq_task;
-
- clear_bit(RPC_TASK_MSG_RECV, &task->tk_runstate);
- if (test_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate))
- wake_up_bit(&task->tk_runstate, RPC_TASK_MSG_RECV);
+ if (!test_bit(RPC_TASK_MSG_PIN_WAIT, &req->rq_task->tk_runstate)) {
+ atomic_dec(&req->rq_pin);
+ return;
+ }
+ if (atomic_dec_and_test(&req->rq_pin))
+ wake_up_var(&req->rq_pin);
}
EXPORT_SYMBOL_GPL(xprt_unpin_rqst);
static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req)
-__must_hold(&req->rq_xprt->recv_lock)
{
- struct rpc_task *task = req->rq_task;
+ wait_var_event(&req->rq_pin, !xprt_is_pinned_rqst(req));
+}
- if (task && test_bit(RPC_TASK_MSG_RECV, &task->tk_runstate)) {
- spin_unlock(&req->rq_xprt->recv_lock);
- set_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate);
- wait_on_bit(&task->tk_runstate, RPC_TASK_MSG_RECV,
- TASK_UNINTERRUPTIBLE);
- clear_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate);
- spin_lock(&req->rq_xprt->recv_lock);
- }
+static bool
+xprt_request_data_received(struct rpc_task *task)
+{
+ return !test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) &&
+ READ_ONCE(task->tk_rqstp->rq_reply_bytes_recvd) != 0;
+}
+
+static bool
+xprt_request_need_enqueue_receive(struct rpc_task *task, struct rpc_rqst *req)
+{
+ return !test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) &&
+ READ_ONCE(task->tk_rqstp->rq_reply_bytes_recvd) == 0;
+}
+
+/**
+ * xprt_request_enqueue_receive - Add an request to the receive queue
+ * @task: RPC task
+ *
+ */
+void
+xprt_request_enqueue_receive(struct rpc_task *task)
+{
+ struct rpc_rqst *req = task->tk_rqstp;
+ struct rpc_xprt *xprt = req->rq_xprt;
+
+ if (!xprt_request_need_enqueue_receive(task, req))
+ return;
+ spin_lock(&xprt->queue_lock);
+
+ /* Update the softirq receive buffer */
+ memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
+ sizeof(req->rq_private_buf));
+
+ /* Add request to the receive list */
+ xprt_request_rb_insert(xprt, req);
+ set_bit(RPC_TASK_NEED_RECV, &task->tk_runstate);
+ spin_unlock(&xprt->queue_lock);
+
+ xprt_reset_majortimeo(req);
+ /* Turn off autodisconnect */
+ del_singleshot_timer_sync(&xprt->timer);
+}
+
+/**
+ * xprt_request_dequeue_receive_locked - Remove a request from the receive queue
+ * @task: RPC task
+ *
+ * Caller must hold xprt->queue_lock.
+ */
+static void
+xprt_request_dequeue_receive_locked(struct rpc_task *task)
+{
+ struct rpc_rqst *req = task->tk_rqstp;
+
+ if (test_and_clear_bit(RPC_TASK_NEED_RECV, &task->tk_runstate))
+ xprt_request_rb_remove(req->rq_xprt, req);
}
/**
* xprt_update_rtt - Update RPC RTT statistics
* @task: RPC request that recently completed
*
- * Caller holds xprt->recv_lock.
+ * Caller holds xprt->queue_lock.
*/
void xprt_update_rtt(struct rpc_task *task)
{
@@ -917,7 +1080,7 @@ EXPORT_SYMBOL_GPL(xprt_update_rtt);
* @task: RPC request that recently completed
* @copied: actual number of bytes received from the transport
*
- * Caller holds xprt->recv_lock.
+ * Caller holds xprt->queue_lock.
*/
void xprt_complete_rqst(struct rpc_task *task, int copied)
{
@@ -930,12 +1093,12 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
xprt->stat.recvs++;
- list_del_init(&req->rq_list);
req->rq_private_buf.len = copied;
/* Ensure all writes are done before we update */
/* req->rq_reply_bytes_recvd */
smp_wmb();
req->rq_reply_bytes_recvd = copied;
+ xprt_request_dequeue_receive_locked(task);
rpc_wake_up_queued_task(&xprt->pending, task);
}
EXPORT_SYMBOL_GPL(xprt_complete_rqst);
@@ -957,6 +1120,172 @@ static void xprt_timer(struct rpc_task *task)
}
/**
+ * xprt_request_wait_receive - wait for the reply to an RPC request
+ * @task: RPC task about to send a request
+ *
+ */
+void xprt_request_wait_receive(struct rpc_task *task)
+{
+ struct rpc_rqst *req = task->tk_rqstp;
+ struct rpc_xprt *xprt = req->rq_xprt;
+
+ if (!test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate))
+ return;
+ /*
+ * Sleep on the pending queue if we're expecting a reply.
+ * The spinlock ensures atomicity between the test of
+ * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on().
+ */
+ spin_lock(&xprt->queue_lock);
+ if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) {
+ xprt->ops->set_retrans_timeout(task);
+ rpc_sleep_on(&xprt->pending, task, xprt_timer);
+ /*
+ * Send an extra queue wakeup call if the
+ * connection was dropped in case the call to
+ * rpc_sleep_on() raced.
+ */
+ if (xprt_request_retransmit_after_disconnect(task))
+ rpc_wake_up_queued_task_set_status(&xprt->pending,
+ task, -ENOTCONN);
+ }
+ spin_unlock(&xprt->queue_lock);
+}
+
+static bool
+xprt_request_need_enqueue_transmit(struct rpc_task *task, struct rpc_rqst *req)
+{
+ return !test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
+}
+
+/**
+ * xprt_request_enqueue_transmit - queue a task for transmission
+ * @task: pointer to rpc_task
+ *
+ * Add a task to the transmission queue.
+ */
+void
+xprt_request_enqueue_transmit(struct rpc_task *task)
+{
+ struct rpc_rqst *pos, *req = task->tk_rqstp;
+ struct rpc_xprt *xprt = req->rq_xprt;
+
+ if (xprt_request_need_enqueue_transmit(task, req)) {
+ spin_lock(&xprt->queue_lock);
+ /*
+ * Requests that carry congestion control credits are added
+ * to the head of the list to avoid starvation issues.
+ */
+ if (req->rq_cong) {
+ xprt_clear_congestion_window_wait(xprt);
+ list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
+ if (pos->rq_cong)
+ continue;
+ /* Note: req is added _before_ pos */
+ list_add_tail(&req->rq_xmit, &pos->rq_xmit);
+ INIT_LIST_HEAD(&req->rq_xmit2);
+ goto out;
+ }
+ } else if (RPC_IS_SWAPPER(task)) {
+ list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
+ if (pos->rq_cong || pos->rq_bytes_sent)
+ continue;
+ if (RPC_IS_SWAPPER(pos->rq_task))
+ continue;
+ /* Note: req is added _before_ pos */
+ list_add_tail(&req->rq_xmit, &pos->rq_xmit);
+ INIT_LIST_HEAD(&req->rq_xmit2);
+ goto out;
+ }
+ } else {
+ list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
+ if (pos->rq_task->tk_owner != task->tk_owner)
+ continue;
+ list_add_tail(&req->rq_xmit2, &pos->rq_xmit2);
+ INIT_LIST_HEAD(&req->rq_xmit);
+ goto out;
+ }
+ }
+ list_add_tail(&req->rq_xmit, &xprt->xmit_queue);
+ INIT_LIST_HEAD(&req->rq_xmit2);
+out:
+ set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
+ spin_unlock(&xprt->queue_lock);
+ }
+}
+
+/**
+ * xprt_request_dequeue_transmit_locked - remove a task from the transmission queue
+ * @task: pointer to rpc_task
+ *
+ * Remove a task from the transmission queue
+ * Caller must hold xprt->queue_lock
+ */
+static void
+xprt_request_dequeue_transmit_locked(struct rpc_task *task)
+{
+ struct rpc_rqst *req = task->tk_rqstp;
+
+ if (!test_and_clear_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+ return;
+ if (!list_empty(&req->rq_xmit)) {
+ list_del(&req->rq_xmit);
+ if (!list_empty(&req->rq_xmit2)) {
+ struct rpc_rqst *next = list_first_entry(&req->rq_xmit2,
+ struct rpc_rqst, rq_xmit2);
+ list_del(&req->rq_xmit2);
+ list_add_tail(&next->rq_xmit, &next->rq_xprt->xmit_queue);
+ }
+ } else
+ list_del(&req->rq_xmit2);
+}
+
+/**
+ * xprt_request_dequeue_transmit - remove a task from the transmission queue
+ * @task: pointer to rpc_task
+ *
+ * Remove a task from the transmission queue
+ */
+static void
+xprt_request_dequeue_transmit(struct rpc_task *task)
+{
+ struct rpc_rqst *req = task->tk_rqstp;
+ struct rpc_xprt *xprt = req->rq_xprt;
+
+ spin_lock(&xprt->queue_lock);
+ xprt_request_dequeue_transmit_locked(task);
+ spin_unlock(&xprt->queue_lock);
+}
+
+/**
+ * xprt_request_prepare - prepare an encoded request for transport
+ * @req: pointer to rpc_rqst
+ *
+ * Calls into the transport layer to do whatever is needed to prepare
+ * the request for transmission or receive.
+ */
+void
+xprt_request_prepare(struct rpc_rqst *req)
+{
+ struct rpc_xprt *xprt = req->rq_xprt;
+
+ if (xprt->ops->prepare_request)
+ xprt->ops->prepare_request(req);
+}
+
+/**
+ * xprt_request_need_retransmit - Test if a task needs retransmission
+ * @task: pointer to rpc_task
+ *
+ * Test for whether a connection breakage requires the task to retransmit
+ */
+bool
+xprt_request_need_retransmit(struct rpc_task *task)
+{
+ return xprt_request_retransmit_after_disconnect(task);
+}
+
+/**
* xprt_prepare_transmit - reserve the transport before sending a request
* @task: RPC task about to send a request
*
@@ -965,32 +1294,18 @@ bool xprt_prepare_transmit(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
- bool ret = false;
dprintk("RPC: %5u xprt_prepare_transmit\n", task->tk_pid);
- spin_lock_bh(&xprt->transport_lock);
- if (!req->rq_bytes_sent) {
- if (req->rq_reply_bytes_recvd) {
- task->tk_status = req->rq_reply_bytes_recvd;
- goto out_unlock;
- }
- if ((task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT)
- && xprt_connected(xprt)
- && req->rq_connect_cookie == xprt->connect_cookie) {
- xprt->ops->set_retrans_timeout(task);
- rpc_sleep_on(&xprt->pending, task, xprt_timer);
- goto out_unlock;
- }
- }
- if (!xprt->ops->reserve_xprt(xprt, task)) {
- task->tk_status = -EAGAIN;
- goto out_unlock;
+ if (!xprt_lock_write(xprt, task)) {
+ /* Race breaker: someone may have transmitted us */
+ if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+ rpc_wake_up_queued_task_set_status(&xprt->sending,
+ task, 0);
+ return false;
+
}
- ret = true;
-out_unlock:
- spin_unlock_bh(&xprt->transport_lock);
- return ret;
+ return true;
}
void xprt_end_transmit(struct rpc_task *task)
@@ -999,54 +1314,62 @@ void xprt_end_transmit(struct rpc_task *task)
}
/**
- * xprt_transmit - send an RPC request on a transport
- * @task: controlling RPC task
+ * xprt_request_transmit - send an RPC request on a transport
+ * @req: pointer to request to transmit
+ * @snd_task: RPC task that owns the transport lock
*
- * We have to copy the iovec because sendmsg fiddles with its contents.
+ * This performs the transmission of a single request.
+ * Note that if the request is not the same as snd_task, then it
+ * does need to be pinned.
+ * Returns '0' on success.
*/
-void xprt_transmit(struct rpc_task *task)
+static int
+xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
{
- struct rpc_rqst *req = task->tk_rqstp;
- struct rpc_xprt *xprt = req->rq_xprt;
+ struct rpc_xprt *xprt = req->rq_xprt;
+ struct rpc_task *task = req->rq_task;
unsigned int connect_cookie;
+ int is_retrans = RPC_WAS_SENT(task);
int status;
dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
- if (!req->rq_reply_bytes_recvd) {
- if (list_empty(&req->rq_list) && rpc_reply_expected(task)) {
- /*
- * Add to the list only if we're expecting a reply
- */
- /* Update the softirq receive buffer */
- memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
- sizeof(req->rq_private_buf));
- /* Add request to the receive list */
- spin_lock(&xprt->recv_lock);
- list_add_tail(&req->rq_list, &xprt->recv);
- spin_unlock(&xprt->recv_lock);
- xprt_reset_majortimeo(req);
- /* Turn off autodisconnect */
- del_singleshot_timer_sync(&xprt->timer);
+ if (!req->rq_bytes_sent) {
+ if (xprt_request_data_received(task)) {
+ status = 0;
+ goto out_dequeue;
}
- } else if (!req->rq_bytes_sent)
- return;
+ /* Verify that our message lies in the RPCSEC_GSS window */
+ if (rpcauth_xmit_need_reencode(task)) {
+ status = -EBADMSG;
+ goto out_dequeue;
+ }
+ }
+
+ /*
+ * Update req->rq_ntrans before transmitting to avoid races with
+ * xprt_update_rtt(), which needs to know that it is recording a
+ * reply to the first transmission.
+ */
+ req->rq_ntrans++;
connect_cookie = xprt->connect_cookie;
- status = xprt->ops->send_request(task);
+ status = xprt->ops->send_request(req);
trace_xprt_transmit(xprt, req->rq_xid, status);
if (status != 0) {
- task->tk_status = status;
- return;
+ req->rq_ntrans--;
+ return status;
}
+
+ if (is_retrans)
+ task->tk_client->cl_stats->rpcretrans++;
+
xprt_inject_disconnect(xprt);
dprintk("RPC: %5u xmit complete\n", task->tk_pid);
task->tk_flags |= RPC_TASK_SENT;
spin_lock_bh(&xprt->transport_lock);
- xprt->ops->set_retrans_timeout(task);
-
xprt->stat.sends++;
xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
xprt->stat.bklog_u += xprt->backlog.qlen;
@@ -1055,25 +1378,49 @@ void xprt_transmit(struct rpc_task *task)
spin_unlock_bh(&xprt->transport_lock);
req->rq_connect_cookie = connect_cookie;
- if (rpc_reply_expected(task) && !READ_ONCE(req->rq_reply_bytes_recvd)) {
- /*
- * Sleep on the pending queue if we're expecting a reply.
- * The spinlock ensures atomicity between the test of
- * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on().
- */
- spin_lock(&xprt->recv_lock);
- if (!req->rq_reply_bytes_recvd) {
- rpc_sleep_on(&xprt->pending, task, xprt_timer);
- /*
- * Send an extra queue wakeup call if the
- * connection was dropped in case the call to
- * rpc_sleep_on() raced.
- */
- if (!xprt_connected(xprt))
- xprt_wake_pending_tasks(xprt, -ENOTCONN);
- }
- spin_unlock(&xprt->recv_lock);
+out_dequeue:
+ xprt_request_dequeue_transmit(task);
+ rpc_wake_up_queued_task_set_status(&xprt->sending, task, status);
+ return status;
+}
+
+/**
+ * xprt_transmit - send an RPC request on a transport
+ * @task: controlling RPC task
+ *
+ * Attempts to drain the transmit queue. On exit, either the transport
+ * signalled an error that needs to be handled before transmission can
+ * resume, or @task finished transmitting, and detected that it already
+ * received a reply.
+ */
+void
+xprt_transmit(struct rpc_task *task)
+{
+ struct rpc_rqst *next, *req = task->tk_rqstp;
+ struct rpc_xprt *xprt = req->rq_xprt;
+ int status;
+
+ spin_lock(&xprt->queue_lock);
+ while (!list_empty(&xprt->xmit_queue)) {
+ next = list_first_entry(&xprt->xmit_queue,
+ struct rpc_rqst, rq_xmit);
+ xprt_pin_rqst(next);
+ spin_unlock(&xprt->queue_lock);
+ status = xprt_request_transmit(next, task);
+ if (status == -EBADMSG && next != req)
+ status = 0;
+ cond_resched();
+ spin_lock(&xprt->queue_lock);
+ xprt_unpin_rqst(next);
+ if (status == 0) {
+ if (!xprt_request_data_received(task) ||
+ test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+ continue;
+ } else if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+ task->tk_status = status;
+ break;
}
+ spin_unlock(&xprt->queue_lock);
}
static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
@@ -1170,20 +1517,6 @@ out_init_req:
}
EXPORT_SYMBOL_GPL(xprt_alloc_slot);
-void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
-{
- /* Note: grabbing the xprt_lock_write() ensures that we throttle
- * new slot allocation if the transport is congested (i.e. when
- * reconnecting a stream transport or when out of socket write
- * buffer space).
- */
- if (xprt_lock_write(xprt, task)) {
- xprt_alloc_slot(xprt, task);
- xprt_release_write(xprt, task);
- }
-}
-EXPORT_SYMBOL_GPL(xprt_lock_and_alloc_slot);
-
void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
{
spin_lock(&xprt->reserve_lock);
@@ -1250,6 +1583,60 @@ void xprt_free(struct rpc_xprt *xprt)
}
EXPORT_SYMBOL_GPL(xprt_free);
+static void
+xprt_init_connect_cookie(struct rpc_rqst *req, struct rpc_xprt *xprt)
+{
+ req->rq_connect_cookie = xprt_connect_cookie(xprt) - 1;
+}
+
+static __be32
+xprt_alloc_xid(struct rpc_xprt *xprt)
+{
+ __be32 xid;
+
+ spin_lock(&xprt->reserve_lock);
+ xid = (__force __be32)xprt->xid++;
+ spin_unlock(&xprt->reserve_lock);
+ return xid;
+}
+
+static void
+xprt_init_xid(struct rpc_xprt *xprt)
+{
+ xprt->xid = prandom_u32();
+}
+
+static void
+xprt_request_init(struct rpc_task *task)
+{
+ struct rpc_xprt *xprt = task->tk_xprt;
+ struct rpc_rqst *req = task->tk_rqstp;
+
+ req->rq_timeout = task->tk_client->cl_timeout->to_initval;
+ req->rq_task = task;
+ req->rq_xprt = xprt;
+ req->rq_buffer = NULL;
+ req->rq_xid = xprt_alloc_xid(xprt);
+ xprt_init_connect_cookie(req, xprt);
+ req->rq_bytes_sent = 0;
+ req->rq_snd_buf.len = 0;
+ req->rq_snd_buf.buflen = 0;
+ req->rq_rcv_buf.len = 0;
+ req->rq_rcv_buf.buflen = 0;
+ req->rq_release_snd_buf = NULL;
+ xprt_reset_majortimeo(req);
+ dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
+ req, ntohl(req->rq_xid));
+}
+
+static void
+xprt_do_reserve(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+ xprt->ops->alloc_slot(xprt, task);
+ if (task->tk_rqstp != NULL)
+ xprt_request_init(task);
+}
+
/**
* xprt_reserve - allocate an RPC request slot
* @task: RPC task requesting a slot allocation
@@ -1269,7 +1656,7 @@ void xprt_reserve(struct rpc_task *task)
task->tk_timeout = 0;
task->tk_status = -EAGAIN;
if (!xprt_throttle_congested(xprt, task))
- xprt->ops->alloc_slot(xprt, task);
+ xprt_do_reserve(xprt, task);
}
/**
@@ -1291,45 +1678,29 @@ void xprt_retry_reserve(struct rpc_task *task)
task->tk_timeout = 0;
task->tk_status = -EAGAIN;
- xprt->ops->alloc_slot(xprt, task);
-}
-
-static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
-{
- __be32 xid;
-
- spin_lock(&xprt->reserve_lock);
- xid = (__force __be32)xprt->xid++;
- spin_unlock(&xprt->reserve_lock);
- return xid;
+ xprt_do_reserve(xprt, task);
}
-static inline void xprt_init_xid(struct rpc_xprt *xprt)
-{
- xprt->xid = prandom_u32();
-}
-
-void xprt_request_init(struct rpc_task *task)
+static void
+xprt_request_dequeue_all(struct rpc_task *task, struct rpc_rqst *req)
{
- struct rpc_xprt *xprt = task->tk_xprt;
- struct rpc_rqst *req = task->tk_rqstp;
+ struct rpc_xprt *xprt = req->rq_xprt;
- INIT_LIST_HEAD(&req->rq_list);
- req->rq_timeout = task->tk_client->cl_timeout->to_initval;
- req->rq_task = task;
- req->rq_xprt = xprt;
- req->rq_buffer = NULL;
- req->rq_xid = xprt_alloc_xid(xprt);
- req->rq_connect_cookie = xprt->connect_cookie - 1;
- req->rq_bytes_sent = 0;
- req->rq_snd_buf.len = 0;
- req->rq_snd_buf.buflen = 0;
- req->rq_rcv_buf.len = 0;
- req->rq_rcv_buf.buflen = 0;
- req->rq_release_snd_buf = NULL;
- xprt_reset_majortimeo(req);
- dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
- req, ntohl(req->rq_xid));
+ if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) ||
+ test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) ||
+ xprt_is_pinned_rqst(req)) {
+ spin_lock(&xprt->queue_lock);
+ xprt_request_dequeue_transmit_locked(task);
+ xprt_request_dequeue_receive_locked(task);
+ while (xprt_is_pinned_rqst(req)) {
+ set_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
+ spin_unlock(&xprt->queue_lock);
+ xprt_wait_on_pinned_rqst(req);
+ spin_lock(&xprt->queue_lock);
+ clear_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
+ }
+ spin_unlock(&xprt->queue_lock);
+ }
}
/**
@@ -1345,8 +1716,7 @@ void xprt_release(struct rpc_task *task)
if (req == NULL) {
if (task->tk_client) {
xprt = task->tk_xprt;
- if (xprt->snd_task == task)
- xprt_release_write(xprt, task);
+ xprt_release_write(xprt, task);
}
return;
}
@@ -1356,12 +1726,7 @@ void xprt_release(struct rpc_task *task)
task->tk_ops->rpc_count_stats(task, task->tk_calldata);
else if (task->tk_client)
rpc_count_iostats(task, task->tk_client->cl_metrics);
- spin_lock(&xprt->recv_lock);
- if (!list_empty(&req->rq_list)) {
- list_del_init(&req->rq_list);
- xprt_wait_on_pinned_rqst(req);
- }
- spin_unlock(&xprt->recv_lock);
+ xprt_request_dequeue_all(task, req);
spin_lock_bh(&xprt->transport_lock);
xprt->ops->release_xprt(xprt, task);
if (xprt->ops->release_request)
@@ -1372,6 +1737,7 @@ void xprt_release(struct rpc_task *task)
if (req->rq_buffer)
xprt->ops->buf_free(task);
xprt_inject_disconnect(xprt);
+ xdr_free_bvec(&req->rq_rcv_buf);
if (req->rq_cred != NULL)
put_rpccred(req->rq_cred);
task->tk_rqstp = NULL;
@@ -1385,16 +1751,36 @@ void xprt_release(struct rpc_task *task)
xprt_free_bc_request(req);
}
+#ifdef CONFIG_SUNRPC_BACKCHANNEL
+void
+xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task)
+{
+ struct xdr_buf *xbufp = &req->rq_snd_buf;
+
+ task->tk_rqstp = req;
+ req->rq_task = task;
+ xprt_init_connect_cookie(req, req->rq_xprt);
+ /*
+ * Set up the xdr_buf length.
+ * This also indicates that the buffer is XDR encoded already.
+ */
+ xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
+ xbufp->tail[0].iov_len;
+ req->rq_bytes_sent = 0;
+}
+#endif
+
static void xprt_init(struct rpc_xprt *xprt, struct net *net)
{
kref_init(&xprt->kref);
spin_lock_init(&xprt->transport_lock);
spin_lock_init(&xprt->reserve_lock);
- spin_lock_init(&xprt->recv_lock);
+ spin_lock_init(&xprt->queue_lock);
INIT_LIST_HEAD(&xprt->free);
- INIT_LIST_HEAD(&xprt->recv);
+ xprt->recv_queue = RB_ROOT;
+ INIT_LIST_HEAD(&xprt->xmit_queue);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
spin_lock_init(&xprt->bc_pa_lock);
INIT_LIST_HEAD(&xprt->bc_pa_list);
@@ -1407,7 +1793,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net)
rpc_init_wait_queue(&xprt->binding, "xprt_binding");
rpc_init_wait_queue(&xprt->pending, "xprt_pending");
- rpc_init_priority_wait_queue(&xprt->sending, "xprt_sending");
+ rpc_init_wait_queue(&xprt->sending, "xprt_sending");
rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");
xprt_init_xid(xprt);
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 90adeff4c06b..e5b367a3e517 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -51,12 +51,11 @@ static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt,
rqst = &req->rl_slot;
rqst->rq_xprt = xprt;
- INIT_LIST_HEAD(&rqst->rq_list);
INIT_LIST_HEAD(&rqst->rq_bc_list);
__set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
- spin_lock_bh(&xprt->bc_pa_lock);
+ spin_lock(&xprt->bc_pa_lock);
list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
- spin_unlock_bh(&xprt->bc_pa_lock);
+ spin_unlock(&xprt->bc_pa_lock);
size = r_xprt->rx_data.inline_rsize;
rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
@@ -201,6 +200,9 @@ int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
if (!xprt_connected(rqst->rq_xprt))
goto drop_connection;
+ if (!xprt_request_get_cong(rqst->rq_xprt, rqst))
+ return -EBADSLT;
+
rc = rpcrdma_bc_marshal_reply(rqst);
if (rc < 0)
goto failed_marshal;
@@ -228,16 +230,16 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpc_rqst *rqst, *tmp;
- spin_lock_bh(&xprt->bc_pa_lock);
+ spin_lock(&xprt->bc_pa_lock);
list_for_each_entry_safe(rqst, tmp, &xprt->bc_pa_list, rq_bc_pa_list) {
list_del(&rqst->rq_bc_pa_list);
- spin_unlock_bh(&xprt->bc_pa_lock);
+ spin_unlock(&xprt->bc_pa_lock);
rpcrdma_bc_free_rqst(r_xprt, rqst);
- spin_lock_bh(&xprt->bc_pa_lock);
+ spin_lock(&xprt->bc_pa_lock);
}
- spin_unlock_bh(&xprt->bc_pa_lock);
+ spin_unlock(&xprt->bc_pa_lock);
}
/**
@@ -255,9 +257,9 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
rpcrdma_recv_buffer_put(req->rl_reply);
req->rl_reply = NULL;
- spin_lock_bh(&xprt->bc_pa_lock);
+ spin_lock(&xprt->bc_pa_lock);
list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
- spin_unlock_bh(&xprt->bc_pa_lock);
+ spin_unlock(&xprt->bc_pa_lock);
}
/**
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 0f7c465d9a5a..7f5632cd5a48 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -49,46 +49,7 @@ fmr_is_supported(struct rpcrdma_ia *ia)
return true;
}
-static int
-fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
-{
- static struct ib_fmr_attr fmr_attr = {
- .max_pages = RPCRDMA_MAX_FMR_SGES,
- .max_maps = 1,
- .page_shift = PAGE_SHIFT
- };
-
- mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
- sizeof(u64), GFP_KERNEL);
- if (!mr->fmr.fm_physaddrs)
- goto out_free;
-
- mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
- sizeof(*mr->mr_sg), GFP_KERNEL);
- if (!mr->mr_sg)
- goto out_free;
-
- sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES);
-
- mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
- &fmr_attr);
- if (IS_ERR(mr->fmr.fm_mr))
- goto out_fmr_err;
-
- INIT_LIST_HEAD(&mr->mr_list);
- return 0;
-
-out_fmr_err:
- dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__,
- PTR_ERR(mr->fmr.fm_mr));
-
-out_free:
- kfree(mr->mr_sg);
- kfree(mr->fmr.fm_physaddrs);
- return -ENOMEM;
-}
-
-static int
+static void
__fmr_unmap(struct rpcrdma_mr *mr)
{
LIST_HEAD(l);
@@ -97,13 +58,16 @@ __fmr_unmap(struct rpcrdma_mr *mr)
list_add(&mr->fmr.fm_mr->list, &l);
rc = ib_unmap_fmr(&l);
list_del(&mr->fmr.fm_mr->list);
- return rc;
+ if (rc)
+ pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
+ mr, rc);
}
+/* Release an MR.
+ */
static void
fmr_op_release_mr(struct rpcrdma_mr *mr)
{
- LIST_HEAD(unmap_list);
int rc;
kfree(mr->fmr.fm_physaddrs);
@@ -112,10 +76,7 @@ fmr_op_release_mr(struct rpcrdma_mr *mr)
/* In case this one was left mapped, try to unmap it
* to prevent dealloc_fmr from failing with EBUSY
*/
- rc = __fmr_unmap(mr);
- if (rc)
- pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
- mr, rc);
+ __fmr_unmap(mr);
rc = ib_dealloc_fmr(mr->fmr.fm_mr);
if (rc)
@@ -125,40 +86,68 @@ fmr_op_release_mr(struct rpcrdma_mr *mr)
kfree(mr);
}
-/* Reset of a single FMR.
+/* MRs are dynamically allocated, so simply clean up and release the MR.
+ * A replacement MR will subsequently be allocated on demand.
*/
static void
-fmr_op_recover_mr(struct rpcrdma_mr *mr)
+fmr_mr_recycle_worker(struct work_struct *work)
{
+ struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle);
struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
- int rc;
- /* ORDER: invalidate first */
- rc = __fmr_unmap(mr);
- if (rc)
- goto out_release;
-
- /* ORDER: then DMA unmap */
- rpcrdma_mr_unmap_and_put(mr);
+ trace_xprtrdma_mr_recycle(mr);
- r_xprt->rx_stats.mrs_recovered++;
- return;
-
-out_release:
- pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr);
- r_xprt->rx_stats.mrs_orphaned++;
-
- trace_xprtrdma_dma_unmap(mr);
+ trace_xprtrdma_mr_unmap(mr);
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
mr->mr_sg, mr->mr_nents, mr->mr_dir);
spin_lock(&r_xprt->rx_buf.rb_mrlock);
list_del(&mr->mr_all);
+ r_xprt->rx_stats.mrs_recycled++;
spin_unlock(&r_xprt->rx_buf.rb_mrlock);
-
fmr_op_release_mr(mr);
}
+static int
+fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
+{
+ static struct ib_fmr_attr fmr_attr = {
+ .max_pages = RPCRDMA_MAX_FMR_SGES,
+ .max_maps = 1,
+ .page_shift = PAGE_SHIFT
+ };
+
+ mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
+ sizeof(u64), GFP_KERNEL);
+ if (!mr->fmr.fm_physaddrs)
+ goto out_free;
+
+ mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
+ sizeof(*mr->mr_sg), GFP_KERNEL);
+ if (!mr->mr_sg)
+ goto out_free;
+
+ sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES);
+
+ mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
+ &fmr_attr);
+ if (IS_ERR(mr->fmr.fm_mr))
+ goto out_fmr_err;
+
+ INIT_LIST_HEAD(&mr->mr_list);
+ INIT_WORK(&mr->mr_recycle, fmr_mr_recycle_worker);
+ return 0;
+
+out_fmr_err:
+ dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__,
+ PTR_ERR(mr->fmr.fm_mr));
+
+out_free:
+ kfree(mr->mr_sg);
+ kfree(mr->fmr.fm_physaddrs);
+ return -ENOMEM;
+}
+
/* On success, sets:
* ep->rep_attr.cap.max_send_wr
* ep->rep_attr.cap.max_recv_wr
@@ -187,6 +176,7 @@ fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
RPCRDMA_MAX_FMR_SGES);
+ ia->ri_max_segs += 2; /* segments for head and tail buffers */
return 0;
}
@@ -244,7 +234,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
mr->mr_sg, i, mr->mr_dir);
if (!mr->mr_nents)
goto out_dmamap_err;
- trace_xprtrdma_dma_map(mr);
+ trace_xprtrdma_mr_map(mr);
for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
@@ -305,13 +295,13 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
list_for_each_entry(mr, mrs, mr_list) {
dprintk("RPC: %s: unmapping fmr %p\n",
__func__, &mr->fmr);
- trace_xprtrdma_localinv(mr);
+ trace_xprtrdma_mr_localinv(mr);
list_add_tail(&mr->fmr.fm_mr->list, &unmap_list);
}
r_xprt->rx_stats.local_inv_needed++;
rc = ib_unmap_fmr(&unmap_list);
if (rc)
- goto out_reset;
+ goto out_release;
/* ORDER: Now DMA unmap all of the req's MRs, and return
* them to the free MW list.
@@ -324,13 +314,13 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
return;
-out_reset:
+out_release:
pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
while (!list_empty(mrs)) {
mr = rpcrdma_mr_pop(mrs);
list_del(&mr->fmr.fm_mr->list);
- fmr_op_recover_mr(mr);
+ rpcrdma_mr_recycle(mr);
}
}
@@ -338,7 +328,6 @@ const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
.ro_map = fmr_op_map,
.ro_send = fmr_op_send,
.ro_unmap_sync = fmr_op_unmap_sync,
- .ro_recover_mr = fmr_op_recover_mr,
.ro_open = fmr_op_open,
.ro_maxpages = fmr_op_maxpages,
.ro_init_mr = fmr_op_init_mr,
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 1bb00dd6ccdb..fc6378cc0c1c 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -97,6 +97,44 @@ out_not_supported:
return false;
}
+static void
+frwr_op_release_mr(struct rpcrdma_mr *mr)
+{
+ int rc;
+
+ rc = ib_dereg_mr(mr->frwr.fr_mr);
+ if (rc)
+ pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
+ mr, rc);
+ kfree(mr->mr_sg);
+ kfree(mr);
+}
+
+/* MRs are dynamically allocated, so simply clean up and release the MR.
+ * A replacement MR will subsequently be allocated on demand.
+ */
+static void
+frwr_mr_recycle_worker(struct work_struct *work)
+{
+ struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle);
+ enum rpcrdma_frwr_state state = mr->frwr.fr_state;
+ struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
+
+ trace_xprtrdma_mr_recycle(mr);
+
+ if (state != FRWR_FLUSHED_LI) {
+ trace_xprtrdma_mr_unmap(mr);
+ ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
+ mr->mr_sg, mr->mr_nents, mr->mr_dir);
+ }
+
+ spin_lock(&r_xprt->rx_buf.rb_mrlock);
+ list_del(&mr->mr_all);
+ r_xprt->rx_stats.mrs_recycled++;
+ spin_unlock(&r_xprt->rx_buf.rb_mrlock);
+ frwr_op_release_mr(mr);
+}
+
static int
frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{
@@ -113,6 +151,7 @@ frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
goto out_list_err;
INIT_LIST_HEAD(&mr->mr_list);
+ INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker);
sg_init_table(mr->mr_sg, depth);
init_completion(&frwr->fr_linv_done);
return 0;
@@ -131,79 +170,6 @@ out_list_err:
return rc;
}
-static void
-frwr_op_release_mr(struct rpcrdma_mr *mr)
-{
- int rc;
-
- rc = ib_dereg_mr(mr->frwr.fr_mr);
- if (rc)
- pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
- mr, rc);
- kfree(mr->mr_sg);
- kfree(mr);
-}
-
-static int
-__frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
-{
- struct rpcrdma_frwr *frwr = &mr->frwr;
- int rc;
-
- rc = ib_dereg_mr(frwr->fr_mr);
- if (rc) {
- pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n",
- rc, mr);
- return rc;
- }
-
- frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
- ia->ri_max_frwr_depth);
- if (IS_ERR(frwr->fr_mr)) {
- pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
- PTR_ERR(frwr->fr_mr), mr);
- return PTR_ERR(frwr->fr_mr);
- }
-
- dprintk("RPC: %s: recovered FRWR %p\n", __func__, frwr);
- frwr->fr_state = FRWR_IS_INVALID;
- return 0;
-}
-
-/* Reset of a single FRWR. Generate a fresh rkey by replacing the MR.
- */
-static void
-frwr_op_recover_mr(struct rpcrdma_mr *mr)
-{
- enum rpcrdma_frwr_state state = mr->frwr.fr_state;
- struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
- struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- int rc;
-
- rc = __frwr_mr_reset(ia, mr);
- if (state != FRWR_FLUSHED_LI) {
- trace_xprtrdma_dma_unmap(mr);
- ib_dma_unmap_sg(ia->ri_device,
- mr->mr_sg, mr->mr_nents, mr->mr_dir);
- }
- if (rc)
- goto out_release;
-
- rpcrdma_mr_put(mr);
- r_xprt->rx_stats.mrs_recovered++;
- return;
-
-out_release:
- pr_err("rpcrdma: FRWR reset failed %d, %p released\n", rc, mr);
- r_xprt->rx_stats.mrs_orphaned++;
-
- spin_lock(&r_xprt->rx_buf.rb_mrlock);
- list_del(&mr->mr_all);
- spin_unlock(&r_xprt->rx_buf.rb_mrlock);
-
- frwr_op_release_mr(mr);
-}
-
/* On success, sets:
* ep->rep_attr.cap.max_send_wr
* ep->rep_attr.cap.max_recv_wr
@@ -276,6 +242,7 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
ia->ri_max_frwr_depth);
+ ia->ri_max_segs += 2; /* segments for head and tail buffers */
return 0;
}
@@ -384,7 +351,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
mr = NULL;
do {
if (mr)
- rpcrdma_mr_defer_recovery(mr);
+ rpcrdma_mr_recycle(mr);
mr = rpcrdma_mr_get(r_xprt);
if (!mr)
return ERR_PTR(-EAGAIN);
@@ -417,7 +384,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
if (!mr->mr_nents)
goto out_dmamap_err;
- trace_xprtrdma_dma_map(mr);
+ trace_xprtrdma_mr_map(mr);
ibmr = frwr->fr_mr;
n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
@@ -451,7 +418,7 @@ out_dmamap_err:
out_mapmr_err:
pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
frwr->fr_mr, n, mr->mr_nents);
- rpcrdma_mr_defer_recovery(mr);
+ rpcrdma_mr_recycle(mr);
return ERR_PTR(-EIO);
}
@@ -499,7 +466,7 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
list_for_each_entry(mr, mrs, mr_list)
if (mr->mr_handle == rep->rr_inv_rkey) {
list_del_init(&mr->mr_list);
- trace_xprtrdma_remoteinv(mr);
+ trace_xprtrdma_mr_remoteinv(mr);
mr->frwr.fr_state = FRWR_IS_INVALID;
rpcrdma_mr_unmap_and_put(mr);
break; /* only one invalidated MR per RPC */
@@ -536,7 +503,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
mr->frwr.fr_state = FRWR_IS_INVALID;
frwr = &mr->frwr;
- trace_xprtrdma_localinv(mr);
+ trace_xprtrdma_mr_localinv(mr);
frwr->fr_cqe.done = frwr_wc_localinv;
last = &frwr->fr_invwr;
@@ -570,7 +537,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
if (bad_wr != first)
wait_for_completion(&frwr->fr_linv_done);
if (rc)
- goto reset_mrs;
+ goto out_release;
/* ORDER: Now DMA unmap all of the MRs, and return
* them to the free MR list.
@@ -582,22 +549,21 @@ unmap:
}
return;
-reset_mrs:
+out_release:
pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc);
- /* Find and reset the MRs in the LOCAL_INV WRs that did not
+ /* Unmap and release the MRs in the LOCAL_INV WRs that did not
* get posted.
*/
while (bad_wr) {
frwr = container_of(bad_wr, struct rpcrdma_frwr,
fr_invwr);
mr = container_of(frwr, struct rpcrdma_mr, frwr);
-
- __frwr_mr_reset(ia, mr);
-
bad_wr = bad_wr->next;
+
+ list_del(&mr->mr_list);
+ frwr_op_release_mr(mr);
}
- goto unmap;
}
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
@@ -605,7 +571,6 @@ const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_send = frwr_op_send,
.ro_reminv = frwr_op_reminv,
.ro_unmap_sync = frwr_op_unmap_sync,
- .ro_recover_mr = frwr_op_recover_mr,
.ro_open = frwr_op_open,
.ro_maxpages = frwr_op_maxpages,
.ro_init_mr = frwr_op_init_mr,
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index c8ae983c6cc0..9f53e0240035 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -71,7 +71,6 @@ static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs)
size = RPCRDMA_HDRLEN_MIN;
/* Maximum Read list size */
- maxsegs += 2; /* segment for head and tail buffers */
size = maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
/* Minimal Read chunk size */
@@ -97,7 +96,6 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
size = RPCRDMA_HDRLEN_MIN;
/* Maximum Write list size */
- maxsegs += 2; /* segment for head and tail buffers */
size = sizeof(__be32); /* segment count */
size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32);
size += sizeof(__be32); /* list discriminator */
@@ -805,7 +803,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
struct rpcrdma_mr *mr;
mr = rpcrdma_mr_pop(&req->rl_registered);
- rpcrdma_mr_defer_recovery(mr);
+ rpcrdma_mr_recycle(mr);
}
/* This implementation supports the following combinations
@@ -866,7 +864,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
out_err:
switch (ret) {
case -EAGAIN:
- xprt_wait_for_buffer_space(rqst->rq_task, NULL);
+ xprt_wait_for_buffer_space(rqst->rq_xprt);
break;
case -ENOBUFS:
break;
@@ -1216,7 +1214,6 @@ void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
struct rpc_rqst *rqst = rep->rr_rqst;
- unsigned long cwnd;
int status;
xprt->reestablish_timeout = 0;
@@ -1238,15 +1235,10 @@ void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
goto out_badheader;
out:
- spin_lock(&xprt->recv_lock);
- cwnd = xprt->cwnd;
- xprt->cwnd = r_xprt->rx_buf.rb_credits << RPC_CWNDSHIFT;
- if (xprt->cwnd > cwnd)
- xprt_release_rqst_cong(rqst->rq_task);
-
+ spin_lock(&xprt->queue_lock);
xprt_complete_rqst(rqst->rq_task, status);
xprt_unpin_rqst(rqst);
- spin_unlock(&xprt->recv_lock);
+ spin_unlock(&xprt->queue_lock);
return;
/* If the incoming reply terminated a pending RPC, the next
@@ -1345,19 +1337,23 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
/* Match incoming rpcrdma_rep to an rpcrdma_req to
* get context for handling any incoming chunks.
*/
- spin_lock(&xprt->recv_lock);
+ spin_lock(&xprt->queue_lock);
rqst = xprt_lookup_rqst(xprt, rep->rr_xid);
if (!rqst)
goto out_norqst;
xprt_pin_rqst(rqst);
+ spin_unlock(&xprt->queue_lock);
if (credits == 0)
credits = 1; /* don't deadlock */
else if (credits > buf->rb_max_requests)
credits = buf->rb_max_requests;
- buf->rb_credits = credits;
-
- spin_unlock(&xprt->recv_lock);
+ if (buf->rb_credits != credits) {
+ spin_lock_bh(&xprt->transport_lock);
+ buf->rb_credits = credits;
+ xprt->cwnd = credits << RPC_CWNDSHIFT;
+ spin_unlock_bh(&xprt->transport_lock);
+ }
req = rpcr_to_rdmar(rqst);
req->rl_reply = rep;
@@ -1378,7 +1374,7 @@ out_badversion:
* is corrupt.
*/
out_norqst:
- spin_unlock(&xprt->recv_lock);
+ spin_unlock(&xprt->queue_lock);
trace_xprtrdma_reply_rqst(rep);
goto repost;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index a68180090554..f3c147d70286 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -5,8 +5,6 @@
* Support for backward direction RPCs on RPC/RDMA (server-side).
*/
-#include <linux/module.h>
-
#include <linux/sunrpc/svc_rdma.h>
#include "xprt_rdma.h"
@@ -32,7 +30,6 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct kvec *dst, *src = &rcvbuf->head[0];
struct rpc_rqst *req;
- unsigned long cwnd;
u32 credits;
size_t len;
__be32 xid;
@@ -56,7 +53,7 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
if (src->iov_len < 24)
goto out_shortreply;
- spin_lock(&xprt->recv_lock);
+ spin_lock(&xprt->queue_lock);
req = xprt_lookup_rqst(xprt, xid);
if (!req)
goto out_notfound;
@@ -66,6 +63,8 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
if (dst->iov_len < len)
goto out_unlock;
memcpy(dst->iov_base, p, len);
+ xprt_pin_rqst(req);
+ spin_unlock(&xprt->queue_lock);
credits = be32_to_cpup(rdma_resp + 2);
if (credits == 0)
@@ -74,19 +73,17 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
credits = r_xprt->rx_buf.rb_bc_max_requests;
spin_lock_bh(&xprt->transport_lock);
- cwnd = xprt->cwnd;
xprt->cwnd = credits << RPC_CWNDSHIFT;
- if (xprt->cwnd > cwnd)
- xprt_release_rqst_cong(req->rq_task);
spin_unlock_bh(&xprt->transport_lock);
-
+ spin_lock(&xprt->queue_lock);
ret = 0;
xprt_complete_rqst(req->rq_task, rcvbuf->len);
+ xprt_unpin_rqst(req);
rcvbuf->len = 0;
out_unlock:
- spin_unlock(&xprt->recv_lock);
+ spin_unlock(&xprt->queue_lock);
out:
return ret;
@@ -215,9 +212,8 @@ drop_connection:
* connection.
*/
static int
-xprt_rdma_bc_send_request(struct rpc_task *task)
+xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
{
- struct rpc_rqst *rqst = task->tk_rqstp;
struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
struct svcxprt_rdma *rdma;
int ret;
@@ -225,12 +221,7 @@ xprt_rdma_bc_send_request(struct rpc_task *task)
dprintk("svcrdma: sending bc call with xid: %08x\n",
be32_to_cpu(rqst->rq_xid));
- if (!mutex_trylock(&sxprt->xpt_mutex)) {
- rpc_sleep_on(&sxprt->xpt_bc_pending, task, NULL);
- if (!mutex_trylock(&sxprt->xpt_mutex))
- return -EAGAIN;
- rpc_wake_up_queued_task(&sxprt->xpt_bc_pending, task);
- }
+ mutex_lock(&sxprt->xpt_mutex);
ret = -ENOTCONN;
rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
@@ -248,6 +239,7 @@ static void
xprt_rdma_bc_close(struct rpc_xprt *xprt)
{
dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
+ xprt->cwnd = RPC_CWNDSHIFT;
}
static void
@@ -256,7 +248,6 @@ xprt_rdma_bc_put(struct rpc_xprt *xprt)
dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
xprt_free(xprt);
- module_put(THIS_MODULE);
}
static const struct rpc_xprt_ops xprt_rdma_bc_procs = {
@@ -328,20 +319,9 @@ xprt_setup_rdma_bc(struct xprt_create *args)
args->bc_xprt->xpt_bc_xprt = xprt;
xprt->bc_xprt = args->bc_xprt;
- if (!try_module_get(THIS_MODULE))
- goto out_fail;
-
/* Final put for backchannel xprt is in __svc_rdma_free */
xprt_get(xprt);
return xprt;
-
-out_fail:
- xprt_rdma_free_addresses(xprt);
- args->bc_xprt->xpt_bc_xprt = NULL;
- args->bc_xprt->xpt_bc_xps = NULL;
- xprt_put(xprt);
- xprt_free(xprt);
- return ERR_PTR(-EINVAL);
}
struct xprt_class xprt_rdma_bc = {
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 2848cafd4a17..2f7ec8912f49 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -475,10 +475,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
/* Qualify the transport resource defaults with the
* capabilities of this particular device */
- newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
- /* transport hdr, head iovec, one page list entry, tail iovec */
- if (newxprt->sc_max_send_sges < 4) {
- pr_err("svcrdma: too few Send SGEs available (%d)\n",
+ /* Transport header, head iovec, tail iovec */
+ newxprt->sc_max_send_sges = 3;
+ /* Add one SGE per page list entry */
+ newxprt->sc_max_send_sges += svcrdma_max_req_size / PAGE_SIZE;
+ if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge) {
+ pr_err("svcrdma: too few Send SGEs available (%d needed)\n",
newxprt->sc_max_send_sges);
goto errout;
}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 143ce2579ba9..ae2a83828953 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -225,69 +225,59 @@ xprt_rdma_free_addresses(struct rpc_xprt *xprt)
}
}
-void
-rpcrdma_conn_func(struct rpcrdma_ep *ep)
-{
- schedule_delayed_work(&ep->rep_connect_worker, 0);
-}
-
-void
-rpcrdma_connect_worker(struct work_struct *work)
-{
- struct rpcrdma_ep *ep =
- container_of(work, struct rpcrdma_ep, rep_connect_worker.work);
- struct rpcrdma_xprt *r_xprt =
- container_of(ep, struct rpcrdma_xprt, rx_ep);
- struct rpc_xprt *xprt = &r_xprt->rx_xprt;
-
- spin_lock_bh(&xprt->transport_lock);
- if (ep->rep_connected > 0) {
- if (!xprt_test_and_set_connected(xprt))
- xprt_wake_pending_tasks(xprt, 0);
- } else {
- if (xprt_test_and_clear_connected(xprt))
- xprt_wake_pending_tasks(xprt, -ENOTCONN);
- }
- spin_unlock_bh(&xprt->transport_lock);
-}
-
+/**
+ * xprt_rdma_connect_worker - establish connection in the background
+ * @work: worker thread context
+ *
+ * Requester holds the xprt's send lock to prevent activity on this
+ * transport while a fresh connection is being established. RPC tasks
+ * sleep on the xprt's pending queue waiting for connect to complete.
+ */
static void
xprt_rdma_connect_worker(struct work_struct *work)
{
struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt,
rx_connect_worker.work);
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
- int rc = 0;
-
- xprt_clear_connected(xprt);
+ int rc;
rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
- if (rc)
- xprt_wake_pending_tasks(xprt, rc);
-
xprt_clear_connecting(xprt);
+ if (r_xprt->rx_ep.rep_connected > 0) {
+ if (!xprt_test_and_set_connected(xprt)) {
+ xprt->stat.connect_count++;
+ xprt->stat.connect_time += (long)jiffies -
+ xprt->stat.connect_start;
+ xprt_wake_pending_tasks(xprt, -EAGAIN);
+ }
+ } else {
+ if (xprt_test_and_clear_connected(xprt))
+ xprt_wake_pending_tasks(xprt, rc);
+ }
}
+/**
+ * xprt_rdma_inject_disconnect - inject a connection fault
+ * @xprt: transport context
+ *
+ * If @xprt is connected, disconnect it to simulate spurious connection
+ * loss.
+ */
static void
xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
{
- struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt,
- rx_xprt);
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
trace_xprtrdma_inject_dsc(r_xprt);
rdma_disconnect(r_xprt->rx_ia.ri_id);
}
-/*
- * xprt_rdma_destroy
+/**
+ * xprt_rdma_destroy - Full tear down of transport
+ * @xprt: doomed transport context
*
- * Destroy the xprt.
- * Free all memory associated with the object, including its own.
- * NOTE: none of the *destroy methods free memory for their top-level
- * objects, even though they may have allocated it (they do free
- * private memory). It's up to the caller to handle it. In this
- * case (RDMA transport), all structure memory is inlined with the
- * struct rpcrdma_xprt.
+ * Caller guarantees there will be no more calls to us with
+ * this @xprt.
*/
static void
xprt_rdma_destroy(struct rpc_xprt *xprt)
@@ -298,8 +288,6 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
- xprt_clear_connected(xprt);
-
rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
rpcrdma_buffer_destroy(&r_xprt->rx_buf);
rpcrdma_ia_close(&r_xprt->rx_ia);
@@ -442,11 +430,12 @@ out1:
}
/**
- * xprt_rdma_close - Close down RDMA connection
- * @xprt: generic transport to be closed
+ * xprt_rdma_close - close a transport connection
+ * @xprt: transport context
*
- * Called during transport shutdown reconnect, or device
- * removal. Caller holds the transport's write lock.
+ * Called during transport shutdown, reconnect, or device removal.
+ * Caller holds @xprt's send lock to prevent activity on this
+ * transport while the connection is torn down.
*/
static void
xprt_rdma_close(struct rpc_xprt *xprt)
@@ -468,6 +457,12 @@ xprt_rdma_close(struct rpc_xprt *xprt)
xprt->reestablish_timeout = 0;
xprt_disconnect_done(xprt);
rpcrdma_ep_disconnect(ep, ia);
+
+ /* Prepare @xprt for the next connection by reinitializing
+ * its credit grant to one (see RFC 8166, Section 3.3.3).
+ */
+ r_xprt->rx_buf.rb_credits = 1;
+ xprt->cwnd = RPC_CWNDSHIFT;
}
/**
@@ -519,6 +514,12 @@ xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task)
xprt_force_disconnect(xprt);
}
+/**
+ * xprt_rdma_connect - try to establish a transport connection
+ * @xprt: transport state
+ * @task: RPC scheduler context
+ *
+ */
static void
xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
{
@@ -638,13 +639,6 @@ rpcrdma_get_recvbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
* 0: Success; rq_buffer points to RPC buffer to use
* ENOMEM: Out of memory, call again later
* EIO: A permanent error occurred, do not retry
- *
- * The RDMA allocate/free functions need the task structure as a place
- * to hide the struct rpcrdma_req, which is necessary for the actual
- * send/recv sequence.
- *
- * xprt_rdma_allocate provides buffers that are already mapped for
- * DMA, and a local DMA lkey is provided for each.
*/
static int
xprt_rdma_allocate(struct rpc_task *task)
@@ -693,7 +687,7 @@ xprt_rdma_free(struct rpc_task *task)
/**
* xprt_rdma_send_request - marshal and send an RPC request
- * @task: RPC task with an RPC message in rq_snd_buf
+ * @rqst: RPC message in rq_snd_buf
*
* Caller holds the transport's write lock.
*
@@ -706,9 +700,8 @@ xprt_rdma_free(struct rpc_task *task)
* sent. Do not try to send this message again.
*/
static int
-xprt_rdma_send_request(struct rpc_task *task)
+xprt_rdma_send_request(struct rpc_rqst *rqst)
{
- struct rpc_rqst *rqst = task->tk_rqstp;
struct rpc_xprt *xprt = rqst->rq_xprt;
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
@@ -722,6 +715,9 @@ xprt_rdma_send_request(struct rpc_task *task)
if (!xprt_connected(xprt))
goto drop_connection;
+ if (!xprt_request_get_cong(xprt, rqst))
+ return -EBADSLT;
+
rc = rpcrdma_marshal_req(r_xprt, rqst);
if (rc < 0)
goto failed_marshal;
@@ -741,7 +737,7 @@ xprt_rdma_send_request(struct rpc_task *task)
/* An RPC with no reply will throw off credit accounting,
* so drop the connection to reset the credit grant.
*/
- if (!rpc_reply_expected(task))
+ if (!rpc_reply_expected(rqst->rq_task))
goto drop_connection;
return 0;
@@ -766,7 +762,7 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
0, /* need a local port? */
xprt->stat.bind_count,
xprt->stat.connect_count,
- xprt->stat.connect_time,
+ xprt->stat.connect_time / HZ,
idle_time,
xprt->stat.sends,
xprt->stat.recvs,
@@ -786,7 +782,7 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
r_xprt->rx_stats.bad_reply_count,
r_xprt->rx_stats.nomsg_call_count);
seq_printf(seq, "%lu %lu %lu %lu %lu %lu\n",
- r_xprt->rx_stats.mrs_recovered,
+ r_xprt->rx_stats.mrs_recycled,
r_xprt->rx_stats.mrs_orphaned,
r_xprt->rx_stats.mrs_allocated,
r_xprt->rx_stats.local_inv_needed,
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 956a5ea47b58..3ddba94c939f 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -108,20 +108,48 @@ rpcrdma_destroy_wq(void)
}
}
+/**
+ * rpcrdma_disconnect_worker - Force a disconnect
+ * @work: endpoint to be disconnected
+ *
+ * Provider callbacks can possibly run in an IRQ context. This function
+ * is invoked in a worker thread to guarantee that disconnect wake-up
+ * calls are always done in process context.
+ */
+static void
+rpcrdma_disconnect_worker(struct work_struct *work)
+{
+ struct rpcrdma_ep *ep = container_of(work, struct rpcrdma_ep,
+ rep_disconnect_worker.work);
+ struct rpcrdma_xprt *r_xprt =
+ container_of(ep, struct rpcrdma_xprt, rx_ep);
+
+ xprt_force_disconnect(&r_xprt->rx_xprt);
+}
+
+/**
+ * rpcrdma_qp_event_handler - Handle one QP event (error notification)
+ * @event: details of the event
+ * @context: ep that owns QP where event occurred
+ *
+ * Called from the RDMA provider (device driver) possibly in an interrupt
+ * context.
+ */
static void
-rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
+rpcrdma_qp_event_handler(struct ib_event *event, void *context)
{
struct rpcrdma_ep *ep = context;
struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt,
rx_ep);
- trace_xprtrdma_qp_error(r_xprt, event);
- pr_err("rpcrdma: %s on device %s ep %p\n",
- ib_event_msg(event->event), event->device->name, context);
+ trace_xprtrdma_qp_event(r_xprt, event);
+ pr_err("rpcrdma: %s on device %s connected to %s:%s\n",
+ ib_event_msg(event->event), event->device->name,
+ rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt));
if (ep->rep_connected == 1) {
ep->rep_connected = -EIO;
- rpcrdma_conn_func(ep);
+ schedule_delayed_work(&ep->rep_disconnect_worker, 0);
wake_up_all(&ep->rep_connect_wait);
}
}
@@ -219,38 +247,48 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
rpcrdma_set_max_header_sizes(r_xprt);
}
+/**
+ * rpcrdma_cm_event_handler - Handle RDMA CM events
+ * @id: rdma_cm_id on which an event has occurred
+ * @event: details of the event
+ *
+ * Called with @id's mutex held. Returns 1 if caller should
+ * destroy @id, otherwise 0.
+ */
static int
-rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
+rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
{
- struct rpcrdma_xprt *xprt = id->context;
- struct rpcrdma_ia *ia = &xprt->rx_ia;
- struct rpcrdma_ep *ep = &xprt->rx_ep;
- int connstate = 0;
+ struct rpcrdma_xprt *r_xprt = id->context;
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+ struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+ struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+
+ might_sleep();
- trace_xprtrdma_conn_upcall(xprt, event);
+ trace_xprtrdma_cm_event(r_xprt, event);
switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED:
case RDMA_CM_EVENT_ROUTE_RESOLVED:
ia->ri_async_rc = 0;
complete(&ia->ri_done);
- break;
+ return 0;
case RDMA_CM_EVENT_ADDR_ERROR:
ia->ri_async_rc = -EPROTO;
complete(&ia->ri_done);
- break;
+ return 0;
case RDMA_CM_EVENT_ROUTE_ERROR:
ia->ri_async_rc = -ENETUNREACH;
complete(&ia->ri_done);
- break;
+ return 0;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
pr_info("rpcrdma: removing device %s for %s:%s\n",
ia->ri_device->name,
- rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt));
+ rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt));
#endif
set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
ep->rep_connected = -ENODEV;
- xprt_force_disconnect(&xprt->rx_xprt);
+ xprt_force_disconnect(xprt);
wait_for_completion(&ia->ri_remove_done);
ia->ri_id = NULL;
@@ -258,41 +296,40 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
/* Return 1 to ensure the core destroys the id. */
return 1;
case RDMA_CM_EVENT_ESTABLISHED:
- ++xprt->rx_xprt.connect_cookie;
- connstate = 1;
- rpcrdma_update_connect_private(xprt, &event->param.conn);
- goto connected;
+ ++xprt->connect_cookie;
+ ep->rep_connected = 1;
+ rpcrdma_update_connect_private(r_xprt, &event->param.conn);
+ wake_up_all(&ep->rep_connect_wait);
+ break;
case RDMA_CM_EVENT_CONNECT_ERROR:
- connstate = -ENOTCONN;
- goto connected;
+ ep->rep_connected = -ENOTCONN;
+ goto disconnected;
case RDMA_CM_EVENT_UNREACHABLE:
- connstate = -ENETUNREACH;
- goto connected;
+ ep->rep_connected = -ENETUNREACH;
+ goto disconnected;
case RDMA_CM_EVENT_REJECTED:
dprintk("rpcrdma: connection to %s:%s rejected: %s\n",
- rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
+ rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt),
rdma_reject_msg(id, event->status));
- connstate = -ECONNREFUSED;
+ ep->rep_connected = -ECONNREFUSED;
if (event->status == IB_CM_REJ_STALE_CONN)
- connstate = -EAGAIN;
- goto connected;
+ ep->rep_connected = -EAGAIN;
+ goto disconnected;
case RDMA_CM_EVENT_DISCONNECTED:
- ++xprt->rx_xprt.connect_cookie;
- connstate = -ECONNABORTED;
-connected:
- ep->rep_connected = connstate;
- rpcrdma_conn_func(ep);
+ ++xprt->connect_cookie;
+ ep->rep_connected = -ECONNABORTED;
+disconnected:
+ xprt_force_disconnect(xprt);
wake_up_all(&ep->rep_connect_wait);
- /*FALLTHROUGH*/
+ break;
default:
- dprintk("RPC: %s: %s:%s on %s/%s (ep 0x%p): %s\n",
- __func__,
- rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
- ia->ri_device->name, ia->ri_ops->ro_displayname,
- ep, rdma_event_msg(event->event));
break;
}
+ dprintk("RPC: %s: %s:%s on %s/%s: %s\n", __func__,
+ rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt),
+ ia->ri_device->name, ia->ri_ops->ro_displayname,
+ rdma_event_msg(event->event));
return 0;
}
@@ -308,7 +345,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia)
init_completion(&ia->ri_done);
init_completion(&ia->ri_remove_done);
- id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_conn_upcall,
+ id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_cm_event_handler,
xprt, RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(id)) {
rc = PTR_ERR(id);
@@ -519,7 +556,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
if (rc)
return rc;
- ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
+ ep->rep_attr.event_handler = rpcrdma_qp_event_handler;
ep->rep_attr.qp_context = ep;
ep->rep_attr.srq = NULL;
ep->rep_attr.cap.max_send_sge = max_sge;
@@ -542,7 +579,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
cdata->max_requests >> 2);
ep->rep_send_count = ep->rep_send_batch;
init_waitqueue_head(&ep->rep_connect_wait);
- INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
+ INIT_DELAYED_WORK(&ep->rep_disconnect_worker,
+ rpcrdma_disconnect_worker);
sendcq = ib_alloc_cq(ia->ri_device, NULL,
ep->rep_attr.cap.max_send_wr + 1,
@@ -615,7 +653,7 @@ out1:
void
rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
- cancel_delayed_work_sync(&ep->rep_connect_worker);
+ cancel_delayed_work_sync(&ep->rep_disconnect_worker);
if (ia->ri_id && ia->ri_id->qp) {
rpcrdma_ep_disconnect(ep, ia);
@@ -728,6 +766,7 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
rx_ia);
+ struct rpc_xprt *xprt = &r_xprt->rx_xprt;
int rc;
retry:
@@ -754,6 +793,8 @@ retry:
}
ep->rep_connected = 0;
+ xprt_clear_connected(xprt);
+
rpcrdma_post_recvs(r_xprt, true);
rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
@@ -877,7 +918,6 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt)
sc->sc_xprt = r_xprt;
buf->rb_sc_ctxs[i] = sc;
}
- buf->rb_flags = 0;
return 0;
@@ -978,39 +1018,6 @@ rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
}
static void
-rpcrdma_mr_recovery_worker(struct work_struct *work)
-{
- struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
- rb_recovery_worker.work);
- struct rpcrdma_mr *mr;
-
- spin_lock(&buf->rb_recovery_lock);
- while (!list_empty(&buf->rb_stale_mrs)) {
- mr = rpcrdma_mr_pop(&buf->rb_stale_mrs);
- spin_unlock(&buf->rb_recovery_lock);
-
- trace_xprtrdma_recover_mr(mr);
- mr->mr_xprt->rx_ia.ri_ops->ro_recover_mr(mr);
-
- spin_lock(&buf->rb_recovery_lock);
- }
- spin_unlock(&buf->rb_recovery_lock);
-}
-
-void
-rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr)
-{
- struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
- struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
-
- spin_lock(&buf->rb_recovery_lock);
- rpcrdma_mr_push(mr, &buf->rb_stale_mrs);
- spin_unlock(&buf->rb_recovery_lock);
-
- schedule_delayed_work(&buf->rb_recovery_worker, 0);
-}
-
-static void
rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
@@ -1019,7 +1026,7 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
LIST_HEAD(free);
LIST_HEAD(all);
- for (count = 0; count < 3; count++) {
+ for (count = 0; count < ia->ri_max_segs; count++) {
struct rpcrdma_mr *mr;
int rc;
@@ -1138,18 +1145,15 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
int i, rc;
+ buf->rb_flags = 0;
buf->rb_max_requests = r_xprt->rx_data.max_requests;
buf->rb_bc_srv_max_requests = 0;
spin_lock_init(&buf->rb_mrlock);
spin_lock_init(&buf->rb_lock);
- spin_lock_init(&buf->rb_recovery_lock);
INIT_LIST_HEAD(&buf->rb_mrs);
INIT_LIST_HEAD(&buf->rb_all);
- INIT_LIST_HEAD(&buf->rb_stale_mrs);
INIT_DELAYED_WORK(&buf->rb_refresh_worker,
rpcrdma_mr_refresh_worker);
- INIT_DELAYED_WORK(&buf->rb_recovery_worker,
- rpcrdma_mr_recovery_worker);
rpcrdma_mrs_create(r_xprt);
@@ -1233,7 +1237,6 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
void
rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
{
- cancel_delayed_work_sync(&buf->rb_recovery_worker);
cancel_delayed_work_sync(&buf->rb_refresh_worker);
rpcrdma_sendctxs_destroy(buf);
@@ -1326,7 +1329,7 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
{
struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
- trace_xprtrdma_dma_unmap(mr);
+ trace_xprtrdma_mr_unmap(mr);
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
mr->mr_sg, mr->mr_nents, mr->mr_dir);
__rpcrdma_mr_put(&r_xprt->rx_buf, mr);
@@ -1518,9 +1521,11 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
struct ib_recv_wr *wr, *bad_wr;
int needed, count, rc;
+ rc = 0;
+ count = 0;
needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1);
if (buf->rb_posted_receives > needed)
- return;
+ goto out;
needed -= buf->rb_posted_receives;
count = 0;
@@ -1556,7 +1561,7 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
--needed;
}
if (!count)
- return;
+ goto out;
rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr,
(const struct ib_recv_wr **)&bad_wr);
@@ -1570,5 +1575,6 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
}
}
buf->rb_posted_receives += count;
+out:
trace_xprtrdma_post_recvs(r_xprt, count, rc);
}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 2ca14f7c2d51..a13ccb643ce0 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -101,7 +101,7 @@ struct rpcrdma_ep {
wait_queue_head_t rep_connect_wait;
struct rpcrdma_connect_private rep_cm_private;
struct rdma_conn_param rep_remote_cma;
- struct delayed_work rep_connect_worker;
+ struct delayed_work rep_disconnect_worker;
};
/* Pre-allocate extra Work Requests for handling backward receives
@@ -280,6 +280,7 @@ struct rpcrdma_mr {
u32 mr_handle;
u32 mr_length;
u64 mr_offset;
+ struct work_struct mr_recycle;
struct list_head mr_all;
};
@@ -411,9 +412,6 @@ struct rpcrdma_buffer {
u32 rb_bc_max_requests;
- spinlock_t rb_recovery_lock; /* protect rb_stale_mrs */
- struct list_head rb_stale_mrs;
- struct delayed_work rb_recovery_worker;
struct delayed_work rb_refresh_worker;
};
#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
@@ -452,7 +450,7 @@ struct rpcrdma_stats {
unsigned long hardway_register_count;
unsigned long failed_marshal_count;
unsigned long bad_reply_count;
- unsigned long mrs_recovered;
+ unsigned long mrs_recycled;
unsigned long mrs_orphaned;
unsigned long mrs_allocated;
unsigned long empty_sendctx_q;
@@ -481,7 +479,6 @@ struct rpcrdma_memreg_ops {
struct list_head *mrs);
void (*ro_unmap_sync)(struct rpcrdma_xprt *,
struct list_head *);
- void (*ro_recover_mr)(struct rpcrdma_mr *mr);
int (*ro_open)(struct rpcrdma_ia *,
struct rpcrdma_ep *,
struct rpcrdma_create_data_internal *);
@@ -559,7 +556,6 @@ int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
struct rpcrdma_create_data_internal *);
void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
-void rpcrdma_conn_func(struct rpcrdma_ep *ep);
void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
@@ -578,7 +574,12 @@ struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
void rpcrdma_mr_put(struct rpcrdma_mr *mr);
void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr);
-void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr);
+
+static inline void
+rpcrdma_mr_recycle(struct rpcrdma_mr *mr)
+{
+ schedule_work(&mr->mr_recycle);
+}
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
void rpcrdma_buffer_put(struct rpcrdma_req *);
@@ -652,7 +653,6 @@ static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
extern unsigned int xprt_rdma_max_inline_read;
void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
-void rpcrdma_connect_worker(struct work_struct *work);
void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq);
int xprt_rdma_init(void);
void xprt_rdma_cleanup(void);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 6b7539c0466e..1b51e04d3566 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -47,13 +47,13 @@
#include <net/checksum.h>
#include <net/udp.h>
#include <net/tcp.h>
+#include <linux/bvec.h>
+#include <linux/uio.h>
#include <trace/events/sunrpc.h>
#include "sunrpc.h"
-#define RPC_TCP_READ_CHUNK_SZ (3*512*1024)
-
static void xs_close(struct rpc_xprt *xprt);
static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
struct socket *sock);
@@ -129,7 +129,7 @@ static struct ctl_table xs_tunables_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &xprt_min_resvport_limit,
- .extra2 = &xprt_max_resvport
+ .extra2 = &xprt_max_resvport_limit
},
{
.procname = "max_resvport",
@@ -137,7 +137,7 @@ static struct ctl_table xs_tunables_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &xprt_min_resvport,
+ .extra1 = &xprt_min_resvport_limit,
.extra2 = &xprt_max_resvport_limit
},
{
@@ -325,6 +325,362 @@ static void xs_free_peer_addresses(struct rpc_xprt *xprt)
}
}
+static size_t
+xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp)
+{
+ size_t i,n;
+
+ if (!(buf->flags & XDRBUF_SPARSE_PAGES))
+ return want;
+ if (want > buf->page_len)
+ want = buf->page_len;
+ n = (buf->page_base + want + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ for (i = 0; i < n; i++) {
+ if (buf->pages[i])
+ continue;
+ buf->bvec[i].bv_page = buf->pages[i] = alloc_page(gfp);
+ if (!buf->pages[i]) {
+ buf->page_len = (i * PAGE_SIZE) - buf->page_base;
+ return buf->page_len;
+ }
+ }
+ return want;
+}
+
+static ssize_t
+xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek)
+{
+ ssize_t ret;
+ if (seek != 0)
+ iov_iter_advance(&msg->msg_iter, seek);
+ ret = sock_recvmsg(sock, msg, flags);
+ return ret > 0 ? ret + seek : ret;
+}
+
+static ssize_t
+xs_read_kvec(struct socket *sock, struct msghdr *msg, int flags,
+ struct kvec *kvec, size_t count, size_t seek)
+{
+ iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, kvec, 1, count);
+ return xs_sock_recvmsg(sock, msg, flags, seek);
+}
+
+static ssize_t
+xs_read_bvec(struct socket *sock, struct msghdr *msg, int flags,
+ struct bio_vec *bvec, unsigned long nr, size_t count,
+ size_t seek)
+{
+ iov_iter_bvec(&msg->msg_iter, READ | ITER_BVEC, bvec, nr, count);
+ return xs_sock_recvmsg(sock, msg, flags, seek);
+}
+
+static ssize_t
+xs_read_discard(struct socket *sock, struct msghdr *msg, int flags,
+ size_t count)
+{
+ struct kvec kvec = { 0 };
+ return xs_read_kvec(sock, msg, flags | MSG_TRUNC, &kvec, count, 0);
+}
+
+static ssize_t
+xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
+ struct xdr_buf *buf, size_t count, size_t seek, size_t *read)
+{
+ size_t want, seek_init = seek, offset = 0;
+ ssize_t ret;
+
+ if (seek < buf->head[0].iov_len) {
+ want = min_t(size_t, count, buf->head[0].iov_len);
+ ret = xs_read_kvec(sock, msg, flags, &buf->head[0], want, seek);
+ if (ret <= 0)
+ goto sock_err;
+ offset += ret;
+ if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+ goto out;
+ if (ret != want)
+ goto eagain;
+ seek = 0;
+ } else {
+ seek -= buf->head[0].iov_len;
+ offset += buf->head[0].iov_len;
+ }
+ if (seek < buf->page_len) {
+ want = xs_alloc_sparse_pages(buf,
+ min_t(size_t, count - offset, buf->page_len),
+ GFP_NOWAIT);
+ ret = xs_read_bvec(sock, msg, flags, buf->bvec,
+ xdr_buf_pagecount(buf),
+ want + buf->page_base,
+ seek + buf->page_base);
+ if (ret <= 0)
+ goto sock_err;
+ offset += ret - buf->page_base;
+ if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+ goto out;
+ if (ret != want)
+ goto eagain;
+ seek = 0;
+ } else {
+ seek -= buf->page_len;
+ offset += buf->page_len;
+ }
+ if (seek < buf->tail[0].iov_len) {
+ want = min_t(size_t, count - offset, buf->tail[0].iov_len);
+ ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek);
+ if (ret <= 0)
+ goto sock_err;
+ offset += ret;
+ if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+ goto out;
+ if (ret != want)
+ goto eagain;
+ } else
+ offset += buf->tail[0].iov_len;
+ ret = -EMSGSIZE;
+ msg->msg_flags |= MSG_TRUNC;
+out:
+ *read = offset - seek_init;
+ return ret;
+eagain:
+ ret = -EAGAIN;
+ goto out;
+sock_err:
+ offset += seek;
+ goto out;
+}
+
+static void
+xs_read_header(struct sock_xprt *transport, struct xdr_buf *buf)
+{
+ if (!transport->recv.copied) {
+ if (buf->head[0].iov_len >= transport->recv.offset)
+ memcpy(buf->head[0].iov_base,
+ &transport->recv.xid,
+ transport->recv.offset);
+ transport->recv.copied = transport->recv.offset;
+ }
+}
+
+static bool
+xs_read_stream_request_done(struct sock_xprt *transport)
+{
+ return transport->recv.fraghdr & cpu_to_be32(RPC_LAST_STREAM_FRAGMENT);
+}
+
+static ssize_t
+xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
+ int flags, struct rpc_rqst *req)
+{
+ struct xdr_buf *buf = &req->rq_private_buf;
+ size_t want, read;
+ ssize_t ret;
+
+ xs_read_header(transport, buf);
+
+ want = transport->recv.len - transport->recv.offset;
+ ret = xs_read_xdr_buf(transport->sock, msg, flags, buf,
+ transport->recv.copied + want, transport->recv.copied,
+ &read);
+ transport->recv.offset += read;
+ transport->recv.copied += read;
+ if (transport->recv.offset == transport->recv.len) {
+ if (xs_read_stream_request_done(transport))
+ msg->msg_flags |= MSG_EOR;
+ return transport->recv.copied;
+ }
+
+ switch (ret) {
+ case -EMSGSIZE:
+ return transport->recv.copied;
+ case 0:
+ return -ESHUTDOWN;
+ default:
+ if (ret < 0)
+ return ret;
+ }
+ return -EAGAIN;
+}
+
+static size_t
+xs_read_stream_headersize(bool isfrag)
+{
+ if (isfrag)
+ return sizeof(__be32);
+ return 3 * sizeof(__be32);
+}
+
+static ssize_t
+xs_read_stream_header(struct sock_xprt *transport, struct msghdr *msg,
+ int flags, size_t want, size_t seek)
+{
+ struct kvec kvec = {
+ .iov_base = &transport->recv.fraghdr,
+ .iov_len = want,
+ };
+ return xs_read_kvec(transport->sock, msg, flags, &kvec, want, seek);
+}
+
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+static ssize_t
+xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
+{
+ struct rpc_xprt *xprt = &transport->xprt;
+ struct rpc_rqst *req;
+ ssize_t ret;
+
+ /* Look up and lock the request corresponding to the given XID */
+ req = xprt_lookup_bc_request(xprt, transport->recv.xid);
+ if (!req) {
+ printk(KERN_WARNING "Callback slot table overflowed\n");
+ return -ESHUTDOWN;
+ }
+
+ ret = xs_read_stream_request(transport, msg, flags, req);
+ if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+ xprt_complete_bc_request(req, ret);
+
+ return ret;
+}
+#else /* CONFIG_SUNRPC_BACKCHANNEL */
+static ssize_t
+xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
+{
+ return -ESHUTDOWN;
+}
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
+
+static ssize_t
+xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags)
+{
+ struct rpc_xprt *xprt = &transport->xprt;
+ struct rpc_rqst *req;
+ ssize_t ret = 0;
+
+ /* Look up and lock the request corresponding to the given XID */
+ spin_lock(&xprt->queue_lock);
+ req = xprt_lookup_rqst(xprt, transport->recv.xid);
+ if (!req) {
+ msg->msg_flags |= MSG_TRUNC;
+ goto out;
+ }
+ xprt_pin_rqst(req);
+ spin_unlock(&xprt->queue_lock);
+
+ ret = xs_read_stream_request(transport, msg, flags, req);
+
+ spin_lock(&xprt->queue_lock);
+ if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+ xprt_complete_rqst(req->rq_task, ret);
+ xprt_unpin_rqst(req);
+out:
+ spin_unlock(&xprt->queue_lock);
+ return ret;
+}
+
+static ssize_t
+xs_read_stream(struct sock_xprt *transport, int flags)
+{
+ struct msghdr msg = { 0 };
+ size_t want, read = 0;
+ ssize_t ret = 0;
+
+ if (transport->recv.len == 0) {
+ want = xs_read_stream_headersize(transport->recv.copied != 0);
+ ret = xs_read_stream_header(transport, &msg, flags, want,
+ transport->recv.offset);
+ if (ret <= 0)
+ goto out_err;
+ transport->recv.offset = ret;
+ if (ret != want) {
+ ret = -EAGAIN;
+ goto out_err;
+ }
+ transport->recv.len = be32_to_cpu(transport->recv.fraghdr) &
+ RPC_FRAGMENT_SIZE_MASK;
+ transport->recv.offset -= sizeof(transport->recv.fraghdr);
+ read = ret;
+ }
+
+ switch (be32_to_cpu(transport->recv.calldir)) {
+ case RPC_CALL:
+ ret = xs_read_stream_call(transport, &msg, flags);
+ break;
+ case RPC_REPLY:
+ ret = xs_read_stream_reply(transport, &msg, flags);
+ }
+ if (msg.msg_flags & MSG_TRUNC) {
+ transport->recv.calldir = cpu_to_be32(-1);
+ transport->recv.copied = -1;
+ }
+ if (ret < 0)
+ goto out_err;
+ read += ret;
+ if (transport->recv.offset < transport->recv.len) {
+ ret = xs_read_discard(transport->sock, &msg, flags,
+ transport->recv.len - transport->recv.offset);
+ if (ret <= 0)
+ goto out_err;
+ transport->recv.offset += ret;
+ read += ret;
+ if (transport->recv.offset != transport->recv.len)
+ return -EAGAIN;
+ }
+ if (xs_read_stream_request_done(transport)) {
+ trace_xs_stream_read_request(transport);
+ transport->recv.copied = 0;
+ }
+ transport->recv.offset = 0;
+ transport->recv.len = 0;
+ return read;
+out_err:
+ switch (ret) {
+ case 0:
+ case -ESHUTDOWN:
+ xprt_force_disconnect(&transport->xprt);
+ return -ESHUTDOWN;
+ }
+ return ret;
+}
+
+static void xs_stream_data_receive(struct sock_xprt *transport)
+{
+ size_t read = 0;
+ ssize_t ret = 0;
+
+ mutex_lock(&transport->recv_mutex);
+ if (transport->sock == NULL)
+ goto out;
+ clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
+ for (;;) {
+ ret = xs_read_stream(transport, MSG_DONTWAIT);
+ if (ret <= 0)
+ break;
+ read += ret;
+ cond_resched();
+ }
+out:
+ mutex_unlock(&transport->recv_mutex);
+ trace_xs_stream_read_data(&transport->xprt, ret, read);
+}
+
+static void xs_stream_data_receive_workfn(struct work_struct *work)
+{
+ struct sock_xprt *transport =
+ container_of(work, struct sock_xprt, recv_worker);
+ xs_stream_data_receive(transport);
+}
+
+static void
+xs_stream_reset_connect(struct sock_xprt *transport)
+{
+ transport->recv.offset = 0;
+ transport->recv.len = 0;
+ transport->recv.copied = 0;
+ transport->xmit.offset = 0;
+ transport->xprt.stat.connect_count++;
+ transport->xprt.stat.connect_start = jiffies;
+}
+
#define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)
static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more)
@@ -440,28 +796,21 @@ out:
return err;
}
-static void xs_nospace_callback(struct rpc_task *task)
-{
- struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt);
-
- transport->inet->sk_write_pending--;
-}
-
/**
- * xs_nospace - place task on wait queue if transmit was incomplete
- * @task: task to put to sleep
+ * xs_nospace - handle transmit was incomplete
+ * @req: pointer to RPC request
*
*/
-static int xs_nospace(struct rpc_task *task)
+static int xs_nospace(struct rpc_rqst *req)
{
- struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct sock *sk = transport->inet;
int ret = -EAGAIN;
dprintk("RPC: %5u xmit incomplete (%u left of %u)\n",
- task->tk_pid, req->rq_slen - req->rq_bytes_sent,
+ req->rq_task->tk_pid,
+ req->rq_slen - transport->xmit.offset,
req->rq_slen);
/* Protect against races with write_space */
@@ -471,7 +820,7 @@ static int xs_nospace(struct rpc_task *task)
if (xprt_connected(xprt)) {
/* wait for more buffer space */
sk->sk_write_pending++;
- xprt_wait_for_buffer_space(task, xs_nospace_callback);
+ xprt_wait_for_buffer_space(xprt);
} else
ret = -ENOTCONN;
@@ -491,6 +840,22 @@ static int xs_nospace(struct rpc_task *task)
return ret;
}
+static void
+xs_stream_prepare_request(struct rpc_rqst *req)
+{
+ req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_NOIO);
+}
+
+/*
+ * Determine if the previous message in the stream was aborted before it
+ * could complete transmission.
+ */
+static bool
+xs_send_request_was_aborted(struct sock_xprt *transport, struct rpc_rqst *req)
+{
+ return transport->xmit.offset != 0 && req->rq_bytes_sent == 0;
+}
+
/*
* Construct a stream transport record marker in @buf.
*/
@@ -503,7 +868,7 @@ static inline void xs_encode_stream_record_marker(struct xdr_buf *buf)
/**
* xs_local_send_request - write an RPC request to an AF_LOCAL socket
- * @task: RPC task that manages the state of an RPC request
+ * @req: pointer to RPC request
*
* Return values:
* 0: The request has been sent
@@ -512,9 +877,8 @@ static inline void xs_encode_stream_record_marker(struct xdr_buf *buf)
* ENOTCONN: Caller needs to invoke connect logic then call again
* other: Some other error occured, the request was not sent
*/
-static int xs_local_send_request(struct rpc_task *task)
+static int xs_local_send_request(struct rpc_rqst *req)
{
- struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport =
container_of(xprt, struct sock_xprt, xprt);
@@ -522,25 +886,34 @@ static int xs_local_send_request(struct rpc_task *task)
int status;
int sent = 0;
+ /* Close the stream if the previous transmission was incomplete */
+ if (xs_send_request_was_aborted(transport, req)) {
+ xs_close(xprt);
+ return -ENOTCONN;
+ }
+
xs_encode_stream_record_marker(&req->rq_snd_buf);
xs_pktdump("packet data:",
req->rq_svec->iov_base, req->rq_svec->iov_len);
req->rq_xtime = ktime_get();
- status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent,
+ status = xs_sendpages(transport->sock, NULL, 0, xdr,
+ transport->xmit.offset,
true, &sent);
dprintk("RPC: %s(%u) = %d\n",
- __func__, xdr->len - req->rq_bytes_sent, status);
+ __func__, xdr->len - transport->xmit.offset, status);
if (status == -EAGAIN && sock_writeable(transport->inet))
status = -ENOBUFS;
if (likely(sent > 0) || status == 0) {
- req->rq_bytes_sent += sent;
- req->rq_xmit_bytes_sent += sent;
+ transport->xmit.offset += sent;
+ req->rq_bytes_sent = transport->xmit.offset;
if (likely(req->rq_bytes_sent >= req->rq_slen)) {
+ req->rq_xmit_bytes_sent += transport->xmit.offset;
req->rq_bytes_sent = 0;
+ transport->xmit.offset = 0;
return 0;
}
status = -EAGAIN;
@@ -550,7 +923,7 @@ static int xs_local_send_request(struct rpc_task *task)
case -ENOBUFS:
break;
case -EAGAIN:
- status = xs_nospace(task);
+ status = xs_nospace(req);
break;
default:
dprintk("RPC: sendmsg returned unrecognized error %d\n",
@@ -566,7 +939,7 @@ static int xs_local_send_request(struct rpc_task *task)
/**
* xs_udp_send_request - write an RPC request to a UDP socket
- * @task: address of RPC task that manages the state of an RPC request
+ * @req: pointer to RPC request
*
* Return values:
* 0: The request has been sent
@@ -575,9 +948,8 @@ static int xs_local_send_request(struct rpc_task *task)
* ENOTCONN: Caller needs to invoke connect logic then call again
* other: Some other error occurred, the request was not sent
*/
-static int xs_udp_send_request(struct rpc_task *task)
+static int xs_udp_send_request(struct rpc_rqst *req)
{
- struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf;
@@ -590,12 +962,16 @@ static int xs_udp_send_request(struct rpc_task *task)
if (!xprt_bound(xprt))
return -ENOTCONN;
+
+ if (!xprt_request_get_cong(xprt, req))
+ return -EBADSLT;
+
req->rq_xtime = ktime_get();
status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen,
- xdr, req->rq_bytes_sent, true, &sent);
+ xdr, 0, true, &sent);
dprintk("RPC: xs_udp_send_request(%u) = %d\n",
- xdr->len - req->rq_bytes_sent, status);
+ xdr->len, status);
/* firewall is blocking us, don't return -EAGAIN or we end up looping */
if (status == -EPERM)
@@ -619,7 +995,7 @@ process_status:
/* Should we call xs_close() here? */
break;
case -EAGAIN:
- status = xs_nospace(task);
+ status = xs_nospace(req);
break;
case -ENETUNREACH:
case -ENOBUFS:
@@ -639,7 +1015,7 @@ process_status:
/**
* xs_tcp_send_request - write an RPC request to a TCP socket
- * @task: address of RPC task that manages the state of an RPC request
+ * @req: pointer to RPC request
*
* Return values:
* 0: The request has been sent
@@ -651,9 +1027,8 @@ process_status:
* XXX: In the case of soft timeouts, should we eventually give up
* if sendmsg is not able to make progress?
*/
-static int xs_tcp_send_request(struct rpc_task *task)
+static int xs_tcp_send_request(struct rpc_rqst *req)
{
- struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf;
@@ -662,6 +1037,13 @@ static int xs_tcp_send_request(struct rpc_task *task)
int status;
int sent;
+ /* Close the stream if the previous transmission was incomplete */
+ if (xs_send_request_was_aborted(transport, req)) {
+ if (transport->sock != NULL)
+ kernel_sock_shutdown(transport->sock, SHUT_RDWR);
+ return -ENOTCONN;
+ }
+
xs_encode_stream_record_marker(&req->rq_snd_buf);
xs_pktdump("packet data:",
@@ -671,7 +1053,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
* completes while the socket holds a reference to the pages,
* then we may end up resending corrupted data.
*/
- if (task->tk_flags & RPC_TASK_SENT)
+ if (req->rq_task->tk_flags & RPC_TASK_SENT)
zerocopy = false;
if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state))
@@ -684,17 +1066,20 @@ static int xs_tcp_send_request(struct rpc_task *task)
while (1) {
sent = 0;
status = xs_sendpages(transport->sock, NULL, 0, xdr,
- req->rq_bytes_sent, zerocopy, &sent);
+ transport->xmit.offset,
+ zerocopy, &sent);
dprintk("RPC: xs_tcp_send_request(%u) = %d\n",
- xdr->len - req->rq_bytes_sent, status);
+ xdr->len - transport->xmit.offset, status);
/* If we've sent the entire packet, immediately
* reset the count of bytes sent. */
- req->rq_bytes_sent += sent;
- req->rq_xmit_bytes_sent += sent;
+ transport->xmit.offset += sent;
+ req->rq_bytes_sent = transport->xmit.offset;
if (likely(req->rq_bytes_sent >= req->rq_slen)) {
+ req->rq_xmit_bytes_sent += transport->xmit.offset;
req->rq_bytes_sent = 0;
+ transport->xmit.offset = 0;
return 0;
}
@@ -732,7 +1117,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
/* Should we call xs_close() here? */
break;
case -EAGAIN:
- status = xs_nospace(task);
+ status = xs_nospace(req);
break;
case -ECONNRESET:
case -ECONNREFUSED:
@@ -749,35 +1134,6 @@ static int xs_tcp_send_request(struct rpc_task *task)
return status;
}
-/**
- * xs_tcp_release_xprt - clean up after a tcp transmission
- * @xprt: transport
- * @task: rpc task
- *
- * This cleans up if an error causes us to abort the transmission of a request.
- * In this case, the socket may need to be reset in order to avoid confusing
- * the server.
- */
-static void xs_tcp_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
-{
- struct rpc_rqst *req;
-
- if (task != xprt->snd_task)
- return;
- if (task == NULL)
- goto out_release;
- req = task->tk_rqstp;
- if (req == NULL)
- goto out_release;
- if (req->rq_bytes_sent == 0)
- goto out_release;
- if (req->rq_bytes_sent == req->rq_snd_buf.len)
- goto out_release;
- set_bit(XPRT_CLOSE_WAIT, &xprt->state);
-out_release:
- xprt_release_xprt(xprt, task);
-}
-
static void xs_save_old_callbacks(struct sock_xprt *transport, struct sock *sk)
{
transport->old_data_ready = sk->sk_data_ready;
@@ -921,114 +1277,6 @@ static void xs_destroy(struct rpc_xprt *xprt)
module_put(THIS_MODULE);
}
-static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
-{
- struct xdr_skb_reader desc = {
- .skb = skb,
- .offset = sizeof(rpc_fraghdr),
- .count = skb->len - sizeof(rpc_fraghdr),
- };
-
- if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0)
- return -1;
- if (desc.count)
- return -1;
- return 0;
-}
-
-/**
- * xs_local_data_read_skb
- * @xprt: transport
- * @sk: socket
- * @skb: skbuff
- *
- * Currently this assumes we can read the whole reply in a single gulp.
- */
-static void xs_local_data_read_skb(struct rpc_xprt *xprt,
- struct sock *sk,
- struct sk_buff *skb)
-{
- struct rpc_task *task;
- struct rpc_rqst *rovr;
- int repsize, copied;
- u32 _xid;
- __be32 *xp;
-
- repsize = skb->len - sizeof(rpc_fraghdr);
- if (repsize < 4) {
- dprintk("RPC: impossible RPC reply size %d\n", repsize);
- return;
- }
-
- /* Copy the XID from the skb... */
- xp = skb_header_pointer(skb, sizeof(rpc_fraghdr), sizeof(_xid), &_xid);
- if (xp == NULL)
- return;
-
- /* Look up and lock the request corresponding to the given XID */
- spin_lock(&xprt->recv_lock);
- rovr = xprt_lookup_rqst(xprt, *xp);
- if (!rovr)
- goto out_unlock;
- xprt_pin_rqst(rovr);
- spin_unlock(&xprt->recv_lock);
- task = rovr->rq_task;
-
- copied = rovr->rq_private_buf.buflen;
- if (copied > repsize)
- copied = repsize;
-
- if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) {
- dprintk("RPC: sk_buff copy failed\n");
- spin_lock(&xprt->recv_lock);
- goto out_unpin;
- }
-
- spin_lock(&xprt->recv_lock);
- xprt_complete_rqst(task, copied);
-out_unpin:
- xprt_unpin_rqst(rovr);
- out_unlock:
- spin_unlock(&xprt->recv_lock);
-}
-
-static void xs_local_data_receive(struct sock_xprt *transport)
-{
- struct sk_buff *skb;
- struct sock *sk;
- int err;
-
-restart:
- mutex_lock(&transport->recv_mutex);
- sk = transport->inet;
- if (sk == NULL)
- goto out;
- for (;;) {
- skb = skb_recv_datagram(sk, 0, 1, &err);
- if (skb != NULL) {
- xs_local_data_read_skb(&transport->xprt, sk, skb);
- skb_free_datagram(sk, skb);
- continue;
- }
- if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
- break;
- if (need_resched()) {
- mutex_unlock(&transport->recv_mutex);
- cond_resched();
- goto restart;
- }
- }
-out:
- mutex_unlock(&transport->recv_mutex);
-}
-
-static void xs_local_data_receive_workfn(struct work_struct *work)
-{
- struct sock_xprt *transport =
- container_of(work, struct sock_xprt, recv_worker);
- xs_local_data_receive(transport);
-}
-
/**
* xs_udp_data_read_skb - receive callback for UDP sockets
* @xprt: transport
@@ -1058,13 +1306,13 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
return;
/* Look up and lock the request corresponding to the given XID */
- spin_lock(&xprt->recv_lock);
+ spin_lock(&xprt->queue_lock);
rovr = xprt_lookup_rqst(xprt, *xp);
if (!rovr)
goto out_unlock;
xprt_pin_rqst(rovr);
xprt_update_rtt(rovr->rq_task);
- spin_unlock(&xprt->recv_lock);
+ spin_unlock(&xprt->queue_lock);
task = rovr->rq_task;
if ((copied = rovr->rq_private_buf.buflen) > repsize)
@@ -1072,7 +1320,7 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
/* Suck it into the iovec, verify checksum if not done by hw. */
if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) {
- spin_lock(&xprt->recv_lock);
+ spin_lock(&xprt->queue_lock);
__UDPX_INC_STATS(sk, UDP_MIB_INERRORS);
goto out_unpin;
}
@@ -1081,13 +1329,13 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
spin_lock_bh(&xprt->transport_lock);
xprt_adjust_cwnd(xprt, task, copied);
spin_unlock_bh(&xprt->transport_lock);
- spin_lock(&xprt->recv_lock);
+ spin_lock(&xprt->queue_lock);
xprt_complete_rqst(task, copied);
__UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS);
out_unpin:
xprt_unpin_rqst(rovr);
out_unlock:
- spin_unlock(&xprt->recv_lock);
+ spin_unlock(&xprt->queue_lock);
}
static void xs_udp_data_receive(struct sock_xprt *transport)
@@ -1096,25 +1344,18 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
struct sock *sk;
int err;
-restart:
mutex_lock(&transport->recv_mutex);
sk = transport->inet;
if (sk == NULL)
goto out;
+ clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
for (;;) {
skb = skb_recv_udp(sk, 0, 1, &err);
- if (skb != NULL) {
- xs_udp_data_read_skb(&transport->xprt, sk, skb);
- consume_skb(skb);
- continue;
- }
- if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
+ if (skb == NULL)
break;
- if (need_resched()) {
- mutex_unlock(&transport->recv_mutex);
- cond_resched();
- goto restart;
- }
+ xs_udp_data_read_skb(&transport->xprt, sk, skb);
+ consume_skb(skb);
+ cond_resched();
}
out:
mutex_unlock(&transport->recv_mutex);
@@ -1163,263 +1404,7 @@ static void xs_tcp_force_close(struct rpc_xprt *xprt)
xprt_force_disconnect(xprt);
}
-static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc)
-{
- struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
- size_t len, used;
- char *p;
-
- p = ((char *) &transport->tcp_fraghdr) + transport->tcp_offset;
- len = sizeof(transport->tcp_fraghdr) - transport->tcp_offset;
- used = xdr_skb_read_bits(desc, p, len);
- transport->tcp_offset += used;
- if (used != len)
- return;
-
- transport->tcp_reclen = ntohl(transport->tcp_fraghdr);
- if (transport->tcp_reclen & RPC_LAST_STREAM_FRAGMENT)
- transport->tcp_flags |= TCP_RCV_LAST_FRAG;
- else
- transport->tcp_flags &= ~TCP_RCV_LAST_FRAG;
- transport->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK;
-
- transport->tcp_flags &= ~TCP_RCV_COPY_FRAGHDR;
- transport->tcp_offset = 0;
-
- /* Sanity check of the record length */
- if (unlikely(transport->tcp_reclen < 8)) {
- dprintk("RPC: invalid TCP record fragment length\n");
- xs_tcp_force_close(xprt);
- return;
- }
- dprintk("RPC: reading TCP record fragment of length %d\n",
- transport->tcp_reclen);
-}
-
-static void xs_tcp_check_fraghdr(struct sock_xprt *transport)
-{
- if (transport->tcp_offset == transport->tcp_reclen) {
- transport->tcp_flags |= TCP_RCV_COPY_FRAGHDR;
- transport->tcp_offset = 0;
- if (transport->tcp_flags & TCP_RCV_LAST_FRAG) {
- transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
- transport->tcp_flags |= TCP_RCV_COPY_XID;
- transport->tcp_copied = 0;
- }
- }
-}
-
-static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_reader *desc)
-{
- size_t len, used;
- char *p;
-
- len = sizeof(transport->tcp_xid) - transport->tcp_offset;
- dprintk("RPC: reading XID (%zu bytes)\n", len);
- p = ((char *) &transport->tcp_xid) + transport->tcp_offset;
- used = xdr_skb_read_bits(desc, p, len);
- transport->tcp_offset += used;
- if (used != len)
- return;
- transport->tcp_flags &= ~TCP_RCV_COPY_XID;
- transport->tcp_flags |= TCP_RCV_READ_CALLDIR;
- transport->tcp_copied = 4;
- dprintk("RPC: reading %s XID %08x\n",
- (transport->tcp_flags & TCP_RPC_REPLY) ? "reply for"
- : "request with",
- ntohl(transport->tcp_xid));
- xs_tcp_check_fraghdr(transport);
-}
-
-static inline void xs_tcp_read_calldir(struct sock_xprt *transport,
- struct xdr_skb_reader *desc)
-{
- size_t len, used;
- u32 offset;
- char *p;
-
- /*
- * We want transport->tcp_offset to be 8 at the end of this routine
- * (4 bytes for the xid and 4 bytes for the call/reply flag).
- * When this function is called for the first time,
- * transport->tcp_offset is 4 (after having already read the xid).
- */
- offset = transport->tcp_offset - sizeof(transport->tcp_xid);
- len = sizeof(transport->tcp_calldir) - offset;
- dprintk("RPC: reading CALL/REPLY flag (%zu bytes)\n", len);
- p = ((char *) &transport->tcp_calldir) + offset;
- used = xdr_skb_read_bits(desc, p, len);
- transport->tcp_offset += used;
- if (used != len)
- return;
- transport->tcp_flags &= ~TCP_RCV_READ_CALLDIR;
- /*
- * We don't yet have the XDR buffer, so we will write the calldir
- * out after we get the buffer from the 'struct rpc_rqst'
- */
- switch (ntohl(transport->tcp_calldir)) {
- case RPC_REPLY:
- transport->tcp_flags |= TCP_RCV_COPY_CALLDIR;
- transport->tcp_flags |= TCP_RCV_COPY_DATA;
- transport->tcp_flags |= TCP_RPC_REPLY;
- break;
- case RPC_CALL:
- transport->tcp_flags |= TCP_RCV_COPY_CALLDIR;
- transport->tcp_flags |= TCP_RCV_COPY_DATA;
- transport->tcp_flags &= ~TCP_RPC_REPLY;
- break;
- default:
- dprintk("RPC: invalid request message type\n");
- xs_tcp_force_close(&transport->xprt);
- }
- xs_tcp_check_fraghdr(transport);
-}
-
-static inline void xs_tcp_read_common(struct rpc_xprt *xprt,
- struct xdr_skb_reader *desc,
- struct rpc_rqst *req)
-{
- struct sock_xprt *transport =
- container_of(xprt, struct sock_xprt, xprt);
- struct xdr_buf *rcvbuf;
- size_t len;
- ssize_t r;
-
- rcvbuf = &req->rq_private_buf;
-
- if (transport->tcp_flags & TCP_RCV_COPY_CALLDIR) {
- /*
- * Save the RPC direction in the XDR buffer
- */
- memcpy(rcvbuf->head[0].iov_base + transport->tcp_copied,
- &transport->tcp_calldir,
- sizeof(transport->tcp_calldir));
- transport->tcp_copied += sizeof(transport->tcp_calldir);
- transport->tcp_flags &= ~TCP_RCV_COPY_CALLDIR;
- }
-
- len = desc->count;
- if (len > transport->tcp_reclen - transport->tcp_offset)
- desc->count = transport->tcp_reclen - transport->tcp_offset;
- r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied,
- desc, xdr_skb_read_bits);
-
- if (desc->count) {
- /* Error when copying to the receive buffer,
- * usually because we weren't able to allocate
- * additional buffer pages. All we can do now
- * is turn off TCP_RCV_COPY_DATA, so the request
- * will not receive any additional updates,
- * and time out.
- * Any remaining data from this record will
- * be discarded.
- */
- transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
- dprintk("RPC: XID %08x truncated request\n",
- ntohl(transport->tcp_xid));
- dprintk("RPC: xprt = %p, tcp_copied = %lu, "
- "tcp_offset = %u, tcp_reclen = %u\n",
- xprt, transport->tcp_copied,
- transport->tcp_offset, transport->tcp_reclen);
- return;
- }
-
- transport->tcp_copied += r;
- transport->tcp_offset += r;
- desc->count = len - r;
-
- dprintk("RPC: XID %08x read %zd bytes\n",
- ntohl(transport->tcp_xid), r);
- dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, "
- "tcp_reclen = %u\n", xprt, transport->tcp_copied,
- transport->tcp_offset, transport->tcp_reclen);
-
- if (transport->tcp_copied == req->rq_private_buf.buflen)
- transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
- else if (transport->tcp_offset == transport->tcp_reclen) {
- if (transport->tcp_flags & TCP_RCV_LAST_FRAG)
- transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
- }
-}
-
-/*
- * Finds the request corresponding to the RPC xid and invokes the common
- * tcp read code to read the data.
- */
-static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
- struct xdr_skb_reader *desc)
-{
- struct sock_xprt *transport =
- container_of(xprt, struct sock_xprt, xprt);
- struct rpc_rqst *req;
-
- dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid));
-
- /* Find and lock the request corresponding to this xid */
- spin_lock(&xprt->recv_lock);
- req = xprt_lookup_rqst(xprt, transport->tcp_xid);
- if (!req) {
- dprintk("RPC: XID %08x request not found!\n",
- ntohl(transport->tcp_xid));
- spin_unlock(&xprt->recv_lock);
- return -1;
- }
- xprt_pin_rqst(req);
- spin_unlock(&xprt->recv_lock);
-
- xs_tcp_read_common(xprt, desc, req);
-
- spin_lock(&xprt->recv_lock);
- if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
- xprt_complete_rqst(req->rq_task, transport->tcp_copied);
- xprt_unpin_rqst(req);
- spin_unlock(&xprt->recv_lock);
- return 0;
-}
-
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
-/*
- * Obtains an rpc_rqst previously allocated and invokes the common
- * tcp read code to read the data. The result is placed in the callback
- * queue.
- * If we're unable to obtain the rpc_rqst we schedule the closing of the
- * connection and return -1.
- */
-static int xs_tcp_read_callback(struct rpc_xprt *xprt,
- struct xdr_skb_reader *desc)
-{
- struct sock_xprt *transport =
- container_of(xprt, struct sock_xprt, xprt);
- struct rpc_rqst *req;
-
- /* Look up the request corresponding to the given XID */
- req = xprt_lookup_bc_request(xprt, transport->tcp_xid);
- if (req == NULL) {
- printk(KERN_WARNING "Callback slot table overflowed\n");
- xprt_force_disconnect(xprt);
- return -1;
- }
-
- dprintk("RPC: read callback XID %08x\n", ntohl(req->rq_xid));
- xs_tcp_read_common(xprt, desc, req);
-
- if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
- xprt_complete_bc_request(req, transport->tcp_copied);
-
- return 0;
-}
-
-static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
- struct xdr_skb_reader *desc)
-{
- struct sock_xprt *transport =
- container_of(xprt, struct sock_xprt, xprt);
-
- return (transport->tcp_flags & TCP_RPC_REPLY) ?
- xs_tcp_read_reply(xprt, desc) :
- xs_tcp_read_callback(xprt, desc);
-}
-
static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net)
{
int ret;
@@ -1435,145 +1420,8 @@ static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt)
{
return PAGE_SIZE;
}
-#else
-static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
- struct xdr_skb_reader *desc)
-{
- return xs_tcp_read_reply(xprt, desc);
-}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
-/*
- * Read data off the transport. This can be either an RPC_CALL or an
- * RPC_REPLY. Relay the processing to helper functions.
- */
-static void xs_tcp_read_data(struct rpc_xprt *xprt,
- struct xdr_skb_reader *desc)
-{
- struct sock_xprt *transport =
- container_of(xprt, struct sock_xprt, xprt);
-
- if (_xs_tcp_read_data(xprt, desc) == 0)
- xs_tcp_check_fraghdr(transport);
- else {
- /*
- * The transport_lock protects the request handling.
- * There's no need to hold it to update the tcp_flags.
- */
- transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
- }
-}
-
-static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_skb_reader *desc)
-{
- size_t len;
-
- len = transport->tcp_reclen - transport->tcp_offset;
- if (len > desc->count)
- len = desc->count;
- desc->count -= len;
- desc->offset += len;
- transport->tcp_offset += len;
- dprintk("RPC: discarded %zu bytes\n", len);
- xs_tcp_check_fraghdr(transport);
-}
-
-static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len)
-{
- struct rpc_xprt *xprt = rd_desc->arg.data;
- struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
- struct xdr_skb_reader desc = {
- .skb = skb,
- .offset = offset,
- .count = len,
- };
- size_t ret;
-
- dprintk("RPC: xs_tcp_data_recv started\n");
- do {
- trace_xs_tcp_data_recv(transport);
- /* Read in a new fragment marker if necessary */
- /* Can we ever really expect to get completely empty fragments? */
- if (transport->tcp_flags & TCP_RCV_COPY_FRAGHDR) {
- xs_tcp_read_fraghdr(xprt, &desc);
- continue;
- }
- /* Read in the xid if necessary */
- if (transport->tcp_flags & TCP_RCV_COPY_XID) {
- xs_tcp_read_xid(transport, &desc);
- continue;
- }
- /* Read in the call/reply flag */
- if (transport->tcp_flags & TCP_RCV_READ_CALLDIR) {
- xs_tcp_read_calldir(transport, &desc);
- continue;
- }
- /* Read in the request data */
- if (transport->tcp_flags & TCP_RCV_COPY_DATA) {
- xs_tcp_read_data(xprt, &desc);
- continue;
- }
- /* Skip over any trailing bytes on short reads */
- xs_tcp_read_discard(transport, &desc);
- } while (desc.count);
- ret = len - desc.count;
- if (ret < rd_desc->count)
- rd_desc->count -= ret;
- else
- rd_desc->count = 0;
- trace_xs_tcp_data_recv(transport);
- dprintk("RPC: xs_tcp_data_recv done\n");
- return ret;
-}
-
-static void xs_tcp_data_receive(struct sock_xprt *transport)
-{
- struct rpc_xprt *xprt = &transport->xprt;
- struct sock *sk;
- read_descriptor_t rd_desc = {
- .arg.data = xprt,
- };
- unsigned long total = 0;
- int read = 0;
-
-restart:
- mutex_lock(&transport->recv_mutex);
- sk = transport->inet;
- if (sk == NULL)
- goto out;
-
- /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
- for (;;) {
- rd_desc.count = RPC_TCP_READ_CHUNK_SZ;
- lock_sock(sk);
- read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
- if (rd_desc.count != 0 || read < 0) {
- clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
- release_sock(sk);
- break;
- }
- release_sock(sk);
- total += read;
- if (need_resched()) {
- mutex_unlock(&transport->recv_mutex);
- cond_resched();
- goto restart;
- }
- }
- if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
- queue_work(xprtiod_workqueue, &transport->recv_worker);
-out:
- mutex_unlock(&transport->recv_mutex);
- trace_xs_tcp_data_ready(xprt, read, total);
-}
-
-static void xs_tcp_data_receive_workfn(struct work_struct *work)
-{
- struct sock_xprt *transport =
- container_of(work, struct sock_xprt, recv_worker);
- xs_tcp_data_receive(transport);
-}
-
/**
* xs_tcp_state_change - callback to handle TCP socket state changes
* @sk: socket whose state has changed
@@ -1600,17 +1448,13 @@ static void xs_tcp_state_change(struct sock *sk)
case TCP_ESTABLISHED:
spin_lock(&xprt->transport_lock);
if (!xprt_test_and_set_connected(xprt)) {
-
- /* Reset TCP record info */
- transport->tcp_offset = 0;
- transport->tcp_reclen = 0;
- transport->tcp_copied = 0;
- transport->tcp_flags =
- TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
xprt->connect_cookie++;
clear_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
xprt_clear_connecting(xprt);
+ xprt->stat.connect_count++;
+ xprt->stat.connect_time += (long)jiffies -
+ xprt->stat.connect_start;
xprt_wake_pending_tasks(xprt, -EAGAIN);
}
spin_unlock(&xprt->transport_lock);
@@ -1675,7 +1519,8 @@ static void xs_write_space(struct sock *sk)
if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0)
goto out;
- xprt_write_space(xprt);
+ if (xprt_write_space(xprt))
+ sk->sk_write_pending--;
out:
rcu_read_unlock();
}
@@ -1773,11 +1618,17 @@ static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task)
spin_unlock_bh(&xprt->transport_lock);
}
-static unsigned short xs_get_random_port(void)
+static int xs_get_random_port(void)
{
- unsigned short range = xprt_max_resvport - xprt_min_resvport + 1;
- unsigned short rand = (unsigned short) prandom_u32() % range;
- return rand + xprt_min_resvport;
+ unsigned short min = xprt_min_resvport, max = xprt_max_resvport;
+ unsigned short range;
+ unsigned short rand;
+
+ if (max < min)
+ return -EADDRINUSE;
+ range = max - min + 1;
+ rand = (unsigned short) prandom_u32() % range;
+ return rand + min;
}
/**
@@ -1833,9 +1684,9 @@ static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock)
transport->srcport = xs_sock_getport(sock);
}
-static unsigned short xs_get_srcport(struct sock_xprt *transport)
+static int xs_get_srcport(struct sock_xprt *transport)
{
- unsigned short port = transport->srcport;
+ int port = transport->srcport;
if (port == 0 && transport->xprt.resvport)
port = xs_get_random_port();
@@ -1856,7 +1707,7 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
{
struct sockaddr_storage myaddr;
int err, nloop = 0;
- unsigned short port = xs_get_srcport(transport);
+ int port = xs_get_srcport(transport);
unsigned short last;
/*
@@ -1874,8 +1725,8 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
* transport->xprt.resvport == 1) xs_get_srcport above will
* ensure that port is non-zero and we will bind as needed.
*/
- if (port == 0)
- return 0;
+ if (port <= 0)
+ return port;
memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen);
do {
@@ -2028,9 +1879,8 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
write_unlock_bh(&sk->sk_callback_lock);
}
- /* Tell the socket layer to start connecting... */
- xprt->stat.connect_count++;
- xprt->stat.connect_start = jiffies;
+ xs_stream_reset_connect(transport);
+
return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0);
}
@@ -2062,6 +1912,9 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
case 0:
dprintk("RPC: xprt %p connected to %s\n",
xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
+ xprt->stat.connect_count++;
+ xprt->stat.connect_time += (long)jiffies -
+ xprt->stat.connect_start;
xprt_set_connected(xprt);
case -ENOBUFS:
break;
@@ -2386,9 +2239,10 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
xs_set_memalloc(xprt);
+ /* Reset TCP record info */
+ xs_stream_reset_connect(transport);
+
/* Tell the socket layer to start connecting... */
- xprt->stat.connect_count++;
- xprt->stat.connect_start = jiffies;
set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
switch (ret) {
@@ -2561,7 +2415,7 @@ static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
"%llu %llu %lu %llu %llu\n",
xprt->stat.bind_count,
xprt->stat.connect_count,
- xprt->stat.connect_time,
+ xprt->stat.connect_time / HZ,
idle_time,
xprt->stat.sends,
xprt->stat.recvs,
@@ -2616,7 +2470,7 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
transport->srcport,
xprt->stat.bind_count,
xprt->stat.connect_count,
- xprt->stat.connect_time,
+ xprt->stat.connect_time / HZ,
idle_time,
xprt->stat.sends,
xprt->stat.recvs,
@@ -2704,9 +2558,8 @@ static int bc_sendto(struct rpc_rqst *req)
/*
* The send routine. Borrows from svc_send
*/
-static int bc_send_request(struct rpc_task *task)
+static int bc_send_request(struct rpc_rqst *req)
{
- struct rpc_rqst *req = task->tk_rqstp;
struct svc_xprt *xprt;
int len;
@@ -2720,12 +2573,7 @@ static int bc_send_request(struct rpc_task *task)
* Grab the mutex to serialize data as the connection is shared
* with the fore channel
*/
- if (!mutex_trylock(&xprt->xpt_mutex)) {
- rpc_sleep_on(&xprt->xpt_bc_pending, task, NULL);
- if (!mutex_trylock(&xprt->xpt_mutex))
- return -EAGAIN;
- rpc_wake_up_queued_task(&xprt->xpt_bc_pending, task);
- }
+ mutex_lock(&xprt->xpt_mutex);
if (test_bit(XPT_DEAD, &xprt->xpt_flags))
len = -ENOTCONN;
else
@@ -2761,7 +2609,7 @@ static void bc_destroy(struct rpc_xprt *xprt)
static const struct rpc_xprt_ops xs_local_ops = {
.reserve_xprt = xprt_reserve_xprt,
- .release_xprt = xs_tcp_release_xprt,
+ .release_xprt = xprt_release_xprt,
.alloc_slot = xprt_alloc_slot,
.free_slot = xprt_free_slot,
.rpcbind = xs_local_rpcbind,
@@ -2769,6 +2617,7 @@ static const struct rpc_xprt_ops xs_local_ops = {
.connect = xs_local_connect,
.buf_alloc = rpc_malloc,
.buf_free = rpc_free,
+ .prepare_request = xs_stream_prepare_request,
.send_request = xs_local_send_request,
.set_retrans_timeout = xprt_set_retrans_timeout_def,
.close = xs_close,
@@ -2803,14 +2652,15 @@ static const struct rpc_xprt_ops xs_udp_ops = {
static const struct rpc_xprt_ops xs_tcp_ops = {
.reserve_xprt = xprt_reserve_xprt,
- .release_xprt = xs_tcp_release_xprt,
- .alloc_slot = xprt_lock_and_alloc_slot,
+ .release_xprt = xprt_release_xprt,
+ .alloc_slot = xprt_alloc_slot,
.free_slot = xprt_free_slot,
.rpcbind = rpcb_getport_async,
.set_port = xs_set_port,
.connect = xs_connect,
.buf_alloc = rpc_malloc,
.buf_free = rpc_free,
+ .prepare_request = xs_stream_prepare_request,
.send_request = xs_tcp_send_request,
.set_retrans_timeout = xprt_set_retrans_timeout_def,
.close = xs_tcp_shutdown,
@@ -2952,9 +2802,8 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
xprt->ops = &xs_local_ops;
xprt->timeout = &xs_local_default_timeout;
- INIT_WORK(&transport->recv_worker, xs_local_data_receive_workfn);
- INIT_DELAYED_WORK(&transport->connect_worker,
- xs_dummy_setup_socket);
+ INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
+ INIT_DELAYED_WORK(&transport->connect_worker, xs_dummy_setup_socket);
switch (sun->sun_family) {
case AF_LOCAL:
@@ -3106,7 +2955,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
xprt->connect_timeout = xprt->timeout->to_initval *
(xprt->timeout->to_retries + 1);
- INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn);
+ INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
switch (addr->sa_family) {
@@ -3317,12 +3166,8 @@ static int param_set_uint_minmax(const char *val,
static int param_set_portnr(const char *val, const struct kernel_param *kp)
{
- if (kp->arg == &xprt_min_resvport)
- return param_set_uint_minmax(val, kp,
- RPC_MIN_RESVPORT,
- xprt_max_resvport);
return param_set_uint_minmax(val, kp,
- xprt_min_resvport,
+ RPC_MIN_RESVPORT,
RPC_MAX_RESVPORT);
}
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 4a9ee2d83158..140270a13d54 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -8,7 +8,6 @@ config XFRM
config XFRM_OFFLOAD
bool
- depends on XFRM
config XFRM_ALGO
tristate
diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c
index 2ad33ce1ea17..eca8d84d99bf 100644
--- a/net/xfrm/xfrm_hash.c
+++ b/net/xfrm/xfrm_hash.c
@@ -6,7 +6,7 @@
#include <linux/kernel.h>
#include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/xfrm.h>