summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/garp.c18
-rw-r--r--net/802/stp.c4
-rw-r--r--net/8021q/vlan.c6
-rw-r--r--net/9p/client.c178
-rw-r--r--net/9p/protocol.c5
-rw-r--r--net/9p/trans_virtio.c76
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/caif/caif_config_util.c13
-rw-r--r--net/caif/caif_dev.c2
-rw-r--r--net/caif/caif_socket.c45
-rw-r--r--net/caif/cfcnfg.c17
-rw-r--r--net/caif/cfctrl.c3
-rw-r--r--net/caif/cfdbgl.c14
-rw-r--r--net/caif/cfrfml.c2
-rw-r--r--net/can/bcm.c2
-rw-r--r--net/ceph/Makefile22
-rw-r--r--net/ceph/buffer.c2
-rw-r--r--net/compat.c10
-rw-r--r--net/core/dev.c40
-rw-r--r--net/core/dst.c1
-rw-r--r--net/core/fib_rules.c21
-rw-r--r--net/core/filter.c87
-rw-r--r--net/core/iovec.c20
-rw-r--r--net/core/net-sysfs.c26
-rw-r--r--net/core/net_namespace.c4
-rw-r--r--net/core/pktgen.c41
-rw-r--r--net/core/request_sock.c4
-rw-r--r--net/core/rtnetlink.c9
-rw-r--r--net/core/sock.c16
-rw-r--r--net/core/sysctl_net_core.c3
-rw-r--r--net/dccp/ccid.h34
-rw-r--r--net/dccp/ccids/ccid2.c23
-rw-r--r--net/dccp/ccids/ccid2.h5
-rw-r--r--net/dccp/ccids/ccid3.c12
-rw-r--r--net/dccp/dccp.h5
-rw-r--r--net/dccp/input.c3
-rw-r--r--net/dccp/output.c209
-rw-r--r--net/dccp/proto.c21
-rw-r--r--net/dccp/timer.c27
-rw-r--r--net/decnet/af_decnet.c4
-rw-r--r--net/decnet/sysctl_net_decnet.c4
-rw-r--r--net/econet/af_econet.c99
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/fib_hash.c54
-rw-r--r--net/ipv4/fib_lookup.h5
-rw-r--r--net/ipv4/fib_trie.c7
-rw-r--r--net/ipv4/gre.c5
-rw-r--r--net/ipv4/icmp.c3
-rw-r--r--net/ipv4/igmp.c4
-rw-r--r--net/ipv4/inet_diag.c27
-rw-r--r--net/ipv4/inet_hashtables.c3
-rw-r--r--net/ipv4/inetpeer.c138
-rw-r--r--net/ipv4/ip_gre.c7
-rw-r--r--net/ipv4/ip_sockglue.c10
-rw-r--r--net/ipv4/ipip.c1
-rw-r--r--net/ipv4/netfilter/arp_tables.c1
-rw-r--r--net/ipv4/netfilter/ip_tables.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c40
-rw-r--r--net/ipv4/proc.c9
-rw-r--r--net/ipv4/protocol.c8
-rw-r--r--net/ipv4/route.c75
-rw-r--r--net/ipv4/sysctl_net_ipv4.c11
-rw-r--r--net/ipv4/tcp.c6
-rw-r--r--net/ipv4/tcp_input.c11
-rw-r--r--net/ipv4/tcp_ipv4.c12
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c42
-rw-r--r--net/ipv4/tunnel4.c29
-rw-r--r--net/ipv4/udp.c6
-rw-r--r--net/ipv6/addrconf.c72
-rw-r--r--net/ipv6/ip6_tunnel.c9
-rw-r--r--net/ipv6/ipv6_sockglue.c4
-rw-r--r--net/ipv6/netfilter/Kconfig5
-rw-r--r--net/ipv6/netfilter/Makefile5
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c7
-rw-r--r--net/ipv6/proc.c4
-rw-r--r--net/ipv6/protocol.c8
-rw-r--r--net/ipv6/raw.c2
-rw-r--r--net/ipv6/reassembly.c2
-rw-r--r--net/ipv6/route.c8
-rw-r--r--net/ipv6/sit.c4
-rw-r--r--net/ipv6/tunnel6.c24
-rw-r--r--net/ipv6/udp.c2
-rw-r--r--net/irda/irttp.c30
-rw-r--r--net/iucv/iucv.c3
-rw-r--r--net/l2tp/l2tp_core.c53
-rw-r--r--net/l2tp/l2tp_core.h33
-rw-r--r--net/l2tp/l2tp_debugfs.c2
-rw-r--r--net/l2tp/l2tp_ip.c8
-rw-r--r--net/llc/af_llc.c5
-rw-r--r--net/netfilter/Kconfig2
-rw-r--r--net/netfilter/ipvs/Kconfig1
-rw-r--r--net/netfilter/nf_conntrack_core.c3
-rw-r--r--net/netfilter/nf_conntrack_proto.c6
-rw-r--r--net/netfilter/xt_TPROXY.c10
-rw-r--r--net/netfilter/xt_socket.c19
-rw-r--r--net/netlink/af_netlink.c65
-rw-r--r--net/packet/af_packet.c7
-rw-r--r--net/rds/loop.c4
-rw-r--r--net/rds/message.c7
-rw-r--r--net/rds/rdma.c128
-rw-r--r--net/rds/send.c4
-rw-r--r--net/rds/tcp.c6
-rw-r--r--net/sched/cls_basic.c4
-rw-r--r--net/sched/cls_cgroup.c2
-rw-r--r--net/sched/em_text.c3
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/sctp/socket.c4
-rw-r--r--net/sctp/sysctl.c4
-rw-r--r--net/socket.c35
-rw-r--r--net/sunrpc/Kconfig19
-rw-r--r--net/sunrpc/auth.c4
-rw-r--r--net/sunrpc/auth_generic.c2
-rw-r--r--net/sunrpc/auth_gss/Makefile5
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c2
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_mech.c247
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_seal.c186
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_token.c267
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_unseal.c127
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c51
-rw-r--r--net/sunrpc/cache.c288
-rw-r--r--net/sunrpc/clnt.c3
-rw-r--r--net/sunrpc/netns.h19
-rw-r--r--net/sunrpc/rpc_pipe.c19
-rw-r--r--net/sunrpc/rpcb_clnt.c60
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/sunrpc/stats.c43
-rw-r--r--net/sunrpc/sunrpc_syms.c58
-rw-r--r--net/sunrpc/svc.c3
-rw-r--r--net/sunrpc/svc_xprt.c59
-rw-r--r--net/sunrpc/svcauth_unix.c194
-rw-r--r--net/sunrpc/svcsock.c27
-rw-r--r--net/sunrpc/xdr.c61
-rw-r--r--net/sunrpc/xprt.c39
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c11
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c19
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c82
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c49
-rw-r--r--net/sunrpc/xprtrdma/transport.c25
-rw-r--r--net/sunrpc/xprtsock.c358
-rw-r--r--net/tipc/socket.c1
-rw-r--r--net/unix/af_unix.c51
-rw-r--r--net/unix/garbage.c9
-rw-r--r--net/x25/x25_facilities.c20
-rw-r--r--net/x25/x25_in.c2
-rw-r--r--net/x25/x25_link.c1
-rw-r--r--net/xfrm/xfrm_hash.c2
-rw-r--r--net/xfrm/xfrm_state.c2
149 files changed, 2315 insertions, 2367 deletions
diff --git a/net/802/garp.c b/net/802/garp.c
index 941f2a324d3a..c1df2dad8c6b 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -346,8 +346,8 @@ int garp_request_join(const struct net_device *dev,
const struct garp_application *appl,
const void *data, u8 len, u8 type)
{
- struct garp_port *port = dev->garp_port;
- struct garp_applicant *app = port->applicants[appl->type];
+ struct garp_port *port = rtnl_dereference(dev->garp_port);
+ struct garp_applicant *app = rtnl_dereference(port->applicants[appl->type]);
struct garp_attr *attr;
spin_lock_bh(&app->lock);
@@ -366,8 +366,8 @@ void garp_request_leave(const struct net_device *dev,
const struct garp_application *appl,
const void *data, u8 len, u8 type)
{
- struct garp_port *port = dev->garp_port;
- struct garp_applicant *app = port->applicants[appl->type];
+ struct garp_port *port = rtnl_dereference(dev->garp_port);
+ struct garp_applicant *app = rtnl_dereference(port->applicants[appl->type]);
struct garp_attr *attr;
spin_lock_bh(&app->lock);
@@ -546,11 +546,11 @@ static int garp_init_port(struct net_device *dev)
static void garp_release_port(struct net_device *dev)
{
- struct garp_port *port = dev->garp_port;
+ struct garp_port *port = rtnl_dereference(dev->garp_port);
unsigned int i;
for (i = 0; i <= GARP_APPLICATION_MAX; i++) {
- if (port->applicants[i])
+ if (rtnl_dereference(port->applicants[i]))
return;
}
rcu_assign_pointer(dev->garp_port, NULL);
@@ -565,7 +565,7 @@ int garp_init_applicant(struct net_device *dev, struct garp_application *appl)
ASSERT_RTNL();
- if (!dev->garp_port) {
+ if (!rtnl_dereference(dev->garp_port)) {
err = garp_init_port(dev);
if (err < 0)
goto err1;
@@ -601,8 +601,8 @@ EXPORT_SYMBOL_GPL(garp_init_applicant);
void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl)
{
- struct garp_port *port = dev->garp_port;
- struct garp_applicant *app = port->applicants[appl->type];
+ struct garp_port *port = rtnl_dereference(dev->garp_port);
+ struct garp_applicant *app = rtnl_dereference(port->applicants[appl->type]);
ASSERT_RTNL();
diff --git a/net/802/stp.c b/net/802/stp.c
index 53c8f77f0ccd..978c30b1b36b 100644
--- a/net/802/stp.c
+++ b/net/802/stp.c
@@ -21,8 +21,8 @@
#define GARP_ADDR_MAX 0x2F
#define GARP_ADDR_RANGE (GARP_ADDR_MAX - GARP_ADDR_MIN)
-static const struct stp_proto *garp_protos[GARP_ADDR_RANGE + 1] __read_mostly;
-static const struct stp_proto *stp_proto __read_mostly;
+static const struct stp_proto __rcu *garp_protos[GARP_ADDR_RANGE + 1] __read_mostly;
+static const struct stp_proto __rcu *stp_proto __read_mostly;
static struct llc_sap *sap __read_mostly;
static unsigned int sap_registered;
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 05b867e43757..52077ca22072 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -112,7 +112,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
ASSERT_RTNL();
- grp = real_dev->vlgrp;
+ grp = rtnl_dereference(real_dev->vlgrp);
BUG_ON(!grp);
/* Take it out of our own structures, but be sure to interlock with
@@ -177,7 +177,7 @@ int register_vlan_dev(struct net_device *dev)
struct vlan_group *grp, *ngrp = NULL;
int err;
- grp = real_dev->vlgrp;
+ grp = rtnl_dereference(real_dev->vlgrp);
if (!grp) {
ngrp = grp = vlan_group_alloc(real_dev);
if (!grp)
@@ -385,7 +385,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
}
- grp = dev->vlgrp;
+ grp = rtnl_dereference(dev->vlgrp);
if (!grp)
goto out;
diff --git a/net/9p/client.c b/net/9p/client.c
index 83bf0541d66f..a848bca9fbff 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -450,32 +450,43 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
return err;
}
- if (type == P9_RERROR) {
+ if (type == P9_RERROR || type == P9_RLERROR) {
int ecode;
- char *ename;
- err = p9pdu_readf(req->rc, c->proto_version, "s?d",
- &ename, &ecode);
- if (err) {
- P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n",
- err);
- return err;
- }
+ if (!p9_is_proto_dotl(c)) {
+ char *ename;
- if (p9_is_proto_dotu(c) ||
- p9_is_proto_dotl(c))
- err = -ecode;
+ err = p9pdu_readf(req->rc, c->proto_version, "s?d",
+ &ename, &ecode);
+ if (err)
+ goto out_err;
+
+ if (p9_is_proto_dotu(c))
+ err = -ecode;
+
+ if (!err || !IS_ERR_VALUE(err)) {
+ err = p9_errstr2errno(ename, strlen(ename));
+
+ P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename);
- if (!err || !IS_ERR_VALUE(err))
- err = p9_errstr2errno(ename, strlen(ename));
+ kfree(ename);
+ }
+ } else {
+ err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
+ err = -ecode;
- P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename);
+ P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
+ }
- kfree(ename);
} else
err = 0;
return err;
+
+out_err:
+ P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", err);
+
+ return err;
}
/**
@@ -568,11 +579,14 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
va_start(ap, fmt);
err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap);
va_end(ap);
+ if (err)
+ goto reterr;
p9pdu_finalize(req->tc);
err = c->trans_mod->request(c, req);
if (err < 0) {
- c->status = Disconnected;
+ if (err != -ERESTARTSYS)
+ c->status = Disconnected;
goto reterr;
}
@@ -1151,12 +1165,44 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname)
}
EXPORT_SYMBOL(p9_client_link);
+int p9_client_fsync(struct p9_fid *fid, int datasync)
+{
+ int err;
+ struct p9_client *clnt;
+ struct p9_req_t *req;
+
+ P9_DPRINTK(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n",
+ fid->fid, datasync);
+ err = 0;
+ clnt = fid->clnt;
+
+ req = p9_client_rpc(clnt, P9_TFSYNC, "dd", fid->fid, datasync);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto error;
+ }
+
+ P9_DPRINTK(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid);
+
+ p9_free_req(clnt, req);
+
+error:
+ return err;
+}
+EXPORT_SYMBOL(p9_client_fsync);
+
int p9_client_clunk(struct p9_fid *fid)
{
int err;
struct p9_client *clnt;
struct p9_req_t *req;
+ if (!fid) {
+ P9_EPRINTK(KERN_WARNING, "Trying to clunk with NULL fid\n");
+ dump_stack();
+ return 0;
+ }
+
P9_DPRINTK(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid);
err = 0;
clnt = fid->clnt;
@@ -1240,16 +1286,13 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
if (data) {
memmove(data, dataptr, count);
- }
-
- if (udata) {
+ } else {
err = copy_to_user(udata, dataptr, count);
if (err) {
err = -EFAULT;
goto free_and_error;
}
}
-
p9_free_req(clnt, req);
return count;
@@ -1761,3 +1804,96 @@ error:
}
EXPORT_SYMBOL(p9_client_mkdir_dotl);
+
+int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
+{
+ int err;
+ struct p9_client *clnt;
+ struct p9_req_t *req;
+
+ err = 0;
+ clnt = fid->clnt;
+ P9_DPRINTK(P9_DEBUG_9P, ">>> TLOCK fid %d type %i flags %d "
+ "start %lld length %lld proc_id %d client_id %s\n",
+ fid->fid, flock->type, flock->flags, flock->start,
+ flock->length, flock->proc_id, flock->client_id);
+
+ req = p9_client_rpc(clnt, P9_TLOCK, "dbdqqds", fid->fid, flock->type,
+ flock->flags, flock->start, flock->length,
+ flock->proc_id, flock->client_id);
+
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ err = p9pdu_readf(req->rc, clnt->proto_version, "b", status);
+ if (err) {
+ p9pdu_dump(1, req->rc);
+ goto error;
+ }
+ P9_DPRINTK(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status);
+error:
+ p9_free_req(clnt, req);
+ return err;
+
+}
+EXPORT_SYMBOL(p9_client_lock_dotl);
+
+int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
+{
+ int err;
+ struct p9_client *clnt;
+ struct p9_req_t *req;
+
+ err = 0;
+ clnt = fid->clnt;
+ P9_DPRINTK(P9_DEBUG_9P, ">>> TGETLOCK fid %d, type %i start %lld "
+ "length %lld proc_id %d client_id %s\n", fid->fid, glock->type,
+ glock->start, glock->length, glock->proc_id, glock->client_id);
+
+ req = p9_client_rpc(clnt, P9_TGETLOCK, "dbqqds", fid->fid, glock->type,
+ glock->start, glock->length, glock->proc_id, glock->client_id);
+
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ err = p9pdu_readf(req->rc, clnt->proto_version, "bqqds", &glock->type,
+ &glock->start, &glock->length, &glock->proc_id,
+ &glock->client_id);
+ if (err) {
+ p9pdu_dump(1, req->rc);
+ goto error;
+ }
+ P9_DPRINTK(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld "
+ "proc_id %d client_id %s\n", glock->type, glock->start,
+ glock->length, glock->proc_id, glock->client_id);
+error:
+ p9_free_req(clnt, req);
+ return err;
+}
+EXPORT_SYMBOL(p9_client_getlock_dotl);
+
+int p9_client_readlink(struct p9_fid *fid, char **target)
+{
+ int err;
+ struct p9_client *clnt;
+ struct p9_req_t *req;
+
+ err = 0;
+ clnt = fid->clnt;
+ P9_DPRINTK(P9_DEBUG_9P, ">>> TREADLINK fid %d\n", fid->fid);
+
+ req = p9_client_rpc(clnt, P9_TREADLINK, "d", fid->fid);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ err = p9pdu_readf(req->rc, clnt->proto_version, "s", target);
+ if (err) {
+ p9pdu_dump(1, req->rc);
+ goto error;
+ }
+ P9_DPRINTK(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target);
+error:
+ p9_free_req(clnt, req);
+ return err;
+}
+EXPORT_SYMBOL(p9_client_readlink);
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 3acd3afb20c8..45c15f491401 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -122,9 +122,8 @@ static size_t
pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
{
size_t len = MIN(pdu->capacity - pdu->size, size);
- int err = copy_from_user(&pdu->sdata[pdu->size], udata, len);
- if (err)
- printk(KERN_WARNING "pdu_write_u returning: %d\n", err);
+ if (copy_from_user(&pdu->sdata[pdu->size], udata, len))
+ len = 0;
pdu->size += len;
return size - len;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index b88515936e4b..c8f3f72ab20e 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -75,6 +75,8 @@ struct virtio_chan {
struct p9_client *client;
struct virtio_device *vdev;
struct virtqueue *vq;
+ int ring_bufs_avail;
+ wait_queue_head_t *vc_wq;
/* Scatterlist: can be too big for stack. */
struct scatterlist sg[VIRTQUEUE_NUM];
@@ -134,16 +136,30 @@ static void req_done(struct virtqueue *vq)
struct p9_fcall *rc;
unsigned int len;
struct p9_req_t *req;
+ unsigned long flags;
P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n");
- while ((rc = virtqueue_get_buf(chan->vq, &len)) != NULL) {
- P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
- P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
- req = p9_tag_lookup(chan->client, rc->tag);
- req->status = REQ_STATUS_RCVD;
- p9_client_cb(chan->client, req);
- }
+ do {
+ spin_lock_irqsave(&chan->lock, flags);
+ rc = virtqueue_get_buf(chan->vq, &len);
+
+ if (rc != NULL) {
+ if (!chan->ring_bufs_avail) {
+ chan->ring_bufs_avail = 1;
+ wake_up(chan->vc_wq);
+ }
+ spin_unlock_irqrestore(&chan->lock, flags);
+ P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
+ P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n",
+ rc->tag);
+ req = p9_tag_lookup(chan->client, rc->tag);
+ req->status = REQ_STATUS_RCVD;
+ p9_client_cb(chan->client, req);
+ } else {
+ spin_unlock_irqrestore(&chan->lock, flags);
+ }
+ } while (rc != NULL);
}
/**
@@ -199,23 +215,43 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
int in, out;
struct virtio_chan *chan = client->trans;
char *rdata = (char *)req->rc+sizeof(struct p9_fcall);
+ unsigned long flags;
+ int err;
P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
+req_retry:
+ req->status = REQ_STATUS_SENT;
+
+ spin_lock_irqsave(&chan->lock, flags);
out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata,
req->tc->size);
in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata,
client->msize);
- req->status = REQ_STATUS_SENT;
-
- if (virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc) < 0) {
- P9_DPRINTK(P9_DEBUG_TRANS,
- "9p debug: virtio rpc add_buf returned failure");
- return -EIO;
+ err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
+ if (err < 0) {
+ if (err == -ENOSPC) {
+ chan->ring_bufs_avail = 0;
+ spin_unlock_irqrestore(&chan->lock, flags);
+ err = wait_event_interruptible(*chan->vc_wq,
+ chan->ring_bufs_avail);
+ if (err == -ERESTARTSYS)
+ return err;
+
+ P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n");
+ goto req_retry;
+ } else {
+ spin_unlock_irqrestore(&chan->lock, flags);
+ P9_DPRINTK(P9_DEBUG_TRANS,
+ "9p debug: "
+ "virtio rpc add_buf returned failure");
+ return -EIO;
+ }
}
virtqueue_kick(chan->vq);
+ spin_unlock_irqrestore(&chan->lock, flags);
P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n");
return 0;
@@ -290,14 +326,23 @@ static int p9_virtio_probe(struct virtio_device *vdev)
chan->tag_len = tag_len;
err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
if (err) {
- kfree(tag);
- goto out_free_vq;
+ goto out_free_tag;
}
+ chan->vc_wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL);
+ if (!chan->vc_wq) {
+ err = -ENOMEM;
+ goto out_free_tag;
+ }
+ init_waitqueue_head(chan->vc_wq);
+ chan->ring_bufs_avail = 1;
+
mutex_lock(&virtio_9p_lock);
list_add_tail(&chan->chan_list, &virtio_chan_list);
mutex_unlock(&virtio_9p_lock);
return 0;
+out_free_tag:
+ kfree(tag);
out_free_vq:
vdev->config->del_vqs(vdev);
kfree(chan);
@@ -371,6 +416,7 @@ static void p9_virtio_remove(struct virtio_device *vdev)
mutex_unlock(&virtio_9p_lock);
sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
kfree(chan->tag);
+ kfree(chan->vc_wq);
kfree(chan);
}
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 26eaebf4aaa9..bb86d2932394 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1392,6 +1392,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
ax25_cb *ax25;
int err = 0;
+ memset(fsa, 0, sizeof(fsa));
lock_sock(sk);
ax25 = ax25_sk(sk);
@@ -1403,7 +1404,6 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
fsa->fsa_ax25.sax25_family = AF_AX25;
fsa->fsa_ax25.sax25_call = ax25->dest_addr;
- fsa->fsa_ax25.sax25_ndigis = 0;
if (ax25->digipeat != NULL) {
ndigi = ax25->digipeat->ndigi;
diff --git a/net/caif/caif_config_util.c b/net/caif/caif_config_util.c
index 76ae68303d3a..d522d8c1703e 100644
--- a/net/caif/caif_config_util.c
+++ b/net/caif/caif_config_util.c
@@ -16,11 +16,18 @@ int connect_req_to_link_param(struct cfcnfg *cnfg,
{
struct dev_info *dev_info;
enum cfcnfg_phy_preference pref;
+ int res;
+
memset(l, 0, sizeof(*l));
- l->priority = s->priority;
+ /* In caif protocol low value is high priority */
+ l->priority = CAIF_PRIO_MAX - s->priority + 1;
- if (s->link_name[0] != '\0')
- l->phyid = cfcnfg_get_named(cnfg, s->link_name);
+ if (s->ifindex != 0){
+ res = cfcnfg_get_id_from_ifi(cnfg, s->ifindex);
+ if (res < 0)
+ return res;
+ l->phyid = res;
+ }
else {
switch (s->link_selector) {
case CAIF_LINK_HIGH_BANDW:
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index b99369a055d1..a42a408306e4 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -307,6 +307,8 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
case NETDEV_UNREGISTER:
caifd = caif_get(dev);
+ if (caifd == NULL)
+ break;
netdev_info(dev, "unregister\n");
atomic_set(&caifd->state, what);
caif_device_destroy(dev);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 2eca2dd0000f..1bf0cf503796 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -716,8 +716,7 @@ static int setsockopt(struct socket *sock,
{
struct sock *sk = sock->sk;
struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
- int prio, linksel;
- struct ifreq ifreq;
+ int linksel;
if (cf_sk->sk.sk_socket->state != SS_UNCONNECTED)
return -ENOPROTOOPT;
@@ -735,33 +734,6 @@ static int setsockopt(struct socket *sock,
release_sock(&cf_sk->sk);
return 0;
- case SO_PRIORITY:
- if (lvl != SOL_SOCKET)
- goto bad_sol;
- if (ol < sizeof(int))
- return -EINVAL;
- if (copy_from_user(&prio, ov, sizeof(int)))
- return -EINVAL;
- lock_sock(&(cf_sk->sk));
- cf_sk->conn_req.priority = prio;
- release_sock(&cf_sk->sk);
- return 0;
-
- case SO_BINDTODEVICE:
- if (lvl != SOL_SOCKET)
- goto bad_sol;
- if (ol < sizeof(struct ifreq))
- return -EINVAL;
- if (copy_from_user(&ifreq, ov, sizeof(ifreq)))
- return -EFAULT;
- lock_sock(&(cf_sk->sk));
- strncpy(cf_sk->conn_req.link_name, ifreq.ifr_name,
- sizeof(cf_sk->conn_req.link_name));
- cf_sk->conn_req.link_name
- [sizeof(cf_sk->conn_req.link_name)-1] = 0;
- release_sock(&cf_sk->sk);
- return 0;
-
case CAIFSO_REQ_PARAM:
if (lvl != SOL_CAIF)
goto bad_sol;
@@ -880,6 +852,18 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
sock->state = SS_CONNECTING;
sk->sk_state = CAIF_CONNECTING;
+ /* Check priority value comming from socket */
+ /* if priority value is out of range it will be ajusted */
+ if (cf_sk->sk.sk_priority > CAIF_PRIO_MAX)
+ cf_sk->conn_req.priority = CAIF_PRIO_MAX;
+ else if (cf_sk->sk.sk_priority < CAIF_PRIO_MIN)
+ cf_sk->conn_req.priority = CAIF_PRIO_MIN;
+ else
+ cf_sk->conn_req.priority = cf_sk->sk.sk_priority;
+
+ /*ifindex = id of the interface.*/
+ cf_sk->conn_req.ifindex = cf_sk->sk.sk_bound_dev_if;
+
dbfs_atomic_inc(&cnt.num_connect_req);
cf_sk->layer.receive = caif_sktrecv_cb;
err = caif_connect_client(&cf_sk->conn_req,
@@ -905,6 +889,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
cf_sk->maxframe = mtu - (headroom + tailroom);
if (cf_sk->maxframe < 1) {
pr_warn("CAIF Interface MTU too small (%d)\n", dev->mtu);
+ err = -ENODEV;
goto out;
}
@@ -1142,7 +1127,7 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
set_rx_flow_on(cf_sk);
/* Set default options on configuration */
- cf_sk->conn_req.priority = CAIF_PRIO_NORMAL;
+ cf_sk->sk.sk_priority= CAIF_PRIO_NORMAL;
cf_sk->conn_req.link_selector = CAIF_LINK_LOW_LATENCY;
cf_sk->conn_req.protocol = protocol;
/* Increase the number of sockets created. */
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 41adafd18914..21ede141018a 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -173,18 +173,15 @@ static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo(struct cfcnfg *cnfg,
return NULL;
}
-int cfcnfg_get_named(struct cfcnfg *cnfg, char *name)
+
+int cfcnfg_get_id_from_ifi(struct cfcnfg *cnfg, int ifi)
{
int i;
-
- /* Try to match with specified name */
- for (i = 0; i < MAX_PHY_LAYERS; i++) {
- if (cnfg->phy_layers[i].frm_layer != NULL
- && strcmp(cnfg->phy_layers[i].phy_layer->name,
- name) == 0)
- return cnfg->phy_layers[i].frm_layer->id;
- }
- return 0;
+ for (i = 0; i < MAX_PHY_LAYERS; i++)
+ if (cnfg->phy_layers[i].frm_layer != NULL &&
+ cnfg->phy_layers[i].ifindex == ifi)
+ return i;
+ return -ENODEV;
}
int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 08f267a109aa..3cd8f978e309 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -361,11 +361,10 @@ void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
struct cfctrl_request_info *p, *tmp;
struct cfctrl *ctrl = container_obj(layr);
spin_lock(&ctrl->info_list_lock);
- pr_warn("enter\n");
list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
if (p->client_layer == adap_layer) {
- pr_warn("cancel req :%d\n", p->sequence_no);
+ pr_debug("cancel req :%d\n", p->sequence_no);
list_del(&p->list);
kfree(p);
}
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index 496fda9ac66f..11a2af4c162a 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c
@@ -12,6 +12,8 @@
#include <net/caif/cfsrvl.h>
#include <net/caif/cfpkt.h>
+#define container_obj(layr) ((struct cfsrvl *) layr)
+
static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt);
static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt);
@@ -38,5 +40,17 @@ static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt)
static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt)
{
+ struct cfsrvl *service = container_obj(layr);
+ struct caif_payload_info *info;
+ int ret;
+
+ if (!cfsrvl_ready(service, &ret))
+ return ret;
+
+ /* Add info for MUX-layer to route the packet out */
+ info = cfpkt_info(pkt);
+ info->channel_id = service->layer.id;
+ info->dev_info = &service->dev_info;
+
return layr->dn->transmit(layr->dn, pkt);
}
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index bde8481e8d25..e2fb5fa75795 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -193,7 +193,7 @@ out:
static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt)
{
- caif_assert(cfpkt_getlen(pkt) >= rfml->fragment_size);
+ caif_assert(cfpkt_getlen(pkt) < rfml->fragment_size);
/* Add info for MUX-layer to route the packet out. */
cfpkt_info(pkt)->channel_id = rfml->serv.layer.id;
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 08ffe9e4be20..6faa8256e10c 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -125,7 +125,7 @@ struct bcm_sock {
struct list_head tx_ops;
unsigned long dropped_usr_msgs;
struct proc_dir_entry *bcm_proc_read;
- char procname [9]; /* pointer printed in ASCII with \0 */
+ char procname [20]; /* pointer printed in ASCII with \0 */
};
static inline struct bcm_sock *bcm_sk(const struct sock *sk)
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index aab1cabb8035..5f19415ec9c0 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -1,9 +1,6 @@
#
# Makefile for CEPH filesystem.
#
-
-ifneq ($(KERNELRELEASE),)
-
obj-$(CONFIG_CEPH_LIB) += libceph.o
libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
@@ -16,22 +13,3 @@ libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
ceph_fs.o ceph_strings.o ceph_hash.o \
pagevec.o
-else
-#Otherwise we were called directly from the command
-# line; invoke the kernel build system.
-
-KERNELDIR ?= /lib/modules/$(shell uname -r)/build
-PWD := $(shell pwd)
-
-default: all
-
-all:
- $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules
-
-modules_install:
- $(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules_install
-
-clean:
- $(MAKE) -C $(KERNELDIR) M=$(PWD) clean
-
-endif
diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c
index 53d8abfa25d5..bf3e6a13c215 100644
--- a/net/ceph/buffer.c
+++ b/net/ceph/buffer.c
@@ -19,7 +19,7 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
if (b->vec.iov_base) {
b->is_vmalloc = false;
} else {
- b->vec.iov_base = __vmalloc(len, gfp, PAGE_KERNEL);
+ b->vec.iov_base = __vmalloc(len, gfp | __GFP_HIGHMEM, PAGE_KERNEL);
if (!b->vec.iov_base) {
kfree(b);
return NULL;
diff --git a/net/compat.c b/net/compat.c
index 63d260e81472..3649d5895361 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -41,10 +41,12 @@ static inline int iov_from_user_compat_to_kern(struct iovec *kiov,
compat_size_t len;
if (get_user(len, &uiov32->iov_len) ||
- get_user(buf, &uiov32->iov_base)) {
- tot_len = -EFAULT;
- break;
- }
+ get_user(buf, &uiov32->iov_base))
+ return -EFAULT;
+
+ if (len > INT_MAX - tot_len)
+ len = INT_MAX - tot_len;
+
tot_len += len;
kiov->iov_base = compat_ptr(buf);
kiov->iov_len = (__kernel_size_t) len;
diff --git a/net/core/dev.c b/net/core/dev.c
index 78b5a89b0f40..0dd54a69dace 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1685,10 +1685,10 @@ EXPORT_SYMBOL(netif_device_attach);
static bool can_checksum_protocol(unsigned long features, __be16 protocol)
{
- return ((features & NETIF_F_GEN_CSUM) ||
- ((features & NETIF_F_IP_CSUM) &&
+ return ((features & NETIF_F_NO_CSUM) ||
+ ((features & NETIF_F_V4_CSUM) &&
protocol == htons(ETH_P_IP)) ||
- ((features & NETIF_F_IPV6_CSUM) &&
+ ((features & NETIF_F_V6_CSUM) &&
protocol == htons(ETH_P_IPV6)) ||
((features & NETIF_F_FCOE_CRC) &&
protocol == htons(ETH_P_FCOE)));
@@ -1696,22 +1696,18 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
{
+ __be16 protocol = skb->protocol;
int features = dev->features;
- if (vlan_tx_tag_present(skb))
+ if (vlan_tx_tag_present(skb)) {
features &= dev->vlan_features;
-
- if (can_checksum_protocol(features, skb->protocol))
- return true;
-
- if (skb->protocol == htons(ETH_P_8021Q)) {
+ } else if (protocol == htons(ETH_P_8021Q)) {
struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
- if (can_checksum_protocol(dev->features & dev->vlan_features,
- veh->h_vlan_encapsulated_proto))
- return true;
+ protocol = veh->h_vlan_encapsulated_proto;
+ features &= dev->vlan_features;
}
- return false;
+ return can_checksum_protocol(features, protocol);
}
/**
@@ -2135,7 +2131,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
} else {
struct sock *sk = skb->sk;
queue_index = sk_tx_queue_get(sk);
- if (queue_index < 0) {
+ if (queue_index < 0 || queue_index >= dev->real_num_tx_queues) {
queue_index = 0;
if (dev->real_num_tx_queues > 1)
@@ -2213,7 +2209,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
}
static DEFINE_PER_CPU(int, xmit_recursion);
-#define RECURSION_LIMIT 3
+#define RECURSION_LIMIT 10
/**
* dev_queue_xmit - transmit a buffer
@@ -2413,7 +2409,7 @@ EXPORT_SYMBOL(__skb_get_rxhash);
#ifdef CONFIG_RPS
/* One global table that all flow-based protocols share. */
-struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
+struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
EXPORT_SYMBOL(rps_sock_flow_table);
/*
@@ -2425,7 +2421,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
struct rps_dev_flow **rflowp)
{
struct netdev_rx_queue *rxqueue;
- struct rps_map *map = NULL;
+ struct rps_map *map;
struct rps_dev_flow_table *flow_table;
struct rps_sock_flow_table *sock_flow_table;
int cpu = -1;
@@ -2444,15 +2440,15 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
} else
rxqueue = dev->_rx;
- if (rxqueue->rps_map) {
- map = rcu_dereference(rxqueue->rps_map);
- if (map && map->len == 1) {
+ map = rcu_dereference(rxqueue->rps_map);
+ if (map) {
+ if (map->len == 1) {
tcpu = map->cpus[0];
if (cpu_online(tcpu))
cpu = tcpu;
goto done;
}
- } else if (!rxqueue->rps_flow_table) {
+ } else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) {
goto done;
}
@@ -5416,7 +5412,7 @@ void netdev_run_todo(void)
/* paranoia */
BUG_ON(netdev_refcnt_read(dev));
WARN_ON(rcu_dereference_raw(dev->ip_ptr));
- WARN_ON(dev->ip6_ptr);
+ WARN_ON(rcu_dereference_raw(dev->ip6_ptr));
WARN_ON(dev->dn_ptr);
if (dev->destructor)
diff --git a/net/core/dst.c b/net/core/dst.c
index 8abe628b79f1..b99c7c7ffce2 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -370,6 +370,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,
static struct notifier_block dst_dev_notifier = {
.notifier_call = dst_dev_event,
+ .priority = -10, /* must be called after other network notifiers */
};
void __init dst_init(void)
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 1bc3f253ba6c..82a4369ae150 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -351,12 +351,12 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
list_for_each_entry(r, &ops->rules_list, list) {
if (r->pref == rule->target) {
- rule->ctarget = r;
+ RCU_INIT_POINTER(rule->ctarget, r);
break;
}
}
- if (rule->ctarget == NULL)
+ if (rcu_dereference_protected(rule->ctarget, 1) == NULL)
unresolved = 1;
} else if (rule->action == FR_ACT_GOTO)
goto errout_free;
@@ -373,6 +373,11 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
fib_rule_get(rule);
+ if (last)
+ list_add_rcu(&rule->list, &last->list);
+ else
+ list_add_rcu(&rule->list, &ops->rules_list);
+
if (ops->unresolved_rules) {
/*
* There are unresolved goto rules in the list, check if
@@ -381,7 +386,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
list_for_each_entry(r, &ops->rules_list, list) {
if (r->action == FR_ACT_GOTO &&
r->target == rule->pref) {
- BUG_ON(r->ctarget != NULL);
+ BUG_ON(rtnl_dereference(r->ctarget) != NULL);
rcu_assign_pointer(r->ctarget, rule);
if (--ops->unresolved_rules == 0)
break;
@@ -395,11 +400,6 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (unresolved)
ops->unresolved_rules++;
- if (last)
- list_add_rcu(&rule->list, &last->list);
- else
- list_add_rcu(&rule->list, &ops->rules_list);
-
notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
flush_route_cache(ops);
rules_ops_put(ops);
@@ -487,7 +487,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
*/
if (ops->nr_goto_rules > 0) {
list_for_each_entry(tmp, &ops->rules_list, list) {
- if (tmp->ctarget == rule) {
+ if (rtnl_dereference(tmp->ctarget) == rule) {
rcu_assign_pointer(tmp->ctarget, NULL);
ops->unresolved_rules++;
}
@@ -545,7 +545,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
frh->action = rule->action;
frh->flags = rule->flags;
- if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL)
+ if (rule->action == FR_ACT_GOTO &&
+ rcu_dereference_raw(rule->ctarget) == NULL)
frh->flags |= FIB_RULE_UNRESOLVED;
if (rule->iifname[0]) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 7adf50352918..ae21a0d3c4a2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -89,8 +89,8 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
rcu_read_lock_bh();
filter = rcu_dereference_bh(sk->sk_filter);
if (filter) {
- unsigned int pkt_len = sk_run_filter(skb, filter->insns,
- filter->len);
+ unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len);
+
err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
}
rcu_read_unlock_bh();
@@ -112,39 +112,41 @@ EXPORT_SYMBOL(sk_filter);
*/
unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
{
- struct sock_filter *fentry; /* We walk down these */
void *ptr;
u32 A = 0; /* Accumulator */
u32 X = 0; /* Index Register */
u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
+ unsigned long memvalid = 0;
u32 tmp;
int k;
int pc;
+ BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG);
/*
* Process array of filter instructions.
*/
for (pc = 0; pc < flen; pc++) {
- fentry = &filter[pc];
+ const struct sock_filter *fentry = &filter[pc];
+ u32 f_k = fentry->k;
switch (fentry->code) {
case BPF_S_ALU_ADD_X:
A += X;
continue;
case BPF_S_ALU_ADD_K:
- A += fentry->k;
+ A += f_k;
continue;
case BPF_S_ALU_SUB_X:
A -= X;
continue;
case BPF_S_ALU_SUB_K:
- A -= fentry->k;
+ A -= f_k;
continue;
case BPF_S_ALU_MUL_X:
A *= X;
continue;
case BPF_S_ALU_MUL_K:
- A *= fentry->k;
+ A *= f_k;
continue;
case BPF_S_ALU_DIV_X:
if (X == 0)
@@ -152,49 +154,49 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
A /= X;
continue;
case BPF_S_ALU_DIV_K:
- A /= fentry->k;
+ A /= f_k;
continue;
case BPF_S_ALU_AND_X:
A &= X;
continue;
case BPF_S_ALU_AND_K:
- A &= fentry->k;
+ A &= f_k;
continue;
case BPF_S_ALU_OR_X:
A |= X;
continue;
case BPF_S_ALU_OR_K:
- A |= fentry->k;
+ A |= f_k;
continue;
case BPF_S_ALU_LSH_X:
A <<= X;
continue;
case BPF_S_ALU_LSH_K:
- A <<= fentry->k;
+ A <<= f_k;
continue;
case BPF_S_ALU_RSH_X:
A >>= X;
continue;
case BPF_S_ALU_RSH_K:
- A >>= fentry->k;
+ A >>= f_k;
continue;
case BPF_S_ALU_NEG:
A = -A;
continue;
case BPF_S_JMP_JA:
- pc += fentry->k;
+ pc += f_k;
continue;
case BPF_S_JMP_JGT_K:
- pc += (A > fentry->k) ? fentry->jt : fentry->jf;
+ pc += (A > f_k) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JGE_K:
- pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
+ pc += (A >= f_k) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JEQ_K:
- pc += (A == fentry->k) ? fentry->jt : fentry->jf;
+ pc += (A == f_k) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JSET_K:
- pc += (A & fentry->k) ? fentry->jt : fentry->jf;
+ pc += (A & f_k) ? fentry->jt : fentry->jf;
continue;
case BPF_S_JMP_JGT_X:
pc += (A > X) ? fentry->jt : fentry->jf;
@@ -209,7 +211,7 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
pc += (A & X) ? fentry->jt : fentry->jf;
continue;
case BPF_S_LD_W_ABS:
- k = fentry->k;
+ k = f_k;
load_w:
ptr = load_pointer(skb, k, 4, &tmp);
if (ptr != NULL) {
@@ -218,7 +220,7 @@ load_w:
}
break;
case BPF_S_LD_H_ABS:
- k = fentry->k;
+ k = f_k;
load_h:
ptr = load_pointer(skb, k, 2, &tmp);
if (ptr != NULL) {
@@ -227,7 +229,7 @@ load_h:
}
break;
case BPF_S_LD_B_ABS:
- k = fentry->k;
+ k = f_k;
load_b:
ptr = load_pointer(skb, k, 1, &tmp);
if (ptr != NULL) {
@@ -242,32 +244,34 @@ load_b:
X = skb->len;
continue;
case BPF_S_LD_W_IND:
- k = X + fentry->k;
+ k = X + f_k;
goto load_w;
case BPF_S_LD_H_IND:
- k = X + fentry->k;
+ k = X + f_k;
goto load_h;
case BPF_S_LD_B_IND:
- k = X + fentry->k;
+ k = X + f_k;
goto load_b;
case BPF_S_LDX_B_MSH:
- ptr = load_pointer(skb, fentry->k, 1, &tmp);
+ ptr = load_pointer(skb, f_k, 1, &tmp);
if (ptr != NULL) {
X = (*(u8 *)ptr & 0xf) << 2;
continue;
}
return 0;
case BPF_S_LD_IMM:
- A = fentry->k;
+ A = f_k;
continue;
case BPF_S_LDX_IMM:
- X = fentry->k;
+ X = f_k;
continue;
case BPF_S_LD_MEM:
- A = mem[fentry->k];
+ A = (memvalid & (1UL << f_k)) ?
+ mem[f_k] : 0;
continue;
case BPF_S_LDX_MEM:
- X = mem[fentry->k];
+ X = (memvalid & (1UL << f_k)) ?
+ mem[f_k] : 0;
continue;
case BPF_S_MISC_TAX:
X = A;
@@ -276,14 +280,16 @@ load_b:
A = X;
continue;
case BPF_S_RET_K:
- return fentry->k;
+ return f_k;
case BPF_S_RET_A:
return A;
case BPF_S_ST:
- mem[fentry->k] = A;
+ memvalid |= 1UL << f_k;
+ mem[f_k] = A;
continue;
case BPF_S_STX:
- mem[fentry->k] = X;
+ memvalid |= 1UL << f_k;
+ mem[f_k] = X;
continue;
default:
WARN_ON(1);
@@ -583,23 +589,16 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
EXPORT_SYMBOL(sk_chk_filter);
/**
- * sk_filter_rcu_release: Release a socket filter by rcu_head
+ * sk_filter_release_rcu - Release a socket filter by rcu_head
* @rcu: rcu_head that contains the sk_filter to free
*/
-static void sk_filter_rcu_release(struct rcu_head *rcu)
+void sk_filter_release_rcu(struct rcu_head *rcu)
{
struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
- sk_filter_release(fp);
-}
-
-static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp)
-{
- unsigned int size = sk_filter_len(fp);
-
- atomic_sub(size, &sk->sk_omem_alloc);
- call_rcu_bh(&fp->rcu, sk_filter_rcu_release);
+ kfree(fp);
}
+EXPORT_SYMBOL(sk_filter_release_rcu);
/**
* sk_attach_filter - attach a socket filter
@@ -643,7 +642,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
rcu_assign_pointer(sk->sk_filter, fp);
if (old_fp)
- sk_filter_delayed_uncharge(sk, old_fp);
+ sk_filter_uncharge(sk, old_fp);
return 0;
}
EXPORT_SYMBOL_GPL(sk_attach_filter);
@@ -657,7 +656,7 @@ int sk_detach_filter(struct sock *sk)
sock_owned_by_user(sk));
if (filter) {
rcu_assign_pointer(sk->sk_filter, NULL);
- sk_filter_delayed_uncharge(sk, filter);
+ sk_filter_uncharge(sk, filter);
ret = 0;
}
return ret;
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 72aceb1fe4fa..c40f27e7d208 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -35,10 +35,9 @@
* in any case.
*/
-long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
+int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
{
- int size, ct;
- long err;
+ int size, ct, err;
if (m->msg_namelen) {
if (mode == VERIFY_READ) {
@@ -62,14 +61,13 @@ long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address,
err = 0;
for (ct = 0; ct < m->msg_iovlen; ct++) {
- err += iov[ct].iov_len;
- /*
- * Goal is not to verify user data, but to prevent returning
- * negative value, which is interpreted as errno.
- * Overflow is still possible, but it is harmless.
- */
- if (err < 0)
- return -EMSGSIZE;
+ size_t len = iov[ct].iov_len;
+
+ if (len > INT_MAX - err) {
+ len = INT_MAX - err;
+ iov[ct].iov_len = len;
+ }
+ err += len;
}
return err;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b143173e3eb2..7f902cad10f8 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -598,7 +598,8 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
}
spin_lock(&rps_map_lock);
- old_map = queue->rps_map;
+ old_map = rcu_dereference_protected(queue->rps_map,
+ lockdep_is_held(&rps_map_lock));
rcu_assign_pointer(queue->rps_map, map);
spin_unlock(&rps_map_lock);
@@ -677,7 +678,8 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
table = NULL;
spin_lock(&rps_dev_flow_lock);
- old_table = queue->rps_flow_table;
+ old_table = rcu_dereference_protected(queue->rps_flow_table,
+ lockdep_is_held(&rps_dev_flow_lock));
rcu_assign_pointer(queue->rps_flow_table, table);
spin_unlock(&rps_dev_flow_lock);
@@ -705,16 +707,26 @@ static void rx_queue_release(struct kobject *kobj)
{
struct netdev_rx_queue *queue = to_rx_queue(kobj);
struct netdev_rx_queue *first = queue->first;
+ struct rps_map *map;
+ struct rps_dev_flow_table *flow_table;
- if (queue->rps_map)
- call_rcu(&queue->rps_map->rcu, rps_map_release);
- if (queue->rps_flow_table)
- call_rcu(&queue->rps_flow_table->rcu,
- rps_dev_flow_table_release);
+ map = rcu_dereference_raw(queue->rps_map);
+ if (map) {
+ RCU_INIT_POINTER(queue->rps_map, NULL);
+ call_rcu(&map->rcu, rps_map_release);
+ }
+
+ flow_table = rcu_dereference_raw(queue->rps_flow_table);
+ if (flow_table) {
+ RCU_INIT_POINTER(queue->rps_flow_table, NULL);
+ call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
+ }
if (atomic_dec_and_test(&first->count))
kfree(first);
+ else
+ memset(kobj, 0, sizeof(*kobj));
}
static struct kobj_type rx_queue_ktype = {
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index c988e685433a..3f860261c5ee 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -42,7 +42,9 @@ static int net_assign_generic(struct net *net, int id, void *data)
BUG_ON(!mutex_is_locked(&net_mutex));
BUG_ON(id == 0);
- ng = old_ng = net->gen;
+ old_ng = rcu_dereference_protected(net->gen,
+ lockdep_is_held(&net_mutex));
+ ng = old_ng;
if (old_ng->len >= id)
goto assign;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 2c0df0f95b3d..33bc3823ac6f 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -771,10 +771,10 @@ done:
static unsigned long num_arg(const char __user * user_buffer,
unsigned long maxlen, unsigned long *num)
{
- int i = 0;
+ int i;
*num = 0;
- for (; i < maxlen; i++) {
+ for (i = 0; i < maxlen; i++) {
char c;
if (get_user(c, &user_buffer[i]))
return -EFAULT;
@@ -789,9 +789,9 @@ static unsigned long num_arg(const char __user * user_buffer,
static int strn_len(const char __user * user_buffer, unsigned int maxlen)
{
- int i = 0;
+ int i;
- for (; i < maxlen; i++) {
+ for (i = 0; i < maxlen; i++) {
char c;
if (get_user(c, &user_buffer[i]))
return -EFAULT;
@@ -846,7 +846,7 @@ static ssize_t pktgen_if_write(struct file *file,
{
struct seq_file *seq = file->private_data;
struct pktgen_dev *pkt_dev = seq->private;
- int i = 0, max, len;
+ int i, max, len;
char name[16], valstr[32];
unsigned long value = 0;
char *pg_result = NULL;
@@ -860,13 +860,13 @@ static ssize_t pktgen_if_write(struct file *file,
return -EINVAL;
}
- max = count - i;
- tmp = count_trail_chars(&user_buffer[i], max);
+ max = count;
+ tmp = count_trail_chars(user_buffer, max);
if (tmp < 0) {
pr_warning("illegal format\n");
return tmp;
}
- i += tmp;
+ i = tmp;
/* Read variable name */
@@ -887,10 +887,11 @@ static ssize_t pktgen_if_write(struct file *file,
i += len;
if (debug) {
- char tb[count + 1];
- if (copy_from_user(tb, user_buffer, count))
+ size_t copy = min_t(size_t, count, 1023);
+ char tb[copy + 1];
+ if (copy_from_user(tb, user_buffer, copy))
return -EFAULT;
- tb[count] = 0;
+ tb[copy] = 0;
printk(KERN_DEBUG "pktgen: %s,%lu buffer -:%s:-\n", name,
(unsigned long)count, tb);
}
@@ -1764,7 +1765,7 @@ static ssize_t pktgen_thread_write(struct file *file,
{
struct seq_file *seq = file->private_data;
struct pktgen_thread *t = seq->private;
- int i = 0, max, len, ret;
+ int i, max, len, ret;
char name[40];
char *pg_result;
@@ -1773,12 +1774,12 @@ static ssize_t pktgen_thread_write(struct file *file,
return -EINVAL;
}
- max = count - i;
- len = count_trail_chars(&user_buffer[i], max);
+ max = count;
+ len = count_trail_chars(user_buffer, max);
if (len < 0)
return len;
- i += len;
+ i = len;
/* Read variable name */
@@ -1975,7 +1976,7 @@ static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev,
const char *ifname)
{
char b[IFNAMSIZ+5];
- int i = 0;
+ int i;
for (i = 0; ifname[i] != '@'; i++) {
if (i == IFNAMSIZ)
@@ -2519,8 +2520,8 @@ static void free_SAs(struct pktgen_dev *pkt_dev)
{
if (pkt_dev->cflows) {
/* let go of the SAs if we have them */
- int i = 0;
- for (; i < pkt_dev->cflows; i++) {
+ int i;
+ for (i = 0; i < pkt_dev->cflows; i++) {
struct xfrm_state *x = pkt_dev->flows[i].x;
if (x) {
xfrm_state_put(x);
@@ -2611,8 +2612,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
/* Update any of the values, used when we're incrementing various
* fields.
*/
- queue_map = pkt_dev->cur_queue_map;
mod_cur_headers(pkt_dev);
+ queue_map = pkt_dev->cur_queue_map;
datalen = (odev->hard_header_len + 16) & ~0xf;
@@ -2975,8 +2976,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
/* Update any of the values, used when we're incrementing various
* fields.
*/
- queue_map = pkt_dev->cur_queue_map;
mod_cur_headers(pkt_dev);
+ queue_map = pkt_dev->cur_queue_map;
skb = __netdev_alloc_skb(odev,
pkt_dev->cur_pkt_size + 64
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 7552495aff7a..fceeb37d7161 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -45,9 +45,7 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
lopt_size += nr_table_entries * sizeof(struct request_sock *);
if (lopt_size > PAGE_SIZE)
- lopt = __vmalloc(lopt_size,
- GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
- PAGE_KERNEL);
+ lopt = vzalloc(lopt_size);
else
lopt = kzalloc(lopt_size, GFP_KERNEL);
if (lopt == NULL)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 8121268ddbdd..841c287ef40a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -347,16 +347,17 @@ static size_t rtnl_link_get_size(const struct net_device *dev)
if (!ops)
return 0;
- size = nlmsg_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
- nlmsg_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */
+ size = nla_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
+ nla_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */
if (ops->get_size)
/* IFLA_INFO_DATA + nested data */
- size += nlmsg_total_size(sizeof(struct nlattr)) +
+ size += nla_total_size(sizeof(struct nlattr)) +
ops->get_size(dev);
if (ops->get_xstats_size)
- size += ops->get_xstats_size(dev); /* IFLA_INFO_XSTATS */
+ /* IFLA_INFO_XSTATS */
+ size += nla_total_size(ops->get_xstats_size(dev));
return size;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 11db43632df8..fb6080111461 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1225,7 +1225,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
sock_reset_flag(newsk, SOCK_DONE);
skb_queue_head_init(&newsk->sk_error_queue);
- filter = newsk->sk_filter;
+ filter = rcu_dereference_protected(newsk->sk_filter, 1);
if (filter != NULL)
sk_filter_charge(newsk, filter);
@@ -1653,10 +1653,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
{
struct proto *prot = sk->sk_prot;
int amt = sk_mem_pages(size);
- int allocated;
+ long allocated;
sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
- allocated = atomic_add_return(amt, prot->memory_allocated);
+ allocated = atomic_long_add_return(amt, prot->memory_allocated);
/* Under limit. */
if (allocated <= prot->sysctl_mem[0]) {
@@ -1714,7 +1714,7 @@ suppress_allocation:
/* Alas. Undo changes. */
sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
- atomic_sub(amt, prot->memory_allocated);
+ atomic_long_sub(amt, prot->memory_allocated);
return 0;
}
EXPORT_SYMBOL(__sk_mem_schedule);
@@ -1727,12 +1727,12 @@ void __sk_mem_reclaim(struct sock *sk)
{
struct proto *prot = sk->sk_prot;
- atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
+ atomic_long_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
prot->memory_allocated);
sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
if (prot->memory_pressure && *prot->memory_pressure &&
- (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
+ (atomic_long_read(prot->memory_allocated) < prot->sysctl_mem[0]))
*prot->memory_pressure = 0;
}
EXPORT_SYMBOL(__sk_mem_reclaim);
@@ -2452,12 +2452,12 @@ static char proto_method_implemented(const void *method)
static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
{
- seq_printf(seq, "%-9s %4u %6d %6d %-3s %6u %-3s %-10s "
+ seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
proto->name,
proto->obj_size,
sock_prot_inuse_get(seq_file_net(seq), proto),
- proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
+ proto->memory_allocated != NULL ? atomic_long_read(proto->memory_allocated) : -1L,
proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
proto->max_header,
proto->slab == NULL ? "no" : "yes",
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 01eee5d984be..385b6095fdc4 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -34,7 +34,8 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write,
mutex_lock(&sock_flow_mutex);
- orig_sock_table = rps_sock_flow_table;
+ orig_sock_table = rcu_dereference_protected(rps_sock_flow_table,
+ lockdep_is_held(&sock_flow_mutex));
size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 117fb093dcaf..75c3582a7678 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -134,13 +134,41 @@ static inline int ccid_get_current_tx_ccid(struct dccp_sock *dp)
extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk);
extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk);
+/*
+ * Congestion control of queued data packets via CCID decision.
+ *
+ * The TX CCID performs its congestion-control by indicating whether and when a
+ * queued packet may be sent, using the return code of ccid_hc_tx_send_packet().
+ * The following modes are supported via the symbolic constants below:
+ * - timer-based pacing (CCID returns a delay value in milliseconds);
+ * - autonomous dequeueing (CCID internally schedules dccps_xmitlet).
+ */
+
+enum ccid_dequeueing_decision {
+ CCID_PACKET_SEND_AT_ONCE = 0x00000, /* "green light": no delay */
+ CCID_PACKET_DELAY_MAX = 0x0FFFF, /* maximum delay in msecs */
+ CCID_PACKET_DELAY = 0x10000, /* CCID msec-delay mode */
+ CCID_PACKET_WILL_DEQUEUE_LATER = 0x20000, /* CCID autonomous mode */
+ CCID_PACKET_ERR = 0xF0000, /* error condition */
+};
+
+static inline int ccid_packet_dequeue_eval(const int return_code)
+{
+ if (return_code < 0)
+ return CCID_PACKET_ERR;
+ if (return_code == 0)
+ return CCID_PACKET_SEND_AT_ONCE;
+ if (return_code <= CCID_PACKET_DELAY_MAX)
+ return CCID_PACKET_DELAY;
+ return return_code;
+}
+
static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb)
{
- int rc = 0;
if (ccid->ccid_ops->ccid_hc_tx_send_packet != NULL)
- rc = ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb);
- return rc;
+ return ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb);
+ return CCID_PACKET_SEND_AT_ONCE;
}
static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index d850e291f87c..6576eae9e779 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -78,12 +78,9 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc)
static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
{
- struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
-
- if (hc->tx_pipe < hc->tx_cwnd)
- return 0;
-
- return 1; /* XXX CCID should dequeue when ready instead of polling */
+ if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk)))
+ return CCID_PACKET_WILL_DEQUEUE_LATER;
+ return CCID_PACKET_SEND_AT_ONCE;
}
static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
@@ -115,6 +112,7 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
{
struct sock *sk = (struct sock *)data;
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
+ const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
@@ -129,8 +127,6 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
if (hc->tx_rto > DCCP_RTO_MAX)
hc->tx_rto = DCCP_RTO_MAX;
- sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
-
/* adjust pipe, cwnd etc */
hc->tx_ssthresh = hc->tx_cwnd / 2;
if (hc->tx_ssthresh < 2)
@@ -146,6 +142,12 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
hc->tx_rpseq = 0;
hc->tx_rpdupack = -1;
ccid2_change_l_ack_ratio(sk, 1);
+
+ /* if we were blocked before, we may now send cwnd=1 packet */
+ if (sender_was_blocked)
+ tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
+ /* restart backed-off timer */
+ sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
out:
bh_unlock_sock(sk);
sock_put(sk);
@@ -434,6 +436,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
+ const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
u64 ackno, seqno;
struct ccid2_seq *seqp;
unsigned char *vector;
@@ -631,6 +634,10 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
sk_stop_timer(sk, &hc->tx_rtotimer);
else
sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
+
+ /* check if incoming Acks allow pending packets to be sent */
+ if (sender_was_blocked && !ccid2_cwnd_network_limited(hc))
+ tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
}
static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 9731c2dc1487..25cb6b216eda 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -81,6 +81,11 @@ struct ccid2_hc_tx_sock {
u64 tx_high_ack;
};
+static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc)
+{
+ return hc->tx_pipe >= hc->tx_cwnd;
+}
+
struct ccid2_hc_rx_sock {
int rx_data;
};
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 3060a60ed5ab..3d604e1349c0 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -268,11 +268,11 @@ out:
sock_put(sk);
}
-/*
- * returns
- * > 0: delay (in msecs) that should pass before actually sending
- * = 0: can send immediately
- * < 0: error condition; do not send packet
+/**
+ * ccid3_hc_tx_send_packet - Delay-based dequeueing of TX packets
+ * @skb: next packet candidate to send on @sk
+ * This function uses the convention of ccid_packet_dequeue_eval() and
+ * returns a millisecond-delay value between 0 and t_mbi = 64000 msec.
*/
static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
{
@@ -348,7 +348,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
/* set the nominal send time for the next following packet */
hc->tx_t_nom = ktime_add_us(hc->tx_t_nom, hc->tx_t_ipi);
- return 0;
+ return CCID_PACKET_SEND_AT_ONCE;
}
static void ccid3_hc_tx_packet_sent(struct sock *sk, unsigned int len)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 3eb264b60823..a8ed459508b2 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -243,8 +243,9 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
extern void dccp_send_sync(struct sock *sk, const u64 seq,
const enum dccp_pkt_type pkt_type);
-extern void dccp_write_xmit(struct sock *sk, int block);
-extern void dccp_write_space(struct sock *sk);
+extern void dccp_write_xmit(struct sock *sk);
+extern void dccp_write_space(struct sock *sk);
+extern void dccp_flush_write_queue(struct sock *sk, long *time_budget);
extern void dccp_init_xmit_timers(struct sock *sk);
static inline void dccp_clear_xmit_timers(struct sock *sk)
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 265985370fa1..e424a09e83f6 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -239,7 +239,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
dccp_update_gsr(sk, seqno);
if (dh->dccph_type != DCCP_PKT_SYNC &&
- (ackno != DCCP_PKT_WITHOUT_ACK_SEQ))
+ ackno != DCCP_PKT_WITHOUT_ACK_SEQ &&
+ after48(ackno, dp->dccps_gar))
dp->dccps_gar = ackno;
} else {
unsigned long now = jiffies;
diff --git a/net/dccp/output.c b/net/dccp/output.c
index a988fe9ffcba..45b91853f5ae 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -209,108 +209,150 @@ void dccp_write_space(struct sock *sk)
}
/**
- * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet
+ * dccp_wait_for_ccid - Await CCID send permission
* @sk: socket to wait for
- * @skb: current skb to pass on for waiting
- * @delay: sleep timeout in milliseconds (> 0)
- * This function is called by default when the socket is closed, and
- * when a non-zero linger time is set on the socket. For consistency
+ * @delay: timeout in jiffies
+ * This is used by CCIDs which need to delay the send time in process context.
*/
-static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay)
+static int dccp_wait_for_ccid(struct sock *sk, unsigned long delay)
{
- struct dccp_sock *dp = dccp_sk(sk);
DEFINE_WAIT(wait);
- unsigned long jiffdelay;
- int rc;
+ long remaining;
+
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ sk->sk_write_pending++;
+ release_sock(sk);
+
+ remaining = schedule_timeout(delay);
+
+ lock_sock(sk);
+ sk->sk_write_pending--;
+ finish_wait(sk_sleep(sk), &wait);
+
+ if (signal_pending(current) || sk->sk_err)
+ return -1;
+ return remaining;
+}
+
+/**
+ * dccp_xmit_packet - Send data packet under control of CCID
+ * Transmits next-queued payload and informs CCID to account for the packet.
+ */
+static void dccp_xmit_packet(struct sock *sk)
+{
+ int err, len;
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue);
- do {
- dccp_pr_debug("delayed send by %d msec\n", delay);
- jiffdelay = msecs_to_jiffies(delay);
+ if (unlikely(skb == NULL))
+ return;
+ len = skb->len;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ if (sk->sk_state == DCCP_PARTOPEN) {
+ const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD;
+ /*
+ * See 8.1.5 - Handshake Completion.
+ *
+ * For robustness we resend Confirm options until the client has
+ * entered OPEN. During the initial feature negotiation, the MPS
+ * is smaller than usual, reduced by the Change/Confirm options.
+ */
+ if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
+ DCCP_WARN("Payload too large (%d) for featneg.\n", len);
+ dccp_send_ack(sk);
+ dccp_feat_list_purge(&dp->dccps_featneg);
+ }
- sk->sk_write_pending++;
- release_sock(sk);
- schedule_timeout(jiffdelay);
- lock_sock(sk);
- sk->sk_write_pending--;
+ inet_csk_schedule_ack(sk);
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+ inet_csk(sk)->icsk_rto,
+ DCCP_RTO_MAX);
+ DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
+ } else if (dccp_ack_pending(sk)) {
+ DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
+ } else {
+ DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATA;
+ }
+
+ err = dccp_transmit_skb(sk, skb);
+ if (err)
+ dccp_pr_debug("transmit_skb() returned err=%d\n", err);
+ /*
+ * Register this one as sent even if an error occurred. To the remote
+ * end a local packet drop is indistinguishable from network loss, i.e.
+ * any local drop will eventually be reported via receiver feedback.
+ */
+ ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
+}
- if (sk->sk_err)
- goto do_error;
- if (signal_pending(current))
- goto do_interrupted;
+/**
+ * dccp_flush_write_queue - Drain queue at end of connection
+ * Since dccp_sendmsg queues packets without waiting for them to be sent, it may
+ * happen that the TX queue is not empty at the end of a connection. We give the
+ * HC-sender CCID a grace period of up to @time_budget jiffies. If this function
+ * returns with a non-empty write queue, it will be purged later.
+ */
+void dccp_flush_write_queue(struct sock *sk, long *time_budget)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct sk_buff *skb;
+ long delay, rc;
+ while (*time_budget > 0 && (skb = skb_peek(&sk->sk_write_queue))) {
rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
- } while ((delay = rc) > 0);
-out:
- finish_wait(sk_sleep(sk), &wait);
- return rc;
-
-do_error:
- rc = -EPIPE;
- goto out;
-do_interrupted:
- rc = -EINTR;
- goto out;
+
+ switch (ccid_packet_dequeue_eval(rc)) {
+ case CCID_PACKET_WILL_DEQUEUE_LATER:
+ /*
+ * If the CCID determines when to send, the next sending
+ * time is unknown or the CCID may not even send again
+ * (e.g. remote host crashes or lost Ack packets).
+ */
+ DCCP_WARN("CCID did not manage to send all packets\n");
+ return;
+ case CCID_PACKET_DELAY:
+ delay = msecs_to_jiffies(rc);
+ if (delay > *time_budget)
+ return;
+ rc = dccp_wait_for_ccid(sk, delay);
+ if (rc < 0)
+ return;
+ *time_budget -= (delay - rc);
+ /* check again if we can send now */
+ break;
+ case CCID_PACKET_SEND_AT_ONCE:
+ dccp_xmit_packet(sk);
+ break;
+ case CCID_PACKET_ERR:
+ skb_dequeue(&sk->sk_write_queue);
+ kfree_skb(skb);
+ dccp_pr_debug("packet discarded due to err=%ld\n", rc);
+ }
+ }
}
-void dccp_write_xmit(struct sock *sk, int block)
+void dccp_write_xmit(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
struct sk_buff *skb;
while ((skb = skb_peek(&sk->sk_write_queue))) {
- int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
-
- if (err > 0) {
- if (!block) {
- sk_reset_timer(sk, &dp->dccps_xmit_timer,
- msecs_to_jiffies(err)+jiffies);
- break;
- } else
- err = dccp_wait_for_ccid(sk, skb, err);
- if (err && err != -EINTR)
- DCCP_BUG("err=%d after dccp_wait_for_ccid", err);
- }
+ int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
- skb_dequeue(&sk->sk_write_queue);
- if (err == 0) {
- struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
- const int len = skb->len;
-
- if (sk->sk_state == DCCP_PARTOPEN) {
- const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD;
- /*
- * See 8.1.5 - Handshake Completion.
- *
- * For robustness we resend Confirm options until the client has
- * entered OPEN. During the initial feature negotiation, the MPS
- * is smaller than usual, reduced by the Change/Confirm options.
- */
- if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
- DCCP_WARN("Payload too large (%d) for featneg.\n", len);
- dccp_send_ack(sk);
- dccp_feat_list_purge(&dp->dccps_featneg);
- }
-
- inet_csk_schedule_ack(sk);
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
- inet_csk(sk)->icsk_rto,
- DCCP_RTO_MAX);
- dcb->dccpd_type = DCCP_PKT_DATAACK;
- } else if (dccp_ack_pending(sk))
- dcb->dccpd_type = DCCP_PKT_DATAACK;
- else
- dcb->dccpd_type = DCCP_PKT_DATA;
-
- err = dccp_transmit_skb(sk, skb);
- ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
- if (err)
- DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
- err);
- } else {
- dccp_pr_debug("packet discarded due to err=%d\n", err);
+ switch (ccid_packet_dequeue_eval(rc)) {
+ case CCID_PACKET_WILL_DEQUEUE_LATER:
+ return;
+ case CCID_PACKET_DELAY:
+ sk_reset_timer(sk, &dp->dccps_xmit_timer,
+ jiffies + msecs_to_jiffies(rc));
+ return;
+ case CCID_PACKET_SEND_AT_ONCE:
+ dccp_xmit_packet(sk);
+ break;
+ case CCID_PACKET_ERR:
+ skb_dequeue(&sk->sk_write_queue);
kfree_skb(skb);
+ dccp_pr_debug("packet discarded due to err=%d\n", rc);
}
}
}
@@ -622,7 +664,6 @@ void dccp_send_close(struct sock *sk, const int active)
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE;
if (active) {
- dccp_write_xmit(sk, 1);
dccp_skb_entail(sk, skb);
dccp_transmit_skb(sk, skb_clone(skb, prio));
/*
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 7e5fc04eb6d1..ef343d53fcea 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -726,7 +726,13 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
goto out_discard;
skb_queue_tail(&sk->sk_write_queue, skb);
- dccp_write_xmit(sk,0);
+ /*
+ * The xmit_timer is set if the TX CCID is rate-based and will expire
+ * when congestion control permits to release further packets into the
+ * network. Window-based CCIDs do not use this timer.
+ */
+ if (!timer_pending(&dp->dccps_xmit_timer))
+ dccp_write_xmit(sk);
out_release:
release_sock(sk);
return rc ? : len;
@@ -951,9 +957,22 @@ void dccp_close(struct sock *sk, long timeout)
/* Check zero linger _after_ checking for unread data. */
sk->sk_prot->disconnect(sk, 0);
} else if (sk->sk_state != DCCP_CLOSED) {
+ /*
+ * Normal connection termination. May need to wait if there are
+ * still packets in the TX queue that are delayed by the CCID.
+ */
+ dccp_flush_write_queue(sk, &timeout);
dccp_terminate_connection(sk);
}
+ /*
+ * Flush write queue. This may be necessary in several cases:
+ * - we have been closed by the peer but still have application data;
+ * - abortive termination (unread data or zero linger time),
+ * - normal termination but queue could not be flushed within time limit
+ */
+ __skb_queue_purge(&sk->sk_write_queue);
+
sk_stream_wait_close(sk, timeout);
adjudge_to_death:
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 1a9aa05d4dc4..7587870b7040 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -237,32 +237,35 @@ out:
sock_put(sk);
}
-/* Transmit-delay timer: used by the CCIDs to delay actual send time */
-static void dccp_write_xmit_timer(unsigned long data)
+/**
+ * dccp_write_xmitlet - Workhorse for CCID packet dequeueing interface
+ * See the comments above %ccid_dequeueing_decision for supported modes.
+ */
+static void dccp_write_xmitlet(unsigned long data)
{
struct sock *sk = (struct sock *)data;
- struct dccp_sock *dp = dccp_sk(sk);
bh_lock_sock(sk);
if (sock_owned_by_user(sk))
- sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1);
+ sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1);
else
- dccp_write_xmit(sk, 0);
+ dccp_write_xmit(sk);
bh_unlock_sock(sk);
- sock_put(sk);
}
-static void dccp_init_write_xmit_timer(struct sock *sk)
+static void dccp_write_xmit_timer(unsigned long data)
{
- struct dccp_sock *dp = dccp_sk(sk);
-
- setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
- (unsigned long)sk);
+ dccp_write_xmitlet(data);
+ sock_put((struct sock *)data);
}
void dccp_init_xmit_timers(struct sock *sk)
{
- dccp_init_write_xmit_timer(sk);
+ struct dccp_sock *dp = dccp_sk(sk);
+
+ tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk);
+ setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
+ (unsigned long)sk);
inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
&dccp_keepalive_timer);
}
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index d6b93d19790f..6f97268ed85f 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -155,7 +155,7 @@ static const struct proto_ops dn_proto_ops;
static DEFINE_RWLOCK(dn_hash_lock);
static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
static struct hlist_head dn_wild_sk;
-static atomic_t decnet_memory_allocated;
+static atomic_long_t decnet_memory_allocated;
static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen, int flags);
static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags);
@@ -1556,6 +1556,8 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us
if (r_len > sizeof(struct linkinfo_dn))
r_len = sizeof(struct linkinfo_dn);
+ memset(&link, 0, sizeof(link));
+
switch(sock->state) {
case SS_CONNECTING:
link.idn_linkstate = LL_CONNECTING;
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index be3eb8e23288..28f8b5e5f73b 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -38,7 +38,7 @@ int decnet_log_martians = 1;
int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW;
/* Reasonable defaults, I hope, based on tcp's defaults */
-int sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 };
+long sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 };
int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
@@ -324,7 +324,7 @@ static ctl_table dn_table[] = {
.data = &sysctl_decnet_mem,
.maxlen = sizeof(sysctl_decnet_mem),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_doulongvec_minmax
},
{
.procname = "decnet_rmem",
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index f8c1ae4b41f0..15dcc1a586b4 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -31,6 +31,7 @@
#include <linux/skbuff.h>
#include <linux/udp.h>
#include <linux/slab.h>
+#include <linux/vmalloc.h>
#include <net/sock.h>
#include <net/inet_common.h>
#include <linux/stat.h>
@@ -276,12 +277,12 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
#endif
#ifdef CONFIG_ECONET_AUNUDP
struct msghdr udpmsg;
- struct iovec iov[msg->msg_iovlen+1];
+ struct iovec iov[2];
struct aunhdr ah;
struct sockaddr_in udpdest;
__kernel_size_t size;
- int i;
mm_segment_t oldfs;
+ char *userbuf;
#endif
/*
@@ -297,23 +298,14 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
mutex_lock(&econet_mutex);
- if (saddr == NULL) {
- struct econet_sock *eo = ec_sk(sk);
-
- addr.station = eo->station;
- addr.net = eo->net;
- port = eo->port;
- cb = eo->cb;
- } else {
- if (msg->msg_namelen < sizeof(struct sockaddr_ec)) {
- mutex_unlock(&econet_mutex);
- return -EINVAL;
- }
- addr.station = saddr->addr.station;
- addr.net = saddr->addr.net;
- port = saddr->port;
- cb = saddr->cb;
- }
+ if (saddr == NULL || msg->msg_namelen < sizeof(struct sockaddr_ec)) {
+ mutex_unlock(&econet_mutex);
+ return -EINVAL;
+ }
+ addr.station = saddr->addr.station;
+ addr.net = saddr->addr.net;
+ port = saddr->port;
+ cb = saddr->cb;
/* Look for a device with the right network number. */
dev = net2dev_map[addr.net];
@@ -328,17 +320,17 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
}
}
- if (len + 15 > dev->mtu) {
- mutex_unlock(&econet_mutex);
- return -EMSGSIZE;
- }
-
if (dev->type == ARPHRD_ECONET) {
/* Real hardware Econet. We're not worthy etc. */
#ifdef CONFIG_ECONET_NATIVE
unsigned short proto = 0;
int res;
+ if (len + 15 > dev->mtu) {
+ mutex_unlock(&econet_mutex);
+ return -EMSGSIZE;
+ }
+
dev_hold(dev);
skb = sock_alloc_send_skb(sk, len+LL_ALLOCATED_SPACE(dev),
@@ -351,7 +343,6 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
eb = (struct ec_cb *)&skb->cb;
- /* BUG: saddr may be NULL */
eb->cookie = saddr->cookie;
eb->sec = *saddr;
eb->sent = ec_tx_done;
@@ -415,6 +406,11 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
return -ENETDOWN; /* No socket - can't send */
}
+ if (len > 32768) {
+ err = -E2BIG;
+ goto error;
+ }
+
/* Make up a UDP datagram and hand it off to some higher intellect. */
memset(&udpdest, 0, sizeof(udpdest));
@@ -446,36 +442,26 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
/* tack our header on the front of the iovec */
size = sizeof(struct aunhdr);
- /*
- * XXX: that is b0rken. We can't mix userland and kernel pointers
- * in iovec, since on a lot of platforms copy_from_user() will
- * *not* work with the kernel and userland ones at the same time,
- * regardless of what we do with set_fs(). And we are talking about
- * econet-over-ethernet here, so "it's only ARM anyway" doesn't
- * apply. Any suggestions on fixing that code? -- AV
- */
iov[0].iov_base = (void *)&ah;
iov[0].iov_len = size;
- for (i = 0; i < msg->msg_iovlen; i++) {
- void __user *base = msg->msg_iov[i].iov_base;
- size_t iov_len = msg->msg_iov[i].iov_len;
- /* Check it now since we switch to KERNEL_DS later. */
- if (!access_ok(VERIFY_READ, base, iov_len)) {
- mutex_unlock(&econet_mutex);
- return -EFAULT;
- }
- iov[i+1].iov_base = base;
- iov[i+1].iov_len = iov_len;
- size += iov_len;
+
+ userbuf = vmalloc(len);
+ if (userbuf == NULL) {
+ err = -ENOMEM;
+ goto error;
}
+ iov[1].iov_base = userbuf;
+ iov[1].iov_len = len;
+ err = memcpy_fromiovec(userbuf, msg->msg_iov, len);
+ if (err)
+ goto error_free_buf;
+
/* Get a skbuff (no data, just holds our cb information) */
if ((skb = sock_alloc_send_skb(sk, 0,
msg->msg_flags & MSG_DONTWAIT,
- &err)) == NULL) {
- mutex_unlock(&econet_mutex);
- return err;
- }
+ &err)) == NULL)
+ goto error_free_buf;
eb = (struct ec_cb *)&skb->cb;
@@ -491,7 +477,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
udpmsg.msg_name = (void *)&udpdest;
udpmsg.msg_namelen = sizeof(udpdest);
udpmsg.msg_iov = &iov[0];
- udpmsg.msg_iovlen = msg->msg_iovlen + 1;
+ udpmsg.msg_iovlen = 2;
udpmsg.msg_control = NULL;
udpmsg.msg_controllen = 0;
udpmsg.msg_flags=0;
@@ -499,9 +485,13 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
oldfs = get_fs(); set_fs(KERNEL_DS); /* More privs :-) */
err = sock_sendmsg(udpsock, &udpmsg, size);
set_fs(oldfs);
+
+error_free_buf:
+ vfree(userbuf);
#else
err = -EPROTOTYPE;
#endif
+ error:
mutex_unlock(&econet_mutex);
return err;
@@ -671,6 +661,11 @@ static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg)
err = 0;
switch (cmd) {
case SIOCSIFADDR:
+ if (!capable(CAP_NET_ADMIN)) {
+ err = -EPERM;
+ break;
+ }
+
edev = dev->ec_ptr;
if (edev == NULL) {
/* Magic up a new one. */
@@ -856,9 +851,13 @@ static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len)
{
struct iphdr *ip = ip_hdr(skb);
unsigned char stn = ntohl(ip->saddr) & 0xff;
+ struct dst_entry *dst = skb_dst(skb);
+ struct ec_device *edev = NULL;
struct sock *sk = NULL;
struct sk_buff *newskb;
- struct ec_device *edev = skb->dev->ec_ptr;
+
+ if (dst)
+ edev = dst->dev->ec_ptr;
if (! edev)
goto bad;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 36e27c2107de..eb6f69a8f27a 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1052,7 +1052,7 @@ static void ip_fib_net_exit(struct net *net)
hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
hlist_del(node);
fib_table_flush(tb);
- kfree(tb);
+ fib_free_table(tb);
}
}
kfree(net->ipv4.fib_table_hash);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 43e1c594ce8f..b3acb0417b21 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -120,11 +120,12 @@ static inline void fn_rebuild_zone(struct fn_zone *fz,
struct fib_node *f;
hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) {
- struct hlist_head __rcu *new_head;
+ struct hlist_head *new_head;
hlist_del_rcu(&f->fn_hash);
- new_head = &fz->fz_hash[fn_hash(f->fn_key, fz)];
+ new_head = rcu_dereference_protected(fz->fz_hash, 1) +
+ fn_hash(f->fn_key, fz);
hlist_add_head_rcu(&f->fn_hash, new_head);
}
}
@@ -179,8 +180,8 @@ static void fn_rehash_zone(struct fn_zone *fz)
memcpy(&nfz, fz, sizeof(nfz));
write_seqlock_bh(&fz->fz_lock);
- old_ht = fz->fz_hash;
- nfz.fz_hash = ht;
+ old_ht = rcu_dereference_protected(fz->fz_hash, 1);
+ RCU_INIT_POINTER(nfz.fz_hash, ht);
nfz.fz_hashmask = new_hashmask;
nfz.fz_divisor = new_divisor;
fn_rebuild_zone(&nfz, old_ht, old_divisor);
@@ -236,7 +237,7 @@ fn_new_zone(struct fn_hash *table, int z)
seqlock_init(&fz->fz_lock);
fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1;
fz->fz_hashmask = fz->fz_divisor - 1;
- fz->fz_hash = fz->fz_embedded_hash;
+ RCU_INIT_POINTER(fz->fz_hash, fz->fz_embedded_hash);
fz->fz_order = z;
fz->fz_revorder = 32 - z;
fz->fz_mask = inet_make_mask(z);
@@ -272,7 +273,7 @@ int fib_table_lookup(struct fib_table *tb,
for (fz = rcu_dereference(t->fn_zone_list);
fz != NULL;
fz = rcu_dereference(fz->fz_next)) {
- struct hlist_head __rcu *head;
+ struct hlist_head *head;
struct hlist_node *node;
struct fib_node *f;
__be32 k;
@@ -282,7 +283,7 @@ int fib_table_lookup(struct fib_table *tb,
seq = read_seqbegin(&fz->fz_lock);
k = fz_key(flp->fl4_dst, fz);
- head = &fz->fz_hash[fn_hash(k, fz)];
+ head = rcu_dereference(fz->fz_hash) + fn_hash(k, fz);
hlist_for_each_entry_rcu(f, node, head, fn_hash) {
if (f->fn_key != k)
continue;
@@ -311,6 +312,7 @@ void fib_table_select_default(struct fib_table *tb,
struct fib_info *last_resort;
struct fn_hash *t = (struct fn_hash *)tb->tb_data;
struct fn_zone *fz = t->fn_zones[0];
+ struct hlist_head *head;
if (fz == NULL)
return;
@@ -320,7 +322,8 @@ void fib_table_select_default(struct fib_table *tb,
order = -1;
rcu_read_lock();
- hlist_for_each_entry_rcu(f, node, &fz->fz_hash[0], fn_hash) {
+ head = rcu_dereference(fz->fz_hash);
+ hlist_for_each_entry_rcu(f, node, head, fn_hash) {
struct fib_alias *fa;
list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
@@ -374,7 +377,7 @@ out:
/* Insert node F to FZ. */
static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
{
- struct hlist_head *head = &fz->fz_hash[fn_hash(f->fn_key, fz)];
+ struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(f->fn_key, fz);
hlist_add_head_rcu(&f->fn_hash, head);
}
@@ -382,7 +385,7 @@ static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
/* Return the node in FZ matching KEY. */
static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
{
- struct hlist_head *head = &fz->fz_hash[fn_hash(key, fz)];
+ struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(key, fz);
struct hlist_node *node;
struct fib_node *f;
@@ -662,7 +665,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
static int fn_flush_list(struct fn_zone *fz, int idx)
{
- struct hlist_head *head = &fz->fz_hash[idx];
+ struct hlist_head *head = rtnl_dereference(fz->fz_hash) + idx;
struct hlist_node *node, *n;
struct fib_node *f;
int found = 0;
@@ -713,6 +716,24 @@ int fib_table_flush(struct fib_table *tb)
return found;
}
+void fib_free_table(struct fib_table *tb)
+{
+ struct fn_hash *table = (struct fn_hash *) tb->tb_data;
+ struct fn_zone *fz, *next;
+
+ next = table->fn_zone_list;
+ while (next != NULL) {
+ fz = next;
+ next = fz->fz_next;
+
+ if (fz->fz_hash != fz->fz_embedded_hash)
+ fz_hash_free(fz->fz_hash, fz->fz_divisor);
+
+ kfree(fz);
+ }
+
+ kfree(tb);
+}
static inline int
fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
@@ -761,14 +782,15 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
struct fn_zone *fz)
{
int h, s_h;
+ struct hlist_head *head = rcu_dereference(fz->fz_hash);
- if (fz->fz_hash == NULL)
+ if (head == NULL)
return skb->len;
s_h = cb->args[3];
for (h = s_h; h < fz->fz_divisor; h++) {
- if (hlist_empty(&fz->fz_hash[h]))
+ if (hlist_empty(head + h))
continue;
- if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h]) < 0) {
+ if (fn_hash_dump_bucket(skb, cb, tb, fz, head + h) < 0) {
cb->args[3] = h;
return -1;
}
@@ -872,7 +894,7 @@ static struct fib_alias *fib_get_first(struct seq_file *seq)
if (!iter->zone->fz_nent)
continue;
- iter->hash_head = iter->zone->fz_hash;
+ iter->hash_head = rcu_dereference(iter->zone->fz_hash);
maxslot = iter->zone->fz_divisor;
for (iter->bucket = 0; iter->bucket < maxslot;
@@ -957,7 +979,7 @@ static struct fib_alias *fib_get_next(struct seq_file *seq)
goto out;
iter->bucket = 0;
- iter->hash_head = iter->zone->fz_hash;
+ iter->hash_head = rcu_dereference(iter->zone->fz_hash);
hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
list_for_each_entry(fa, &fn->fn_alias, fa_list) {
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index a29edf2219c8..c079cc0ec651 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -47,11 +47,8 @@ extern int fib_detect_death(struct fib_info *fi, int order,
static inline void fib_result_assign(struct fib_result *res,
struct fib_info *fi)
{
- if (res->fi != NULL)
- fib_info_put(res->fi);
+ /* we used to play games with refcounts, but we now use RCU */
res->fi = fi;
- if (fi != NULL)
- atomic_inc(&fi->fib_clntref);
}
#endif /* _FIB_LOOKUP_H */
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index b14450895102..0f280348e0fd 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -365,7 +365,7 @@ static struct tnode *tnode_alloc(size_t size)
if (size <= PAGE_SIZE)
return kzalloc(size, GFP_KERNEL);
else
- return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
+ return vzalloc(size);
}
static void __tnode_vfree(struct work_struct *arg)
@@ -1797,6 +1797,11 @@ int fib_table_flush(struct fib_table *tb)
return found;
}
+void fib_free_table(struct fib_table *tb)
+{
+ kfree(tb);
+}
+
void fib_table_select_default(struct fib_table *tb,
const struct flowi *flp,
struct fib_result *res)
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index caea6885fdbd..c6933f2ea310 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -22,7 +22,7 @@
#include <net/gre.h>
-static const struct gre_protocol *gre_proto[GREPROTO_MAX] __read_mostly;
+static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
static DEFINE_SPINLOCK(gre_proto_lock);
int gre_add_protocol(const struct gre_protocol *proto, u8 version)
@@ -51,7 +51,8 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version)
goto err_out;
spin_lock(&gre_proto_lock);
- if (gre_proto[version] != proto)
+ if (rcu_dereference_protected(gre_proto[version],
+ lockdep_is_held(&gre_proto_lock)) != proto)
goto err_out_unlock;
rcu_assign_pointer(gre_proto[version], NULL);
spin_unlock(&gre_proto_lock);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 96bc7f9475a3..e5d1a44bcbdf 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -569,6 +569,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
/* No need to clone since we're just using its address. */
rt2 = rt;
+ if (!fl.nl_u.ip4_u.saddr)
+ fl.nl_u.ip4_u.saddr = rt->rt_src;
+
err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0);
switch (err) {
case 0:
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c8877c6c7216..3c53c2d89e3b 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2306,10 +2306,8 @@ void ip_mc_drop_socket(struct sock *sk)
in_dev = inetdev_by_index(net, iml->multi.imr_ifindex);
(void) ip_mc_leave_src(sk, iml, in_dev);
- if (in_dev != NULL) {
+ if (in_dev != NULL)
ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
- in_dev_put(in_dev);
- }
/* decrease mem now to avoid the memleak warning */
atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
call_rcu(&iml->rcu, ip_mc_socklist_reclaim);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ba8042665849..2ada17129fce 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -490,9 +490,11 @@ static int inet_csk_diag_dump(struct sock *sk,
{
struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
- if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+ if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
struct inet_diag_entry entry;
- struct rtattr *bc = (struct rtattr *)(r + 1);
+ const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
+ sizeof(*r),
+ INET_DIAG_REQ_BYTECODE);
struct inet_sock *inet = inet_sk(sk);
entry.family = sk->sk_family;
@@ -512,7 +514,7 @@ static int inet_csk_diag_dump(struct sock *sk,
entry.dport = ntohs(inet->inet_dport);
entry.userlocks = sk->sk_userlocks;
- if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
+ if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
return 0;
}
@@ -527,9 +529,11 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
{
struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
- if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+ if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
struct inet_diag_entry entry;
- struct rtattr *bc = (struct rtattr *)(r + 1);
+ const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
+ sizeof(*r),
+ INET_DIAG_REQ_BYTECODE);
entry.family = tw->tw_family;
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
@@ -548,7 +552,7 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
entry.dport = ntohs(tw->tw_dport);
entry.userlocks = 0;
- if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
+ if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
return 0;
}
@@ -618,7 +622,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
struct inet_connection_sock *icsk = inet_csk(sk);
struct listen_sock *lopt;
- struct rtattr *bc = NULL;
+ const struct nlattr *bc = NULL;
struct inet_sock *inet = inet_sk(sk);
int j, s_j;
int reqnum, s_reqnum;
@@ -638,8 +642,9 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
if (!lopt || !lopt->qlen)
goto out;
- if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
- bc = (struct rtattr *)(r + 1);
+ if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
+ bc = nlmsg_find_attr(cb->nlh, sizeof(*r),
+ INET_DIAG_REQ_BYTECODE);
entry.sport = inet->inet_num;
entry.userlocks = sk->sk_userlocks;
}
@@ -672,8 +677,8 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
&ireq->rmt_addr;
entry.dport = ntohs(ireq->rmt_port);
- if (!inet_diag_bc_run(RTA_DATA(bc),
- RTA_PAYLOAD(bc), &entry))
+ if (!inet_diag_bc_run(nla_data(bc),
+ nla_len(bc), &entry))
continue;
}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 1b344f30b463..3c0369a3a663 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -133,8 +133,7 @@ int __inet_inherit_port(struct sock *sk, struct sock *child)
}
}
}
- sk_add_bind_node(child, &tb->owners);
- inet_csk(child)->icsk_bind_hash = tb;
+ inet_bind_hash(child, tb, port);
spin_unlock(&head->lock);
return 0;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 9ffa24b9a804..9e94d7cf4f8a 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -72,18 +72,19 @@ static struct kmem_cache *peer_cachep __read_mostly;
#define node_height(x) x->avl_height
#define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
+#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node)
static const struct inet_peer peer_fake_node = {
- .avl_left = peer_avl_empty,
- .avl_right = peer_avl_empty,
+ .avl_left = peer_avl_empty_rcu,
+ .avl_right = peer_avl_empty_rcu,
.avl_height = 0
};
static struct {
- struct inet_peer *root;
+ struct inet_peer __rcu *root;
spinlock_t lock;
int total;
} peers = {
- .root = peer_avl_empty,
+ .root = peer_avl_empty_rcu,
.lock = __SPIN_LOCK_UNLOCKED(peers.lock),
.total = 0,
};
@@ -156,11 +157,14 @@ static void unlink_from_unused(struct inet_peer *p)
*/
#define lookup(_daddr, _stack) \
({ \
- struct inet_peer *u, **v; \
+ struct inet_peer *u; \
+ struct inet_peer __rcu **v; \
\
stackptr = _stack; \
*stackptr++ = &peers.root; \
- for (u = peers.root; u != peer_avl_empty; ) { \
+ for (u = rcu_dereference_protected(peers.root, \
+ lockdep_is_held(&peers.lock)); \
+ u != peer_avl_empty; ) { \
if (_daddr == u->v4daddr) \
break; \
if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \
@@ -168,7 +172,8 @@ static void unlink_from_unused(struct inet_peer *p)
else \
v = &u->avl_right; \
*stackptr++ = v; \
- u = *v; \
+ u = rcu_dereference_protected(*v, \
+ lockdep_is_held(&peers.lock)); \
} \
u; \
})
@@ -209,13 +214,17 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
/* Called with local BH disabled and the pool lock held. */
#define lookup_rightempty(start) \
({ \
- struct inet_peer *u, **v; \
+ struct inet_peer *u; \
+ struct inet_peer __rcu **v; \
*stackptr++ = &start->avl_left; \
v = &start->avl_left; \
- for (u = *v; u->avl_right != peer_avl_empty; ) { \
+ for (u = rcu_dereference_protected(*v, \
+ lockdep_is_held(&peers.lock)); \
+ u->avl_right != peer_avl_empty_rcu; ) { \
v = &u->avl_right; \
*stackptr++ = v; \
- u = *v; \
+ u = rcu_dereference_protected(*v, \
+ lockdep_is_held(&peers.lock)); \
} \
u; \
})
@@ -224,74 +233,86 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
* Variable names are the proof of operation correctness.
* Look into mm/map_avl.c for more detail description of the ideas.
*/
-static void peer_avl_rebalance(struct inet_peer **stack[],
- struct inet_peer ***stackend)
+static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
+ struct inet_peer __rcu ***stackend)
{
- struct inet_peer **nodep, *node, *l, *r;
+ struct inet_peer __rcu **nodep;
+ struct inet_peer *node, *l, *r;
int lh, rh;
while (stackend > stack) {
nodep = *--stackend;
- node = *nodep;
- l = node->avl_left;
- r = node->avl_right;
+ node = rcu_dereference_protected(*nodep,
+ lockdep_is_held(&peers.lock));
+ l = rcu_dereference_protected(node->avl_left,
+ lockdep_is_held(&peers.lock));
+ r = rcu_dereference_protected(node->avl_right,
+ lockdep_is_held(&peers.lock));
lh = node_height(l);
rh = node_height(r);
if (lh > rh + 1) { /* l: RH+2 */
struct inet_peer *ll, *lr, *lrl, *lrr;
int lrh;
- ll = l->avl_left;
- lr = l->avl_right;
+ ll = rcu_dereference_protected(l->avl_left,
+ lockdep_is_held(&peers.lock));
+ lr = rcu_dereference_protected(l->avl_right,
+ lockdep_is_held(&peers.lock));
lrh = node_height(lr);
if (lrh <= node_height(ll)) { /* ll: RH+1 */
- node->avl_left = lr; /* lr: RH or RH+1 */
- node->avl_right = r; /* r: RH */
+ RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */
+ RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
node->avl_height = lrh + 1; /* RH+1 or RH+2 */
- l->avl_left = ll; /* ll: RH+1 */
- l->avl_right = node; /* node: RH+1 or RH+2 */
+ RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH+1 */
+ RCU_INIT_POINTER(l->avl_right, node); /* node: RH+1 or RH+2 */
l->avl_height = node->avl_height + 1;
- *nodep = l;
+ RCU_INIT_POINTER(*nodep, l);
} else { /* ll: RH, lr: RH+1 */
- lrl = lr->avl_left; /* lrl: RH or RH-1 */
- lrr = lr->avl_right; /* lrr: RH or RH-1 */
- node->avl_left = lrr; /* lrr: RH or RH-1 */
- node->avl_right = r; /* r: RH */
+ lrl = rcu_dereference_protected(lr->avl_left,
+ lockdep_is_held(&peers.lock)); /* lrl: RH or RH-1 */
+ lrr = rcu_dereference_protected(lr->avl_right,
+ lockdep_is_held(&peers.lock)); /* lrr: RH or RH-1 */
+ RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */
+ RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
node->avl_height = rh + 1; /* node: RH+1 */
- l->avl_left = ll; /* ll: RH */
- l->avl_right = lrl; /* lrl: RH or RH-1 */
+ RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH */
+ RCU_INIT_POINTER(l->avl_right, lrl); /* lrl: RH or RH-1 */
l->avl_height = rh + 1; /* l: RH+1 */
- lr->avl_left = l; /* l: RH+1 */
- lr->avl_right = node; /* node: RH+1 */
+ RCU_INIT_POINTER(lr->avl_left, l); /* l: RH+1 */
+ RCU_INIT_POINTER(lr->avl_right, node); /* node: RH+1 */
lr->avl_height = rh + 2;
- *nodep = lr;
+ RCU_INIT_POINTER(*nodep, lr);
}
} else if (rh > lh + 1) { /* r: LH+2 */
struct inet_peer *rr, *rl, *rlr, *rll;
int rlh;
- rr = r->avl_right;
- rl = r->avl_left;
+ rr = rcu_dereference_protected(r->avl_right,
+ lockdep_is_held(&peers.lock));
+ rl = rcu_dereference_protected(r->avl_left,
+ lockdep_is_held(&peers.lock));
rlh = node_height(rl);
if (rlh <= node_height(rr)) { /* rr: LH+1 */
- node->avl_right = rl; /* rl: LH or LH+1 */
- node->avl_left = l; /* l: LH */
+ RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */
+ RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
node->avl_height = rlh + 1; /* LH+1 or LH+2 */
- r->avl_right = rr; /* rr: LH+1 */
- r->avl_left = node; /* node: LH+1 or LH+2 */
+ RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH+1 */
+ RCU_INIT_POINTER(r->avl_left, node); /* node: LH+1 or LH+2 */
r->avl_height = node->avl_height + 1;
- *nodep = r;
+ RCU_INIT_POINTER(*nodep, r);
} else { /* rr: RH, rl: RH+1 */
- rlr = rl->avl_right; /* rlr: LH or LH-1 */
- rll = rl->avl_left; /* rll: LH or LH-1 */
- node->avl_right = rll; /* rll: LH or LH-1 */
- node->avl_left = l; /* l: LH */
+ rlr = rcu_dereference_protected(rl->avl_right,
+ lockdep_is_held(&peers.lock)); /* rlr: LH or LH-1 */
+ rll = rcu_dereference_protected(rl->avl_left,
+ lockdep_is_held(&peers.lock)); /* rll: LH or LH-1 */
+ RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */
+ RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
node->avl_height = lh + 1; /* node: LH+1 */
- r->avl_right = rr; /* rr: LH */
- r->avl_left = rlr; /* rlr: LH or LH-1 */
+ RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH */
+ RCU_INIT_POINTER(r->avl_left, rlr); /* rlr: LH or LH-1 */
r->avl_height = lh + 1; /* r: LH+1 */
- rl->avl_right = r; /* r: LH+1 */
- rl->avl_left = node; /* node: LH+1 */
+ RCU_INIT_POINTER(rl->avl_right, r); /* r: LH+1 */
+ RCU_INIT_POINTER(rl->avl_left, node); /* node: LH+1 */
rl->avl_height = lh + 2;
- *nodep = rl;
+ RCU_INIT_POINTER(*nodep, rl);
}
} else {
node->avl_height = (lh > rh ? lh : rh) + 1;
@@ -303,10 +324,10 @@ static void peer_avl_rebalance(struct inet_peer **stack[],
#define link_to_pool(n) \
do { \
n->avl_height = 1; \
- n->avl_left = peer_avl_empty; \
- n->avl_right = peer_avl_empty; \
- smp_wmb(); /* lockless readers can catch us now */ \
- **--stackptr = n; \
+ n->avl_left = peer_avl_empty_rcu; \
+ n->avl_right = peer_avl_empty_rcu; \
+ /* lockless readers can catch us now */ \
+ rcu_assign_pointer(**--stackptr, n); \
peer_avl_rebalance(stack, stackptr); \
} while (0)
@@ -330,24 +351,25 @@ static void unlink_from_pool(struct inet_peer *p)
* We use refcnt=-1 to alert lockless readers this entry is deleted.
*/
if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
- struct inet_peer **stack[PEER_MAXDEPTH];
- struct inet_peer ***stackptr, ***delp;
+ struct inet_peer __rcu **stack[PEER_MAXDEPTH];
+ struct inet_peer __rcu ***stackptr, ***delp;
if (lookup(p->v4daddr, stack) != p)
BUG();
delp = stackptr - 1; /* *delp[0] == p */
- if (p->avl_left == peer_avl_empty) {
+ if (p->avl_left == peer_avl_empty_rcu) {
*delp[0] = p->avl_right;
--stackptr;
} else {
/* look for a node to insert instead of p */
struct inet_peer *t;
t = lookup_rightempty(p);
- BUG_ON(*stackptr[-1] != t);
+ BUG_ON(rcu_dereference_protected(*stackptr[-1],
+ lockdep_is_held(&peers.lock)) != t);
**--stackptr = t->avl_left;
/* t is removed, t->v4daddr > x->v4daddr for any
* x in p->avl_left subtree.
* Put t in the old place of p. */
- *delp[0] = t;
+ RCU_INIT_POINTER(*delp[0], t);
t->avl_left = p->avl_left;
t->avl_right = p->avl_right;
t->avl_height = p->avl_height;
@@ -414,7 +436,7 @@ static int cleanup_once(unsigned long ttl)
struct inet_peer *inet_getpeer(__be32 daddr, int create)
{
struct inet_peer *p;
- struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr;
+ struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
/* Look up for the address quickly, lockless.
* Because of a concurrent writer, we might not find an existing entry.
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d0ffcbe369b7..70ff77f02eee 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1072,6 +1072,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
break;
}
ipgre_tunnel_unlink(ign, t);
+ synchronize_net();
t->parms.iph.saddr = p.iph.saddr;
t->parms.iph.daddr = p.iph.daddr;
t->parms.i_key = p.i_key;
@@ -1324,7 +1325,6 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct iphdr *iph = &tunnel->parms.iph;
- struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
tunnel->dev = dev;
strcpy(tunnel->parms.name, dev->name);
@@ -1335,7 +1335,6 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
tunnel->hlen = sizeof(struct iphdr) + 4;
dev_hold(dev);
- rcu_assign_pointer(ign->tunnels_wc[0], tunnel);
}
@@ -1382,10 +1381,12 @@ static int __net_init ipgre_init_net(struct net *net)
if ((err = register_netdev(ign->fb_tunnel_dev)))
goto err_reg_dev;
+ rcu_assign_pointer(ign->tunnels_wc[0],
+ netdev_priv(ign->fb_tunnel_dev));
return 0;
err_reg_dev:
- free_netdev(ign->fb_tunnel_dev);
+ ipgre_dev_free(ign->fb_tunnel_dev);
err_alloc_dev:
return err;
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 64b70ad162e3..3948c86e59ca 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -238,7 +238,7 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
but receiver should be enough clever f.e. to forward mtrace requests,
sent to multicast group to reach destination designated router.
*/
-struct ip_ra_chain *ip_ra_chain;
+struct ip_ra_chain __rcu *ip_ra_chain;
static DEFINE_SPINLOCK(ip_ra_lock);
@@ -253,7 +253,8 @@ static void ip_ra_destroy_rcu(struct rcu_head *head)
int ip_ra_control(struct sock *sk, unsigned char on,
void (*destructor)(struct sock *))
{
- struct ip_ra_chain *ra, *new_ra, **rap;
+ struct ip_ra_chain *ra, *new_ra;
+ struct ip_ra_chain __rcu **rap;
if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
return -EINVAL;
@@ -261,7 +262,10 @@ int ip_ra_control(struct sock *sk, unsigned char on,
new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
spin_lock_bh(&ip_ra_lock);
- for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) {
+ for (rap = &ip_ra_chain;
+ (ra = rcu_dereference_protected(*rap,
+ lockdep_is_held(&ip_ra_lock))) != NULL;
+ rap = &ra->next) {
if (ra->sk == sk) {
if (on) {
spin_unlock_bh(&ip_ra_lock);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index e9b816e6cd73..cd300aaee78f 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -676,6 +676,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
}
t = netdev_priv(dev);
ipip_tunnel_unlink(ipn, t);
+ synchronize_net();
t->parms.iph.saddr = p.iph.saddr;
t->parms.iph.daddr = p.iph.daddr;
memcpy(dev->dev_addr, &p.iph.saddr, 4);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3cad2591ace0..3fac340a28d5 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -927,6 +927,7 @@ static int get_info(struct net *net, void __user *user,
private = &tmp;
}
#endif
+ memset(&info, 0, sizeof(info));
info.valid_hooks = t->valid_hooks;
memcpy(info.hook_entry, private->hook_entry,
sizeof(info.hook_entry));
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index d31b007a6d80..a846d633b3b6 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1124,6 +1124,7 @@ static int get_info(struct net *net, void __user *user,
private = &tmp;
}
#endif
+ memset(&info, 0, sizeof(info));
info.valid_hooks = t->valid_hooks;
memcpy(info.hook_entry, private->hook_entry,
sizeof(info.hook_entry));
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 295c97431e43..c04787ce1a71 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -47,26 +47,6 @@ __nf_nat_proto_find(u_int8_t protonum)
return rcu_dereference(nf_nat_protos[protonum]);
}
-static const struct nf_nat_protocol *
-nf_nat_proto_find_get(u_int8_t protonum)
-{
- const struct nf_nat_protocol *p;
-
- rcu_read_lock();
- p = __nf_nat_proto_find(protonum);
- if (!try_module_get(p->me))
- p = &nf_nat_unknown_protocol;
- rcu_read_unlock();
-
- return p;
-}
-
-static void
-nf_nat_proto_put(const struct nf_nat_protocol *p)
-{
- module_put(p->me);
-}
-
/* We keep an extra hash for each conntrack, for fast searching. */
static inline unsigned int
hash_by_src(const struct net *net, u16 zone,
@@ -588,6 +568,26 @@ static struct nf_ct_ext_type nat_extend __read_mostly = {
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
+static const struct nf_nat_protocol *
+nf_nat_proto_find_get(u_int8_t protonum)
+{
+ const struct nf_nat_protocol *p;
+
+ rcu_read_lock();
+ p = __nf_nat_proto_find(protonum);
+ if (!try_module_get(p->me))
+ p = &nf_nat_unknown_protocol;
+ rcu_read_unlock();
+
+ return p;
+}
+
+static void
+nf_nat_proto_put(const struct nf_nat_protocol *p)
+{
+ module_put(p->me);
+}
+
static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
[CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 },
[CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 },
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4ae1f203f7cb..b14ec7d03b6e 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -59,13 +59,13 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
local_bh_enable();
socket_seq_show(seq);
- seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
+ seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
sock_prot_inuse_get(net, &tcp_prot), orphans,
tcp_death_row.tw_count, sockets,
- atomic_read(&tcp_memory_allocated));
- seq_printf(seq, "UDP: inuse %d mem %d\n",
+ atomic_long_read(&tcp_memory_allocated));
+ seq_printf(seq, "UDP: inuse %d mem %ld\n",
sock_prot_inuse_get(net, &udp_prot),
- atomic_read(&udp_memory_allocated));
+ atomic_long_read(&udp_memory_allocated));
seq_printf(seq, "UDPLITE: inuse %d\n",
sock_prot_inuse_get(net, &udplite_prot));
seq_printf(seq, "RAW: inuse %d\n",
@@ -253,6 +253,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP),
SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER),
+ SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 65699c24411c..9ae5c01cd0b2 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -28,7 +28,7 @@
#include <linux/spinlock.h>
#include <net/protocol.h>
-const struct net_protocol *inet_protos[MAX_INET_PROTOS] __read_mostly;
+const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
/*
* Add a protocol handler to the hash tables
@@ -38,7 +38,8 @@ int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
{
int hash = protocol & (MAX_INET_PROTOS - 1);
- return !cmpxchg(&inet_protos[hash], NULL, prot) ? 0 : -1;
+ return !cmpxchg((const struct net_protocol **)&inet_protos[hash],
+ NULL, prot) ? 0 : -1;
}
EXPORT_SYMBOL(inet_add_protocol);
@@ -50,7 +51,8 @@ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
{
int ret, hash = protocol & (MAX_INET_PROTOS - 1);
- ret = (cmpxchg(&inet_protos[hash], prot, NULL) == prot) ? 0 : -1;
+ ret = (cmpxchg((const struct net_protocol **)&inet_protos[hash],
+ prot, NULL) == prot) ? 0 : -1;
synchronize_net();
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d6cb2bfcd8e1..987bf9adb318 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -198,7 +198,7 @@ const __u8 ip_tos2prio[16] = {
*/
struct rt_hash_bucket {
- struct rtable *chain;
+ struct rtable __rcu *chain;
};
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
@@ -280,7 +280,7 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
struct rtable *r = NULL;
for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
- if (!rt_hash_table[st->bucket].chain)
+ if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain))
continue;
rcu_read_lock_bh();
r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
@@ -300,17 +300,17 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
{
struct rt_cache_iter_state *st = seq->private;
- r = r->dst.rt_next;
+ r = rcu_dereference_bh(r->dst.rt_next);
while (!r) {
rcu_read_unlock_bh();
do {
if (--st->bucket < 0)
return NULL;
- } while (!rt_hash_table[st->bucket].chain);
+ } while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain));
rcu_read_lock_bh();
- r = rt_hash_table[st->bucket].chain;
+ r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
}
- return rcu_dereference_bh(r);
+ return r;
}
static struct rtable *rt_cache_get_next(struct seq_file *seq,
@@ -721,19 +721,23 @@ static void rt_do_flush(int process_context)
for (i = 0; i <= rt_hash_mask; i++) {
if (process_context && need_resched())
cond_resched();
- rth = rt_hash_table[i].chain;
+ rth = rcu_dereference_raw(rt_hash_table[i].chain);
if (!rth)
continue;
spin_lock_bh(rt_hash_lock_addr(i));
#ifdef CONFIG_NET_NS
{
- struct rtable ** prev, * p;
+ struct rtable __rcu **prev;
+ struct rtable *p;
- rth = rt_hash_table[i].chain;
+ rth = rcu_dereference_protected(rt_hash_table[i].chain,
+ lockdep_is_held(rt_hash_lock_addr(i)));
/* defer releasing the head of the list after spin_unlock */
- for (tail = rth; tail; tail = tail->dst.rt_next)
+ for (tail = rth; tail;
+ tail = rcu_dereference_protected(tail->dst.rt_next,
+ lockdep_is_held(rt_hash_lock_addr(i))))
if (!rt_is_expired(tail))
break;
if (rth != tail)
@@ -741,8 +745,12 @@ static void rt_do_flush(int process_context)
/* call rt_free on entries after the tail requiring flush */
prev = &rt_hash_table[i].chain;
- for (p = *prev; p; p = next) {
- next = p->dst.rt_next;
+ for (p = rcu_dereference_protected(*prev,
+ lockdep_is_held(rt_hash_lock_addr(i)));
+ p != NULL;
+ p = next) {
+ next = rcu_dereference_protected(p->dst.rt_next,
+ lockdep_is_held(rt_hash_lock_addr(i)));
if (!rt_is_expired(p)) {
prev = &p->dst.rt_next;
} else {
@@ -752,14 +760,15 @@ static void rt_do_flush(int process_context)
}
}
#else
- rth = rt_hash_table[i].chain;
- rt_hash_table[i].chain = NULL;
+ rth = rcu_dereference_protected(rt_hash_table[i].chain,
+ lockdep_is_held(rt_hash_lock_addr(i)));
+ rcu_assign_pointer(rt_hash_table[i].chain, NULL);
tail = NULL;
#endif
spin_unlock_bh(rt_hash_lock_addr(i));
for (; rth != tail; rth = next) {
- next = rth->dst.rt_next;
+ next = rcu_dereference_protected(rth->dst.rt_next, 1);
rt_free(rth);
}
}
@@ -790,7 +799,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
while (aux != rth) {
if (compare_hash_inputs(&aux->fl, &rth->fl))
return 0;
- aux = aux->dst.rt_next;
+ aux = rcu_dereference_protected(aux->dst.rt_next, 1);
}
return ONE;
}
@@ -799,7 +808,8 @@ static void rt_check_expire(void)
{
static unsigned int rover;
unsigned int i = rover, goal;
- struct rtable *rth, **rthp;
+ struct rtable *rth;
+ struct rtable __rcu **rthp;
unsigned long samples = 0;
unsigned long sum = 0, sum2 = 0;
unsigned long delta;
@@ -825,11 +835,12 @@ static void rt_check_expire(void)
samples++;
- if (*rthp == NULL)
+ if (rcu_dereference_raw(*rthp) == NULL)
continue;
length = 0;
spin_lock_bh(rt_hash_lock_addr(i));
- while ((rth = *rthp) != NULL) {
+ while ((rth = rcu_dereference_protected(*rthp,
+ lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
prefetch(rth->dst.rt_next);
if (rt_is_expired(rth)) {
*rthp = rth->dst.rt_next;
@@ -941,7 +952,8 @@ static int rt_garbage_collect(struct dst_ops *ops)
static unsigned long last_gc;
static int rover;
static int equilibrium;
- struct rtable *rth, **rthp;
+ struct rtable *rth;
+ struct rtable __rcu **rthp;
unsigned long now = jiffies;
int goal;
int entries = dst_entries_get_fast(&ipv4_dst_ops);
@@ -995,7 +1007,8 @@ static int rt_garbage_collect(struct dst_ops *ops)
k = (k + 1) & rt_hash_mask;
rthp = &rt_hash_table[k].chain;
spin_lock_bh(rt_hash_lock_addr(k));
- while ((rth = *rthp) != NULL) {
+ while ((rth = rcu_dereference_protected(*rthp,
+ lockdep_is_held(rt_hash_lock_addr(k)))) != NULL) {
if (!rt_is_expired(rth) &&
!rt_may_expire(rth, tmo, expire)) {
tmo >>= 1;
@@ -1071,7 +1084,7 @@ static int slow_chain_length(const struct rtable *head)
while (rth) {
length += has_noalias(head, rth);
- rth = rth->dst.rt_next;
+ rth = rcu_dereference_protected(rth->dst.rt_next, 1);
}
return length >> FRACT_BITS;
}
@@ -1079,9 +1092,9 @@ static int slow_chain_length(const struct rtable *head)
static int rt_intern_hash(unsigned hash, struct rtable *rt,
struct rtable **rp, struct sk_buff *skb, int ifindex)
{
- struct rtable *rth, **rthp;
+ struct rtable *rth, *cand;
+ struct rtable __rcu **rthp, **candp;
unsigned long now;
- struct rtable *cand, **candp;
u32 min_score;
int chain_length;
int attempts = !in_softirq();
@@ -1128,7 +1141,8 @@ restart:
rthp = &rt_hash_table[hash].chain;
spin_lock_bh(rt_hash_lock_addr(hash));
- while ((rth = *rthp) != NULL) {
+ while ((rth = rcu_dereference_protected(*rthp,
+ lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
if (rt_is_expired(rth)) {
*rthp = rth->dst.rt_next;
rt_free(rth);
@@ -1324,12 +1338,14 @@ EXPORT_SYMBOL(__ip_select_ident);
static void rt_del(unsigned hash, struct rtable *rt)
{
- struct rtable **rthp, *aux;
+ struct rtable __rcu **rthp;
+ struct rtable *aux;
rthp = &rt_hash_table[hash].chain;
spin_lock_bh(rt_hash_lock_addr(hash));
ip_rt_put(rt);
- while ((aux = *rthp) != NULL) {
+ while ((aux = rcu_dereference_protected(*rthp,
+ lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
if (aux == rt || rt_is_expired(aux)) {
*rthp = aux->dst.rt_next;
rt_free(aux);
@@ -1346,7 +1362,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
{
int i, k;
struct in_device *in_dev = __in_dev_get_rcu(dev);
- struct rtable *rth, **rthp;
+ struct rtable *rth;
+ struct rtable __rcu **rthp;
__be32 skeys[2] = { saddr, 0 };
int ikeys[2] = { dev->ifindex, 0 };
struct netevent_redirect netevent;
@@ -1379,7 +1396,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
rt_genid(net));
- rthp=&rt_hash_table[hash].chain;
+ rthp = &rt_hash_table[hash].chain;
while ((rth = rcu_dereference(*rthp)) != NULL) {
struct rtable *rt;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d96c1da4b17c..1b4ec21497a4 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -26,6 +26,8 @@ static int zero;
static int tcp_retr1_max = 255;
static int ip_local_port_range_min[] = { 1, 1 };
static int ip_local_port_range_max[] = { 65535, 65535 };
+static int tcp_adv_win_scale_min = -31;
+static int tcp_adv_win_scale_max = 31;
/* Update system visible IP port range */
static void set_local_port_range(int range[2])
@@ -398,7 +400,7 @@ static struct ctl_table ipv4_table[] = {
.data = &sysctl_tcp_mem,
.maxlen = sizeof(sysctl_tcp_mem),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_doulongvec_minmax
},
{
.procname = "tcp_wmem",
@@ -426,7 +428,9 @@ static struct ctl_table ipv4_table[] = {
.data = &sysctl_tcp_adv_win_scale,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &tcp_adv_win_scale_min,
+ .extra2 = &tcp_adv_win_scale_max,
},
{
.procname = "tcp_tw_reuse",
@@ -602,8 +606,7 @@ static struct ctl_table ipv4_table[] = {
.data = &sysctl_udp_mem,
.maxlen = sizeof(sysctl_udp_mem),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero
+ .proc_handler = proc_doulongvec_minmax,
},
{
.procname = "udp_rmem_min",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1664a0590bb8..f15c36a706ec 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -282,7 +282,7 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
-int sysctl_tcp_mem[3] __read_mostly;
+long sysctl_tcp_mem[3] __read_mostly;
int sysctl_tcp_wmem[3] __read_mostly;
int sysctl_tcp_rmem[3] __read_mostly;
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(sysctl_tcp_mem);
EXPORT_SYMBOL(sysctl_tcp_rmem);
EXPORT_SYMBOL(sysctl_tcp_wmem);
-atomic_t tcp_memory_allocated; /* Current allocated memory. */
+atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
EXPORT_SYMBOL(tcp_memory_allocated);
/*
@@ -2246,7 +2246,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
/* Values greater than interface MTU won't take effect. However
* at the point when this call is done we typically don't yet
* know which interface is going to be used */
- if (val < 8 || val > MAX_TCP_WINDOW) {
+ if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) {
err = -EINVAL;
break;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3357f69e353d..6d8ab1c4efc3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -259,8 +259,11 @@ static void tcp_fixup_sndbuf(struct sock *sk)
int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
sizeof(struct sk_buff);
- if (sk->sk_sndbuf < 3 * sndmem)
- sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]);
+ if (sk->sk_sndbuf < 3 * sndmem) {
+ sk->sk_sndbuf = 3 * sndmem;
+ if (sk->sk_sndbuf > sysctl_tcp_wmem[2])
+ sk->sk_sndbuf = sysctl_tcp_wmem[2];
+ }
}
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -396,7 +399,7 @@ static void tcp_clamp_window(struct sock *sk)
if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_memory_pressure &&
- atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
+ atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
sysctl_tcp_rmem[2]);
}
@@ -4861,7 +4864,7 @@ static int tcp_should_expand_sndbuf(struct sock *sk)
return 0;
/* If we are under soft global TCP memory pressure, do not expand. */
- if (atomic_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
+ if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
return 0;
/* If we filled the congestion window, do not expand. */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8f8527d41682..e13da6de1fc7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -415,6 +415,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
!icsk->icsk_backoff)
break;
+ if (sock_owned_by_user(sk))
+ break;
+
icsk->icsk_backoff--;
inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
icsk->icsk_backoff;
@@ -429,11 +432,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
if (remaining) {
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
remaining, TCP_RTO_MAX);
- } else if (sock_owned_by_user(sk)) {
- /* RTO revert clocked out retransmission,
- * but socket is locked. Will defer. */
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- HZ/20, TCP_RTO_MAX);
} else {
/* RTO revert clocked out retransmission.
* Will retransmit now */
@@ -2045,7 +2043,9 @@ get_req:
}
get_sk:
sk_nulls_for_each_from(sk, node) {
- if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ if (sk->sk_family == st->family) {
cur = sk;
goto out;
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 43cf901d7659..a66735f75963 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -347,7 +347,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
* socket up. We've got bigger problems than
* non-graceful socket closings.
*/
- LIMIT_NETDEBUG(KERN_INFO "TCP: time wait bucket table overflow\n");
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW);
}
tcp_update_metrics(sk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 05b1ecf36763..61c2463e2753 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -231,11 +231,10 @@ void tcp_select_initial_window(int __space, __u32 mss,
/* when initializing use the value from init_rcv_wnd
* rather than the default from above
*/
- if (init_rcv_wnd &&
- (*rcv_wnd > init_rcv_wnd * mss))
- *rcv_wnd = init_rcv_wnd * mss;
- else if (*rcv_wnd > init_cwnd * mss)
- *rcv_wnd = init_cwnd * mss;
+ if (init_rcv_wnd)
+ *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
+ else
+ *rcv_wnd = min(*rcv_wnd, init_cwnd * mss);
}
/* Set the clamp no higher than max representable value */
@@ -386,27 +385,30 @@ struct tcp_out_options {
*/
static u8 tcp_cookie_size_check(u8 desired)
{
- if (desired > 0) {
+ int cookie_size;
+
+ if (desired > 0)
/* previously specified */
return desired;
- }
- if (sysctl_tcp_cookie_size <= 0) {
+
+ cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size);
+ if (cookie_size <= 0)
/* no default specified */
return 0;
- }
- if (sysctl_tcp_cookie_size <= TCP_COOKIE_MIN) {
+
+ if (cookie_size <= TCP_COOKIE_MIN)
/* value too small, specify minimum */
return TCP_COOKIE_MIN;
- }
- if (sysctl_tcp_cookie_size >= TCP_COOKIE_MAX) {
+
+ if (cookie_size >= TCP_COOKIE_MAX)
/* value too large, specify maximum */
return TCP_COOKIE_MAX;
- }
- if (0x1 & sysctl_tcp_cookie_size) {
+
+ if (cookie_size & 1)
/* 8-bit multiple, illegal, fix it */
- return (u8)(sysctl_tcp_cookie_size + 0x1);
- }
- return (u8)sysctl_tcp_cookie_size;
+ cookie_size++;
+
+ return (u8)cookie_size;
}
/* Write previously computed TCP options to the packet.
@@ -1513,6 +1515,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 send_win, cong_win, limit, in_flight;
+ int win_divisor;
if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)
goto send_now;
@@ -1544,13 +1547,14 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
goto send_now;
- if (sysctl_tcp_tso_win_divisor) {
+ win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
+ if (win_divisor) {
u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
/* If at least some fraction of a window is available,
* just use it.
*/
- chunk /= sysctl_tcp_tso_win_divisor;
+ chunk /= win_divisor;
if (limit >= chunk)
goto send_now;
} else {
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 9a17bd2a0a37..ac3b3ee4b07c 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -14,27 +14,32 @@
#include <net/protocol.h>
#include <net/xfrm.h>
-static struct xfrm_tunnel *tunnel4_handlers __read_mostly;
-static struct xfrm_tunnel *tunnel64_handlers __read_mostly;
+static struct xfrm_tunnel __rcu *tunnel4_handlers __read_mostly;
+static struct xfrm_tunnel __rcu *tunnel64_handlers __read_mostly;
static DEFINE_MUTEX(tunnel4_mutex);
-static inline struct xfrm_tunnel **fam_handlers(unsigned short family)
+static inline struct xfrm_tunnel __rcu **fam_handlers(unsigned short family)
{
return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers;
}
int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
{
- struct xfrm_tunnel **pprev;
+ struct xfrm_tunnel __rcu **pprev;
+ struct xfrm_tunnel *t;
+
int ret = -EEXIST;
int priority = handler->priority;
mutex_lock(&tunnel4_mutex);
- for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) {
- if ((*pprev)->priority > priority)
+ for (pprev = fam_handlers(family);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&tunnel4_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t->priority > priority)
break;
- if ((*pprev)->priority == priority)
+ if (t->priority == priority)
goto err;
}
@@ -52,13 +57,17 @@ EXPORT_SYMBOL(xfrm4_tunnel_register);
int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
{
- struct xfrm_tunnel **pprev;
+ struct xfrm_tunnel __rcu **pprev;
+ struct xfrm_tunnel *t;
int ret = -ENOENT;
mutex_lock(&tunnel4_mutex);
- for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) {
- if (*pprev == handler) {
+ for (pprev = fam_handlers(family);
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&tunnel4_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t == handler) {
*pprev = handler->next;
ret = 0;
break;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b3f7e8cf18ac..5e0a3a582a59 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -110,7 +110,7 @@
struct udp_table udp_table __read_mostly;
EXPORT_SYMBOL(udp_table);
-int sysctl_udp_mem[3] __read_mostly;
+long sysctl_udp_mem[3] __read_mostly;
EXPORT_SYMBOL(sysctl_udp_mem);
int sysctl_udp_rmem_min __read_mostly;
@@ -119,7 +119,7 @@ EXPORT_SYMBOL(sysctl_udp_rmem_min);
int sysctl_udp_wmem_min __read_mostly;
EXPORT_SYMBOL(sysctl_udp_wmem_min);
-atomic_t udp_memory_allocated;
+atomic_long_t udp_memory_allocated;
EXPORT_SYMBOL(udp_memory_allocated);
#define MAX_UDP_PORTS 65536
@@ -1413,7 +1413,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
}
- if (sk->sk_filter) {
+ if (rcu_dereference_raw(sk->sk_filter)) {
if (udp_lib_checksum_complete(skb))
goto drop;
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ec7a91d9e865..93b7a933a775 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -98,7 +98,11 @@
#endif
#define INFINITY_LIFE_TIME 0xFFFFFFFF
-#define TIME_DELTA(a, b) ((unsigned long)((long)(a) - (long)(b)))
+
+static inline u32 cstamp_delta(unsigned long cstamp)
+{
+ return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
+}
#define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ/50 : 1)
#define ADDRCONF_TIMER_FUZZ (HZ / 4)
@@ -836,7 +840,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i
{
struct inet6_dev *idev = ifp->idev;
struct in6_addr addr, *tmpaddr;
- unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp;
+ unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp, age;
unsigned long regen_advance;
int tmp_plen;
int ret = 0;
@@ -886,12 +890,13 @@ retry:
goto out;
}
memcpy(&addr.s6_addr[8], idev->rndid, 8);
+ age = (jiffies - ifp->tstamp) / HZ;
tmp_valid_lft = min_t(__u32,
ifp->valid_lft,
- idev->cnf.temp_valid_lft);
+ idev->cnf.temp_valid_lft + age);
tmp_prefered_lft = min_t(__u32,
ifp->prefered_lft,
- idev->cnf.temp_prefered_lft -
+ idev->cnf.temp_prefered_lft + age -
idev->cnf.max_desync_factor);
tmp_plen = ifp->prefix_len;
max_addresses = idev->cnf.max_addresses;
@@ -1426,8 +1431,10 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
{
struct inet6_dev *idev = ifp->idev;
- if (addrconf_dad_end(ifp))
+ if (addrconf_dad_end(ifp)) {
+ in6_ifa_put(ifp);
return;
+ }
if (net_ratelimit())
printk(KERN_INFO "%s: IPv6 duplicate address %pI6c detected!\n",
@@ -2021,10 +2028,11 @@ ok:
ipv6_ifa_notify(0, ift);
}
- if (create && in6_dev->cnf.use_tempaddr > 0) {
+ if ((create || list_empty(&in6_dev->tempaddr_list)) && in6_dev->cnf.use_tempaddr > 0) {
/*
* When a new public address is created as described in [ADDRCONF],
- * also create a new temporary address.
+ * also create a new temporary address. Also create a temporary
+ * address if it's enabled but no temporary address currently exists.
*/
read_unlock_bh(&in6_dev->lock);
ipv6_create_tempaddr(ifp, NULL);
@@ -2736,10 +2744,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
/* Flag it for later restoration when link comes up */
ifa->flags |= IFA_F_TENTATIVE;
ifa->state = INET6_IFADDR_STATE_DAD;
-
- write_unlock_bh(&idev->lock);
-
- in6_ifa_hold(ifa);
} else {
list_del(&ifa->if_list);
@@ -2754,19 +2758,15 @@ static int addrconf_ifdown(struct net_device *dev, int how)
ifa->state = INET6_IFADDR_STATE_DEAD;
spin_unlock_bh(&ifa->state_lock);
- if (state == INET6_IFADDR_STATE_DEAD)
- goto put_ifa;
- }
-
- __ipv6_ifa_notify(RTM_DELADDR, ifa);
- if (ifa->state == INET6_IFADDR_STATE_DEAD)
- atomic_notifier_call_chain(&inet6addr_chain,
- NETDEV_DOWN, ifa);
-
-put_ifa:
- in6_ifa_put(ifa);
+ if (state != INET6_IFADDR_STATE_DEAD) {
+ __ipv6_ifa_notify(RTM_DELADDR, ifa);
+ atomic_notifier_call_chain(&inet6addr_chain,
+ NETDEV_DOWN, ifa);
+ }
- write_lock_bh(&idev->lock);
+ in6_ifa_put(ifa);
+ write_lock_bh(&idev->lock);
+ }
}
list_splice(&keep_list, &idev->addr_list);
@@ -3448,10 +3448,8 @@ static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
{
struct ifa_cacheinfo ci;
- ci.cstamp = (u32)(TIME_DELTA(cstamp, INITIAL_JIFFIES) / HZ * 100
- + TIME_DELTA(cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
- ci.tstamp = (u32)(TIME_DELTA(tstamp, INITIAL_JIFFIES) / HZ * 100
- + TIME_DELTA(tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
+ ci.cstamp = cstamp_delta(cstamp);
+ ci.tstamp = cstamp_delta(tstamp);
ci.ifa_prefered = preferred;
ci.ifa_valid = valid;
@@ -3802,8 +3800,10 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_AUTOCONF] = cnf->autoconf;
array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits;
array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits;
- array[DEVCONF_RTR_SOLICIT_INTERVAL] = cnf->rtr_solicit_interval;
- array[DEVCONF_RTR_SOLICIT_DELAY] = cnf->rtr_solicit_delay;
+ array[DEVCONF_RTR_SOLICIT_INTERVAL] =
+ jiffies_to_msecs(cnf->rtr_solicit_interval);
+ array[DEVCONF_RTR_SOLICIT_DELAY] =
+ jiffies_to_msecs(cnf->rtr_solicit_delay);
array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
#ifdef CONFIG_IPV6_PRIVACY
array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
@@ -3817,7 +3817,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
#ifdef CONFIG_IPV6_ROUTER_PREF
array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref;
- array[DEVCONF_RTR_PROBE_INTERVAL] = cnf->rtr_probe_interval;
+ array[DEVCONF_RTR_PROBE_INTERVAL] =
+ jiffies_to_msecs(cnf->rtr_probe_interval);
#ifdef CONFIG_IPV6_ROUTE_INFO
array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
#endif
@@ -3933,10 +3934,9 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags);
ci.max_reasm_len = IPV6_MAXPLEN;
- ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100
- + TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
- ci.reachable_time = idev->nd_parms->reachable_time;
- ci.retrans_time = idev->nd_parms->retrans_time;
+ ci.tstamp = cstamp_delta(idev->tstamp);
+ ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time);
+ ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time);
NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
@@ -4021,11 +4021,11 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
kfree_skb(skb);
goto errout;
}
- rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFINFO, NULL, GFP_ATOMIC);
return;
errout:
if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_IFINFO, err);
}
static inline size_t inet6_prefix_nlmsg_size(void)
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index c2c0f89397b1..70e891a20fb9 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1175,6 +1175,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
sizeof (struct ipv6hdr);
dev->mtu = rt->rt6i_dev->mtu - sizeof (struct ipv6hdr);
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ dev->mtu-=8;
if (dev->mtu < IPV6_MIN_MTU)
dev->mtu = IPV6_MIN_MTU;
@@ -1284,6 +1286,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
t = netdev_priv(dev);
ip6_tnl_unlink(ip6n, t);
+ synchronize_net();
err = ip6_tnl_change(t, &p);
ip6_tnl_link(ip6n, t);
netdev_state_change(dev);
@@ -1362,15 +1365,21 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
static void ip6_tnl_dev_setup(struct net_device *dev)
{
+ struct ip6_tnl *t;
+
dev->netdev_ops = &ip6_tnl_netdev_ops;
dev->destructor = ip6_dev_free;
dev->type = ARPHRD_TUNNEL6;
dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr);
+ t = netdev_priv(dev);
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ dev->mtu-=8;
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 0553867a317f..d1770e061c08 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -343,6 +343,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_TRANSPARENT:
+ if (!capable(CAP_NET_ADMIN)) {
+ retv = -EPERM;
+ break;
+ }
if (optlen < sizeof(int))
goto e_inval;
/* we don't have a separate transparent bit for IPV6 we use the one in the IPv4 socket */
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 44d2eeac089b..448464844a25 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -5,10 +5,15 @@
menu "IPv6: Netfilter Configuration"
depends on INET && IPV6 && NETFILTER
+config NF_DEFRAG_IPV6
+ tristate
+ default n
+
config NF_CONNTRACK_IPV6
tristate "IPv6 connection tracking support"
depends on INET && IPV6 && NF_CONNTRACK
default m if NETFILTER_ADVANCED=n
+ select NF_DEFRAG_IPV6
---help---
Connection tracking keeps a record of what packets have passed
through your machine, in order to figure out how they are related
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 3f8e4a3d83ce..0a432c9b0795 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -12,11 +12,14 @@ obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
# objects for l3 independent conntrack
nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
-nf_defrag_ipv6-objs := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
# l3 independent conntrack
obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
+# defrag
+nf_defrag_ipv6-objs := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
+obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
+
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 51df035897e7..455582384ece 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1137,6 +1137,7 @@ static int get_info(struct net *net, void __user *user,
private = &tmp;
}
#endif
+ memset(&info, 0, sizeof(info));
info.valid_hooks = t->valid_hooks;
memcpy(info.hook_entry, private->hook_entry,
sizeof(info.hook_entry));
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 489d71b844ac..79d43aa8fa8d 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -286,7 +286,7 @@ found:
/* Check for overlap with preceding fragment. */
if (prev &&
- (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset > 0)
+ (NFCT_FRAG6_CB(prev)->offset + prev->len) > offset)
goto discard_fq;
/* Look for overlap with succeeding segment. */
@@ -625,21 +625,24 @@ int nf_ct_frag6_init(void)
inet_frags_init_net(&nf_init_frags);
inet_frags_init(&nf_frags);
+#ifdef CONFIG_SYSCTL
nf_ct_frag6_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path,
nf_ct_frag6_sysctl_table);
if (!nf_ct_frag6_sysctl_header) {
inet_frags_fini(&nf_frags);
return -ENOMEM;
}
+#endif
return 0;
}
void nf_ct_frag6_cleanup(void)
{
+#ifdef CONFIG_SYSCTL
unregister_sysctl_table(nf_ct_frag6_sysctl_header);
nf_ct_frag6_sysctl_header = NULL;
-
+#endif
inet_frags_fini(&nf_frags);
nf_init_frags.low_thresh = 0;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index d082eaeefa25..24b3558b8e67 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -126,6 +126,8 @@ static const struct snmp_mib snmp6_udp6_list[] = {
SNMP_MIB_ITEM("Udp6NoPorts", UDP_MIB_NOPORTS),
SNMP_MIB_ITEM("Udp6InErrors", UDP_MIB_INERRORS),
SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
+ SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
+ SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS),
SNMP_MIB_SENTINEL
};
@@ -134,6 +136,8 @@ static const struct snmp_mib snmp6_udplite6_list[] = {
SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS),
SNMP_MIB_ITEM("UdpLite6InErrors", UDP_MIB_INERRORS),
SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
+ SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
+ SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index 9bb936ae2452..9a7978fdc02a 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -25,13 +25,14 @@
#include <linux/spinlock.h>
#include <net/protocol.h>
-const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS] __read_mostly;
+const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
{
int hash = protocol & (MAX_INET_PROTOS - 1);
- return !cmpxchg(&inet6_protos[hash], NULL, prot) ? 0 : -1;
+ return !cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
+ NULL, prot) ? 0 : -1;
}
EXPORT_SYMBOL(inet6_add_protocol);
@@ -43,7 +44,8 @@ int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol
{
int ret, hash = protocol & (MAX_INET_PROTOS - 1);
- ret = (cmpxchg(&inet6_protos[hash], prot, NULL) == prot) ? 0 : -1;
+ ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
+ prot, NULL) == prot) ? 0 : -1;
synchronize_net();
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 45e6efb7f171..86c39526ba5e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -373,7 +373,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
{
- if ((raw6_sk(sk)->checksum || sk->sk_filter) &&
+ if ((raw6_sk(sk)->checksum || rcu_dereference_raw(sk->sk_filter)) &&
skb_checksum_complete(skb)) {
atomic_inc(&sk->sk_drops);
kfree_skb(skb);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index c7ba3149633f..0f2766453759 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -349,7 +349,7 @@ found:
/* Check for overlap with preceding fragment. */
if (prev &&
- (FRAG6_CB(prev)->offset + prev->len) - offset > 0)
+ (FRAG6_CB(prev)->offset + prev->len) > offset)
goto discard_fq;
/* Look for overlap with succeeding segment. */
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 25661f968f3f..96455ffb76fb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1945,8 +1945,12 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
struct neighbour *neigh;
- if (rt == NULL)
+ if (rt == NULL) {
+ if (net_ratelimit())
+ pr_warning("IPv6: Maximum number of routes reached,"
+ " consider increasing route/max_size.\n");
return ERR_PTR(-ENOMEM);
+ }
dev_hold(net->loopback_dev);
in6_dev_hold(idev);
@@ -2741,6 +2745,7 @@ static void __net_exit ip6_route_net_exit(struct net *net)
kfree(net->ipv6.ip6_prohibit_entry);
kfree(net->ipv6.ip6_blk_hole_entry);
#endif
+ dst_entries_destroy(&net->ipv6.ip6_dst_ops);
}
static struct pernet_operations ip6_route_net_ops = {
@@ -2832,5 +2837,6 @@ void ip6_route_cleanup(void)
xfrm6_fini();
fib6_gc_cleanup();
unregister_pernet_subsys(&ip6_route_net_ops);
+ dst_entries_destroy(&ip6_dst_blackhole_ops);
kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 367a6cc584cc..8c4d00c7cd2b 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -606,8 +606,9 @@ static int ipip6_rcv(struct sk_buff *skb)
return 0;
}
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+ /* no tunnel matched, let upstream know, ipsec may handle it */
rcu_read_unlock();
+ return 1;
out:
kfree_skb(skb);
return 0;
@@ -963,6 +964,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
}
t = netdev_priv(dev);
ipip6_tunnel_unlink(sitn, t);
+ synchronize_net();
t->parms.iph.saddr = p.iph.saddr;
t->parms.iph.daddr = p.iph.daddr;
memcpy(dev->dev_addr, &p.iph.saddr, 4);
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index d9864725d0c6..4f3cec12aa85 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -30,23 +30,26 @@
#include <net/protocol.h>
#include <net/xfrm.h>
-static struct xfrm6_tunnel *tunnel6_handlers __read_mostly;
-static struct xfrm6_tunnel *tunnel46_handlers __read_mostly;
+static struct xfrm6_tunnel __rcu *tunnel6_handlers __read_mostly;
+static struct xfrm6_tunnel __rcu *tunnel46_handlers __read_mostly;
static DEFINE_MUTEX(tunnel6_mutex);
int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)
{
- struct xfrm6_tunnel **pprev;
+ struct xfrm6_tunnel __rcu **pprev;
+ struct xfrm6_tunnel *t;
int ret = -EEXIST;
int priority = handler->priority;
mutex_lock(&tunnel6_mutex);
for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
- *pprev; pprev = &(*pprev)->next) {
- if ((*pprev)->priority > priority)
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&tunnel6_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t->priority > priority)
break;
- if ((*pprev)->priority == priority)
+ if (t->priority == priority)
goto err;
}
@@ -65,14 +68,17 @@ EXPORT_SYMBOL(xfrm6_tunnel_register);
int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
{
- struct xfrm6_tunnel **pprev;
+ struct xfrm6_tunnel __rcu **pprev;
+ struct xfrm6_tunnel *t;
int ret = -ENOENT;
mutex_lock(&tunnel6_mutex);
for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
- *pprev; pprev = &(*pprev)->next) {
- if (*pprev == handler) {
+ (t = rcu_dereference_protected(*pprev,
+ lockdep_is_held(&tunnel6_mutex))) != NULL;
+ pprev = &t->next) {
+ if (t == handler) {
*pprev = handler->next;
ret = 0;
break;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index c84dad432114..91def93bec85 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -527,7 +527,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
}
}
- if (sk->sk_filter) {
+ if (rcu_dereference_raw(sk->sk_filter)) {
if (udp_lib_checksum_complete(skb))
goto drop;
}
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index 285761e77d90..f6054f9ccbe3 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -550,22 +550,30 @@ EXPORT_SYMBOL(irttp_close_tsap);
*/
int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb)
{
+ int ret;
+
IRDA_ASSERT(self != NULL, return -1;);
IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;);
IRDA_ASSERT(skb != NULL, return -1;);
IRDA_DEBUG(4, "%s()\n", __func__);
+ /* Take shortcut on zero byte packets */
+ if (skb->len == 0) {
+ ret = 0;
+ goto err;
+ }
+
/* Check that nothing bad happens */
- if ((skb->len == 0) || (!self->connected)) {
- IRDA_DEBUG(1, "%s(), No data, or not connected\n",
- __func__);
+ if (!self->connected) {
+ IRDA_WARNING("%s(), Not connected\n", __func__);
+ ret = -ENOTCONN;
goto err;
}
if (skb->len > self->max_seg_size) {
- IRDA_DEBUG(1, "%s(), UData is too large for IrLAP!\n",
- __func__);
+ IRDA_ERROR("%s(), UData is too large for IrLAP!\n", __func__);
+ ret = -EMSGSIZE;
goto err;
}
@@ -576,7 +584,7 @@ int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb)
err:
dev_kfree_skb(skb);
- return -1;
+ return ret;
}
EXPORT_SYMBOL(irttp_udata_request);
@@ -599,9 +607,15 @@ int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb)
IRDA_DEBUG(2, "%s() : queue len = %d\n", __func__,
skb_queue_len(&self->tx_queue));
+ /* Take shortcut on zero byte packets */
+ if (skb->len == 0) {
+ ret = 0;
+ goto err;
+ }
+
/* Check that nothing bad happens */
- if ((skb->len == 0) || (!self->connected)) {
- IRDA_WARNING("%s: No data, or not connected\n", __func__);
+ if (!self->connected) {
+ IRDA_WARNING("%s: Not connected\n", __func__);
ret = -ENOTCONN;
goto err;
}
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 499c045d6910..f7db676de77d 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -1798,7 +1798,8 @@ static void iucv_work_fn(struct work_struct *work)
* Handles external interrupts coming in from CP.
* Places the interrupt buffer on a queue and schedules iucv_tasklet_fn().
*/
-static void iucv_external_interrupt(u16 code)
+static void iucv_external_interrupt(unsigned int ext_int_code,
+ unsigned int param32, unsigned long param64)
{
struct iucv_irq_data *p;
struct iucv_irq_list *work;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 1712af1c7b3f..c64ce0a0bb03 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -111,6 +111,10 @@ struct l2tp_net {
spinlock_t l2tp_session_hlist_lock;
};
+static void l2tp_session_set_header_len(struct l2tp_session *session, int version);
+static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
+static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
+
static inline struct l2tp_net *l2tp_pernet(struct net *net)
{
BUG_ON(!net);
@@ -118,6 +122,34 @@ static inline struct l2tp_net *l2tp_pernet(struct net *net)
return net_generic(net, l2tp_net_id);
}
+
+/* Tunnel reference counts. Incremented per session that is added to
+ * the tunnel.
+ */
+static inline void l2tp_tunnel_inc_refcount_1(struct l2tp_tunnel *tunnel)
+{
+ atomic_inc(&tunnel->ref_count);
+}
+
+static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel)
+{
+ if (atomic_dec_and_test(&tunnel->ref_count))
+ l2tp_tunnel_free(tunnel);
+}
+#ifdef L2TP_REFCNT_DEBUG
+#define l2tp_tunnel_inc_refcount(_t) do { \
+ printk(KERN_DEBUG "l2tp_tunnel_inc_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
+ l2tp_tunnel_inc_refcount_1(_t); \
+ } while (0)
+#define l2tp_tunnel_dec_refcount(_t) do { \
+ printk(KERN_DEBUG "l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
+ l2tp_tunnel_dec_refcount_1(_t); \
+ } while (0)
+#else
+#define l2tp_tunnel_inc_refcount(t) l2tp_tunnel_inc_refcount_1(t)
+#define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t)
+#endif
+
/* Session hash global list for L2TPv3.
* The session_id SHOULD be random according to RFC3931, but several
* L2TP implementations use incrementing session_ids. So we do a real
@@ -699,8 +731,8 @@ EXPORT_SYMBOL(l2tp_recv_common);
* Returns 1 if the packet was not a good data packet and could not be
* forwarded. All such packets are passed up to userspace to deal with.
*/
-int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
- int (*payload_hook)(struct sk_buff *skb))
+static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
+ int (*payload_hook)(struct sk_buff *skb))
{
struct l2tp_session *session = NULL;
unsigned char *ptr, *optr;
@@ -812,7 +844,6 @@ error:
return 1;
}
-EXPORT_SYMBOL_GPL(l2tp_udp_recv_core);
/* UDP encapsulation receive handler. See net/ipv4/udp.c.
* Return codes:
@@ -922,7 +953,8 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
return bufp - optr;
}
-int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len)
+static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
+ size_t data_len)
{
struct l2tp_tunnel *tunnel = session->tunnel;
unsigned int len = skb->len;
@@ -970,7 +1002,6 @@ int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t dat
return 0;
}
-EXPORT_SYMBOL_GPL(l2tp_xmit_core);
/* Automatically called when the skb is freed.
*/
@@ -1089,7 +1120,7 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
* The tunnel context is deleted only when all session sockets have been
* closed.
*/
-void l2tp_tunnel_destruct(struct sock *sk)
+static void l2tp_tunnel_destruct(struct sock *sk)
{
struct l2tp_tunnel *tunnel;
@@ -1128,11 +1159,10 @@ void l2tp_tunnel_destruct(struct sock *sk)
end:
return;
}
-EXPORT_SYMBOL(l2tp_tunnel_destruct);
/* When the tunnel is closed, all the attached sessions need to go too.
*/
-void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
+static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
{
int hash;
struct hlist_node *walk;
@@ -1193,12 +1223,11 @@ again:
}
write_unlock_bh(&tunnel->hlist_lock);
}
-EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall);
/* Really kill the tunnel.
* Come here only when all sessions have been cleared from the tunnel.
*/
-void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
+static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
{
struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
@@ -1217,7 +1246,6 @@ void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
atomic_dec(&l2tp_tunnel_count);
kfree(tunnel);
}
-EXPORT_SYMBOL_GPL(l2tp_tunnel_free);
/* Create a socket for the tunnel, if one isn't set up by
* userspace. This is used for static tunnels where there is no
@@ -1512,7 +1540,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_delete);
/* We come here whenever a session's send_seq, cookie_len or
* l2specific_len parameters are set.
*/
-void l2tp_session_set_header_len(struct l2tp_session *session, int version)
+static void l2tp_session_set_header_len(struct l2tp_session *session, int version)
{
if (version == L2TP_HDR_VER_2) {
session->hdr_len = 6;
@@ -1525,7 +1553,6 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version)
}
}
-EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
{
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index f0f318edd3f1..a16a48e79fab 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -231,48 +231,15 @@ extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_i
extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
extern int l2tp_session_delete(struct l2tp_session *session);
-extern void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
extern void l2tp_session_free(struct l2tp_session *session);
extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb));
-extern int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, int (*payload_hook)(struct sk_buff *skb));
extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
-extern int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len);
extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len);
-extern void l2tp_tunnel_destruct(struct sock *sk);
-extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
-extern void l2tp_session_set_header_len(struct l2tp_session *session, int version);
extern int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops);
extern void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
-/* Tunnel reference counts. Incremented per session that is added to
- * the tunnel.
- */
-static inline void l2tp_tunnel_inc_refcount_1(struct l2tp_tunnel *tunnel)
-{
- atomic_inc(&tunnel->ref_count);
-}
-
-static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel)
-{
- if (atomic_dec_and_test(&tunnel->ref_count))
- l2tp_tunnel_free(tunnel);
-}
-#ifdef L2TP_REFCNT_DEBUG
-#define l2tp_tunnel_inc_refcount(_t) do { \
- printk(KERN_DEBUG "l2tp_tunnel_inc_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
- l2tp_tunnel_inc_refcount_1(_t); \
- } while (0)
-#define l2tp_tunnel_dec_refcount(_t) do { \
- printk(KERN_DEBUG "l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
- l2tp_tunnel_dec_refcount_1(_t); \
- } while (0)
-#else
-#define l2tp_tunnel_inc_refcount(t) l2tp_tunnel_inc_refcount_1(t)
-#define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t)
-#endif
-
/* Session reference counts. Incremented when code obtains a reference
* to a session.
*/
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 104ec3b283d4..b8dbae82fab8 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -249,7 +249,7 @@ static int l2tp_dfs_seq_open(struct inode *inode, struct file *file)
struct seq_file *seq;
int rc = -ENOMEM;
- pd = kzalloc(GFP_KERNEL, sizeof(*pd));
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
if (pd == NULL)
goto out;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 1c770c0644d1..522e219f3558 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -576,7 +576,7 @@ out:
return copied;
}
-struct proto l2tp_ip_prot = {
+static struct proto l2tp_ip_prot = {
.name = "L2TP/IP",
.owner = THIS_MODULE,
.init = l2tp_ip_open,
@@ -674,4 +674,8 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
MODULE_DESCRIPTION("L2TP over IP");
MODULE_VERSION("1.0");
-MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, SOCK_DGRAM, IPPROTO_L2TP);
+
+/* Use the value of SOCK_DGRAM (2) directory, because __stringify does't like
+ * enums
+ */
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 2, IPPROTO_L2TP);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 582612998211..e35dbe55f520 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -317,8 +317,9 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
goto out;
rc = -ENODEV;
rtnl_lock();
+ rcu_read_lock();
if (sk->sk_bound_dev_if) {
- llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
+ llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if);
if (llc->dev) {
if (!addr->sllc_arphrd)
addr->sllc_arphrd = llc->dev->type;
@@ -329,13 +330,13 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
!llc_mac_match(addr->sllc_mac,
llc->dev->dev_addr)) {
rc = -EINVAL;
- dev_put(llc->dev);
llc->dev = NULL;
}
}
} else
llc->dev = dev_getbyhwaddr(&init_net, addr->sllc_arphrd,
addr->sllc_mac);
+ rcu_read_unlock();
rtnl_unlock();
if (!llc->dev)
goto out;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 43288259f4a1..1534f2b44caf 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -525,6 +525,7 @@ config NETFILTER_XT_TARGET_TPROXY
depends on NETFILTER_XTABLES
depends on NETFILTER_ADVANCED
select NF_DEFRAG_IPV4
+ select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
help
This option adds a `TPROXY' target, which is somewhat similar to
REDIRECT. It can only be used in the mangle table and is useful
@@ -927,6 +928,7 @@ config NETFILTER_XT_MATCH_SOCKET
depends on NETFILTER_ADVANCED
depends on !NF_CONNTRACK || NF_CONNTRACK
select NF_DEFRAG_IPV4
+ select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
help
This option adds a `socket' match, which can be used to match
packets for which a TCP or UDP socket lookup finds a valid socket.
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index a22dac227055..70bd1d0774c6 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -4,6 +4,7 @@
menuconfig IP_VS
tristate "IP virtual server support"
depends on NET && INET && NETFILTER
+ depends on (NF_CONNTRACK || NF_CONNTRACK=n)
---help---
IP Virtual Server support will let you build a high-performance
virtual server based on cluster of two or more real servers. This
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 1eacf8d9966a..27a5ea6b6a0f 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1312,7 +1312,8 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls)
if (!hash) {
*vmalloced = 1;
printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
- hash = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
+ hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
+ PAGE_KERNEL);
}
if (hash && nulls)
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index ed6d92958023..dc7bb74110df 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -292,6 +292,12 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
for (i = 0; i < MAX_NF_CT_PROTO; i++)
proto_array[i] = &nf_conntrack_l4proto_generic;
+
+ /* Before making proto_array visible to lockless readers,
+ * we must make sure its content is committed to memory.
+ */
+ smp_wmb();
+
nf_ct_protos[l4proto->l3proto] = proto_array;
} else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] !=
&nf_conntrack_l4proto_generic) {
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 19c482caf30b..640678f47a2a 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -21,7 +21,9 @@
#include <linux/netfilter_ipv4/ip_tables.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#define XT_TPROXY_HAVE_IPV6 1
#include <net/if_inet6.h>
#include <net/addrconf.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
@@ -172,7 +174,7 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par)
return tproxy_tg4(skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value);
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#ifdef XT_TPROXY_HAVE_IPV6
static inline const struct in6_addr *
tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
@@ -372,7 +374,7 @@ static struct xt_target tproxy_tg_reg[] __read_mostly = {
.hooks = 1 << NF_INET_PRE_ROUTING,
.me = THIS_MODULE,
},
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#ifdef XT_TPROXY_HAVE_IPV6
{
.name = "TPROXY",
.family = NFPROTO_IPV6,
@@ -391,7 +393,7 @@ static struct xt_target tproxy_tg_reg[] __read_mostly = {
static int __init tproxy_tg_init(void)
{
nf_defrag_ipv4_enable();
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#ifdef XT_TPROXY_HAVE_IPV6
nf_defrag_ipv6_enable();
#endif
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 2dbd4c857735..00d6ae838303 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -14,7 +14,6 @@
#include <linux/skbuff.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <net/icmp.h>
@@ -22,7 +21,12 @@
#include <net/inet_sock.h>
#include <net/netfilter/nf_tproxy_core.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#define XT_SOCKET_HAVE_IPV6 1
+#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#endif
#include <linux/netfilter/xt_socket.h>
@@ -186,12 +190,12 @@ socket_mt4_v1(const struct sk_buff *skb, struct xt_action_param *par)
return socket_match(skb, par, par->matchinfo);
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#ifdef XT_SOCKET_HAVE_IPV6
static int
extract_icmp6_fields(const struct sk_buff *skb,
unsigned int outside_hdrlen,
- u8 *protocol,
+ int *protocol,
struct in6_addr **raddr,
struct in6_addr **laddr,
__be16 *rport,
@@ -248,8 +252,7 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
struct sock *sk;
struct in6_addr *daddr, *saddr;
__be16 dport, sport;
- int thoff;
- u8 tproto;
+ int thoff, tproto;
const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
tproto = ipv6_find_hdr(skb, &thoff, -1, NULL);
@@ -301,7 +304,7 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
sk = NULL;
}
- pr_debug("proto %hhu %pI6:%hu -> %pI6:%hu "
+ pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu "
"(orig %pI6:%hu) sock %p\n",
tproto, saddr, ntohs(sport),
daddr, ntohs(dport),
@@ -331,7 +334,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
(1 << NF_INET_LOCAL_IN),
.me = THIS_MODULE,
},
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#ifdef XT_SOCKET_HAVE_IPV6
{
.name = "socket",
.revision = 1,
@@ -348,7 +351,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
static int __init socket_mt_init(void)
{
nf_defrag_ipv4_enable();
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#ifdef XT_SOCKET_HAVE_IPV6
nf_defrag_ipv6_enable();
#endif
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index cd96ed3ccee4..478181d53c55 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -83,9 +83,9 @@ struct netlink_sock {
struct module *module;
};
-struct listeners_rcu_head {
- struct rcu_head rcu_head;
- void *ptr;
+struct listeners {
+ struct rcu_head rcu;
+ unsigned long masks[0];
};
#define NETLINK_KERNEL_SOCKET 0x1
@@ -119,7 +119,7 @@ struct nl_pid_hash {
struct netlink_table {
struct nl_pid_hash hash;
struct hlist_head mc_list;
- unsigned long *listeners;
+ struct listeners __rcu *listeners;
unsigned int nl_nonroot;
unsigned int groups;
struct mutex *cb_mutex;
@@ -338,7 +338,7 @@ netlink_update_listeners(struct sock *sk)
if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
mask |= nlk_sk(sk)->groups[i];
}
- tbl->listeners[i] = mask;
+ tbl->listeners->masks[i] = mask;
}
/* this function is only called with the netlink table "grabbed", which
* makes sure updates are visible before bind or setsockopt return. */
@@ -936,7 +936,7 @@ EXPORT_SYMBOL(netlink_unicast);
int netlink_has_listeners(struct sock *sk, unsigned int group)
{
int res = 0;
- unsigned long *listeners;
+ struct listeners *listeners;
BUG_ON(!netlink_is_kernel(sk));
@@ -944,7 +944,7 @@ int netlink_has_listeners(struct sock *sk, unsigned int group)
listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
if (group - 1 < nl_table[sk->sk_protocol].groups)
- res = test_bit(group - 1, listeners);
+ res = test_bit(group - 1, listeners->masks);
rcu_read_unlock();
@@ -1498,7 +1498,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
struct socket *sock;
struct sock *sk;
struct netlink_sock *nlk;
- unsigned long *listeners = NULL;
+ struct listeners *listeners = NULL;
BUG_ON(!nl_table);
@@ -1523,8 +1523,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
if (groups < 32)
groups = 32;
- listeners = kzalloc(NLGRPSZ(groups) + sizeof(struct listeners_rcu_head),
- GFP_KERNEL);
+ listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
if (!listeners)
goto out_sock_release;
@@ -1541,7 +1540,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
netlink_table_grab();
if (!nl_table[unit].registered) {
nl_table[unit].groups = groups;
- nl_table[unit].listeners = listeners;
+ rcu_assign_pointer(nl_table[unit].listeners, listeners);
nl_table[unit].cb_mutex = cb_mutex;
nl_table[unit].module = module;
nl_table[unit].registered = 1;
@@ -1572,43 +1571,28 @@ netlink_kernel_release(struct sock *sk)
EXPORT_SYMBOL(netlink_kernel_release);
-static void netlink_free_old_listeners(struct rcu_head *rcu_head)
+static void listeners_free_rcu(struct rcu_head *head)
{
- struct listeners_rcu_head *lrh;
-
- lrh = container_of(rcu_head, struct listeners_rcu_head, rcu_head);
- kfree(lrh->ptr);
+ kfree(container_of(head, struct listeners, rcu));
}
int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
{
- unsigned long *listeners, *old = NULL;
- struct listeners_rcu_head *old_rcu_head;
+ struct listeners *new, *old;
struct netlink_table *tbl = &nl_table[sk->sk_protocol];
if (groups < 32)
groups = 32;
if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
- listeners = kzalloc(NLGRPSZ(groups) +
- sizeof(struct listeners_rcu_head),
- GFP_ATOMIC);
- if (!listeners)
+ new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
+ if (!new)
return -ENOMEM;
- old = tbl->listeners;
- memcpy(listeners, old, NLGRPSZ(tbl->groups));
- rcu_assign_pointer(tbl->listeners, listeners);
- /*
- * Free the old memory after an RCU grace period so we
- * don't leak it. We use call_rcu() here in order to be
- * able to call this function from atomic contexts. The
- * allocation of this memory will have reserved enough
- * space for struct listeners_rcu_head at the end.
- */
- old_rcu_head = (void *)(tbl->listeners +
- NLGRPLONGS(tbl->groups));
- old_rcu_head->ptr = old;
- call_rcu(&old_rcu_head->rcu_head, netlink_free_old_listeners);
+ old = rcu_dereference_raw(tbl->listeners);
+ memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
+ rcu_assign_pointer(tbl->listeners, new);
+
+ call_rcu(&old->rcu, listeners_free_rcu);
}
tbl->groups = groups;
@@ -2104,18 +2088,17 @@ static void __net_exit netlink_net_exit(struct net *net)
static void __init netlink_add_usersock_entry(void)
{
- unsigned long *listeners;
+ struct listeners *listeners;
int groups = 32;
- listeners = kzalloc(NLGRPSZ(groups) + sizeof(struct listeners_rcu_head),
- GFP_KERNEL);
+ listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
if (!listeners)
- panic("netlink_add_usersock_entry: Cannot allocate listneres\n");
+ panic("netlink_add_usersock_entry: Cannot allocate listeners\n");
netlink_table_grab();
nl_table[NETLINK_USERSOCK].groups = groups;
- nl_table[NETLINK_USERSOCK].listeners = listeners;
+ rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
nl_table[NETLINK_USERSOCK].registered = 1;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3616f27b9d46..8298e676f5a0 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1610,9 +1610,11 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
err = -EINVAL;
vnet_hdr_len = sizeof(vnet_hdr);
- if ((len -= vnet_hdr_len) < 0)
+ if (len < vnet_hdr_len)
goto out_free;
+ len -= vnet_hdr_len;
+
if (skb_is_gso(skb)) {
struct skb_shared_info *sinfo = skb_shinfo(skb);
@@ -1719,7 +1721,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
rcu_read_lock();
dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
if (dev)
- strlcpy(uaddr->sa_data, dev->name, 15);
+ strncpy(uaddr->sa_data, dev->name, 14);
else
memset(uaddr->sa_data, 0, 14);
rcu_read_unlock();
@@ -1742,6 +1744,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
sll->sll_family = AF_PACKET;
sll->sll_ifindex = po->ifindex;
sll->sll_protocol = po->num;
+ sll->sll_pkttype = 0;
rcu_read_lock();
dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
if (dev) {
diff --git a/net/rds/loop.c b/net/rds/loop.c
index c390156b426f..aeec1d483b17 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -134,8 +134,12 @@ static int rds_loop_conn_alloc(struct rds_connection *conn, gfp_t gfp)
static void rds_loop_conn_free(void *arg)
{
struct rds_loop_connection *lc = arg;
+ unsigned long flags;
+
rdsdebug("lc %p\n", lc);
+ spin_lock_irqsave(&loop_conns_lock, flags);
list_del(&lc->loop_node);
+ spin_unlock_irqrestore(&loop_conns_lock, flags);
kfree(lc);
}
diff --git a/net/rds/message.c b/net/rds/message.c
index a84545dae370..1fd3d29023d7 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -224,6 +224,9 @@ struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents)
WARN_ON(rm->m_used_sgs + nents > rm->m_total_sgs);
WARN_ON(!nents);
+ if (rm->m_used_sgs + nents > rm->m_total_sgs)
+ return NULL;
+
sg_ret = &sg_first[rm->m_used_sgs];
sg_init_table(sg_ret, nents);
rm->m_used_sgs += nents;
@@ -246,6 +249,10 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
rm->data.op_nents = ceil(total_len, PAGE_SIZE);
rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
+ if (!rm->data.op_sg) {
+ rds_message_put(rm);
+ return ERR_PTR(-ENOMEM);
+ }
for (i = 0; i < rm->data.op_nents; ++i) {
sg_set_page(&rm->data.op_sg[i],
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 1a41debca1ce..4e37c1cbe8b2 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -479,13 +479,38 @@ void rds_atomic_free_op(struct rm_atomic_op *ao)
/*
- * Count the number of pages needed to describe an incoming iovec.
+ * Count the number of pages needed to describe an incoming iovec array.
*/
-static int rds_rdma_pages(struct rds_rdma_args *args)
+static int rds_rdma_pages(struct rds_iovec iov[], int nr_iovecs)
+{
+ int tot_pages = 0;
+ unsigned int nr_pages;
+ unsigned int i;
+
+ /* figure out the number of pages in the vector */
+ for (i = 0; i < nr_iovecs; i++) {
+ nr_pages = rds_pages_in_vec(&iov[i]);
+ if (nr_pages == 0)
+ return -EINVAL;
+
+ tot_pages += nr_pages;
+
+ /*
+ * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1,
+ * so tot_pages cannot overflow without first going negative.
+ */
+ if (tot_pages < 0)
+ return -EINVAL;
+ }
+
+ return tot_pages;
+}
+
+int rds_rdma_extra_size(struct rds_rdma_args *args)
{
struct rds_iovec vec;
struct rds_iovec __user *local_vec;
- unsigned int tot_pages = 0;
+ int tot_pages = 0;
unsigned int nr_pages;
unsigned int i;
@@ -502,14 +527,16 @@ static int rds_rdma_pages(struct rds_rdma_args *args)
return -EINVAL;
tot_pages += nr_pages;
- }
- return tot_pages;
-}
+ /*
+ * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1,
+ * so tot_pages cannot overflow without first going negative.
+ */
+ if (tot_pages < 0)
+ return -EINVAL;
+ }
-int rds_rdma_extra_size(struct rds_rdma_args *args)
-{
- return rds_rdma_pages(args) * sizeof(struct scatterlist);
+ return tot_pages * sizeof(struct scatterlist);
}
/*
@@ -520,13 +547,12 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
struct cmsghdr *cmsg)
{
struct rds_rdma_args *args;
- struct rds_iovec vec;
struct rm_rdma_op *op = &rm->rdma;
int nr_pages;
unsigned int nr_bytes;
struct page **pages = NULL;
- struct rds_iovec __user *local_vec;
- unsigned int nr;
+ struct rds_iovec iovstack[UIO_FASTIOV], *iovs = iovstack;
+ int iov_size;
unsigned int i, j;
int ret = 0;
@@ -541,14 +567,31 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
goto out;
}
- if (args->nr_local > (u64)UINT_MAX) {
+ if (args->nr_local > UIO_MAXIOV) {
ret = -EMSGSIZE;
goto out;
}
- nr_pages = rds_rdma_pages(args);
- if (nr_pages < 0)
+ /* Check whether to allocate the iovec area */
+ iov_size = args->nr_local * sizeof(struct rds_iovec);
+ if (args->nr_local > UIO_FASTIOV) {
+ iovs = sock_kmalloc(rds_rs_to_sk(rs), iov_size, GFP_KERNEL);
+ if (!iovs) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ if (copy_from_user(iovs, (struct rds_iovec __user *)(unsigned long) args->local_vec_addr, iov_size)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ nr_pages = rds_rdma_pages(iovs, args->nr_local);
+ if (nr_pages < 0) {
+ ret = -EINVAL;
goto out;
+ }
pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
if (!pages) {
@@ -564,6 +607,10 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
op->op_recverr = rs->rs_recverr;
WARN_ON(!nr_pages);
op->op_sg = rds_message_alloc_sgs(rm, nr_pages);
+ if (!op->op_sg) {
+ ret = -ENOMEM;
+ goto out;
+ }
if (op->op_notify || op->op_recverr) {
/* We allocate an uninitialized notifier here, because
@@ -597,50 +644,40 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
(unsigned long long)args->remote_vec.addr,
op->op_rkey);
- local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
-
for (i = 0; i < args->nr_local; i++) {
- if (copy_from_user(&vec, &local_vec[i],
- sizeof(struct rds_iovec))) {
- ret = -EFAULT;
- goto out;
- }
-
- nr = rds_pages_in_vec(&vec);
- if (nr == 0) {
- ret = -EINVAL;
- goto out;
- }
+ struct rds_iovec *iov = &iovs[i];
+ /* don't need to check, rds_rdma_pages() verified nr will be +nonzero */
+ unsigned int nr = rds_pages_in_vec(iov);
- rs->rs_user_addr = vec.addr;
- rs->rs_user_bytes = vec.bytes;
+ rs->rs_user_addr = iov->addr;
+ rs->rs_user_bytes = iov->bytes;
/* If it's a WRITE operation, we want to pin the pages for reading.
* If it's a READ operation, we need to pin the pages for writing.
*/
- ret = rds_pin_pages(vec.addr, nr, pages, !op->op_write);
+ ret = rds_pin_pages(iov->addr, nr, pages, !op->op_write);
if (ret < 0)
goto out;
- rdsdebug("RDS: nr_bytes %u nr %u vec.bytes %llu vec.addr %llx\n",
- nr_bytes, nr, vec.bytes, vec.addr);
+ rdsdebug("RDS: nr_bytes %u nr %u iov->bytes %llu iov->addr %llx\n",
+ nr_bytes, nr, iov->bytes, iov->addr);
- nr_bytes += vec.bytes;
+ nr_bytes += iov->bytes;
for (j = 0; j < nr; j++) {
- unsigned int offset = vec.addr & ~PAGE_MASK;
+ unsigned int offset = iov->addr & ~PAGE_MASK;
struct scatterlist *sg;
sg = &op->op_sg[op->op_nents + j];
sg_set_page(sg, pages[j],
- min_t(unsigned int, vec.bytes, PAGE_SIZE - offset),
+ min_t(unsigned int, iov->bytes, PAGE_SIZE - offset),
offset);
- rdsdebug("RDS: sg->offset %x sg->len %x vec.addr %llx vec.bytes %llu\n",
- sg->offset, sg->length, vec.addr, vec.bytes);
+ rdsdebug("RDS: sg->offset %x sg->len %x iov->addr %llx iov->bytes %llu\n",
+ sg->offset, sg->length, iov->addr, iov->bytes);
- vec.addr += sg->length;
- vec.bytes -= sg->length;
+ iov->addr += sg->length;
+ iov->bytes -= sg->length;
}
op->op_nents += nr;
@@ -655,13 +692,14 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
}
op->op_bytes = nr_bytes;
- ret = 0;
out:
+ if (iovs != iovstack)
+ sock_kfree_s(rds_rs_to_sk(rs), iovs, iov_size);
kfree(pages);
if (ret)
rds_rdma_free_op(op);
-
- rds_stats_inc(s_send_rdma);
+ else
+ rds_stats_inc(s_send_rdma);
return ret;
}
@@ -773,6 +811,10 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
rm->atomic.op_active = 1;
rm->atomic.op_recverr = rs->rs_recverr;
rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1);
+ if (!rm->atomic.op_sg) {
+ ret = -ENOMEM;
+ goto err;
+ }
/* verify 8 byte-aligned */
if (args->local_addr & 0x7) {
diff --git a/net/rds/send.c b/net/rds/send.c
index 0bc9db17a87d..35b9c2e9caf1 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -973,6 +973,10 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
/* Attach data to the rm */
if (payload_len) {
rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
+ if (!rm->data.op_sg) {
+ ret = -ENOMEM;
+ goto out;
+ }
ret = rds_message_copy_from_user(rm, msg->msg_iov, payload_len);
if (ret)
goto out;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 08a8c6cf2d10..8e0a32001c90 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -221,7 +221,13 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
static void rds_tcp_conn_free(void *arg)
{
struct rds_tcp_connection *tc = arg;
+ unsigned long flags;
rdsdebug("freeing tc %p\n", tc);
+
+ spin_lock_irqsave(&rds_tcp_conn_lock, flags);
+ list_del(&tc->t_tcp_node);
+ spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
+
kmem_cache_free(rds_tcp_conn_slab, tc);
}
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index efd4f95fd050..f23d9155b1ef 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -268,6 +268,10 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
goto nla_put_failure;
nla_nest_end(skb, nest);
+
+ if (tcf_exts_dump_stats(skb, &f->exts, &basic_ext_map) < 0)
+ goto nla_put_failure;
+
return skb->len;
nla_put_failure:
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 37dff78e9cb1..d49c40fb7e09 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -34,8 +34,6 @@ struct cgroup_subsys net_cls_subsys = {
.populate = cgrp_populate,
#ifdef CONFIG_NET_CLS_CGROUP
.subsys_id = net_cls_subsys_id,
-#else
-#define net_cls_subsys_id net_cls_subsys.subsys_id
#endif
.module = THIS_MODULE,
};
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index 763253257411..ea8f566e720c 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -103,7 +103,8 @@ retry:
static void em_text_destroy(struct tcf_proto *tp, struct tcf_ematch *m)
{
- textsearch_destroy(EM_TEXT_PRIV(m)->config);
+ if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config)
+ textsearch_destroy(EM_TEXT_PRIV(m)->config);
}
static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m)
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 1ef29c74d85e..e58f9476f29c 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -92,7 +92,7 @@ static struct sctp_af *sctp_af_v6_specific;
struct kmem_cache *sctp_chunk_cachep __read_mostly;
struct kmem_cache *sctp_bucket_cachep __read_mostly;
-int sysctl_sctp_mem[3];
+long sysctl_sctp_mem[3];
int sysctl_sctp_rmem[3];
int sysctl_sctp_wmem[3];
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e34ca9cc1167..6bd554323a34 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -111,12 +111,12 @@ static void sctp_sock_migrate(struct sock *, struct sock *,
static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG;
extern struct kmem_cache *sctp_bucket_cachep;
-extern int sysctl_sctp_mem[3];
+extern long sysctl_sctp_mem[3];
extern int sysctl_sctp_rmem[3];
extern int sysctl_sctp_wmem[3];
static int sctp_memory_pressure;
-static atomic_t sctp_memory_allocated;
+static atomic_long_t sctp_memory_allocated;
struct percpu_counter sctp_sockets_allocated;
static void sctp_enter_memory_pressure(struct sock *sk)
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 832590bbe0c0..50cb57f0919e 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -54,7 +54,7 @@ static int sack_timer_max = 500;
static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */
static int rwnd_scale_max = 16;
-extern int sysctl_sctp_mem[3];
+extern long sysctl_sctp_mem[3];
extern int sysctl_sctp_rmem[3];
extern int sysctl_sctp_wmem[3];
@@ -203,7 +203,7 @@ static ctl_table sctp_table[] = {
.data = &sysctl_sctp_mem,
.maxlen = sizeof(sysctl_sctp_mem),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_doulongvec_minmax
},
{
.procname = "sctp_rmem",
diff --git a/net/socket.c b/net/socket.c
index abf3e2561521..088fb3fd45e0 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -305,19 +305,17 @@ static const struct super_operations sockfs_ops = {
.statfs = simple_statfs,
};
-static int sockfs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data,
- struct vfsmount *mnt)
+static struct dentry *sockfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
- mnt);
+ return mount_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC);
}
static struct vfsmount *sock_mnt __read_mostly;
static struct file_system_type sock_fs_type = {
.name = "sockfs",
- .get_sb = sockfs_get_sb,
+ .mount = sockfs_mount,
.kill_sb = kill_anon_super,
};
@@ -377,7 +375,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
&socket_file_ops);
if (unlikely(!file)) {
/* drop dentry, keep inode */
- atomic_inc(&path.dentry->d_inode->i_count);
+ ihold(path.dentry->d_inode);
path_put(&path);
put_unused_fd(fd);
return -ENFILE;
@@ -480,6 +478,7 @@ static struct socket *sock_alloc(void)
sock = SOCKET_I(inode);
kmemcheck_annotate_bitfield(sock, type);
+ inode->i_ino = get_next_ino();
inode->i_mode = S_IFSOCK | S_IRWXUGO;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
@@ -733,6 +732,21 @@ static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
return ret;
}
+/**
+ * kernel_recvmsg - Receive a message from a socket (kernel space)
+ * @sock: The socket to receive the message from
+ * @msg: Received message
+ * @vec: Input s/g array for message data
+ * @num: Size of input s/g array
+ * @size: Number of bytes to read
+ * @flags: Message flags (MSG_DONTWAIT, etc...)
+ *
+ * On return the msg structure contains the scatter/gather array passed in the
+ * vec argument. The array is modified so that it consists of the unfilled
+ * portion of the original array.
+ *
+ * The returned value is the total number of bytes received, or an error.
+ */
int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size, int flags)
{
@@ -1145,7 +1159,7 @@ call_kill:
}
EXPORT_SYMBOL(sock_wake_async);
-static int __sock_create(struct net *net, int family, int type, int protocol,
+int __sock_create(struct net *net, int family, int type, int protocol,
struct socket **res, int kern)
{
int err;
@@ -1257,6 +1271,7 @@ out_release:
rcu_read_unlock();
goto out_sock_release;
}
+EXPORT_SYMBOL(__sock_create);
int sock_create(int family, int type, int protocol, struct socket **res)
{
@@ -1652,6 +1667,8 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
struct iovec iov;
int fput_needed;
+ if (len > INT_MAX)
+ len = INT_MAX;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
@@ -1709,6 +1726,8 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
int err, err2;
int fput_needed;
+ if (size > INT_MAX)
+ size = INT_MAX;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 3376d7657185..8873fd8ddacd 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -36,22 +36,3 @@ config RPCSEC_GSS_KRB5
Kerberos support should be installed.
If unsure, say Y.
-
-config RPCSEC_GSS_SPKM3
- tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)"
- depends on SUNRPC && EXPERIMENTAL
- select SUNRPC_GSS
- select CRYPTO
- select CRYPTO_MD5
- select CRYPTO_DES
- select CRYPTO_CAST5
- select CRYPTO_CBC
- help
- Choose Y here to enable Secure RPC using the SPKM3 public key
- GSS-API mechanism (RFC 2025).
-
- Secure RPC calls with SPKM3 require an auxiliary userspace
- daemon which may be found in the Linux nfs-utils package
- available from http://linux-nfs.org/.
-
- If unsure, say N.
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index e9eaaf7d43c1..afe67849269f 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -595,7 +595,7 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
int
rpcauth_refreshcred(struct rpc_task *task)
{
- struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+ struct rpc_cred *cred;
int err;
cred = task->tk_rqstp->rq_cred;
@@ -658,7 +658,7 @@ out1:
return err;
}
-void __exit rpcauth_remove_module(void)
+void rpcauth_remove_module(void)
{
rpc_destroy_authunix();
rpc_destroy_generic_auth();
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 43162bb3b78f..e010a015d996 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -158,7 +158,7 @@ int __init rpc_init_generic_auth(void)
return rpcauth_init_credcache(&generic_auth);
}
-void __exit rpc_destroy_generic_auth(void)
+void rpc_destroy_generic_auth(void)
{
rpcauth_destroy_credcache(&generic_auth);
}
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 74a231735f67..7350d86a32ee 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -11,8 +11,3 @@ obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
-
-obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o
-
-rpcsec_gss_spkm3-objs := gss_spkm3_mech.o gss_spkm3_seal.o gss_spkm3_unseal.o \
- gss_spkm3_token.o
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 778e5dfc5144..f375decc024b 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -427,7 +427,7 @@ static int
context_derive_keys_rc4(struct krb5_ctx *ctx)
{
struct crypto_hash *hmac;
- char sigkeyconstant[] = "signaturekey";
+ static const char sigkeyconstant[] = "signaturekey";
int slen = strlen(sigkeyconstant) + 1; /* include null terminator */
struct hash_desc desc;
struct scatterlist sg[1];
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
deleted file mode 100644
index adade3d313f2..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- * linux/net/sunrpc/gss_spkm3_mech.c
- *
- * Copyright (c) 2003 The Regents of the University of Michigan.
- * All rights reserved.
- *
- * Andy Adamson <andros@umich.edu>
- * J. Bruce Fields <bfields@umich.edu>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/sunrpc/auth.h>
-#include <linux/in.h>
-#include <linux/sunrpc/svcauth_gss.h>
-#include <linux/sunrpc/gss_spkm3.h>
-#include <linux/sunrpc/xdr.h>
-#include <linux/crypto.h>
-
-#ifdef RPC_DEBUG
-# define RPCDBG_FACILITY RPCDBG_AUTH
-#endif
-
-static const void *
-simple_get_bytes(const void *p, const void *end, void *res, int len)
-{
- const void *q = (const void *)((const char *)p + len);
- if (unlikely(q > end || q < p))
- return ERR_PTR(-EFAULT);
- memcpy(res, p, len);
- return q;
-}
-
-static const void *
-simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
-{
- const void *q;
- unsigned int len;
- p = simple_get_bytes(p, end, &len, sizeof(len));
- if (IS_ERR(p))
- return p;
- res->len = len;
- if (len == 0) {
- res->data = NULL;
- return p;
- }
- q = (const void *)((const char *)p + len);
- if (unlikely(q > end || q < p))
- return ERR_PTR(-EFAULT);
- res->data = kmemdup(p, len, GFP_NOFS);
- if (unlikely(res->data == NULL))
- return ERR_PTR(-ENOMEM);
- return q;
-}
-
-static int
-gss_import_sec_context_spkm3(const void *p, size_t len,
- struct gss_ctx *ctx_id,
- gfp_t gfp_mask)
-{
- const void *end = (const void *)((const char *)p + len);
- struct spkm3_ctx *ctx;
- int version;
-
- if (!(ctx = kzalloc(sizeof(*ctx), gfp_mask)))
- goto out_err;
-
- p = simple_get_bytes(p, end, &version, sizeof(version));
- if (IS_ERR(p))
- goto out_err_free_ctx;
- if (version != 1) {
- dprintk("RPC: unknown spkm3 token format: "
- "obsolete nfs-utils?\n");
- p = ERR_PTR(-EINVAL);
- goto out_err_free_ctx;
- }
-
- p = simple_get_netobj(p, end, &ctx->ctx_id);
- if (IS_ERR(p))
- goto out_err_free_ctx;
-
- p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
- if (IS_ERR(p))
- goto out_err_free_ctx_id;
-
- p = simple_get_netobj(p, end, &ctx->mech_used);
- if (IS_ERR(p))
- goto out_err_free_ctx_id;
-
- p = simple_get_bytes(p, end, &ctx->ret_flags, sizeof(ctx->ret_flags));
- if (IS_ERR(p))
- goto out_err_free_mech;
-
- p = simple_get_netobj(p, end, &ctx->conf_alg);
- if (IS_ERR(p))
- goto out_err_free_mech;
-
- p = simple_get_netobj(p, end, &ctx->derived_conf_key);
- if (IS_ERR(p))
- goto out_err_free_conf_alg;
-
- p = simple_get_netobj(p, end, &ctx->intg_alg);
- if (IS_ERR(p))
- goto out_err_free_conf_key;
-
- p = simple_get_netobj(p, end, &ctx->derived_integ_key);
- if (IS_ERR(p))
- goto out_err_free_intg_alg;
-
- if (p != end) {
- p = ERR_PTR(-EFAULT);
- goto out_err_free_intg_key;
- }
-
- ctx_id->internal_ctx_id = ctx;
-
- dprintk("RPC: Successfully imported new spkm context.\n");
- return 0;
-
-out_err_free_intg_key:
- kfree(ctx->derived_integ_key.data);
-out_err_free_intg_alg:
- kfree(ctx->intg_alg.data);
-out_err_free_conf_key:
- kfree(ctx->derived_conf_key.data);
-out_err_free_conf_alg:
- kfree(ctx->conf_alg.data);
-out_err_free_mech:
- kfree(ctx->mech_used.data);
-out_err_free_ctx_id:
- kfree(ctx->ctx_id.data);
-out_err_free_ctx:
- kfree(ctx);
-out_err:
- return PTR_ERR(p);
-}
-
-static void
-gss_delete_sec_context_spkm3(void *internal_ctx)
-{
- struct spkm3_ctx *sctx = internal_ctx;
-
- kfree(sctx->derived_integ_key.data);
- kfree(sctx->intg_alg.data);
- kfree(sctx->derived_conf_key.data);
- kfree(sctx->conf_alg.data);
- kfree(sctx->mech_used.data);
- kfree(sctx->ctx_id.data);
- kfree(sctx);
-}
-
-static u32
-gss_verify_mic_spkm3(struct gss_ctx *ctx,
- struct xdr_buf *signbuf,
- struct xdr_netobj *checksum)
-{
- u32 maj_stat = 0;
- struct spkm3_ctx *sctx = ctx->internal_ctx_id;
-
- maj_stat = spkm3_read_token(sctx, checksum, signbuf, SPKM_MIC_TOK);
-
- dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat);
- return maj_stat;
-}
-
-static u32
-gss_get_mic_spkm3(struct gss_ctx *ctx,
- struct xdr_buf *message_buffer,
- struct xdr_netobj *message_token)
-{
- u32 err = 0;
- struct spkm3_ctx *sctx = ctx->internal_ctx_id;
-
- err = spkm3_make_token(sctx, message_buffer,
- message_token, SPKM_MIC_TOK);
- dprintk("RPC: gss_get_mic_spkm3 returning %d\n", err);
- return err;
-}
-
-static const struct gss_api_ops gss_spkm3_ops = {
- .gss_import_sec_context = gss_import_sec_context_spkm3,
- .gss_get_mic = gss_get_mic_spkm3,
- .gss_verify_mic = gss_verify_mic_spkm3,
- .gss_delete_sec_context = gss_delete_sec_context_spkm3,
-};
-
-static struct pf_desc gss_spkm3_pfs[] = {
- {RPC_AUTH_GSS_SPKM, RPC_GSS_SVC_NONE, "spkm3"},
- {RPC_AUTH_GSS_SPKMI, RPC_GSS_SVC_INTEGRITY, "spkm3i"},
-};
-
-static struct gss_api_mech gss_spkm3_mech = {
- .gm_name = "spkm3",
- .gm_owner = THIS_MODULE,
- .gm_oid = {7, "\053\006\001\005\005\001\003"},
- .gm_ops = &gss_spkm3_ops,
- .gm_pf_num = ARRAY_SIZE(gss_spkm3_pfs),
- .gm_pfs = gss_spkm3_pfs,
-};
-
-static int __init init_spkm3_module(void)
-{
- int status;
-
- status = gss_mech_register(&gss_spkm3_mech);
- if (status)
- printk("Failed to register spkm3 gss mechanism!\n");
- return status;
-}
-
-static void __exit cleanup_spkm3_module(void)
-{
- gss_mech_unregister(&gss_spkm3_mech);
-}
-
-MODULE_LICENSE("GPL");
-module_init(init_spkm3_module);
-module_exit(cleanup_spkm3_module);
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c
deleted file mode 100644
index 5a3a65a0e2b4..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_seal.c
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * linux/net/sunrpc/gss_spkm3_seal.c
- *
- * Copyright (c) 2003 The Regents of the University of Michigan.
- * All rights reserved.
- *
- * Andy Adamson <andros@umich.edu>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <linux/types.h>
-#include <linux/jiffies.h>
-#include <linux/sunrpc/gss_spkm3.h>
-#include <linux/random.h>
-#include <linux/crypto.h>
-#include <linux/pagemap.h>
-#include <linux/scatterlist.h>
-#include <linux/sunrpc/xdr.h>
-
-#ifdef RPC_DEBUG
-# define RPCDBG_FACILITY RPCDBG_AUTH
-#endif
-
-const struct xdr_netobj hmac_md5_oid = { 8, "\x2B\x06\x01\x05\x05\x08\x01\x01"};
-const struct xdr_netobj cast5_cbc_oid = {9, "\x2A\x86\x48\x86\xF6\x7D\x07\x42\x0A"};
-
-/*
- * spkm3_make_token()
- *
- * Only SPKM_MIC_TOK with md5 intg-alg is supported
- */
-
-u32
-spkm3_make_token(struct spkm3_ctx *ctx,
- struct xdr_buf * text, struct xdr_netobj * token,
- int toktype)
-{
- s32 checksum_type;
- char tokhdrbuf[25];
- char cksumdata[16];
- struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
- struct xdr_netobj mic_hdr = {.len = 0, .data = tokhdrbuf};
- int tokenlen = 0;
- unsigned char *ptr;
- s32 now;
- int ctxelen = 0, ctxzbit = 0;
- int md5elen = 0, md5zbit = 0;
-
- now = jiffies;
-
- if (ctx->ctx_id.len != 16) {
- dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n",
- ctx->ctx_id.len);
- goto out_err;
- }
-
- if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) {
- dprintk("RPC: gss_spkm3_seal: unsupported I-ALG "
- "algorithm. only support hmac-md5 I-ALG.\n");
- goto out_err;
- } else
- checksum_type = CKSUMTYPE_HMAC_MD5;
-
- if (!g_OID_equal(&ctx->conf_alg, &cast5_cbc_oid)) {
- dprintk("RPC: gss_spkm3_seal: unsupported C-ALG "
- "algorithm\n");
- goto out_err;
- }
-
- if (toktype == SPKM_MIC_TOK) {
- /* Calculate checksum over the mic-header */
- asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit);
- spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data,
- ctxelen, ctxzbit);
- if (make_spkm3_checksum(checksum_type, &ctx->derived_integ_key,
- (char *)mic_hdr.data, mic_hdr.len,
- text, 0, &md5cksum))
- goto out_err;
-
- asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit);
- tokenlen = 10 + ctxelen + 1 + md5elen + 1;
-
- /* Create token header using generic routines */
- token->len = g_token_size(&ctx->mech_used, tokenlen + 2);
-
- ptr = token->data;
- g_make_token_header(&ctx->mech_used, tokenlen + 2, &ptr);
-
- spkm3_make_mic_token(&ptr, tokenlen, &mic_hdr, &md5cksum, md5elen, md5zbit);
- } else if (toktype == SPKM_WRAP_TOK) { /* Not Supported */
- dprintk("RPC: gss_spkm3_seal: SPKM_WRAP_TOK "
- "not supported\n");
- goto out_err;
- }
-
- /* XXX need to implement sequence numbers, and ctx->expired */
-
- return GSS_S_COMPLETE;
-out_err:
- token->data = NULL;
- token->len = 0;
- return GSS_S_FAILURE;
-}
-
-static int
-spkm3_checksummer(struct scatterlist *sg, void *data)
-{
- struct hash_desc *desc = data;
-
- return crypto_hash_update(desc, sg, sg->length);
-}
-
-/* checksum the plaintext data and hdrlen bytes of the token header */
-s32
-make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header,
- unsigned int hdrlen, struct xdr_buf *body,
- unsigned int body_offset, struct xdr_netobj *cksum)
-{
- char *cksumname;
- struct hash_desc desc; /* XXX add to ctx? */
- struct scatterlist sg[1];
- int err;
-
- switch (cksumtype) {
- case CKSUMTYPE_HMAC_MD5:
- cksumname = "hmac(md5)";
- break;
- default:
- dprintk("RPC: spkm3_make_checksum:"
- " unsupported checksum %d", cksumtype);
- return GSS_S_FAILURE;
- }
-
- if (key->data == NULL || key->len <= 0) return GSS_S_FAILURE;
-
- desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(desc.tfm))
- return GSS_S_FAILURE;
- cksum->len = crypto_hash_digestsize(desc.tfm);
- desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
-
- err = crypto_hash_setkey(desc.tfm, key->data, key->len);
- if (err)
- goto out;
-
- err = crypto_hash_init(&desc);
- if (err)
- goto out;
-
- sg_init_one(sg, header, hdrlen);
- crypto_hash_update(&desc, sg, sg->length);
-
- xdr_process_buf(body, body_offset, body->len - body_offset,
- spkm3_checksummer, &desc);
- crypto_hash_final(&desc, cksum->data);
-
-out:
- crypto_free_hash(desc.tfm);
-
- return err ? GSS_S_FAILURE : 0;
-}
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c
deleted file mode 100644
index a99825d7caa0..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_token.c
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- * linux/net/sunrpc/gss_spkm3_token.c
- *
- * Copyright (c) 2003 The Regents of the University of Michigan.
- * All rights reserved.
- *
- * Andy Adamson <andros@umich.edu>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/jiffies.h>
-#include <linux/sunrpc/gss_spkm3.h>
-#include <linux/random.h>
-#include <linux/crypto.h>
-
-#ifdef RPC_DEBUG
-# define RPCDBG_FACILITY RPCDBG_AUTH
-#endif
-
-/*
- * asn1_bitstring_len()
- *
- * calculate the asn1 bitstring length of the xdr_netobject
- */
-void
-asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits)
-{
- int i, zbit = 0,elen = in->len;
- char *ptr;
-
- ptr = &in->data[in->len -1];
-
- /* count trailing 0's */
- for(i = in->len; i > 0; i--) {
- if (*ptr == 0) {
- ptr--;
- elen--;
- } else
- break;
- }
-
- /* count number of 0 bits in final octet */
- ptr = &in->data[elen - 1];
- for(i = 0; i < 8; i++) {
- short mask = 0x01;
-
- if (!((mask << i) & *ptr))
- zbit++;
- else
- break;
- }
- *enclen = elen;
- *zerobits = zbit;
-}
-
-/*
- * decode_asn1_bitstring()
- *
- * decode a bitstring into a buffer of the expected length.
- * enclen = bit string length
- * explen = expected length (define in rfc)
- */
-int
-decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen)
-{
- if (!(out->data = kzalloc(explen,GFP_NOFS)))
- return 0;
- out->len = explen;
- memcpy(out->data, in, enclen);
- return 1;
-}
-
-/*
- * SPKMInnerContextToken choice SPKM_MIC asn1 token layout
- *
- * contextid is always 16 bytes plain data. max asn1 bitstring len = 17.
- *
- * tokenlen = pos[0] to end of token (max pos[45] with MD5 cksum)
- *
- * pos value
- * ----------
- * [0] a4 SPKM-MIC tag
- * [1] ?? innertoken length (max 44)
- *
- *
- * tok_hdr piece of checksum data starts here
- *
- * the maximum mic-header len = 9 + 17 = 26
- * mic-header
- * ----------
- * [2] 30 SEQUENCE tag
- * [3] ?? mic-header length: (max 23) = TokenID + ContextID
- *
- * TokenID - all fields constant and can be hardcoded
- * -------
- * [4] 02 Type 2
- * [5] 02 Length 2
- * [6][7] 01 01 TokenID (SPKM_MIC_TOK)
- *
- * ContextID - encoded length not constant, calculated
- * ---------
- * [8] 03 Type 3
- * [9] ?? encoded length
- * [10] ?? ctxzbit
- * [11] contextid
- *
- * mic_header piece of checksum data ends here.
- *
- * int-cksum - encoded length not constant, calculated
- * ---------
- * [??] 03 Type 3
- * [??] ?? encoded length
- * [??] ?? md5zbit
- * [??] int-cksum (NID_md5 = 16)
- *
- * maximum SPKM-MIC innercontext token length =
- * 10 + encoded contextid_size(17 max) + 2 + encoded
- * cksum_size (17 maxfor NID_md5) = 46
- */
-
-/*
- * spkm3_mic_header()
- *
- * Prepare the SPKM_MIC_TOK mic-header for check-sum calculation
- * elen: 16 byte context id asn1 bitstring encoded length
- */
-void
-spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ctxdata, int elen, int zbit)
-{
- char *hptr = *hdrbuf;
- char *top = *hdrbuf;
-
- *(u8 *)hptr++ = 0x30;
- *(u8 *)hptr++ = elen + 7; /* on the wire header length */
-
- /* tokenid */
- *(u8 *)hptr++ = 0x02;
- *(u8 *)hptr++ = 0x02;
- *(u8 *)hptr++ = 0x01;
- *(u8 *)hptr++ = 0x01;
-
- /* coniextid */
- *(u8 *)hptr++ = 0x03;
- *(u8 *)hptr++ = elen + 1; /* add 1 to include zbit */
- *(u8 *)hptr++ = zbit;
- memcpy(hptr, ctxdata, elen);
- hptr += elen;
- *hdrlen = hptr - top;
-}
-
-/*
- * spkm3_mic_innercontext_token()
- *
- * *tokp points to the beginning of the SPKM_MIC token described
- * in rfc 2025, section 3.2.1:
- *
- * toklen is the inner token length
- */
-void
-spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hdr, struct xdr_netobj *md5cksum, int md5elen, int md5zbit)
-{
- unsigned char *ict = *tokp;
-
- *(u8 *)ict++ = 0xa4;
- *(u8 *)ict++ = toklen;
- memcpy(ict, mic_hdr->data, mic_hdr->len);
- ict += mic_hdr->len;
-
- *(u8 *)ict++ = 0x03;
- *(u8 *)ict++ = md5elen + 1; /* add 1 to include zbit */
- *(u8 *)ict++ = md5zbit;
- memcpy(ict, md5cksum->data, md5elen);
-}
-
-u32
-spkm3_verify_mic_token(unsigned char **tokp, int *mic_hdrlen, unsigned char **cksum)
-{
- struct xdr_netobj spkm3_ctx_id = {.len =0, .data = NULL};
- unsigned char *ptr = *tokp;
- int ctxelen;
- u32 ret = GSS_S_DEFECTIVE_TOKEN;
-
- /* spkm3 innercontext token preamble */
- if ((ptr[0] != 0xa4) || (ptr[2] != 0x30)) {
- dprintk("RPC: BAD SPKM ictoken preamble\n");
- goto out;
- }
-
- *mic_hdrlen = ptr[3];
-
- /* token type */
- if ((ptr[4] != 0x02) || (ptr[5] != 0x02)) {
- dprintk("RPC: BAD asn1 SPKM3 token type\n");
- goto out;
- }
-
- /* only support SPKM_MIC_TOK */
- if((ptr[6] != 0x01) || (ptr[7] != 0x01)) {
- dprintk("RPC: ERROR unsupported SPKM3 token\n");
- goto out;
- }
-
- /* contextid */
- if (ptr[8] != 0x03) {
- dprintk("RPC: BAD SPKM3 asn1 context-id type\n");
- goto out;
- }
-
- ctxelen = ptr[9];
- if (ctxelen > 17) { /* length includes asn1 zbit octet */
- dprintk("RPC: BAD SPKM3 contextid len %d\n", ctxelen);
- goto out;
- }
-
- /* ignore ptr[10] */
-
- if(!decode_asn1_bitstring(&spkm3_ctx_id, &ptr[11], ctxelen - 1, 16))
- goto out;
-
- /*
- * in the current implementation: the optional int-alg is not present
- * so the default int-alg (md5) is used the optional snd-seq field is
- * also not present
- */
-
- if (*mic_hdrlen != 6 + ctxelen) {
- dprintk("RPC: BAD SPKM_ MIC_TOK header len %d: we only "
- "support default int-alg (should be absent) "
- "and do not support snd-seq\n", *mic_hdrlen);
- goto out;
- }
- /* checksum */
- *cksum = (&ptr[10] + ctxelen); /* ctxelen includes ptr[10] */
-
- ret = GSS_S_COMPLETE;
-out:
- kfree(spkm3_ctx_id.data);
- return ret;
-}
-
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
deleted file mode 100644
index cc21ee860bb6..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * linux/net/sunrpc/gss_spkm3_unseal.c
- *
- * Copyright (c) 2003 The Regents of the University of Michigan.
- * All rights reserved.
- *
- * Andy Adamson <andros@umich.edu>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/jiffies.h>
-#include <linux/sunrpc/gss_spkm3.h>
-#include <linux/crypto.h>
-
-#ifdef RPC_DEBUG
-# define RPCDBG_FACILITY RPCDBG_AUTH
-#endif
-
-/*
- * spkm3_read_token()
- *
- * only SPKM_MIC_TOK with md5 intg-alg is supported
- */
-u32
-spkm3_read_token(struct spkm3_ctx *ctx,
- struct xdr_netobj *read_token, /* checksum */
- struct xdr_buf *message_buffer, /* signbuf */
- int toktype)
-{
- s32 checksum_type;
- s32 code;
- struct xdr_netobj wire_cksum = {.len =0, .data = NULL};
- char cksumdata[16];
- struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
- unsigned char *ptr = (unsigned char *)read_token->data;
- unsigned char *cksum;
- int bodysize, md5elen;
- int mic_hdrlen;
- u32 ret = GSS_S_DEFECTIVE_TOKEN;
-
- if (g_verify_token_header((struct xdr_netobj *) &ctx->mech_used,
- &bodysize, &ptr, read_token->len))
- goto out;
-
- /* decode the token */
-
- if (toktype != SPKM_MIC_TOK) {
- dprintk("RPC: BAD SPKM3 token type: %d\n", toktype);
- goto out;
- }
-
- if ((ret = spkm3_verify_mic_token(&ptr, &mic_hdrlen, &cksum)))
- goto out;
-
- if (*cksum++ != 0x03) {
- dprintk("RPC: spkm3_read_token BAD checksum type\n");
- goto out;
- }
- md5elen = *cksum++;
- cksum++; /* move past the zbit */
-
- if (!decode_asn1_bitstring(&wire_cksum, cksum, md5elen - 1, 16))
- goto out;
-
- /* HARD CODED FOR MD5 */
-
- /* compute the checksum of the message.
- * ptr + 2 = start of header piece of checksum
- * mic_hdrlen + 2 = length of header piece of checksum
- */
- ret = GSS_S_DEFECTIVE_TOKEN;
- if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) {
- dprintk("RPC: gss_spkm3_seal: unsupported I-ALG "
- "algorithm\n");
- goto out;
- }
-
- checksum_type = CKSUMTYPE_HMAC_MD5;
-
- code = make_spkm3_checksum(checksum_type,
- &ctx->derived_integ_key, ptr + 2, mic_hdrlen + 2,
- message_buffer, 0, &md5cksum);
-
- if (code)
- goto out;
-
- ret = GSS_S_BAD_SIG;
- code = memcmp(md5cksum.data, wire_cksum.data, wire_cksum.len);
- if (code) {
- dprintk("RPC: bad MIC checksum\n");
- goto out;
- }
-
-
- /* XXX: need to add expiration and sequencing */
- ret = GSS_S_COMPLETE;
-out:
- kfree(wire_cksum.data);
- return ret;
-}
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index cc385b3a59c2..dec2a6fc7c12 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -964,7 +964,7 @@ svcauth_gss_set_client(struct svc_rqst *rqstp)
if (rqstp->rq_gssclient == NULL)
return SVC_DENIED;
stat = svcauth_unix_set_client(rqstp);
- if (stat == SVC_DROP)
+ if (stat == SVC_DROP || stat == SVC_CLOSE)
return stat;
return SVC_OK;
}
@@ -1018,7 +1018,7 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
return SVC_DENIED;
memset(&rsikey, 0, sizeof(rsikey));
if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx))
- return SVC_DROP;
+ return SVC_CLOSE;
*authp = rpc_autherr_badverf;
if (svc_safe_getnetobj(argv, &tmpobj)) {
kfree(rsikey.in_handle.data);
@@ -1026,38 +1026,35 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
}
if (dup_netobj(&rsikey.in_token, &tmpobj)) {
kfree(rsikey.in_handle.data);
- return SVC_DROP;
+ return SVC_CLOSE;
}
/* Perform upcall, or find upcall result: */
rsip = rsi_lookup(&rsikey);
rsi_free(&rsikey);
if (!rsip)
- return SVC_DROP;
- switch (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) {
- case -EAGAIN:
- case -ETIMEDOUT:
- case -ENOENT:
+ return SVC_CLOSE;
+ if (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle) < 0)
/* No upcall result: */
- return SVC_DROP;
- case 0:
- ret = SVC_DROP;
- /* Got an answer to the upcall; use it: */
- if (gss_write_init_verf(rqstp, rsip))
- goto out;
- if (resv->iov_len + 4 > PAGE_SIZE)
- goto out;
- svc_putnl(resv, RPC_SUCCESS);
- if (svc_safe_putnetobj(resv, &rsip->out_handle))
- goto out;
- if (resv->iov_len + 3 * 4 > PAGE_SIZE)
- goto out;
- svc_putnl(resv, rsip->major_status);
- svc_putnl(resv, rsip->minor_status);
- svc_putnl(resv, GSS_SEQ_WIN);
- if (svc_safe_putnetobj(resv, &rsip->out_token))
- goto out;
- }
+ return SVC_CLOSE;
+
+ ret = SVC_CLOSE;
+ /* Got an answer to the upcall; use it: */
+ if (gss_write_init_verf(rqstp, rsip))
+ goto out;
+ if (resv->iov_len + 4 > PAGE_SIZE)
+ goto out;
+ svc_putnl(resv, RPC_SUCCESS);
+ if (svc_safe_putnetobj(resv, &rsip->out_handle))
+ goto out;
+ if (resv->iov_len + 3 * 4 > PAGE_SIZE)
+ goto out;
+ svc_putnl(resv, rsip->major_status);
+ svc_putnl(resv, rsip->minor_status);
+ svc_putnl(resv, GSS_SEQ_WIN);
+ if (svc_safe_putnetobj(resv, &rsip->out_token))
+ goto out;
+
ret = SVC_COMPLETE;
out:
cache_put(&rsip->h, &rsi_cache);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 7dce81a926c5..e433e7580e27 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -33,15 +33,16 @@
#include <linux/sunrpc/cache.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
+#include "netns.h"
#define RPCDBG_FACILITY RPCDBG_CACHE
-static int cache_defer_req(struct cache_req *req, struct cache_head *item);
+static void cache_defer_req(struct cache_req *req, struct cache_head *item);
static void cache_revisit_request(struct cache_head *item);
static void cache_init(struct cache_head *h)
{
- time_t now = get_seconds();
+ time_t now = seconds_since_boot();
h->next = NULL;
h->flags = 0;
kref_init(&h->ref);
@@ -51,7 +52,7 @@ static void cache_init(struct cache_head *h)
static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
{
- return (h->expiry_time < get_seconds()) ||
+ return (h->expiry_time < seconds_since_boot()) ||
(detail->flush_time > h->last_refresh);
}
@@ -126,7 +127,7 @@ static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
static void cache_fresh_locked(struct cache_head *head, time_t expiry)
{
head->expiry_time = expiry;
- head->last_refresh = get_seconds();
+ head->last_refresh = seconds_since_boot();
set_bit(CACHE_VALID, &head->flags);
}
@@ -237,7 +238,7 @@ int cache_check(struct cache_detail *detail,
/* now see if we want to start an upcall */
refresh_age = (h->expiry_time - h->last_refresh);
- age = get_seconds() - h->last_refresh;
+ age = seconds_since_boot() - h->last_refresh;
if (rqstp == NULL) {
if (rv == -EAGAIN)
@@ -252,7 +253,7 @@ int cache_check(struct cache_detail *detail,
cache_revisit_request(h);
if (rv == -EAGAIN) {
set_bit(CACHE_NEGATIVE, &h->flags);
- cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY);
+ cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY);
cache_fresh_unlocked(h, detail);
rv = -ENOENT;
}
@@ -267,7 +268,8 @@ int cache_check(struct cache_detail *detail,
}
if (rv == -EAGAIN) {
- if (cache_defer_req(rqstp, h) < 0) {
+ cache_defer_req(rqstp, h);
+ if (!test_bit(CACHE_PENDING, &h->flags)) {
/* Request is not deferred */
rv = cache_is_valid(detail, h);
if (rv == -EAGAIN)
@@ -387,11 +389,11 @@ static int cache_clean(void)
return -1;
}
current_detail = list_entry(next, struct cache_detail, others);
- if (current_detail->nextcheck > get_seconds())
+ if (current_detail->nextcheck > seconds_since_boot())
current_index = current_detail->hash_size;
else {
current_index = 0;
- current_detail->nextcheck = get_seconds()+30*60;
+ current_detail->nextcheck = seconds_since_boot()+30*60;
}
}
@@ -476,7 +478,7 @@ EXPORT_SYMBOL_GPL(cache_flush);
void cache_purge(struct cache_detail *detail)
{
detail->flush_time = LONG_MAX;
- detail->nextcheck = get_seconds();
+ detail->nextcheck = seconds_since_boot();
cache_flush();
detail->flush_time = 1;
}
@@ -505,81 +507,155 @@ EXPORT_SYMBOL_GPL(cache_purge);
static DEFINE_SPINLOCK(cache_defer_lock);
static LIST_HEAD(cache_defer_list);
-static struct list_head cache_defer_hash[DFR_HASHSIZE];
+static struct hlist_head cache_defer_hash[DFR_HASHSIZE];
static int cache_defer_cnt;
-static int cache_defer_req(struct cache_req *req, struct cache_head *item)
+static void __unhash_deferred_req(struct cache_deferred_req *dreq)
+{
+ hlist_del_init(&dreq->hash);
+ if (!list_empty(&dreq->recent)) {
+ list_del_init(&dreq->recent);
+ cache_defer_cnt--;
+ }
+}
+
+static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_head *item)
{
- struct cache_deferred_req *dreq, *discard;
int hash = DFR_HASH(item);
- if (cache_defer_cnt >= DFR_MAX) {
- /* too much in the cache, randomly drop this one,
- * or continue and drop the oldest below
- */
- if (net_random()&1)
- return -ENOMEM;
- }
- dreq = req->defer(req);
- if (dreq == NULL)
- return -ENOMEM;
+ INIT_LIST_HEAD(&dreq->recent);
+ hlist_add_head(&dreq->hash, &cache_defer_hash[hash]);
+}
+
+static void setup_deferral(struct cache_deferred_req *dreq,
+ struct cache_head *item,
+ int count_me)
+{
dreq->item = item;
spin_lock(&cache_defer_lock);
- list_add(&dreq->recent, &cache_defer_list);
-
- if (cache_defer_hash[hash].next == NULL)
- INIT_LIST_HEAD(&cache_defer_hash[hash]);
- list_add(&dreq->hash, &cache_defer_hash[hash]);
+ __hash_deferred_req(dreq, item);
- /* it is in, now maybe clean up */
- discard = NULL;
- if (++cache_defer_cnt > DFR_MAX) {
- discard = list_entry(cache_defer_list.prev,
- struct cache_deferred_req, recent);
- list_del_init(&discard->recent);
- list_del_init(&discard->hash);
- cache_defer_cnt--;
+ if (count_me) {
+ cache_defer_cnt++;
+ list_add(&dreq->recent, &cache_defer_list);
}
+
spin_unlock(&cache_defer_lock);
+}
+
+struct thread_deferred_req {
+ struct cache_deferred_req handle;
+ struct completion completion;
+};
+
+static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many)
+{
+ struct thread_deferred_req *dr =
+ container_of(dreq, struct thread_deferred_req, handle);
+ complete(&dr->completion);
+}
+
+static void cache_wait_req(struct cache_req *req, struct cache_head *item)
+{
+ struct thread_deferred_req sleeper;
+ struct cache_deferred_req *dreq = &sleeper.handle;
+
+ sleeper.completion = COMPLETION_INITIALIZER_ONSTACK(sleeper.completion);
+ dreq->revisit = cache_restart_thread;
+
+ setup_deferral(dreq, item, 0);
+
+ if (!test_bit(CACHE_PENDING, &item->flags) ||
+ wait_for_completion_interruptible_timeout(
+ &sleeper.completion, req->thread_wait) <= 0) {
+ /* The completion wasn't completed, so we need
+ * to clean up
+ */
+ spin_lock(&cache_defer_lock);
+ if (!hlist_unhashed(&sleeper.handle.hash)) {
+ __unhash_deferred_req(&sleeper.handle);
+ spin_unlock(&cache_defer_lock);
+ } else {
+ /* cache_revisit_request already removed
+ * this from the hash table, but hasn't
+ * called ->revisit yet. It will very soon
+ * and we need to wait for it.
+ */
+ spin_unlock(&cache_defer_lock);
+ wait_for_completion(&sleeper.completion);
+ }
+ }
+}
+
+static void cache_limit_defers(void)
+{
+ /* Make sure we haven't exceed the limit of allowed deferred
+ * requests.
+ */
+ struct cache_deferred_req *discard = NULL;
+
+ if (cache_defer_cnt <= DFR_MAX)
+ return;
+
+ spin_lock(&cache_defer_lock);
+
+ /* Consider removing either the first or the last */
+ if (cache_defer_cnt > DFR_MAX) {
+ if (net_random() & 1)
+ discard = list_entry(cache_defer_list.next,
+ struct cache_deferred_req, recent);
+ else
+ discard = list_entry(cache_defer_list.prev,
+ struct cache_deferred_req, recent);
+ __unhash_deferred_req(discard);
+ }
+ spin_unlock(&cache_defer_lock);
if (discard)
- /* there was one too many */
discard->revisit(discard, 1);
+}
- if (!test_bit(CACHE_PENDING, &item->flags)) {
- /* must have just been validated... */
- cache_revisit_request(item);
- return -EAGAIN;
+static void cache_defer_req(struct cache_req *req, struct cache_head *item)
+{
+ struct cache_deferred_req *dreq;
+
+ if (req->thread_wait) {
+ cache_wait_req(req, item);
+ if (!test_bit(CACHE_PENDING, &item->flags))
+ return;
}
- return 0;
+ dreq = req->defer(req);
+ if (dreq == NULL)
+ return;
+ setup_deferral(dreq, item, 1);
+ if (!test_bit(CACHE_PENDING, &item->flags))
+ /* Bit could have been cleared before we managed to
+ * set up the deferral, so need to revisit just in case
+ */
+ cache_revisit_request(item);
+
+ cache_limit_defers();
}
static void cache_revisit_request(struct cache_head *item)
{
struct cache_deferred_req *dreq;
struct list_head pending;
-
- struct list_head *lp;
+ struct hlist_node *lp, *tmp;
int hash = DFR_HASH(item);
INIT_LIST_HEAD(&pending);
spin_lock(&cache_defer_lock);
- lp = cache_defer_hash[hash].next;
- if (lp) {
- while (lp != &cache_defer_hash[hash]) {
- dreq = list_entry(lp, struct cache_deferred_req, hash);
- lp = lp->next;
- if (dreq->item == item) {
- list_del_init(&dreq->hash);
- list_move(&dreq->recent, &pending);
- cache_defer_cnt--;
- }
+ hlist_for_each_entry_safe(dreq, lp, tmp, &cache_defer_hash[hash], hash)
+ if (dreq->item == item) {
+ __unhash_deferred_req(dreq);
+ list_add(&dreq->recent, &pending);
}
- }
+
spin_unlock(&cache_defer_lock);
while (!list_empty(&pending)) {
@@ -600,9 +676,8 @@ void cache_clean_deferred(void *owner)
list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) {
if (dreq->owner == owner) {
- list_del_init(&dreq->hash);
- list_move(&dreq->recent, &pending);
- cache_defer_cnt--;
+ __unhash_deferred_req(dreq);
+ list_add(&dreq->recent, &pending);
}
}
spin_unlock(&cache_defer_lock);
@@ -901,7 +976,7 @@ static int cache_release(struct inode *inode, struct file *filp,
filp->private_data = NULL;
kfree(rp);
- cd->last_close = get_seconds();
+ cd->last_close = seconds_since_boot();
atomic_dec(&cd->readers);
}
module_put(cd->owner);
@@ -1014,6 +1089,23 @@ static void warn_no_listener(struct cache_detail *detail)
}
}
+static bool cache_listeners_exist(struct cache_detail *detail)
+{
+ if (atomic_read(&detail->readers))
+ return true;
+ if (detail->last_close == 0)
+ /* This cache was never opened */
+ return false;
+ if (detail->last_close < seconds_since_boot() - 30)
+ /*
+ * We allow for the possibility that someone might
+ * restart a userspace daemon without restarting the
+ * server; but after 30 seconds, we give up.
+ */
+ return false;
+ return true;
+}
+
/*
* register an upcall request to user-space and queue it up for read() by the
* upcall daemon.
@@ -1032,10 +1124,9 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h,
char *bp;
int len;
- if (atomic_read(&detail->readers) == 0 &&
- detail->last_close < get_seconds() - 30) {
- warn_no_listener(detail);
- return -EINVAL;
+ if (!cache_listeners_exist(detail)) {
+ warn_no_listener(detail);
+ return -EINVAL;
}
buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
@@ -1094,13 +1185,19 @@ int qword_get(char **bpp, char *dest, int bufsize)
if (bp[0] == '\\' && bp[1] == 'x') {
/* HEX STRING */
bp += 2;
- while (isxdigit(bp[0]) && isxdigit(bp[1]) && len < bufsize) {
- int byte = isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10;
- bp++;
- byte <<= 4;
- byte |= isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10;
- *dest++ = byte;
- bp++;
+ while (len < bufsize) {
+ int h, l;
+
+ h = hex_to_bin(bp[0]);
+ if (h < 0)
+ break;
+
+ l = hex_to_bin(bp[1]);
+ if (l < 0)
+ break;
+
+ *dest++ = (h << 4) | l;
+ bp += 2;
len++;
}
} else {
@@ -1218,7 +1315,8 @@ static int c_show(struct seq_file *m, void *p)
ifdebug(CACHE)
seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n",
- cp->expiry_time, atomic_read(&cp->ref.refcount), cp->flags);
+ convert_to_wallclock(cp->expiry_time),
+ atomic_read(&cp->ref.refcount), cp->flags);
cache_get(cp);
if (cache_check(cd, cp, NULL))
/* cache_check does a cache_put on failure */
@@ -1284,7 +1382,7 @@ static ssize_t read_flush(struct file *file, char __user *buf,
unsigned long p = *ppos;
size_t len;
- sprintf(tbuf, "%lu\n", cd->flush_time);
+ sprintf(tbuf, "%lu\n", convert_to_wallclock(cd->flush_time));
len = strlen(tbuf);
if (p >= len)
return 0;
@@ -1302,19 +1400,20 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
struct cache_detail *cd)
{
char tbuf[20];
- char *ep;
- long flushtime;
+ char *bp, *ep;
+
if (*ppos || count > sizeof(tbuf)-1)
return -EINVAL;
if (copy_from_user(tbuf, buf, count))
return -EFAULT;
tbuf[count] = 0;
- flushtime = simple_strtoul(tbuf, &ep, 0);
+ simple_strtoul(tbuf, &ep, 0);
if (*ep && *ep != '\n')
return -EINVAL;
- cd->flush_time = flushtime;
- cd->nextcheck = get_seconds();
+ bp = tbuf;
+ cd->flush_time = get_expiry(&bp);
+ cd->nextcheck = seconds_since_boot();
cache_flush();
*ppos += count;
@@ -1438,8 +1537,10 @@ static const struct file_operations cache_flush_operations_procfs = {
.llseek = no_llseek,
};
-static void remove_cache_proc_entries(struct cache_detail *cd)
+static void remove_cache_proc_entries(struct cache_detail *cd, struct net *net)
{
+ struct sunrpc_net *sn;
+
if (cd->u.procfs.proc_ent == NULL)
return;
if (cd->u.procfs.flush_ent)
@@ -1449,15 +1550,18 @@ static void remove_cache_proc_entries(struct cache_detail *cd)
if (cd->u.procfs.content_ent)
remove_proc_entry("content", cd->u.procfs.proc_ent);
cd->u.procfs.proc_ent = NULL;
- remove_proc_entry(cd->name, proc_net_rpc);
+ sn = net_generic(net, sunrpc_net_id);
+ remove_proc_entry(cd->name, sn->proc_net_rpc);
}
#ifdef CONFIG_PROC_FS
-static int create_cache_proc_entries(struct cache_detail *cd)
+static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
{
struct proc_dir_entry *p;
+ struct sunrpc_net *sn;
- cd->u.procfs.proc_ent = proc_mkdir(cd->name, proc_net_rpc);
+ sn = net_generic(net, sunrpc_net_id);
+ cd->u.procfs.proc_ent = proc_mkdir(cd->name, sn->proc_net_rpc);
if (cd->u.procfs.proc_ent == NULL)
goto out_nomem;
cd->u.procfs.channel_ent = NULL;
@@ -1488,11 +1592,11 @@ static int create_cache_proc_entries(struct cache_detail *cd)
}
return 0;
out_nomem:
- remove_cache_proc_entries(cd);
+ remove_cache_proc_entries(cd, net);
return -ENOMEM;
}
#else /* CONFIG_PROC_FS */
-static int create_cache_proc_entries(struct cache_detail *cd)
+static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
{
return 0;
}
@@ -1503,23 +1607,33 @@ void __init cache_initialize(void)
INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean);
}
-int cache_register(struct cache_detail *cd)
+int cache_register_net(struct cache_detail *cd, struct net *net)
{
int ret;
sunrpc_init_cache_detail(cd);
- ret = create_cache_proc_entries(cd);
+ ret = create_cache_proc_entries(cd, net);
if (ret)
sunrpc_destroy_cache_detail(cd);
return ret;
}
+
+int cache_register(struct cache_detail *cd)
+{
+ return cache_register_net(cd, &init_net);
+}
EXPORT_SYMBOL_GPL(cache_register);
-void cache_unregister(struct cache_detail *cd)
+void cache_unregister_net(struct cache_detail *cd, struct net *net)
{
- remove_cache_proc_entries(cd);
+ remove_cache_proc_entries(cd, net);
sunrpc_destroy_cache_detail(cd);
}
+
+void cache_unregister(struct cache_detail *cd)
+{
+ cache_unregister_net(cd, &init_net);
+}
EXPORT_SYMBOL_GPL(cache_unregister);
static ssize_t cache_read_pipefs(struct file *filp, char __user *buf,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index fa5549079d79..9dab9573be41 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -284,6 +284,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
struct rpc_xprt *xprt;
struct rpc_clnt *clnt;
struct xprt_create xprtargs = {
+ .net = args->net,
.ident = args->protocol,
.srcaddr = args->saddress,
.dstaddr = args->address,
@@ -1675,7 +1676,7 @@ rpc_verify_header(struct rpc_task *task)
rpcauth_invalcred(task);
/* Ensure we obtain a new XID! */
xprt_release(task);
- task->tk_action = call_refresh;
+ task->tk_action = call_reserve;
goto out_retry;
case RPC_AUTH_BADCRED:
case RPC_AUTH_BADVERF:
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
new file mode 100644
index 000000000000..d013bf211cae
--- /dev/null
+++ b/net/sunrpc/netns.h
@@ -0,0 +1,19 @@
+#ifndef __SUNRPC_NETNS_H__
+#define __SUNRPC_NETNS_H__
+
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+struct cache_detail;
+
+struct sunrpc_net {
+ struct proc_dir_entry *proc_net_rpc;
+ struct cache_detail *ip_map_cache;
+};
+
+extern int sunrpc_net_id;
+
+int ip_map_cache_create(struct net *);
+void ip_map_cache_destroy(struct net *);
+
+#endif
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 52f252432144..10a17a37ec4e 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -28,7 +28,7 @@
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/sunrpc/cache.h>
-static struct vfsmount *rpc_mount __read_mostly;
+static struct vfsmount *rpc_mnt __read_mostly;
static int rpc_mount_count;
static struct file_system_type rpc_pipe_fs_type;
@@ -417,16 +417,16 @@ struct vfsmount *rpc_get_mount(void)
{
int err;
- err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mount, &rpc_mount_count);
+ err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mnt, &rpc_mount_count);
if (err != 0)
return ERR_PTR(err);
- return rpc_mount;
+ return rpc_mnt;
}
EXPORT_SYMBOL_GPL(rpc_get_mount);
void rpc_put_mount(void)
{
- simple_release_fs(&rpc_mount, &rpc_mount_count);
+ simple_release_fs(&rpc_mnt, &rpc_mount_count);
}
EXPORT_SYMBOL_GPL(rpc_put_mount);
@@ -445,6 +445,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
struct inode *inode = new_inode(sb);
if (!inode)
return NULL;
+ inode->i_ino = get_next_ino();
inode->i_mode = mode;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
switch(mode & S_IFMT) {
@@ -1017,17 +1018,17 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
return 0;
}
-static int
-rpc_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *
+rpc_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- return get_sb_single(fs_type, flags, data, rpc_fill_super, mnt);
+ return mount_single(fs_type, flags, data, rpc_fill_super);
}
static struct file_system_type rpc_pipe_fs_type = {
.owner = THIS_MODULE,
.name = "rpc_pipefs",
- .get_sb = rpc_get_sb,
+ .mount = rpc_mount,
.kill_sb = kill_litter_super,
};
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index dac219a56ae1..fa6d7ca2c851 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -177,6 +177,7 @@ static DEFINE_MUTEX(rpcb_create_local_mutex);
static int rpcb_create_local(void)
{
struct rpc_create_args args = {
+ .net = &init_net,
.protocol = XPRT_TRANSPORT_TCP,
.address = (struct sockaddr *)&rpcb_inaddr_loopback,
.addrsize = sizeof(rpcb_inaddr_loopback),
@@ -211,8 +212,9 @@ static int rpcb_create_local(void)
*/
clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4);
if (IS_ERR(clnt4)) {
- dprintk("RPC: failed to create local rpcbind v4 "
- "cleint (errno %ld).\n", PTR_ERR(clnt4));
+ dprintk("RPC: failed to bind second program to "
+ "rpcbind v4 client (errno %ld).\n",
+ PTR_ERR(clnt4));
clnt4 = NULL;
}
@@ -228,6 +230,7 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
size_t salen, int proto, u32 version)
{
struct rpc_create_args args = {
+ .net = &init_net,
.protocol = proto,
.address = srvaddr,
.addrsize = salen,
@@ -247,7 +250,7 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT);
break;
default:
- return NULL;
+ return ERR_PTR(-EAFNOSUPPORT);
}
return rpc_create(&args);
@@ -475,57 +478,6 @@ int rpcb_v4_register(const u32 program, const u32 version,
return -EAFNOSUPPORT;
}
-/**
- * rpcb_getport_sync - obtain the port for an RPC service on a given host
- * @sin: address of remote peer
- * @prog: RPC program number to bind
- * @vers: RPC version number to bind
- * @prot: transport protocol to use to make this request
- *
- * Return value is the requested advertised port number,
- * or a negative errno value.
- *
- * Called from outside the RPC client in a synchronous task context.
- * Uses default timeout parameters specified by underlying transport.
- *
- * XXX: Needs to support IPv6
- */
-int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot)
-{
- struct rpcbind_args map = {
- .r_prog = prog,
- .r_vers = vers,
- .r_prot = prot,
- .r_port = 0,
- };
- struct rpc_message msg = {
- .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT],
- .rpc_argp = &map,
- .rpc_resp = &map,
- };
- struct rpc_clnt *rpcb_clnt;
- int status;
-
- dprintk("RPC: %s(%pI4, %u, %u, %d)\n",
- __func__, &sin->sin_addr.s_addr, prog, vers, prot);
-
- rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin,
- sizeof(*sin), prot, RPCBVERS_2);
- if (IS_ERR(rpcb_clnt))
- return PTR_ERR(rpcb_clnt);
-
- status = rpc_call_sync(rpcb_clnt, &msg, 0);
- rpc_shutdown_client(rpcb_clnt);
-
- if (status >= 0) {
- if (map.r_port != 0)
- return map.r_port;
- status = -EACCES;
- }
- return status;
-}
-EXPORT_SYMBOL_GPL(rpcb_getport_sync);
-
static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc)
{
struct rpc_message msg = {
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index aa5dbda6608c..243fc09b164e 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -908,7 +908,7 @@ static int rpciod_start(void)
* Create the rpciod thread and wait for it to start.
*/
dprintk("RPC: creating workqueue rpciod\n");
- wq = create_workqueue("rpciod");
+ wq = alloc_workqueue("rpciod", WQ_RESCUER, 0);
rpciod_workqueue = wq;
return rpciod_workqueue != NULL;
}
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index ea1046f3f9a3..f71a73107ae9 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -22,11 +22,10 @@
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/metrics.h>
-#include <net/net_namespace.h>
-#define RPCDBG_FACILITY RPCDBG_MISC
+#include "netns.h"
-struct proc_dir_entry *proc_net_rpc = NULL;
+#define RPCDBG_FACILITY RPCDBG_MISC
/*
* Get RPC client stats
@@ -218,10 +217,11 @@ EXPORT_SYMBOL_GPL(rpc_print_iostats);
static inline struct proc_dir_entry *
do_register(const char *name, void *data, const struct file_operations *fops)
{
- rpc_proc_init();
- dprintk("RPC: registering /proc/net/rpc/%s\n", name);
+ struct sunrpc_net *sn;
- return proc_create_data(name, 0, proc_net_rpc, fops, data);
+ dprintk("RPC: registering /proc/net/rpc/%s\n", name);
+ sn = net_generic(&init_net, sunrpc_net_id);
+ return proc_create_data(name, 0, sn->proc_net_rpc, fops, data);
}
struct proc_dir_entry *
@@ -234,7 +234,10 @@ EXPORT_SYMBOL_GPL(rpc_proc_register);
void
rpc_proc_unregister(const char *name)
{
- remove_proc_entry(name, proc_net_rpc);
+ struct sunrpc_net *sn;
+
+ sn = net_generic(&init_net, sunrpc_net_id);
+ remove_proc_entry(name, sn->proc_net_rpc);
}
EXPORT_SYMBOL_GPL(rpc_proc_unregister);
@@ -248,25 +251,29 @@ EXPORT_SYMBOL_GPL(svc_proc_register);
void
svc_proc_unregister(const char *name)
{
- remove_proc_entry(name, proc_net_rpc);
+ struct sunrpc_net *sn;
+
+ sn = net_generic(&init_net, sunrpc_net_id);
+ remove_proc_entry(name, sn->proc_net_rpc);
}
EXPORT_SYMBOL_GPL(svc_proc_unregister);
-void
-rpc_proc_init(void)
+int rpc_proc_init(struct net *net)
{
+ struct sunrpc_net *sn;
+
dprintk("RPC: registering /proc/net/rpc\n");
- if (!proc_net_rpc)
- proc_net_rpc = proc_mkdir("rpc", init_net.proc_net);
+ sn = net_generic(net, sunrpc_net_id);
+ sn->proc_net_rpc = proc_mkdir("rpc", net->proc_net);
+ if (sn->proc_net_rpc == NULL)
+ return -ENOMEM;
+
+ return 0;
}
-void
-rpc_proc_exit(void)
+void rpc_proc_exit(struct net *net)
{
dprintk("RPC: unregistering /proc/net/rpc\n");
- if (proc_net_rpc) {
- proc_net_rpc = NULL;
- remove_proc_entry("rpc", init_net.proc_net);
- }
+ remove_proc_entry("rpc", net->proc_net);
}
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index c0d085013a2b..9d0809160994 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -22,7 +22,44 @@
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/sunrpc/xprtsock.h>
-extern struct cache_detail ip_map_cache, unix_gid_cache;
+#include "netns.h"
+
+int sunrpc_net_id;
+
+static __net_init int sunrpc_init_net(struct net *net)
+{
+ int err;
+
+ err = rpc_proc_init(net);
+ if (err)
+ goto err_proc;
+
+ err = ip_map_cache_create(net);
+ if (err)
+ goto err_ipmap;
+
+ return 0;
+
+err_ipmap:
+ rpc_proc_exit(net);
+err_proc:
+ return err;
+}
+
+static __net_exit void sunrpc_exit_net(struct net *net)
+{
+ ip_map_cache_destroy(net);
+ rpc_proc_exit(net);
+}
+
+static struct pernet_operations sunrpc_net_ops = {
+ .init = sunrpc_init_net,
+ .exit = sunrpc_exit_net,
+ .id = &sunrpc_net_id,
+ .size = sizeof(struct sunrpc_net),
+};
+
+extern struct cache_detail unix_gid_cache;
extern void cleanup_rpcb_clnt(void);
@@ -38,18 +75,22 @@ init_sunrpc(void)
err = rpcauth_init_module();
if (err)
goto out3;
+
+ cache_initialize();
+
+ err = register_pernet_subsys(&sunrpc_net_ops);
+ if (err)
+ goto out4;
#ifdef RPC_DEBUG
rpc_register_sysctl();
#endif
-#ifdef CONFIG_PROC_FS
- rpc_proc_init();
-#endif
- cache_initialize();
- cache_register(&ip_map_cache);
cache_register(&unix_gid_cache);
svc_init_xprt_sock(); /* svc sock transport */
init_socket_xprt(); /* clnt sock transport */
return 0;
+
+out4:
+ rpcauth_remove_module();
out3:
rpc_destroy_mempool();
out2:
@@ -67,14 +108,11 @@ cleanup_sunrpc(void)
svc_cleanup_xprt_sock();
unregister_rpc_pipefs();
rpc_destroy_mempool();
- cache_unregister(&ip_map_cache);
cache_unregister(&unix_gid_cache);
+ unregister_pernet_subsys(&sunrpc_net_ops);
#ifdef RPC_DEBUG
rpc_unregister_sysctl();
#endif
-#ifdef CONFIG_PROC_FS
- rpc_proc_exit();
-#endif
rcu_barrier(); /* Wait for completion of call_rcu()'s */
}
MODULE_LICENSE("GPL");
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index d9017d64597e..6359c42c4941 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1055,6 +1055,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
goto err_bad;
case SVC_DENIED:
goto err_bad_auth;
+ case SVC_CLOSE:
+ if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
+ svc_close_xprt(rqstp->rq_xprt);
case SVC_DROP:
goto dropit;
case SVC_COMPLETE:
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index cbc084939dd8..c82fe739fbdc 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -100,16 +100,14 @@ EXPORT_SYMBOL_GPL(svc_unreg_xprt_class);
*/
int svc_print_xprts(char *buf, int maxlen)
{
- struct list_head *le;
+ struct svc_xprt_class *xcl;
char tmpstr[80];
int len = 0;
buf[0] = '\0';
spin_lock(&svc_xprt_class_lock);
- list_for_each(le, &svc_xprt_class_list) {
+ list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) {
int slen;
- struct svc_xprt_class *xcl =
- list_entry(le, struct svc_xprt_class, xcl_list);
sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload);
slen = strlen(tmpstr);
@@ -128,9 +126,9 @@ static void svc_xprt_free(struct kref *kref)
struct svc_xprt *xprt =
container_of(kref, struct svc_xprt, xpt_ref);
struct module *owner = xprt->xpt_class->xcl_owner;
- if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags) &&
- xprt->xpt_auth_cache != NULL)
- svcauth_unix_info_release(xprt->xpt_auth_cache);
+ if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags))
+ svcauth_unix_info_release(xprt);
+ put_net(xprt->xpt_net);
xprt->xpt_ops->xpo_free(xprt);
module_put(owner);
}
@@ -156,15 +154,18 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
INIT_LIST_HEAD(&xprt->xpt_list);
INIT_LIST_HEAD(&xprt->xpt_ready);
INIT_LIST_HEAD(&xprt->xpt_deferred);
+ INIT_LIST_HEAD(&xprt->xpt_users);
mutex_init(&xprt->xpt_mutex);
spin_lock_init(&xprt->xpt_lock);
set_bit(XPT_BUSY, &xprt->xpt_flags);
rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
+ xprt->xpt_net = get_net(&init_net);
}
EXPORT_SYMBOL_GPL(svc_xprt_init);
static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
struct svc_serv *serv,
+ struct net *net,
const int family,
const unsigned short port,
int flags)
@@ -199,12 +200,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
return ERR_PTR(-EAFNOSUPPORT);
}
- return xcl->xcl_ops->xpo_create(serv, sap, len, flags);
+ return xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
}
int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
- const int family, const unsigned short port,
- int flags)
+ struct net *net, const int family,
+ const unsigned short port, int flags)
{
struct svc_xprt_class *xcl;
@@ -220,7 +221,7 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
goto err;
spin_unlock(&svc_xprt_class_lock);
- newxprt = __svc_xpo_create(xcl, serv, family, port, flags);
+ newxprt = __svc_xpo_create(xcl, serv, net, family, port, flags);
if (IS_ERR(newxprt)) {
module_put(xcl->xcl_owner);
return PTR_ERR(newxprt);
@@ -329,12 +330,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
"svc_xprt_enqueue: "
"threads and transports both waiting??\n");
- if (test_bit(XPT_DEAD, &xprt->xpt_flags)) {
- /* Don't enqueue dead transports */
- dprintk("svc: transport %p is dead, not enqueued\n", xprt);
- goto out_unlock;
- }
-
pool->sp_stats.packets++;
/* Mark transport as busy. It will remain in this state until
@@ -651,6 +646,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
if (signalled() || kthread_should_stop())
return -EINTR;
+ /* Normally we will wait up to 5 seconds for any required
+ * cache information to be provided.
+ */
+ rqstp->rq_chandle.thread_wait = 5*HZ;
+
spin_lock_bh(&pool->sp_lock);
xprt = svc_xprt_dequeue(pool);
if (xprt) {
@@ -658,6 +658,12 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
svc_xprt_get(xprt);
rqstp->rq_reserved = serv->sv_max_mesg;
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
+
+ /* As there is a shortage of threads and this request
+ * had to be queued, don't allow the thread to wait so
+ * long for cache updates.
+ */
+ rqstp->rq_chandle.thread_wait = 1*HZ;
} else {
/* No data pending. Go to sleep */
svc_thread_enqueue(pool, rqstp);
@@ -868,6 +874,19 @@ static void svc_age_temp_xprts(unsigned long closure)
mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
}
+static void call_xpt_users(struct svc_xprt *xprt)
+{
+ struct svc_xpt_user *u;
+
+ spin_lock(&xprt->xpt_lock);
+ while (!list_empty(&xprt->xpt_users)) {
+ u = list_first_entry(&xprt->xpt_users, struct svc_xpt_user, list);
+ list_del(&u->list);
+ u->callback(u);
+ }
+ spin_unlock(&xprt->xpt_lock);
+}
+
/*
* Remove a dead transport
*/
@@ -878,7 +897,7 @@ void svc_delete_xprt(struct svc_xprt *xprt)
/* Only do this once */
if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags))
- return;
+ BUG();
dprintk("svc: svc_delete_xprt(%p)\n", xprt);
xprt->xpt_ops->xpo_detach(xprt);
@@ -900,6 +919,7 @@ void svc_delete_xprt(struct svc_xprt *xprt)
while ((dr = svc_deferred_dequeue(xprt)) != NULL)
kfree(dr);
+ call_xpt_users(xprt);
svc_xprt_put(xprt);
}
@@ -910,10 +930,7 @@ void svc_close_xprt(struct svc_xprt *xprt)
/* someone else will have to effect the close */
return;
- svc_xprt_get(xprt);
svc_delete_xprt(xprt);
- clear_bit(XPT_BUSY, &xprt->xpt_flags);
- svc_xprt_put(xprt);
}
EXPORT_SYMBOL_GPL(svc_close_xprt);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 207311610988..560677d187f1 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -18,6 +18,8 @@
#include <linux/sunrpc/clnt.h>
+#include "netns.h"
+
/*
* AUTHUNIX and AUTHNULL credentials are both handled here.
* AUTHNULL is treated just like AUTHUNIX except that the uid/gid
@@ -92,7 +94,6 @@ struct ip_map {
struct unix_domain *m_client;
int m_add_change;
};
-static struct cache_head *ip_table[IP_HASHMAX];
static void ip_map_put(struct kref *kref)
{
@@ -178,8 +179,8 @@ static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h)
return sunrpc_cache_pipe_upcall(cd, h, ip_map_request);
}
-static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr);
-static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry);
+static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct in6_addr *addr);
+static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, struct unix_domain *udom, time_t expiry);
static int ip_map_parse(struct cache_detail *cd,
char *mesg, int mlen)
@@ -219,10 +220,9 @@ static int ip_map_parse(struct cache_detail *cd,
switch (address.sa.sa_family) {
case AF_INET:
/* Form a mapped IPv4 address in sin6 */
- memset(&sin6, 0, sizeof(sin6));
sin6.sin6_family = AF_INET6;
- sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
- sin6.sin6_addr.s6_addr32[3] = address.s4.sin_addr.s_addr;
+ ipv6_addr_set_v4mapped(address.s4.sin_addr.s_addr,
+ &sin6.sin6_addr);
break;
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case AF_INET6:
@@ -249,9 +249,9 @@ static int ip_map_parse(struct cache_detail *cd,
dom = NULL;
/* IPv6 scope IDs are ignored for now */
- ipmp = ip_map_lookup(class, &sin6.sin6_addr);
+ ipmp = __ip_map_lookup(cd, class, &sin6.sin6_addr);
if (ipmp) {
- err = ip_map_update(ipmp,
+ err = __ip_map_update(cd, ipmp,
container_of(dom, struct unix_domain, h),
expiry);
} else
@@ -294,29 +294,15 @@ static int ip_map_show(struct seq_file *m,
}
-struct cache_detail ip_map_cache = {
- .owner = THIS_MODULE,
- .hash_size = IP_HASHMAX,
- .hash_table = ip_table,
- .name = "auth.unix.ip",
- .cache_put = ip_map_put,
- .cache_upcall = ip_map_upcall,
- .cache_parse = ip_map_parse,
- .cache_show = ip_map_show,
- .match = ip_map_match,
- .init = ip_map_init,
- .update = update,
- .alloc = ip_map_alloc,
-};
-
-static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr)
+static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class,
+ struct in6_addr *addr)
{
struct ip_map ip;
struct cache_head *ch;
strcpy(ip.m_class, class);
ipv6_addr_copy(&ip.m_addr, addr);
- ch = sunrpc_cache_lookup(&ip_map_cache, &ip.h,
+ ch = sunrpc_cache_lookup(cd, &ip.h,
hash_str(class, IP_HASHBITS) ^
hash_ip6(*addr));
@@ -326,7 +312,17 @@ static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr)
return NULL;
}
-static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry)
+static inline struct ip_map *ip_map_lookup(struct net *net, char *class,
+ struct in6_addr *addr)
+{
+ struct sunrpc_net *sn;
+
+ sn = net_generic(net, sunrpc_net_id);
+ return __ip_map_lookup(sn->ip_map_cache, class, addr);
+}
+
+static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm,
+ struct unix_domain *udom, time_t expiry)
{
struct ip_map ip;
struct cache_head *ch;
@@ -344,17 +340,25 @@ static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t ex
ip.m_add_change++;
}
ip.h.expiry_time = expiry;
- ch = sunrpc_cache_update(&ip_map_cache,
- &ip.h, &ipm->h,
+ ch = sunrpc_cache_update(cd, &ip.h, &ipm->h,
hash_str(ipm->m_class, IP_HASHBITS) ^
hash_ip6(ipm->m_addr));
if (!ch)
return -ENOMEM;
- cache_put(ch, &ip_map_cache);
+ cache_put(ch, cd);
return 0;
}
-int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom)
+static inline int ip_map_update(struct net *net, struct ip_map *ipm,
+ struct unix_domain *udom, time_t expiry)
+{
+ struct sunrpc_net *sn;
+
+ sn = net_generic(net, sunrpc_net_id);
+ return __ip_map_update(sn->ip_map_cache, ipm, udom, expiry);
+}
+
+int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom)
{
struct unix_domain *udom;
struct ip_map *ipmp;
@@ -362,10 +366,10 @@ int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom)
if (dom->flavour != &svcauth_unix)
return -EINVAL;
udom = container_of(dom, struct unix_domain, h);
- ipmp = ip_map_lookup("nfsd", addr);
+ ipmp = ip_map_lookup(net, "nfsd", addr);
if (ipmp)
- return ip_map_update(ipmp, udom, NEVER);
+ return ip_map_update(net, ipmp, udom, NEVER);
else
return -ENOMEM;
}
@@ -383,16 +387,18 @@ int auth_unix_forget_old(struct auth_domain *dom)
}
EXPORT_SYMBOL_GPL(auth_unix_forget_old);
-struct auth_domain *auth_unix_lookup(struct in6_addr *addr)
+struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr)
{
struct ip_map *ipm;
struct auth_domain *rv;
+ struct sunrpc_net *sn;
- ipm = ip_map_lookup("nfsd", addr);
+ sn = net_generic(net, sunrpc_net_id);
+ ipm = ip_map_lookup(net, "nfsd", addr);
if (!ipm)
return NULL;
- if (cache_check(&ip_map_cache, &ipm->h, NULL))
+ if (cache_check(sn->ip_map_cache, &ipm->h, NULL))
return NULL;
if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) {
@@ -403,22 +409,29 @@ struct auth_domain *auth_unix_lookup(struct in6_addr *addr)
rv = &ipm->m_client->h;
kref_get(&rv->ref);
}
- cache_put(&ipm->h, &ip_map_cache);
+ cache_put(&ipm->h, sn->ip_map_cache);
return rv;
}
EXPORT_SYMBOL_GPL(auth_unix_lookup);
void svcauth_unix_purge(void)
{
- cache_purge(&ip_map_cache);
+ struct net *net;
+
+ for_each_net(net) {
+ struct sunrpc_net *sn;
+
+ sn = net_generic(net, sunrpc_net_id);
+ cache_purge(sn->ip_map_cache);
+ }
}
EXPORT_SYMBOL_GPL(svcauth_unix_purge);
static inline struct ip_map *
-ip_map_cached_get(struct svc_rqst *rqstp)
+ip_map_cached_get(struct svc_xprt *xprt)
{
struct ip_map *ipm = NULL;
- struct svc_xprt *xprt = rqstp->rq_xprt;
+ struct sunrpc_net *sn;
if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
spin_lock(&xprt->xpt_lock);
@@ -430,9 +443,10 @@ ip_map_cached_get(struct svc_rqst *rqstp)
* remembered, e.g. by a second mount from the
* same IP address.
*/
+ sn = net_generic(xprt->xpt_net, sunrpc_net_id);
xprt->xpt_auth_cache = NULL;
spin_unlock(&xprt->xpt_lock);
- cache_put(&ipm->h, &ip_map_cache);
+ cache_put(&ipm->h, sn->ip_map_cache);
return NULL;
}
cache_get(&ipm->h);
@@ -443,10 +457,8 @@ ip_map_cached_get(struct svc_rqst *rqstp)
}
static inline void
-ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm)
+ip_map_cached_put(struct svc_xprt *xprt, struct ip_map *ipm)
{
- struct svc_xprt *xprt = rqstp->rq_xprt;
-
if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
spin_lock(&xprt->xpt_lock);
if (xprt->xpt_auth_cache == NULL) {
@@ -456,15 +468,26 @@ ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm)
}
spin_unlock(&xprt->xpt_lock);
}
- if (ipm)
- cache_put(&ipm->h, &ip_map_cache);
+ if (ipm) {
+ struct sunrpc_net *sn;
+
+ sn = net_generic(xprt->xpt_net, sunrpc_net_id);
+ cache_put(&ipm->h, sn->ip_map_cache);
+ }
}
void
-svcauth_unix_info_release(void *info)
+svcauth_unix_info_release(struct svc_xprt *xpt)
{
- struct ip_map *ipm = info;
- cache_put(&ipm->h, &ip_map_cache);
+ struct ip_map *ipm;
+
+ ipm = xpt->xpt_auth_cache;
+ if (ipm != NULL) {
+ struct sunrpc_net *sn;
+
+ sn = net_generic(xpt->xpt_net, sunrpc_net_id);
+ cache_put(&ipm->h, sn->ip_map_cache);
+ }
}
/****************************************************************************
@@ -674,6 +697,8 @@ static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp)
switch (ret) {
case -ENOENT:
return ERR_PTR(-ENOENT);
+ case -ETIMEDOUT:
+ return ERR_PTR(-ESHUTDOWN);
case 0:
gi = get_group_info(ug->gi);
cache_put(&ug->h, &unix_gid_cache);
@@ -691,6 +716,9 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
struct ip_map *ipm;
struct group_info *gi;
struct svc_cred *cred = &rqstp->rq_cred;
+ struct svc_xprt *xprt = rqstp->rq_xprt;
+ struct net *net = xprt->xpt_net;
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
switch (rqstp->rq_addr.ss_family) {
case AF_INET:
@@ -709,26 +737,27 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
if (rqstp->rq_proc == 0)
return SVC_OK;
- ipm = ip_map_cached_get(rqstp);
+ ipm = ip_map_cached_get(xprt);
if (ipm == NULL)
- ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class,
+ ipm = __ip_map_lookup(sn->ip_map_cache, rqstp->rq_server->sv_program->pg_class,
&sin6->sin6_addr);
if (ipm == NULL)
return SVC_DENIED;
- switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
+ switch (cache_check(sn->ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
default:
BUG();
- case -EAGAIN:
case -ETIMEDOUT:
+ return SVC_CLOSE;
+ case -EAGAIN:
return SVC_DROP;
case -ENOENT:
return SVC_DENIED;
case 0:
rqstp->rq_client = &ipm->m_client->h;
kref_get(&rqstp->rq_client->ref);
- ip_map_cached_put(rqstp, ipm);
+ ip_map_cached_put(xprt, ipm);
break;
}
@@ -736,6 +765,8 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
switch (PTR_ERR(gi)) {
case -EAGAIN:
return SVC_DROP;
+ case -ESHUTDOWN:
+ return SVC_CLOSE;
case -ENOENT:
break;
default:
@@ -776,7 +807,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
cred->cr_gid = (gid_t) -1;
cred->cr_group_info = groups_alloc(0);
if (cred->cr_group_info == NULL)
- return SVC_DROP; /* kmalloc failure - client must retry */
+ return SVC_CLOSE; /* kmalloc failure - client must retry */
/* Put NULL verifier */
svc_putnl(resv, RPC_AUTH_NULL);
@@ -840,7 +871,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
goto badcred;
cred->cr_group_info = groups_alloc(slen);
if (cred->cr_group_info == NULL)
- return SVC_DROP;
+ return SVC_CLOSE;
for (i = 0; i < slen; i++)
GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv);
if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
@@ -886,3 +917,56 @@ struct auth_ops svcauth_unix = {
.set_client = svcauth_unix_set_client,
};
+int ip_map_cache_create(struct net *net)
+{
+ int err = -ENOMEM;
+ struct cache_detail *cd;
+ struct cache_head **tbl;
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+ cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL);
+ if (cd == NULL)
+ goto err_cd;
+
+ tbl = kzalloc(IP_HASHMAX * sizeof(struct cache_head *), GFP_KERNEL);
+ if (tbl == NULL)
+ goto err_tbl;
+
+ cd->owner = THIS_MODULE,
+ cd->hash_size = IP_HASHMAX,
+ cd->hash_table = tbl,
+ cd->name = "auth.unix.ip",
+ cd->cache_put = ip_map_put,
+ cd->cache_upcall = ip_map_upcall,
+ cd->cache_parse = ip_map_parse,
+ cd->cache_show = ip_map_show,
+ cd->match = ip_map_match,
+ cd->init = ip_map_init,
+ cd->update = update,
+ cd->alloc = ip_map_alloc,
+
+ err = cache_register_net(cd, net);
+ if (err)
+ goto err_reg;
+
+ sn->ip_map_cache = cd;
+ return 0;
+
+err_reg:
+ kfree(tbl);
+err_tbl:
+ kfree(cd);
+err_cd:
+ return err;
+}
+
+void ip_map_cache_destroy(struct net *net)
+{
+ struct sunrpc_net *sn;
+
+ sn = net_generic(net, sunrpc_net_id);
+ cache_purge(sn->ip_map_cache);
+ cache_unregister_net(sn->ip_map_cache, net);
+ kfree(sn->ip_map_cache->hash_table);
+ kfree(sn->ip_map_cache);
+}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 7e534dd09077..07919e16be3e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -64,7 +64,8 @@ static void svc_tcp_sock_detach(struct svc_xprt *);
static void svc_sock_free(struct svc_xprt *);
static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
- struct sockaddr *, int, int);
+ struct net *, struct sockaddr *,
+ int, int);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key svc_key[2];
static struct lock_class_key svc_slock_key[2];
@@ -657,10 +658,11 @@ static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt)
}
static struct svc_xprt *svc_udp_create(struct svc_serv *serv,
+ struct net *net,
struct sockaddr *sa, int salen,
int flags)
{
- return svc_create_socket(serv, IPPROTO_UDP, sa, salen, flags);
+ return svc_create_socket(serv, IPPROTO_UDP, net, sa, salen, flags);
}
static struct svc_xprt_ops svc_udp_ops = {
@@ -1133,9 +1135,6 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
reclen = htonl(0x80000000|((xbufp->len ) - 4));
memcpy(xbufp->head[0].iov_base, &reclen, 4);
- if (test_bit(XPT_DEAD, &rqstp->rq_xprt->xpt_flags))
- return -ENOTCONN;
-
sent = svc_sendto(rqstp, &rqstp->rq_res);
if (sent != xbufp->len) {
printk(KERN_NOTICE
@@ -1178,10 +1177,11 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt)
}
static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
+ struct net *net,
struct sockaddr *sa, int salen,
int flags)
{
- return svc_create_socket(serv, IPPROTO_TCP, sa, salen, flags);
+ return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
}
static struct svc_xprt_ops svc_tcp_ops = {
@@ -1258,19 +1258,13 @@ void svc_sock_update_bufs(struct svc_serv *serv)
* The number of server threads has changed. Update
* rcvbuf and sndbuf accordingly on all sockets
*/
- struct list_head *le;
+ struct svc_sock *svsk;
spin_lock_bh(&serv->sv_lock);
- list_for_each(le, &serv->sv_permsocks) {
- struct svc_sock *svsk =
- list_entry(le, struct svc_sock, sk_xprt.xpt_list);
+ list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list)
set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
- }
- list_for_each(le, &serv->sv_tempsocks) {
- struct svc_sock *svsk =
- list_entry(le, struct svc_sock, sk_xprt.xpt_list);
+ list_for_each_entry(svsk, &serv->sv_tempsocks, sk_xprt.xpt_list)
set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
- }
spin_unlock_bh(&serv->sv_lock);
}
EXPORT_SYMBOL_GPL(svc_sock_update_bufs);
@@ -1385,6 +1379,7 @@ EXPORT_SYMBOL_GPL(svc_addsock);
*/
static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
int protocol,
+ struct net *net,
struct sockaddr *sin, int len,
int flags)
{
@@ -1421,7 +1416,7 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
return ERR_PTR(-EINVAL);
}
- error = sock_create_kern(family, type, protocol, &sock);
+ error = __sock_create(net, family, type, protocol, &sock, 1);
if (error < 0)
return ERR_PTR(error);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index a1f82a87d34d..cd9e841e7492 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -111,6 +111,23 @@ xdr_decode_string_inplace(__be32 *p, char **sp,
}
EXPORT_SYMBOL_GPL(xdr_decode_string_inplace);
+/**
+ * xdr_terminate_string - '\0'-terminate a string residing in an xdr_buf
+ * @buf: XDR buffer where string resides
+ * @len: length of string, in bytes
+ *
+ */
+void
+xdr_terminate_string(struct xdr_buf *buf, const u32 len)
+{
+ char *kaddr;
+
+ kaddr = kmap_atomic(buf->pages[0], KM_USER0);
+ kaddr[buf->page_base + len] = '\0';
+ kunmap_atomic(kaddr, KM_USER0);
+}
+EXPORT_SYMBOL(xdr_terminate_string);
+
void
xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
unsigned int len)
@@ -395,24 +412,29 @@ xdr_shrink_pagelen(struct xdr_buf *buf, size_t len)
{
struct kvec *tail;
size_t copy;
- char *p;
unsigned int pglen = buf->page_len;
+ unsigned int tailbuf_len;
tail = buf->tail;
BUG_ON (len > pglen);
+ tailbuf_len = buf->buflen - buf->head->iov_len - buf->page_len;
+
/* Shift the tail first */
- if (tail->iov_len != 0) {
- p = (char *)tail->iov_base + len;
+ if (tailbuf_len != 0) {
+ unsigned int free_space = tailbuf_len - tail->iov_len;
+
+ if (len < free_space)
+ free_space = len;
+ tail->iov_len += free_space;
+
+ copy = len;
if (tail->iov_len > len) {
- copy = tail->iov_len - len;
- memmove(p, tail->iov_base, copy);
+ char *p = (char *)tail->iov_base + len;
+ memmove(p, tail->iov_base, tail->iov_len - len);
} else
- buf->buflen -= len;
- /* Copy from the inlined pages into the tail */
- copy = len;
- if (copy > tail->iov_len)
copy = tail->iov_len;
+ /* Copy from the inlined pages into the tail */
_copy_from_pages((char *)tail->iov_base,
buf->pages, buf->page_base + pglen - len,
copy);
@@ -551,6 +573,27 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
EXPORT_SYMBOL_GPL(xdr_init_decode);
/**
+ * xdr_inline_peek - Allow read-ahead in the XDR data stream
+ * @xdr: pointer to xdr_stream struct
+ * @nbytes: number of bytes of data to decode
+ *
+ * Check if the input buffer is long enough to enable us to decode
+ * 'nbytes' more bytes of data starting at the current position.
+ * If so return the current pointer without updating the current
+ * pointer position.
+ */
+__be32 * xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes)
+{
+ __be32 *p = xdr->p;
+ __be32 *q = p + XDR_QUADLEN(nbytes);
+
+ if (unlikely(q > xdr->end || q < p))
+ return NULL;
+ return p;
+}
+EXPORT_SYMBOL_GPL(xdr_inline_peek);
+
+/**
* xdr_inline_decode - Retrieve non-page XDR data to decode
* @xdr: pointer to xdr_stream struct
* @nbytes: number of bytes of data to decode
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 970fb00f388c..4c8f18aff7c3 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -199,8 +199,6 @@ int xprt_reserve_xprt(struct rpc_task *task)
if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
if (task == xprt->snd_task)
return 1;
- if (task == NULL)
- return 0;
goto out_sleep;
}
xprt->snd_task = task;
@@ -757,13 +755,11 @@ static void xprt_connect_status(struct rpc_task *task)
*/
struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
{
- struct list_head *pos;
+ struct rpc_rqst *entry;
- list_for_each(pos, &xprt->recv) {
- struct rpc_rqst *entry = list_entry(pos, struct rpc_rqst, rq_list);
+ list_for_each_entry(entry, &xprt->recv, rq_list)
if (entry->rq_xid == xid)
return entry;
- }
dprintk("RPC: xprt_lookup_rqst did not find xid %08x\n",
ntohl(xid));
@@ -962,6 +958,37 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
spin_unlock(&xprt->reserve_lock);
}
+struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req)
+{
+ struct rpc_xprt *xprt;
+
+ xprt = kzalloc(size, GFP_KERNEL);
+ if (xprt == NULL)
+ goto out;
+
+ xprt->max_reqs = max_req;
+ xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL);
+ if (xprt->slot == NULL)
+ goto out_free;
+
+ xprt->xprt_net = get_net(net);
+ return xprt;
+
+out_free:
+ kfree(xprt);
+out:
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(xprt_alloc);
+
+void xprt_free(struct rpc_xprt *xprt)
+{
+ put_net(xprt->xprt_net);
+ kfree(xprt->slot);
+ kfree(xprt);
+}
+EXPORT_SYMBOL_GPL(xprt_free);
+
/**
* xprt_reserve - allocate an RPC request slot
* @task: RPC task requesting a slot allocation
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index d718b8fa9525..09af4fab1a45 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -43,6 +43,7 @@
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/sysctl.h>
+#include <linux/workqueue.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/svc_rdma.h>
@@ -74,6 +75,8 @@ atomic_t rdma_stat_sq_prod;
struct kmem_cache *svc_rdma_map_cachep;
struct kmem_cache *svc_rdma_ctxt_cachep;
+struct workqueue_struct *svc_rdma_wq;
+
/*
* This function implements reading and resetting an atomic_t stat
* variable through read/write to a proc file. Any write to the file
@@ -231,7 +234,7 @@ static ctl_table svcrdma_root_table[] = {
void svc_rdma_cleanup(void)
{
dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
- flush_scheduled_work();
+ destroy_workqueue(svc_rdma_wq);
if (svcrdma_table_header) {
unregister_sysctl_table(svcrdma_table_header);
svcrdma_table_header = NULL;
@@ -249,6 +252,11 @@ int svc_rdma_init(void)
dprintk("\tsq_depth : %d\n",
svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
+
+ svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
+ if (!svc_rdma_wq)
+ return -ENOMEM;
+
if (!svcrdma_table_header)
svcrdma_table_header =
register_sysctl_table(svcrdma_root_table);
@@ -283,6 +291,7 @@ int svc_rdma_init(void)
kmem_cache_destroy(svc_rdma_map_cachep);
err0:
unregister_sysctl_table(svcrdma_table_header);
+ destroy_workqueue(svc_rdma_wq);
return -ENOMEM;
}
MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 0194de814933..df67211c4baf 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -263,9 +263,9 @@ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
frmr->page_list->page_list[page_no] =
- ib_dma_map_single(xprt->sc_cm_id->device,
- page_address(rqstp->rq_arg.pages[page_no]),
- PAGE_SIZE, DMA_FROM_DEVICE);
+ ib_dma_map_page(xprt->sc_cm_id->device,
+ rqstp->rq_arg.pages[page_no], 0,
+ PAGE_SIZE, DMA_FROM_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
frmr->page_list->page_list[page_no]))
goto fatal_err;
@@ -309,17 +309,21 @@ static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
int count)
{
int i;
+ unsigned long off;
ctxt->count = count;
ctxt->direction = DMA_FROM_DEVICE;
for (i = 0; i < count; i++) {
ctxt->sge[i].length = 0; /* in case map fails */
if (!frmr) {
+ BUG_ON(0 == virt_to_page(vec[i].iov_base));
+ off = (unsigned long)vec[i].iov_base & ~PAGE_MASK;
ctxt->sge[i].addr =
- ib_dma_map_single(xprt->sc_cm_id->device,
- vec[i].iov_base,
- vec[i].iov_len,
- DMA_FROM_DEVICE);
+ ib_dma_map_page(xprt->sc_cm_id->device,
+ virt_to_page(vec[i].iov_base),
+ off,
+ vec[i].iov_len,
+ DMA_FROM_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
ctxt->sge[i].addr))
return -EINVAL;
@@ -491,6 +495,7 @@ next_sge:
printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
err);
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+ svc_rdma_unmap_dma(ctxt);
svc_rdma_put_context(ctxt, 0);
goto out;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index b15e1ebb2bfa..249a835b703f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -70,8 +70,8 @@
* on extra page for the RPCRMDA header.
*/
static int fast_reg_xdr(struct svcxprt_rdma *xprt,
- struct xdr_buf *xdr,
- struct svc_rdma_req_map *vec)
+ struct xdr_buf *xdr,
+ struct svc_rdma_req_map *vec)
{
int sge_no;
u32 sge_bytes;
@@ -96,21 +96,25 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
vec->count = 2;
sge_no++;
- /* Build the FRMR */
+ /* Map the XDR head */
frmr->kva = frva;
frmr->direction = DMA_TO_DEVICE;
frmr->access_flags = 0;
frmr->map_len = PAGE_SIZE;
frmr->page_list_len = 1;
+ page_off = (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
frmr->page_list->page_list[page_no] =
- ib_dma_map_single(xprt->sc_cm_id->device,
- (void *)xdr->head[0].iov_base,
- PAGE_SIZE, DMA_TO_DEVICE);
+ ib_dma_map_page(xprt->sc_cm_id->device,
+ virt_to_page(xdr->head[0].iov_base),
+ page_off,
+ PAGE_SIZE - page_off,
+ DMA_TO_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
frmr->page_list->page_list[page_no]))
goto fatal_err;
atomic_inc(&xprt->sc_dma_used);
+ /* Map the XDR page list */
page_off = xdr->page_base;
page_bytes = xdr->page_len + page_off;
if (!page_bytes)
@@ -128,9 +132,9 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
page_bytes -= sge_bytes;
frmr->page_list->page_list[page_no] =
- ib_dma_map_single(xprt->sc_cm_id->device,
- page_address(page),
- PAGE_SIZE, DMA_TO_DEVICE);
+ ib_dma_map_page(xprt->sc_cm_id->device,
+ page, page_off,
+ sge_bytes, DMA_TO_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
frmr->page_list->page_list[page_no]))
goto fatal_err;
@@ -166,8 +170,10 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
frmr->page_list->page_list[page_no] =
- ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE,
- DMA_TO_DEVICE);
+ ib_dma_map_page(xprt->sc_cm_id->device, virt_to_page(va),
+ page_off,
+ PAGE_SIZE,
+ DMA_TO_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
frmr->page_list->page_list[page_no]))
goto fatal_err;
@@ -245,6 +251,35 @@ static int map_xdr(struct svcxprt_rdma *xprt,
return 0;
}
+static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
+ struct xdr_buf *xdr,
+ u32 xdr_off, size_t len, int dir)
+{
+ struct page *page;
+ dma_addr_t dma_addr;
+ if (xdr_off < xdr->head[0].iov_len) {
+ /* This offset is in the head */
+ xdr_off += (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
+ page = virt_to_page(xdr->head[0].iov_base);
+ } else {
+ xdr_off -= xdr->head[0].iov_len;
+ if (xdr_off < xdr->page_len) {
+ /* This offset is in the page list */
+ page = xdr->pages[xdr_off >> PAGE_SHIFT];
+ xdr_off &= ~PAGE_MASK;
+ } else {
+ /* This offset is in the tail */
+ xdr_off -= xdr->page_len;
+ xdr_off += (unsigned long)
+ xdr->tail[0].iov_base & ~PAGE_MASK;
+ page = virt_to_page(xdr->tail[0].iov_base);
+ }
+ }
+ dma_addr = ib_dma_map_page(xprt->sc_cm_id->device, page, xdr_off,
+ min_t(size_t, PAGE_SIZE, len), dir);
+ return dma_addr;
+}
+
/* Assumptions:
* - We are using FRMR
* - or -
@@ -293,10 +328,9 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
sge[sge_no].length = sge_bytes;
if (!vec->frmr) {
sge[sge_no].addr =
- ib_dma_map_single(xprt->sc_cm_id->device,
- (void *)
- vec->sge[xdr_sge_no].iov_base + sge_off,
- sge_bytes, DMA_TO_DEVICE);
+ dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
+ sge_bytes, DMA_TO_DEVICE);
+ xdr_off += sge_bytes;
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
sge[sge_no].addr))
goto err;
@@ -333,6 +367,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
goto err;
return 0;
err:
+ svc_rdma_unmap_dma(ctxt);
+ svc_rdma_put_frmr(xprt, vec->frmr);
svc_rdma_put_context(ctxt, 0);
/* Fatal error, close transport */
return -EIO;
@@ -494,7 +530,8 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
* In all three cases, this function prepares the RPCRDMA header in
* sge[0], the 'type' parameter indicates the type to place in the
* RPCRDMA header, and the 'byte_count' field indicates how much of
- * the XDR to include in this RDMA_SEND.
+ * the XDR to include in this RDMA_SEND. NB: The offset of the payload
+ * to send is zero in the XDR.
*/
static int send_reply(struct svcxprt_rdma *rdma,
struct svc_rqst *rqstp,
@@ -536,23 +573,24 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->sge[0].lkey = rdma->sc_dma_lkey;
ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
ctxt->sge[0].addr =
- ib_dma_map_single(rdma->sc_cm_id->device, page_address(page),
- ctxt->sge[0].length, DMA_TO_DEVICE);
+ ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
+ ctxt->sge[0].length, DMA_TO_DEVICE);
if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
goto err;
atomic_inc(&rdma->sc_dma_used);
ctxt->direction = DMA_TO_DEVICE;
- /* Determine how many of our SGE are to be transmitted */
+ /* Map the payload indicated by 'byte_count' */
for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
+ int xdr_off = 0;
sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
byte_count -= sge_bytes;
if (!vec->frmr) {
ctxt->sge[sge_no].addr =
- ib_dma_map_single(rdma->sc_cm_id->device,
- vec->sge[sge_no].iov_base,
- sge_bytes, DMA_TO_DEVICE);
+ dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
+ sge_bytes, DMA_TO_DEVICE);
+ xdr_off += sge_bytes;
if (ib_dma_mapping_error(rdma->sc_cm_id->device,
ctxt->sge[sge_no].addr))
goto err;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index edea15a54e51..9df1eadc912a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -45,6 +45,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/workqueue.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#include <linux/sunrpc/svc_rdma.h>
@@ -52,6 +53,7 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
+ struct net *net,
struct sockaddr *sa, int salen,
int flags);
static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
@@ -89,6 +91,9 @@ struct svc_xprt_class svc_rdma_class = {
/* WR context cache. Created in svc_rdma.c */
extern struct kmem_cache *svc_rdma_ctxt_cachep;
+/* Workqueue created in svc_rdma.c */
+extern struct workqueue_struct *svc_rdma_wq;
+
struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
{
struct svc_rdma_op_ctxt *ctxt;
@@ -120,7 +125,7 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
*/
if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
atomic_dec(&xprt->sc_dma_used);
- ib_dma_unmap_single(xprt->sc_cm_id->device,
+ ib_dma_unmap_page(xprt->sc_cm_id->device,
ctxt->sge[i].addr,
ctxt->sge[i].length,
ctxt->direction);
@@ -502,8 +507,8 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
BUG_ON(sge_no >= xprt->sc_max_sge);
page = svc_rdma_get_page();
ctxt->pages[sge_no] = page;
- pa = ib_dma_map_single(xprt->sc_cm_id->device,
- page_address(page), PAGE_SIZE,
+ pa = ib_dma_map_page(xprt->sc_cm_id->device,
+ page, 0, PAGE_SIZE,
DMA_FROM_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
goto err_put_ctxt;
@@ -511,9 +516,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
ctxt->sge[sge_no].addr = pa;
ctxt->sge[sge_no].length = PAGE_SIZE;
ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
+ ctxt->count = sge_no + 1;
buflen += PAGE_SIZE;
}
- ctxt->count = sge_no;
recv_wr.next = NULL;
recv_wr.sg_list = &ctxt->sge[0];
recv_wr.num_sge = ctxt->count;
@@ -529,6 +534,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
return ret;
err_put_ctxt:
+ svc_rdma_unmap_dma(ctxt);
svc_rdma_put_context(ctxt, 1);
return -ENOMEM;
}
@@ -670,6 +676,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
* Create a listening RDMA service endpoint.
*/
static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
+ struct net *net,
struct sockaddr *sa, int salen,
int flags)
{
@@ -798,8 +805,8 @@ static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
if (ib_dma_mapping_error(frmr->mr->device, addr))
continue;
atomic_dec(&xprt->sc_dma_used);
- ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE,
- frmr->direction);
+ ib_dma_unmap_page(frmr->mr->device, addr, PAGE_SIZE,
+ frmr->direction);
}
}
@@ -1184,7 +1191,7 @@ static void svc_rdma_free(struct svc_xprt *xprt)
struct svcxprt_rdma *rdma =
container_of(xprt, struct svcxprt_rdma, sc_xprt);
INIT_WORK(&rdma->sc_work, __svc_rdma_free);
- schedule_work(&rdma->sc_work);
+ queue_work(svc_rdma_wq, &rdma->sc_work);
}
static int svc_rdma_has_wspace(struct svc_xprt *xprt)
@@ -1274,7 +1281,7 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
atomic_read(&xprt->sc_sq_count) <
xprt->sc_sq_depth);
if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
- return 0;
+ return -ENOTCONN;
continue;
}
/* Take a transport ref for each WR posted */
@@ -1306,7 +1313,6 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
enum rpcrdma_errcode err)
{
struct ib_send_wr err_wr;
- struct ib_sge sge;
struct page *p;
struct svc_rdma_op_ctxt *ctxt;
u32 *va;
@@ -1319,26 +1325,27 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
/* XDR encode error */
length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
+ ctxt = svc_rdma_get_context(xprt);
+ ctxt->direction = DMA_FROM_DEVICE;
+ ctxt->count = 1;
+ ctxt->pages[0] = p;
+
/* Prepare SGE for local address */
- sge.addr = ib_dma_map_single(xprt->sc_cm_id->device,
- page_address(p), PAGE_SIZE, DMA_FROM_DEVICE);
- if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) {
+ ctxt->sge[0].addr = ib_dma_map_page(xprt->sc_cm_id->device,
+ p, 0, length, DMA_FROM_DEVICE);
+ if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
put_page(p);
return;
}
atomic_inc(&xprt->sc_dma_used);
- sge.lkey = xprt->sc_dma_lkey;
- sge.length = length;
-
- ctxt = svc_rdma_get_context(xprt);
- ctxt->count = 1;
- ctxt->pages[0] = p;
+ ctxt->sge[0].lkey = xprt->sc_dma_lkey;
+ ctxt->sge[0].length = length;
/* Prepare SEND WR */
memset(&err_wr, 0, sizeof err_wr);
ctxt->wr_op = IB_WR_SEND;
err_wr.wr_id = (unsigned long)ctxt;
- err_wr.sg_list = &sge;
+ err_wr.sg_list = ctxt->sge;
err_wr.num_sge = 1;
err_wr.opcode = IB_WR_SEND;
err_wr.send_flags = IB_SEND_SIGNALED;
@@ -1348,9 +1355,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
if (ret) {
dprintk("svcrdma: Error %d posting send for protocol error\n",
ret);
- ib_dma_unmap_single(xprt->sc_cm_id->device,
- sge.addr, PAGE_SIZE,
- DMA_FROM_DEVICE);
+ svc_rdma_unmap_dma(ctxt);
svc_rdma_put_context(ctxt, 1);
}
}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index a85e866a77f7..0867070bb5ca 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -237,8 +237,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
dprintk("RPC: %s: called\n", __func__);
- cancel_delayed_work(&r_xprt->rdma_connect);
- flush_scheduled_work();
+ cancel_delayed_work_sync(&r_xprt->rdma_connect);
xprt_clear_connected(xprt);
@@ -251,9 +250,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
xprt_rdma_free_addresses(xprt);
- kfree(xprt->slot);
- xprt->slot = NULL;
- kfree(xprt);
+ xprt_free(xprt);
dprintk("RPC: %s: returning\n", __func__);
@@ -285,23 +282,14 @@ xprt_setup_rdma(struct xprt_create *args)
return ERR_PTR(-EBADF);
}
- xprt = kzalloc(sizeof(struct rpcrdma_xprt), GFP_KERNEL);
+ xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt),
+ xprt_rdma_slot_table_entries);
if (xprt == NULL) {
dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
__func__);
return ERR_PTR(-ENOMEM);
}
- xprt->max_reqs = xprt_rdma_slot_table_entries;
- xprt->slot = kcalloc(xprt->max_reqs,
- sizeof(struct rpc_rqst), GFP_KERNEL);
- if (xprt->slot == NULL) {
- dprintk("RPC: %s: couldn't allocate %d slots\n",
- __func__, xprt->max_reqs);
- kfree(xprt);
- return ERR_PTR(-ENOMEM);
- }
-
/* 60 second timeout, no retries */
xprt->timeout = &xprt_rdma_default_timeout;
xprt->bind_timeout = (60U * HZ);
@@ -410,8 +398,7 @@ out3:
out2:
rpcrdma_ia_close(&new_xprt->rx_ia);
out1:
- kfree(xprt->slot);
- kfree(xprt);
+ xprt_free(xprt);
return ERR_PTR(rc);
}
@@ -460,7 +447,7 @@ xprt_rdma_connect(struct rpc_task *task)
} else {
schedule_delayed_work(&r_xprt->rdma_connect, 0);
if (!RPC_IS_ASYNC(task))
- flush_scheduled_work();
+ flush_delayed_work(&r_xprt->rdma_connect);
}
}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index fe9306bf10cc..dfcab5ac65af 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -774,8 +774,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
xs_close(xprt);
xs_free_peer_addresses(xprt);
- kfree(xprt->slot);
- kfree(xprt);
+ xprt_free(xprt);
module_put(THIS_MODULE);
}
@@ -1516,7 +1515,7 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
xs_update_peer_port(xprt);
}
-static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket *sock)
+static unsigned short xs_get_srcport(struct sock_xprt *transport)
{
unsigned short port = transport->srcport;
@@ -1525,7 +1524,7 @@ static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket
return port;
}
-static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket *sock, unsigned short port)
+static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned short port)
{
if (transport->srcport != 0)
transport->srcport = 0;
@@ -1535,23 +1534,18 @@ static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket
return xprt_max_resvport;
return --port;
}
-
-static int xs_bind4(struct sock_xprt *transport, struct socket *sock)
+static int xs_bind(struct sock_xprt *transport, struct socket *sock)
{
- struct sockaddr_in myaddr = {
- .sin_family = AF_INET,
- };
- struct sockaddr_in *sa;
+ struct sockaddr_storage myaddr;
int err, nloop = 0;
- unsigned short port = xs_get_srcport(transport, sock);
+ unsigned short port = xs_get_srcport(transport);
unsigned short last;
- sa = (struct sockaddr_in *)&transport->srcaddr;
- myaddr.sin_addr = sa->sin_addr;
+ memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen);
do {
- myaddr.sin_port = htons(port);
- err = kernel_bind(sock, (struct sockaddr *) &myaddr,
- sizeof(myaddr));
+ rpc_set_port((struct sockaddr *)&myaddr, port);
+ err = kernel_bind(sock, (struct sockaddr *)&myaddr,
+ transport->xprt.addrlen);
if (port == 0)
break;
if (err == 0) {
@@ -1559,48 +1553,23 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock)
break;
}
last = port;
- port = xs_next_srcport(transport, sock, port);
+ port = xs_next_srcport(transport, port);
if (port > last)
nloop++;
} while (err == -EADDRINUSE && nloop != 2);
- dprintk("RPC: %s %pI4:%u: %s (%d)\n",
- __func__, &myaddr.sin_addr,
- port, err ? "failed" : "ok", err);
- return err;
-}
-
-static int xs_bind6(struct sock_xprt *transport, struct socket *sock)
-{
- struct sockaddr_in6 myaddr = {
- .sin6_family = AF_INET6,
- };
- struct sockaddr_in6 *sa;
- int err, nloop = 0;
- unsigned short port = xs_get_srcport(transport, sock);
- unsigned short last;
- sa = (struct sockaddr_in6 *)&transport->srcaddr;
- myaddr.sin6_addr = sa->sin6_addr;
- do {
- myaddr.sin6_port = htons(port);
- err = kernel_bind(sock, (struct sockaddr *) &myaddr,
- sizeof(myaddr));
- if (port == 0)
- break;
- if (err == 0) {
- transport->srcport = port;
- break;
- }
- last = port;
- port = xs_next_srcport(transport, sock, port);
- if (port > last)
- nloop++;
- } while (err == -EADDRINUSE && nloop != 2);
- dprintk("RPC: xs_bind6 %pI6:%u: %s (%d)\n",
- &myaddr.sin6_addr, port, err ? "failed" : "ok", err);
+ if (myaddr.ss_family == AF_INET)
+ dprintk("RPC: %s %pI4:%u: %s (%d)\n", __func__,
+ &((struct sockaddr_in *)&myaddr)->sin_addr,
+ port, err ? "failed" : "ok", err);
+ else
+ dprintk("RPC: %s %pI6:%u: %s (%d)\n", __func__,
+ &((struct sockaddr_in6 *)&myaddr)->sin6_addr,
+ port, err ? "failed" : "ok", err);
return err;
}
+
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key xs_key[2];
static struct lock_class_key xs_slock_key[2];
@@ -1622,6 +1591,18 @@ static inline void xs_reclassify_socket6(struct socket *sock)
sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC",
&xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]);
}
+
+static inline void xs_reclassify_socket(int family, struct socket *sock)
+{
+ switch (family) {
+ case AF_INET:
+ xs_reclassify_socket4(sock);
+ break;
+ case AF_INET6:
+ xs_reclassify_socket6(sock);
+ break;
+ }
+}
#else
static inline void xs_reclassify_socket4(struct socket *sock)
{
@@ -1630,8 +1611,36 @@ static inline void xs_reclassify_socket4(struct socket *sock)
static inline void xs_reclassify_socket6(struct socket *sock)
{
}
+
+static inline void xs_reclassify_socket(int family, struct socket *sock)
+{
+}
#endif
+static struct socket *xs_create_sock(struct rpc_xprt *xprt,
+ struct sock_xprt *transport, int family, int type, int protocol)
+{
+ struct socket *sock;
+ int err;
+
+ err = __sock_create(xprt->xprt_net, family, type, protocol, &sock, 1);
+ if (err < 0) {
+ dprintk("RPC: can't create %d transport socket (%d).\n",
+ protocol, -err);
+ goto out;
+ }
+ xs_reclassify_socket(family, sock);
+
+ if (xs_bind(transport, sock)) {
+ sock_release(sock);
+ goto out;
+ }
+
+ return sock;
+out:
+ return ERR_PTR(err);
+}
+
static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -1661,82 +1670,23 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
xs_udp_do_set_buffer_size(xprt);
}
-/**
- * xs_udp_connect_worker4 - set up a UDP socket
- * @work: RPC transport to connect
- *
- * Invoked by a work queue tasklet.
- */
-static void xs_udp_connect_worker4(struct work_struct *work)
+static void xs_udp_setup_socket(struct work_struct *work)
{
struct sock_xprt *transport =
container_of(work, struct sock_xprt, connect_worker.work);
struct rpc_xprt *xprt = &transport->xprt;
struct socket *sock = transport->sock;
- int err, status = -EIO;
+ int status = -EIO;
if (xprt->shutdown)
goto out;
/* Start by resetting any existing state */
xs_reset_transport(transport);
-
- err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
- if (err < 0) {
- dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
+ sock = xs_create_sock(xprt, transport,
+ xs_addr(xprt)->sa_family, SOCK_DGRAM, IPPROTO_UDP);
+ if (IS_ERR(sock))
goto out;
- }
- xs_reclassify_socket4(sock);
-
- if (xs_bind4(transport, sock)) {
- sock_release(sock);
- goto out;
- }
-
- dprintk("RPC: worker connecting xprt %p via %s to "
- "%s (port %s)\n", xprt,
- xprt->address_strings[RPC_DISPLAY_PROTO],
- xprt->address_strings[RPC_DISPLAY_ADDR],
- xprt->address_strings[RPC_DISPLAY_PORT]);
-
- xs_udp_finish_connecting(xprt, sock);
- status = 0;
-out:
- xprt_clear_connecting(xprt);
- xprt_wake_pending_tasks(xprt, status);
-}
-
-/**
- * xs_udp_connect_worker6 - set up a UDP socket
- * @work: RPC transport to connect
- *
- * Invoked by a work queue tasklet.
- */
-static void xs_udp_connect_worker6(struct work_struct *work)
-{
- struct sock_xprt *transport =
- container_of(work, struct sock_xprt, connect_worker.work);
- struct rpc_xprt *xprt = &transport->xprt;
- struct socket *sock = transport->sock;
- int err, status = -EIO;
-
- if (xprt->shutdown)
- goto out;
-
- /* Start by resetting any existing state */
- xs_reset_transport(transport);
-
- err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock);
- if (err < 0) {
- dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
- goto out;
- }
- xs_reclassify_socket6(sock);
-
- if (xs_bind6(transport, sock) < 0) {
- sock_release(sock);
- goto out;
- }
dprintk("RPC: worker connecting xprt %p via %s to "
"%s (port %s)\n", xprt,
@@ -1755,12 +1705,12 @@ out:
* We need to preserve the port number so the reply cache on the server can
* find our cached RPC replies when we get around to reconnecting.
*/
-static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transport)
+static void xs_abort_connection(struct sock_xprt *transport)
{
int result;
struct sockaddr any;
- dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt);
+ dprintk("RPC: disconnecting xprt %p to reuse port\n", transport);
/*
* Disconnect the transport socket by doing a connect operation
@@ -1770,13 +1720,13 @@ static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transpo
any.sa_family = AF_UNSPEC;
result = kernel_connect(transport->sock, &any, sizeof(any), 0);
if (!result)
- xs_sock_mark_closed(xprt);
+ xs_sock_mark_closed(&transport->xprt);
else
dprintk("RPC: AF_UNSPEC connect return code %d\n",
result);
}
-static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *transport)
+static void xs_tcp_reuse_connection(struct sock_xprt *transport)
{
unsigned int state = transport->inet->sk_state;
@@ -1799,7 +1749,7 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *tra
"sk_shutdown set to %d\n",
__func__, transport->inet->sk_shutdown);
}
- xs_abort_connection(xprt, transport);
+ xs_abort_connection(transport);
}
static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
@@ -1852,12 +1802,12 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
*
* Invoked by a work queue tasklet.
*/
-static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
- struct sock_xprt *transport,
- struct socket *(*create_sock)(struct rpc_xprt *,
- struct sock_xprt *))
+static void xs_tcp_setup_socket(struct work_struct *work)
{
+ struct sock_xprt *transport =
+ container_of(work, struct sock_xprt, connect_worker.work);
struct socket *sock = transport->sock;
+ struct rpc_xprt *xprt = &transport->xprt;
int status = -EIO;
if (xprt->shutdown)
@@ -1865,7 +1815,8 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
if (!sock) {
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
- sock = create_sock(xprt, transport);
+ sock = xs_create_sock(xprt, transport,
+ xs_addr(xprt)->sa_family, SOCK_STREAM, IPPROTO_TCP);
if (IS_ERR(sock)) {
status = PTR_ERR(sock);
goto out;
@@ -1876,7 +1827,7 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
&xprt->state);
/* "close" the socket, preserving the local port */
- xs_tcp_reuse_connection(xprt, transport);
+ xs_tcp_reuse_connection(transport);
if (abort_and_exit)
goto out_eagain;
@@ -1925,84 +1876,6 @@ out:
xprt_wake_pending_tasks(xprt, status);
}
-static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt,
- struct sock_xprt *transport)
-{
- struct socket *sock;
- int err;
-
- /* start from scratch */
- err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
- if (err < 0) {
- dprintk("RPC: can't create TCP transport socket (%d).\n",
- -err);
- goto out_err;
- }
- xs_reclassify_socket4(sock);
-
- if (xs_bind4(transport, sock) < 0) {
- sock_release(sock);
- goto out_err;
- }
- return sock;
-out_err:
- return ERR_PTR(-EIO);
-}
-
-/**
- * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
- * @work: RPC transport to connect
- *
- * Invoked by a work queue tasklet.
- */
-static void xs_tcp_connect_worker4(struct work_struct *work)
-{
- struct sock_xprt *transport =
- container_of(work, struct sock_xprt, connect_worker.work);
- struct rpc_xprt *xprt = &transport->xprt;
-
- xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock4);
-}
-
-static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt,
- struct sock_xprt *transport)
-{
- struct socket *sock;
- int err;
-
- /* start from scratch */
- err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock);
- if (err < 0) {
- dprintk("RPC: can't create TCP transport socket (%d).\n",
- -err);
- goto out_err;
- }
- xs_reclassify_socket6(sock);
-
- if (xs_bind6(transport, sock) < 0) {
- sock_release(sock);
- goto out_err;
- }
- return sock;
-out_err:
- return ERR_PTR(-EIO);
-}
-
-/**
- * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
- * @work: RPC transport to connect
- *
- * Invoked by a work queue tasklet.
- */
-static void xs_tcp_connect_worker6(struct work_struct *work)
-{
- struct sock_xprt *transport =
- container_of(work, struct sock_xprt, connect_worker.work);
- struct rpc_xprt *xprt = &transport->xprt;
-
- xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock6);
-}
-
/**
* xs_connect - connect a socket to a remote endpoint
* @task: address of RPC task that manages state of connect request
@@ -2262,6 +2135,31 @@ static struct rpc_xprt_ops bc_tcp_ops = {
.print_stats = xs_tcp_print_stats,
};
+static int xs_init_anyaddr(const int family, struct sockaddr *sap)
+{
+ static const struct sockaddr_in sin = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_ANY),
+ };
+ static const struct sockaddr_in6 sin6 = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = IN6ADDR_ANY_INIT,
+ };
+
+ switch (family) {
+ case AF_INET:
+ memcpy(sap, &sin, sizeof(sin));
+ break;
+ case AF_INET6:
+ memcpy(sap, &sin6, sizeof(sin6));
+ break;
+ default:
+ dprintk("RPC: %s: Bad address family\n", __func__);
+ return -EAFNOSUPPORT;
+ }
+ return 0;
+}
+
static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
unsigned int slot_table_size)
{
@@ -2273,27 +2171,25 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
return ERR_PTR(-EBADF);
}
- new = kzalloc(sizeof(*new), GFP_KERNEL);
- if (new == NULL) {
+ xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size);
+ if (xprt == NULL) {
dprintk("RPC: xs_setup_xprt: couldn't allocate "
"rpc_xprt\n");
return ERR_PTR(-ENOMEM);
}
- xprt = &new->xprt;
-
- xprt->max_reqs = slot_table_size;
- xprt->slot = kcalloc(xprt->max_reqs, sizeof(struct rpc_rqst), GFP_KERNEL);
- if (xprt->slot == NULL) {
- kfree(xprt);
- dprintk("RPC: xs_setup_xprt: couldn't allocate slot "
- "table\n");
- return ERR_PTR(-ENOMEM);
- }
+ new = container_of(xprt, struct sock_xprt, xprt);
memcpy(&xprt->addr, args->dstaddr, args->addrlen);
xprt->addrlen = args->addrlen;
if (args->srcaddr)
memcpy(&new->srcaddr, args->srcaddr, args->addrlen);
+ else {
+ int err;
+ err = xs_init_anyaddr(args->dstaddr->sa_family,
+ (struct sockaddr *)&new->srcaddr);
+ if (err != 0)
+ return ERR_PTR(err);
+ }
return xprt;
}
@@ -2341,7 +2237,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
xprt_set_bound(xprt);
INIT_DELAYED_WORK(&transport->connect_worker,
- xs_udp_connect_worker4);
+ xs_udp_setup_socket);
xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP);
break;
case AF_INET6:
@@ -2349,7 +2245,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
xprt_set_bound(xprt);
INIT_DELAYED_WORK(&transport->connect_worker,
- xs_udp_connect_worker6);
+ xs_udp_setup_socket);
xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
break;
default:
@@ -2371,8 +2267,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
return xprt;
ret = ERR_PTR(-EINVAL);
out_err:
- kfree(xprt->slot);
- kfree(xprt);
+ xprt_free(xprt);
return ret;
}
@@ -2416,7 +2311,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
xprt_set_bound(xprt);
INIT_DELAYED_WORK(&transport->connect_worker,
- xs_tcp_connect_worker4);
+ xs_tcp_setup_socket);
xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
break;
case AF_INET6:
@@ -2424,7 +2319,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
xprt_set_bound(xprt);
INIT_DELAYED_WORK(&transport->connect_worker,
- xs_tcp_connect_worker6);
+ xs_tcp_setup_socket);
xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
break;
default:
@@ -2447,8 +2342,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
return xprt;
ret = ERR_PTR(-EINVAL);
out_err:
- kfree(xprt->slot);
- kfree(xprt);
+ xprt_free(xprt);
return ret;
}
@@ -2507,15 +2401,10 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
goto out_err;
}
- if (xprt_bound(xprt))
- dprintk("RPC: set up xprt to %s (port %s) via %s\n",
- xprt->address_strings[RPC_DISPLAY_ADDR],
- xprt->address_strings[RPC_DISPLAY_PORT],
- xprt->address_strings[RPC_DISPLAY_PROTO]);
- else
- dprintk("RPC: set up xprt to %s (autobind) via %s\n",
- xprt->address_strings[RPC_DISPLAY_ADDR],
- xprt->address_strings[RPC_DISPLAY_PROTO]);
+ dprintk("RPC: set up xprt to %s (port %s) via %s\n",
+ xprt->address_strings[RPC_DISPLAY_ADDR],
+ xprt->address_strings[RPC_DISPLAY_PORT],
+ xprt->address_strings[RPC_DISPLAY_PROTO]);
/*
* Since we don't want connections for the backchannel, we set
@@ -2528,8 +2417,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
return xprt;
ret = ERR_PTR(-EINVAL);
out_err:
- kfree(xprt->slot);
- kfree(xprt);
+ xprt_free(xprt);
return ret;
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 33217fc3d697..e9f0d5004483 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -396,6 +396,7 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr,
struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
struct tipc_sock *tsock = tipc_sk(sock->sk);
+ memset(addr, 0, sizeof(*addr));
if (peer) {
if ((sock->state != SS_CONNECTED) &&
((peer != 2) || (sock->state != SS_DISCONNECTING)))
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0ebc777a6660..2268e6798124 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -117,7 +117,7 @@
static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
static DEFINE_SPINLOCK(unix_table_lock);
-static atomic_t unix_nr_socks = ATOMIC_INIT(0);
+static atomic_long_t unix_nr_socks;
#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])
@@ -360,13 +360,13 @@ static void unix_sock_destructor(struct sock *sk)
if (u->addr)
unix_release_addr(u->addr);
- atomic_dec(&unix_nr_socks);
+ atomic_long_dec(&unix_nr_socks);
local_bh_disable();
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
local_bh_enable();
#ifdef UNIX_REFCNT_DEBUG
- printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
- atomic_read(&unix_nr_socks));
+ printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
+ atomic_long_read(&unix_nr_socks));
#endif
}
@@ -606,8 +606,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
struct sock *sk = NULL;
struct unix_sock *u;
- atomic_inc(&unix_nr_socks);
- if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
+ atomic_long_inc(&unix_nr_socks);
+ if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
goto out;
sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
@@ -632,7 +632,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
unix_insert_socket(unix_sockets_unbound, sk);
out:
if (sk == NULL)
- atomic_dec(&unix_nr_socks);
+ atomic_long_dec(&unix_nr_socks);
else {
local_bh_disable();
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -1343,9 +1343,25 @@ static void unix_destruct_scm(struct sk_buff *skb)
sock_wfree(skb);
}
+#define MAX_RECURSION_LEVEL 4
+
static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
int i;
+ unsigned char max_level = 0;
+ int unix_sock_count = 0;
+
+ for (i = scm->fp->count - 1; i >= 0; i--) {
+ struct sock *sk = unix_get_socket(scm->fp->fp[i]);
+
+ if (sk) {
+ unix_sock_count++;
+ max_level = max(max_level,
+ unix_sk(sk)->recursion_level);
+ }
+ }
+ if (unlikely(max_level > MAX_RECURSION_LEVEL))
+ return -ETOOMANYREFS;
/*
* Need to duplicate file references for the sake of garbage
@@ -1356,9 +1372,11 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
if (!UNIXCB(skb).fp)
return -ENOMEM;
- for (i = scm->fp->count-1; i >= 0; i--)
- unix_inflight(scm->fp->fp[i]);
- return 0;
+ if (unix_sock_count) {
+ for (i = scm->fp->count - 1; i >= 0; i--)
+ unix_inflight(scm->fp->fp[i]);
+ }
+ return max_level;
}
static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
@@ -1393,6 +1411,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
struct sk_buff *skb;
long timeo;
struct scm_cookie tmp_scm;
+ int max_level;
if (NULL == siocb->scm)
siocb->scm = &tmp_scm;
@@ -1431,8 +1450,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
goto out;
err = unix_scm_to_skb(siocb->scm, skb, true);
- if (err)
+ if (err < 0)
goto out_free;
+ max_level = err + 1;
unix_get_secdata(siocb->scm, skb);
skb_reset_transport_header(skb);
@@ -1514,6 +1534,8 @@ restart:
if (sock_flag(other, SOCK_RCVTSTAMP))
__net_timestamp(skb);
skb_queue_tail(&other->sk_receive_queue, skb);
+ if (max_level > unix_sk(other)->recursion_level)
+ unix_sk(other)->recursion_level = max_level;
unix_state_unlock(other);
other->sk_data_ready(other, len);
sock_put(other);
@@ -1544,6 +1566,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
int sent = 0;
struct scm_cookie tmp_scm;
bool fds_sent = false;
+ int max_level;
if (NULL == siocb->scm)
siocb->scm = &tmp_scm;
@@ -1607,10 +1630,11 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
/* Only send the fds in the first buffer */
err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
- if (err) {
+ if (err < 0) {
kfree_skb(skb);
goto out_err;
}
+ max_level = err + 1;
fds_sent = true;
err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
@@ -1626,6 +1650,8 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
goto pipe_err_free;
skb_queue_tail(&other->sk_receive_queue, skb);
+ if (max_level > unix_sk(other)->recursion_level)
+ unix_sk(other)->recursion_level = max_level;
unix_state_unlock(other);
other->sk_data_ready(other, size);
sent += size;
@@ -1845,6 +1871,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
unix_state_lock(sk);
skb = skb_dequeue(&sk->sk_receive_queue);
if (skb == NULL) {
+ unix_sk(sk)->recursion_level = 0;
if (copied >= target)
goto unlock;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index c8df6fda0b1f..f89f83bf828e 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -96,7 +96,7 @@ static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
unsigned int unix_tot_inflight;
-static struct sock *unix_get_socket(struct file *filp)
+struct sock *unix_get_socket(struct file *filp)
{
struct sock *u_sock = NULL;
struct inode *inode = filp->f_path.dentry->d_inode;
@@ -259,9 +259,16 @@ static void inc_inflight_move_tail(struct unix_sock *u)
}
static bool gc_in_progress = false;
+#define UNIX_INFLIGHT_TRIGGER_GC 16000
void wait_for_unix_gc(void)
{
+ /*
+ * If number of inflight sockets is insane,
+ * force a garbage collect right now.
+ */
+ if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress)
+ unix_gc();
wait_event(unix_gc_wait, gc_in_progress == false);
}
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 771bab00754b..55187c8f6420 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -61,6 +61,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
while (len > 0) {
switch (*p & X25_FAC_CLASS_MASK) {
case X25_FAC_CLASS_A:
+ if (len < 2)
+ return 0;
switch (*p) {
case X25_FAC_REVERSE:
if((p[1] & 0x81) == 0x81) {
@@ -104,6 +106,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
len -= 2;
break;
case X25_FAC_CLASS_B:
+ if (len < 3)
+ return 0;
switch (*p) {
case X25_FAC_PACKET_SIZE:
facilities->pacsize_in = p[1];
@@ -125,6 +129,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
len -= 3;
break;
case X25_FAC_CLASS_C:
+ if (len < 4)
+ return 0;
printk(KERN_DEBUG "X.25: unknown facility %02X, "
"values %02X, %02X, %02X\n",
p[0], p[1], p[2], p[3]);
@@ -132,26 +138,26 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
len -= 4;
break;
case X25_FAC_CLASS_D:
+ if (len < p[1] + 2)
+ return 0;
switch (*p) {
case X25_FAC_CALLING_AE:
- if (p[1] > X25_MAX_DTE_FACIL_LEN)
- break;
+ if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1)
+ return 0;
dte_facs->calling_len = p[2];
memcpy(dte_facs->calling_ae, &p[3], p[1] - 1);
*vc_fac_mask |= X25_MASK_CALLING_AE;
break;
case X25_FAC_CALLED_AE:
- if (p[1] > X25_MAX_DTE_FACIL_LEN)
- break;
+ if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1)
+ return 0;
dte_facs->called_len = p[2];
memcpy(dte_facs->called_ae, &p[3], p[1] - 1);
*vc_fac_mask |= X25_MASK_CALLED_AE;
break;
default:
printk(KERN_DEBUG "X.25: unknown facility %02X,"
- "length %d, values %02X, %02X, "
- "%02X, %02X\n",
- p[0], p[1], p[2], p[3], p[4], p[5]);
+ "length %d\n", p[0], p[1]);
break;
}
len -= p[1] + 2;
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 63178961efac..f729f022be69 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -119,6 +119,8 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp
&x25->vc_facil_mask);
if (len > 0)
skb_pull(skb, len);
+ else
+ return -1;
/*
* Copy any Call User Data.
*/
diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
index 73e7b954ad28..b25c6463c3e9 100644
--- a/net/x25/x25_link.c
+++ b/net/x25/x25_link.c
@@ -394,6 +394,7 @@ void __exit x25_link_free(void)
list_for_each_safe(entry, tmp, &x25_neigh_list) {
nb = list_entry(entry, struct x25_neigh, node);
__x25_remove_neigh(nb);
+ dev_put(nb->dev);
}
write_unlock_bh(&x25_neigh_list_lock);
}
diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c
index a2023ec52329..1e98bc0fe0a5 100644
--- a/net/xfrm/xfrm_hash.c
+++ b/net/xfrm/xfrm_hash.c
@@ -19,7 +19,7 @@ struct hlist_head *xfrm_hash_alloc(unsigned int sz)
if (sz <= PAGE_SIZE)
n = kzalloc(sz, GFP_KERNEL);
else if (hashdist)
- n = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
+ n = vzalloc(sz);
else
n = (struct hlist_head *)
__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index eb96ce52f178..220ebc05c7af 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1268,7 +1268,7 @@ struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x,
return xc;
error:
- kfree(xc);
+ xfrm_state_put(xc);
return NULL;
}
EXPORT_SYMBOL(xfrm_state_migrate);