summaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c236
-rw-r--r--net/ipv6/af_inet6.c22
-rw-r--r--net/ipv6/ah6.c2
-rw-r--r--net/ipv6/anycast.c35
-rw-r--r--net/ipv6/datagram.c14
-rw-r--r--net/ipv6/esp6.c2
-rw-r--r--net/ipv6/fib6_rules.c24
-rw-r--r--net/ipv6/ip6_flowlabel.c17
-rw-r--r--net/ipv6/ip6_tunnel.c38
-rw-r--r--net/ipv6/mcast.c51
-rw-r--r--net/ipv6/netfilter/ip6_queue.c2
-rw-r--r--net/ipv6/raw.c24
-rw-r--r--net/ipv6/reassembly.c17
-rw-r--r--net/ipv6/sit.c69
-rw-r--r--net/ipv6/syncookies.c5
-rw-r--r--net/ipv6/tcp_ipv6.c104
-rw-r--r--net/ipv6/udp.c223
-rw-r--r--net/ipv6/udplite.c1
18 files changed, 550 insertions, 336 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 918648409612..b1ce8fc62049 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -481,9 +481,8 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
struct net_device *dev;
struct inet6_dev *idev;
- read_lock(&dev_base_lock);
- for_each_netdev(net, dev) {
- rcu_read_lock();
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev) {
idev = __in6_dev_get(dev);
if (idev) {
int changed = (!idev->cnf.forwarding) ^ (!newf);
@@ -491,9 +490,8 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
if (changed)
dev_forward_change(idev);
}
- rcu_read_unlock();
}
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
}
static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
@@ -1137,10 +1135,9 @@ int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
hiscore->rule = -1;
hiscore->ifa = NULL;
- read_lock(&dev_base_lock);
rcu_read_lock();
- for_each_netdev(net, dev) {
+ for_each_netdev_rcu(net, dev) {
struct inet6_dev *idev;
/* Candidate Source Address (section 4)
@@ -1235,7 +1232,6 @@ try_nextdev:
read_unlock_bh(&idev->lock);
}
rcu_read_unlock();
- read_unlock(&dev_base_lock);
if (!hiscore->ifa)
return -EADDRNOTAVAIL;
@@ -3485,85 +3481,114 @@ enum addr_type_t
ANYCAST_ADDR,
};
+/* called with rcu_read_lock() */
+static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
+ struct netlink_callback *cb, enum addr_type_t type,
+ int s_ip_idx, int *p_ip_idx)
+{
+ struct inet6_ifaddr *ifa;
+ struct ifmcaddr6 *ifmca;
+ struct ifacaddr6 *ifaca;
+ int err = 1;
+ int ip_idx = *p_ip_idx;
+
+ read_lock_bh(&idev->lock);
+ switch (type) {
+ case UNICAST_ADDR:
+ /* unicast address incl. temp addr */
+ for (ifa = idev->addr_list; ifa;
+ ifa = ifa->if_next, ip_idx++) {
+ if (ip_idx < s_ip_idx)
+ continue;
+ err = inet6_fill_ifaddr(skb, ifa,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWADDR,
+ NLM_F_MULTI);
+ if (err <= 0)
+ break;
+ }
+ break;
+ case MULTICAST_ADDR:
+ /* multicast address */
+ for (ifmca = idev->mc_list; ifmca;
+ ifmca = ifmca->next, ip_idx++) {
+ if (ip_idx < s_ip_idx)
+ continue;
+ err = inet6_fill_ifmcaddr(skb, ifmca,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+ RTM_GETMULTICAST,
+ NLM_F_MULTI);
+ if (err <= 0)
+ break;
+ }
+ break;
+ case ANYCAST_ADDR:
+ /* anycast address */
+ for (ifaca = idev->ac_list; ifaca;
+ ifaca = ifaca->aca_next, ip_idx++) {
+ if (ip_idx < s_ip_idx)
+ continue;
+ err = inet6_fill_ifacaddr(skb, ifaca,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+ RTM_GETANYCAST,
+ NLM_F_MULTI);
+ if (err <= 0)
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ read_unlock_bh(&idev->lock);
+ *p_ip_idx = ip_idx;
+ return err;
+}
+
static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
enum addr_type_t type)
{
+ struct net *net = sock_net(skb->sk);
+ int h, s_h;
int idx, ip_idx;
int s_idx, s_ip_idx;
- int err = 1;
struct net_device *dev;
- struct inet6_dev *idev = NULL;
- struct inet6_ifaddr *ifa;
- struct ifmcaddr6 *ifmca;
- struct ifacaddr6 *ifaca;
- struct net *net = sock_net(skb->sk);
-
- s_idx = cb->args[0];
- s_ip_idx = ip_idx = cb->args[1];
+ struct inet6_dev *idev;
+ struct hlist_head *head;
+ struct hlist_node *node;
- idx = 0;
- for_each_netdev(net, dev) {
- if (idx < s_idx)
- goto cont;
- if (idx > s_idx)
- s_ip_idx = 0;
- ip_idx = 0;
- if ((idev = in6_dev_get(dev)) == NULL)
- goto cont;
- read_lock_bh(&idev->lock);
- switch (type) {
- case UNICAST_ADDR:
- /* unicast address incl. temp addr */
- for (ifa = idev->addr_list; ifa;
- ifa = ifa->if_next, ip_idx++) {
- if (ip_idx < s_ip_idx)
- continue;
- err = inet6_fill_ifaddr(skb, ifa,
- NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq,
- RTM_NEWADDR,
- NLM_F_MULTI);
- }
- break;
- case MULTICAST_ADDR:
- /* multicast address */
- for (ifmca = idev->mc_list; ifmca;
- ifmca = ifmca->next, ip_idx++) {
- if (ip_idx < s_ip_idx)
- continue;
- err = inet6_fill_ifmcaddr(skb, ifmca,
- NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq,
- RTM_GETMULTICAST,
- NLM_F_MULTI);
- }
- break;
- case ANYCAST_ADDR:
- /* anycast address */
- for (ifaca = idev->ac_list; ifaca;
- ifaca = ifaca->aca_next, ip_idx++) {
- if (ip_idx < s_ip_idx)
- continue;
- err = inet6_fill_ifacaddr(skb, ifaca,
- NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq,
- RTM_GETANYCAST,
- NLM_F_MULTI);
- }
- break;
- default:
- break;
- }
- read_unlock_bh(&idev->lock);
- in6_dev_put(idev);
+ s_h = cb->args[0];
+ s_idx = idx = cb->args[1];
+ s_ip_idx = ip_idx = cb->args[2];
- if (err <= 0)
- break;
+ rcu_read_lock();
+ for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+ idx = 0;
+ head = &net->dev_index_head[h];
+ hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+ if (idx < s_idx)
+ goto cont;
+ if (idx > s_idx)
+ s_ip_idx = 0;
+ ip_idx = 0;
+ if ((idev = __in6_dev_get(dev)) == NULL)
+ goto cont;
+
+ if (in6_dump_addrs(idev, skb, cb, type,
+ s_ip_idx, &ip_idx) <= 0)
+ goto done;
cont:
- idx++;
+ idx++;
+ }
}
- cb->args[0] = idx;
- cb->args[1] = ip_idx;
+done:
+ rcu_read_unlock();
+ cb->args[0] = h;
+ cb->args[1] = idx;
+ cb->args[2] = ip_idx;
+
return skb->len;
}
@@ -3827,28 +3852,39 @@ nla_put_failure:
static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
- int idx, err;
- int s_idx = cb->args[0];
+ int h, s_h;
+ int idx = 0, s_idx;
struct net_device *dev;
struct inet6_dev *idev;
+ struct hlist_head *head;
+ struct hlist_node *node;
- read_lock(&dev_base_lock);
- idx = 0;
- for_each_netdev(net, dev) {
- if (idx < s_idx)
- goto cont;
- if ((idev = in6_dev_get(dev)) == NULL)
- goto cont;
- err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI);
- in6_dev_put(idev);
- if (err <= 0)
- break;
+ s_h = cb->args[0];
+ s_idx = cb->args[1];
+
+ rcu_read_lock();
+ for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+ idx = 0;
+ head = &net->dev_index_head[h];
+ hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+ if (idx < s_idx)
+ goto cont;
+ idev = __in6_dev_get(dev);
+ if (!idev)
+ goto cont;
+ if (inet6_fill_ifinfo(skb, idev,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWLINK, NLM_F_MULTI) <= 0)
+ goto out;
cont:
- idx++;
+ idx++;
+ }
}
- read_unlock(&dev_base_lock);
- cb->args[0] = idx;
+out:
+ rcu_read_unlock();
+ cb->args[1] = idx;
+ cb->args[0] = h;
return skb->len;
}
@@ -4052,9 +4088,8 @@ static void addrconf_disable_change(struct net *net, __s32 newf)
struct net_device *dev;
struct inet6_dev *idev;
- read_lock(&dev_base_lock);
- for_each_netdev(net, dev) {
- rcu_read_lock();
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev) {
idev = __in6_dev_get(dev);
if (idev) {
int changed = (!idev->cnf.disable_ipv6) ^ (!newf);
@@ -4062,9 +4097,8 @@ static void addrconf_disable_change(struct net *net, __s32 newf)
if (changed)
dev_disable_change(idev);
}
- rcu_read_unlock();
}
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
}
static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old)
@@ -4464,7 +4498,7 @@ static int addrconf_init_net(struct net *net)
all = &ipv6_devconf;
dflt = &ipv6_devconf_dflt;
- if (net != &init_net) {
+ if (!net_eq(net, &init_net)) {
all = kmemdup(all, sizeof(ipv6_devconf), GFP_KERNEL);
if (all == NULL)
goto err_alloc_all;
@@ -4512,7 +4546,7 @@ static void addrconf_exit_net(struct net *net)
__addrconf_sysctl_unregister(net->ipv6.devconf_dflt);
__addrconf_sysctl_unregister(net->ipv6.devconf_all);
#endif
- if (net != &init_net) {
+ if (!net_eq(net, &init_net)) {
kfree(net->ipv6.devconf_dflt);
kfree(net->ipv6.devconf_all);
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index b6d058818673..12e69d364dd5 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -95,7 +95,8 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
}
-static int inet6_create(struct net *net, struct socket *sock, int protocol)
+static int inet6_create(struct net *net, struct socket *sock, int protocol,
+ int kern)
{
struct inet_sock *inet;
struct ipv6_pinfo *np;
@@ -158,7 +159,7 @@ lookup_protocol:
}
err = -EPERM;
- if (answer->capability > 0 && !capable(answer->capability))
+ if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW))
goto out_rcu_unlock;
sock->ops = answer->ops;
@@ -314,6 +315,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (addr_type != IPV6_ADDR_ANY) {
struct net_device *dev = NULL;
+ rcu_read_lock();
if (addr_type & IPV6_ADDR_LINKLOCAL) {
if (addr_len >= sizeof(struct sockaddr_in6) &&
addr->sin6_scope_id) {
@@ -326,12 +328,12 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
/* Binding to link-local address requires an interface */
if (!sk->sk_bound_dev_if) {
err = -EINVAL;
- goto out;
+ goto out_unlock;
}
- dev = dev_get_by_index(net, sk->sk_bound_dev_if);
+ dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
if (!dev) {
err = -ENODEV;
- goto out;
+ goto out_unlock;
}
}
@@ -342,14 +344,11 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (!(addr_type & IPV6_ADDR_MULTICAST)) {
if (!ipv6_chk_addr(net, &addr->sin6_addr,
dev, 0)) {
- if (dev)
- dev_put(dev);
err = -EADDRNOTAVAIL;
- goto out;
+ goto out_unlock;
}
}
- if (dev)
- dev_put(dev);
+ rcu_read_unlock();
}
}
@@ -381,6 +380,9 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
out:
release_sock(sk);
return err;
+out_unlock:
+ rcu_read_unlock();
+ goto out;
}
EXPORT_SYMBOL(inet6_bind);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 0f526f8ea518..c2f300c314be 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -667,7 +667,7 @@ static int ah6_init_state(struct xfrm_state *x)
}
ahp->icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
- ahp->icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8;
+ ahp->icv_trunc_len = x->aalg->alg_trunc_len/8;
BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN);
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 1ae58bec1de0..f1c74c8ef9de 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -404,13 +404,13 @@ int ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
if (dev)
return ipv6_chk_acast_dev(dev, addr);
- read_lock(&dev_base_lock);
- for_each_netdev(net, dev)
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev)
if (ipv6_chk_acast_dev(dev, addr)) {
found = 1;
break;
}
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
return found;
}
@@ -431,9 +431,9 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
struct net *net = seq_file_net(seq);
state->idev = NULL;
- for_each_netdev(net, state->dev) {
+ for_each_netdev_rcu(net, state->dev) {
struct inet6_dev *idev;
- idev = in6_dev_get(state->dev);
+ idev = __in6_dev_get(state->dev);
if (!idev)
continue;
read_lock_bh(&idev->lock);
@@ -443,7 +443,6 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
break;
}
read_unlock_bh(&idev->lock);
- in6_dev_put(idev);
}
return im;
}
@@ -454,16 +453,15 @@ static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im
im = im->aca_next;
while (!im) {
- if (likely(state->idev != NULL)) {
+ if (likely(state->idev != NULL))
read_unlock_bh(&state->idev->lock);
- in6_dev_put(state->idev);
- }
- state->dev = next_net_device(state->dev);
+
+ state->dev = next_net_device_rcu(state->dev);
if (!state->dev) {
state->idev = NULL;
break;
}
- state->idev = in6_dev_get(state->dev);
+ state->idev = __in6_dev_get(state->dev);
if (!state->idev)
continue;
read_lock_bh(&state->idev->lock);
@@ -482,29 +480,30 @@ static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos)
}
static void *ac6_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(dev_base_lock)
+ __acquires(RCU)
{
- read_lock(&dev_base_lock);
+ rcu_read_lock();
return ac6_get_idx(seq, *pos);
}
static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct ifacaddr6 *im;
- im = ac6_get_next(seq, v);
+ struct ifacaddr6 *im = ac6_get_next(seq, v);
+
++*pos;
return im;
}
static void ac6_seq_stop(struct seq_file *seq, void *v)
- __releases(dev_base_lock)
+ __releases(RCU)
{
struct ac6_iter_state *state = ac6_seq_private(seq);
+
if (likely(state->idev != NULL)) {
read_unlock_bh(&state->idev->lock);
- in6_dev_put(state->idev);
+ state->idev = NULL;
}
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
}
static int ac6_seq_show(struct seq_file *seq, void *v)
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 9f70452a69e7..e6f9cdf780fe 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -537,12 +537,17 @@ int datagram_send_ctl(struct net *net,
addr_type = __ipv6_addr_type(&src_info->ipi6_addr);
+ rcu_read_lock();
if (fl->oif) {
- dev = dev_get_by_index(net, fl->oif);
- if (!dev)
+ dev = dev_get_by_index_rcu(net, fl->oif);
+ if (!dev) {
+ rcu_read_unlock();
return -ENODEV;
- } else if (addr_type & IPV6_ADDR_LINKLOCAL)
+ }
+ } else if (addr_type & IPV6_ADDR_LINKLOCAL) {
+ rcu_read_unlock();
return -EINVAL;
+ }
if (addr_type != IPV6_ADDR_ANY) {
int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
@@ -553,8 +558,7 @@ int datagram_send_ctl(struct net *net,
ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr);
}
- if (dev)
- dev_put(dev);
+ rcu_read_unlock();
if (err)
goto exit_f;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index af597c73ebe9..668a46b655e6 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -473,7 +473,7 @@ static int esp_init_authenc(struct xfrm_state *x)
}
err = crypto_aead_setauthsize(
- aead, aalg_desc->uinfo.auth.icv_truncbits / 8);
+ aead, x->aalg->alg_trunc_len / 8);
if (err)
goto free_key;
}
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 00a7a5e4ac97..b7aa7c64cc4a 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -264,44 +264,36 @@ static struct fib_rules_ops fib6_rules_ops_template = {
static int fib6_rules_net_init(struct net *net)
{
+ struct fib_rules_ops *ops;
int err = -ENOMEM;
- net->ipv6.fib6_rules_ops = kmemdup(&fib6_rules_ops_template,
- sizeof(*net->ipv6.fib6_rules_ops),
- GFP_KERNEL);
- if (!net->ipv6.fib6_rules_ops)
- goto out;
+ ops = fib_rules_register(&fib6_rules_ops_template, net);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+ net->ipv6.fib6_rules_ops = ops;
- net->ipv6.fib6_rules_ops->fro_net = net;
- INIT_LIST_HEAD(&net->ipv6.fib6_rules_ops->rules_list);
err = fib_default_rule_add(net->ipv6.fib6_rules_ops, 0,
- RT6_TABLE_LOCAL, FIB_RULE_PERMANENT);
+ RT6_TABLE_LOCAL, 0);
if (err)
goto out_fib6_rules_ops;
err = fib_default_rule_add(net->ipv6.fib6_rules_ops,
0x7FFE, RT6_TABLE_MAIN, 0);
if (err)
- goto out_fib6_default_rule_add;
+ goto out_fib6_rules_ops;
- err = fib_rules_register(net->ipv6.fib6_rules_ops);
- if (err)
- goto out_fib6_default_rule_add;
out:
return err;
-out_fib6_default_rule_add:
- fib_rules_cleanup_ops(net->ipv6.fib6_rules_ops);
out_fib6_rules_ops:
- kfree(net->ipv6.fib6_rules_ops);
+ fib_rules_unregister(ops);
goto out;
}
static void fib6_rules_net_exit(struct net *net)
{
fib_rules_unregister(net->ipv6.fib6_rules_ops);
- kfree(net->ipv6.fib6_rules_ops);
}
static struct pernet_operations fib6_rules_net_ops = {
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 7712578bdc66..6e7bffa2205e 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -67,7 +67,7 @@ static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label)
struct ip6_flowlabel *fl;
for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) {
- if (fl->label == label && fl->fl_net == net)
+ if (fl->label == label && net_eq(fl->fl_net, net))
return fl;
}
return NULL;
@@ -163,7 +163,8 @@ static void ip6_fl_purge(struct net *net)
struct ip6_flowlabel *fl, **flp;
flp = &fl_ht[i];
while ((fl = *flp) != NULL) {
- if (fl->fl_net == net && atomic_read(&fl->users) == 0) {
+ if (net_eq(fl->fl_net, net) &&
+ atomic_read(&fl->users) == 0) {
*flp = fl->next;
fl_free(fl);
atomic_dec(&fl_size);
@@ -377,8 +378,8 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
goto done;
fl->share = freq->flr_share;
addr_type = ipv6_addr_type(&freq->flr_dst);
- if ((addr_type&IPV6_ADDR_MAPPED)
- || addr_type == IPV6_ADDR_ANY) {
+ if ((addr_type & IPV6_ADDR_MAPPED) ||
+ addr_type == IPV6_ADDR_ANY) {
err = -EINVAL;
goto done;
}
@@ -421,8 +422,8 @@ static int mem_check(struct sock *sk)
if (room <= 0 ||
((count >= FL_MAX_PER_SOCK ||
- (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4)
- && !capable(CAP_NET_ADMIN)))
+ (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) &&
+ !capable(CAP_NET_ADMIN)))
return -ENOBUFS;
return 0;
@@ -630,7 +631,7 @@ static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
fl = fl_ht[state->bucket];
- while (fl && fl->fl_net != net)
+ while (fl && !net_eq(fl->fl_net, net))
fl = fl->next;
if (fl)
break;
@@ -645,7 +646,7 @@ static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flo
fl = fl->next;
try_again:
- while (fl && fl->fl_net != net)
+ while (fl && !net_eq(fl->fl_net, net))
fl = fl->next;
while (!fl) {
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 6c1b5c98e818..d453d07b0dfe 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -78,7 +78,7 @@ static void ip6_fb_tnl_dev_init(struct net_device *dev);
static void ip6_tnl_dev_init(struct net_device *dev);
static void ip6_tnl_dev_setup(struct net_device *dev);
-static int ip6_tnl_net_id;
+static int ip6_tnl_net_id __read_mostly;
struct ip6_tnl_net {
/* the IPv6 tunnel fallback device */
struct net_device *fb_tnl_dev;
@@ -658,6 +658,7 @@ static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
IP6_ECN_set_ce(ipv6_hdr(skb));
}
+/* called with rcu_read_lock() */
static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
{
struct ip6_tnl_parm *p = &t->parms;
@@ -668,15 +669,13 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
struct net_device *ldev = NULL;
if (p->link)
- ldev = dev_get_by_index(net, p->link);
+ ldev = dev_get_by_index_rcu(net, p->link);
if ((ipv6_addr_is_multicast(&p->laddr) ||
likely(ipv6_chk_addr(net, &p->laddr, ldev, 0))) &&
likely(!ipv6_chk_addr(net, &p->raddr, NULL, 0)))
ret = 1;
- if (ldev)
- dev_put(ldev);
}
return ret;
}
@@ -804,8 +803,9 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
if (p->flags & IP6_TNL_F_CAP_XMIT) {
struct net_device *ldev = NULL;
+ rcu_read_lock();
if (p->link)
- ldev = dev_get_by_index(net, p->link);
+ ldev = dev_get_by_index_rcu(net, p->link);
if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0)))
printk(KERN_WARNING
@@ -819,8 +819,7 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
p->name);
else
ret = 1;
- if (ldev)
- dev_put(ldev);
+ rcu_read_unlock();
}
return ret;
}
@@ -1410,17 +1409,8 @@ static void ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
static int ip6_tnl_init_net(struct net *net)
{
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
int err;
- struct ip6_tnl_net *ip6n;
-
- err = -ENOMEM;
- ip6n = kzalloc(sizeof(struct ip6_tnl_net), GFP_KERNEL);
- if (ip6n == NULL)
- goto err_alloc;
-
- err = net_assign_generic(net, ip6_tnl_net_id, ip6n);
- if (err < 0)
- goto err_assign;
ip6n->tnls[0] = ip6n->tnls_wc;
ip6n->tnls[1] = ip6n->tnls_r_l;
@@ -1443,27 +1433,23 @@ static int ip6_tnl_init_net(struct net *net)
err_register:
free_netdev(ip6n->fb_tnl_dev);
err_alloc_dev:
- /* nothing */
-err_assign:
- kfree(ip6n);
-err_alloc:
return err;
}
static void ip6_tnl_exit_net(struct net *net)
{
- struct ip6_tnl_net *ip6n;
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
- ip6n = net_generic(net, ip6_tnl_net_id);
rtnl_lock();
ip6_tnl_destroy_tunnels(ip6n);
rtnl_unlock();
- kfree(ip6n);
}
static struct pernet_operations ip6_tnl_net_ops = {
.init = ip6_tnl_init_net,
.exit = ip6_tnl_exit_net,
+ .id = &ip6_tnl_net_id,
+ .size = sizeof(struct ip6_tnl_net),
};
/**
@@ -1488,7 +1474,7 @@ static int __init ip6_tunnel_init(void)
goto unreg_ip4ip6;
}
- err = register_pernet_gen_device(&ip6_tnl_net_id, &ip6_tnl_net_ops);
+ err = register_pernet_device(&ip6_tnl_net_ops);
if (err < 0)
goto err_pernet;
return 0;
@@ -1512,7 +1498,7 @@ static void __exit ip6_tunnel_cleanup(void)
if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
printk(KERN_INFO "ip6_tunnel close: can't deregister ip6ip6\n");
- unregister_pernet_gen_device(ip6_tnl_net_id, &ip6_tnl_net_ops);
+ unregister_pernet_device(&ip6_tnl_net_ops);
}
module_init(ip6_tunnel_init);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index f9fcf690bd5d..1f9c44442e65 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2375,9 +2375,9 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq)
struct net *net = seq_file_net(seq);
state->idev = NULL;
- for_each_netdev(net, state->dev) {
+ for_each_netdev_rcu(net, state->dev) {
struct inet6_dev *idev;
- idev = in6_dev_get(state->dev);
+ idev = __in6_dev_get(state->dev);
if (!idev)
continue;
read_lock_bh(&idev->lock);
@@ -2387,7 +2387,6 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq)
break;
}
read_unlock_bh(&idev->lock);
- in6_dev_put(idev);
}
return im;
}
@@ -2398,16 +2397,15 @@ static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr
im = im->next;
while (!im) {
- if (likely(state->idev != NULL)) {
+ if (likely(state->idev != NULL))
read_unlock_bh(&state->idev->lock);
- in6_dev_put(state->idev);
- }
- state->dev = next_net_device(state->dev);
+
+ state->dev = next_net_device_rcu(state->dev);
if (!state->dev) {
state->idev = NULL;
break;
}
- state->idev = in6_dev_get(state->dev);
+ state->idev = __in6_dev_get(state->dev);
if (!state->idev)
continue;
read_lock_bh(&state->idev->lock);
@@ -2426,31 +2424,31 @@ static struct ifmcaddr6 *igmp6_mc_get_idx(struct seq_file *seq, loff_t pos)
}
static void *igmp6_mc_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(dev_base_lock)
+ __acquires(RCU)
{
- read_lock(&dev_base_lock);
+ rcu_read_lock();
return igmp6_mc_get_idx(seq, *pos);
}
static void *igmp6_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct ifmcaddr6 *im;
- im = igmp6_mc_get_next(seq, v);
+ struct ifmcaddr6 *im = igmp6_mc_get_next(seq, v);
+
++*pos;
return im;
}
static void igmp6_mc_seq_stop(struct seq_file *seq, void *v)
- __releases(dev_base_lock)
+ __releases(RCU)
{
struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
+
if (likely(state->idev != NULL)) {
read_unlock_bh(&state->idev->lock);
- in6_dev_put(state->idev);
state->idev = NULL;
}
state->dev = NULL;
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
}
static int igmp6_mc_seq_show(struct seq_file *seq, void *v)
@@ -2507,9 +2505,9 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq)
state->idev = NULL;
state->im = NULL;
- for_each_netdev(net, state->dev) {
+ for_each_netdev_rcu(net, state->dev) {
struct inet6_dev *idev;
- idev = in6_dev_get(state->dev);
+ idev = __in6_dev_get(state->dev);
if (unlikely(idev == NULL))
continue;
read_lock_bh(&idev->lock);
@@ -2525,7 +2523,6 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq)
spin_unlock_bh(&im->mca_lock);
}
read_unlock_bh(&idev->lock);
- in6_dev_put(idev);
}
return psf;
}
@@ -2539,16 +2536,15 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s
spin_unlock_bh(&state->im->mca_lock);
state->im = state->im->next;
while (!state->im) {
- if (likely(state->idev != NULL)) {
+ if (likely(state->idev != NULL))
read_unlock_bh(&state->idev->lock);
- in6_dev_put(state->idev);
- }
- state->dev = next_net_device(state->dev);
+
+ state->dev = next_net_device_rcu(state->dev);
if (!state->dev) {
state->idev = NULL;
goto out;
}
- state->idev = in6_dev_get(state->dev);
+ state->idev = __in6_dev_get(state->dev);
if (!state->idev)
continue;
read_lock_bh(&state->idev->lock);
@@ -2573,9 +2569,9 @@ static struct ip6_sf_list *igmp6_mcf_get_idx(struct seq_file *seq, loff_t pos)
}
static void *igmp6_mcf_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(dev_base_lock)
+ __acquires(RCU)
{
- read_lock(&dev_base_lock);
+ rcu_read_lock();
return *pos ? igmp6_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
@@ -2591,7 +2587,7 @@ static void *igmp6_mcf_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v)
- __releases(dev_base_lock)
+ __releases(RCU)
{
struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
if (likely(state->im != NULL)) {
@@ -2600,11 +2596,10 @@ static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v)
}
if (likely(state->idev != NULL)) {
read_unlock_bh(&state->idev->lock);
- in6_dev_put(state->idev);
state->idev = NULL;
}
state->dev = NULL;
- read_unlock(&dev_base_lock);
+ rcu_read_unlock();
}
static int igmp6_mcf_seq_show(struct seq_file *seq, void *v)
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 47a3623e7119..db4d5725cce8 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -501,7 +501,7 @@ ipq_rcv_nl_event(struct notifier_block *this,
if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) {
write_lock_bh(&queue_lock);
- if ((n->net == &init_net) && (n->pid == peer_pid))
+ if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
__ipq_reset();
write_unlock_bh(&queue_lock);
}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index cb834ab7f071..926ce8eeffaf 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -249,7 +249,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* Raw sockets are IPv6 only */
if (addr_type == IPV6_ADDR_MAPPED)
- return(-EADDRNOTAVAIL);
+ return -EADDRNOTAVAIL;
lock_sock(sk);
@@ -257,6 +257,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (sk->sk_state != TCP_CLOSE)
goto out;
+ rcu_read_lock();
/* Check if the address belongs to the host. */
if (addr_type != IPV6_ADDR_ANY) {
struct net_device *dev = NULL;
@@ -272,13 +273,13 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* Binding to link-local address requires an interface */
if (!sk->sk_bound_dev_if)
- goto out;
+ goto out_unlock;
- dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if);
- if (!dev) {
- err = -ENODEV;
- goto out;
- }
+ err = -ENODEV;
+ dev = dev_get_by_index_rcu(sock_net(sk),
+ sk->sk_bound_dev_if);
+ if (!dev)
+ goto out_unlock;
}
/* ipv4 addr of the socket is invalid. Only the
@@ -289,13 +290,9 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
err = -EADDRNOTAVAIL;
if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr,
dev, 0)) {
- if (dev)
- dev_put(dev);
- goto out;
+ goto out_unlock;
}
}
- if (dev)
- dev_put(dev);
}
inet->inet_rcv_saddr = inet->inet_saddr = v4addr;
@@ -303,6 +300,8 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (!(addr_type & IPV6_ADDR_MULTICAST))
ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
err = 0;
+out_unlock:
+ rcu_read_unlock();
out:
release_sock(sk);
return err;
@@ -1336,7 +1335,6 @@ static struct inet_protosw rawv6_protosw = {
.protocol = IPPROTO_IP, /* wild card */
.prot = &rawv6_prot,
.ops = &inet6_sockraw_ops,
- .capability = CAP_NET_RAW,
.no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_REUSE,
};
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index da5bd0ed83df..45efc39753e2 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -208,18 +208,17 @@ static void ip6_frag_expire(unsigned long data)
fq_kill(fq);
net = container_of(fq->q.net, struct net, ipv6.frags);
- dev = dev_get_by_index(net, fq->iif);
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(net, fq->iif);
if (!dev)
- goto out;
+ goto out_rcu_unlock;
- rcu_read_lock();
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
- rcu_read_unlock();
/* Don't send error if the first segment did not arrive. */
if (!(fq->q.last_in & INET_FRAG_FIRST_IN) || !fq->q.fragments)
- goto out;
+ goto out_rcu_unlock;
/*
But use as source device on which LAST ARRIVED
@@ -228,9 +227,9 @@ static void ip6_frag_expire(unsigned long data)
*/
fq->q.fragments->dev = dev;
icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev);
+out_rcu_unlock:
+ rcu_read_unlock();
out:
- if (dev)
- dev_put(dev);
spin_unlock(&fq->q.lock);
fq_put(fq);
}
@@ -682,7 +681,7 @@ static int ip6_frags_ns_sysctl_register(struct net *net)
struct ctl_table_header *hdr;
table = ip6_frags_ns_ctl_table;
- if (net != &init_net) {
+ if (!net_eq(net, &init_net)) {
table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL);
if (table == NULL)
goto err_alloc;
@@ -700,7 +699,7 @@ static int ip6_frags_ns_sysctl_register(struct net *net)
return 0;
err_reg:
- if (net != &init_net)
+ if (!net_eq(net, &init_net))
kfree(table);
err_alloc:
return -ENOMEM;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 2362a3397e91..976e68244b99 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -66,7 +66,7 @@ static void ipip6_fb_tunnel_init(struct net_device *dev);
static void ipip6_tunnel_init(struct net_device *dev);
static void ipip6_tunnel_setup(struct net_device *dev);
-static int sit_net_id;
+static int sit_net_id __read_mostly;
struct sit_net {
struct ip_tunnel *tunnels_r_l[HASH_SIZE];
struct ip_tunnel *tunnels_r[HASH_SIZE];
@@ -637,6 +637,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
struct iphdr *tiph = &tunnel->parms.iph;
struct ipv6hdr *iph6 = ipv6_hdr(skb);
u8 tos = tunnel->parms.iph.tos;
+ __be16 df = tiph->frag_off;
struct rtable *rt; /* Route to the other host */
struct net_device *tdev; /* Device to other host */
struct iphdr *iph; /* Our new IP header */
@@ -726,25 +727,28 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
goto tx_error;
}
- if (tiph->frag_off)
+ if (df) {
mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
- else
- mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
- if (mtu < 68) {
- stats->collisions++;
- ip_rt_put(rt);
- goto tx_error;
- }
- if (mtu < IPV6_MIN_MTU)
- mtu = IPV6_MIN_MTU;
- if (tunnel->parms.iph.daddr && skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+ if (mtu < 68) {
+ stats->collisions++;
+ ip_rt_put(rt);
+ goto tx_error;
+ }
- if (skb->len > mtu) {
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
- ip_rt_put(rt);
- goto tx_error;
+ if (mtu < IPV6_MIN_MTU) {
+ mtu = IPV6_MIN_MTU;
+ df = 0;
+ }
+
+ if (tunnel->parms.iph.daddr && skb_dst(skb))
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+
+ if (skb->len > mtu) {
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+ ip_rt_put(rt);
+ goto tx_error;
+ }
}
if (tunnel->err_count > 0) {
@@ -792,11 +796,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = sizeof(struct iphdr)>>2;
- if (mtu > IPV6_MIN_MTU)
- iph->frag_off = tiph->frag_off;
- else
- iph->frag_off = 0;
-
+ iph->frag_off = df;
iph->protocol = IPPROTO_IPV6;
iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
iph->daddr = rt->rt_dst;
@@ -1164,17 +1164,8 @@ static void sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head)
static int sit_init_net(struct net *net)
{
+ struct sit_net *sitn = net_generic(net, sit_net_id);
int err;
- struct sit_net *sitn;
-
- err = -ENOMEM;
- sitn = kzalloc(sizeof(struct sit_net), GFP_KERNEL);
- if (sitn == NULL)
- goto err_alloc;
-
- err = net_assign_generic(net, sit_net_id, sitn);
- if (err < 0)
- goto err_assign;
sitn->tunnels[0] = sitn->tunnels_wc;
sitn->tunnels[1] = sitn->tunnels_l;
@@ -1201,37 +1192,33 @@ err_reg_dev:
dev_put(sitn->fb_tunnel_dev);
free_netdev(sitn->fb_tunnel_dev);
err_alloc_dev:
- /* nothing */
-err_assign:
- kfree(sitn);
-err_alloc:
return err;
}
static void sit_exit_net(struct net *net)
{
- struct sit_net *sitn;
+ struct sit_net *sitn = net_generic(net, sit_net_id);
LIST_HEAD(list);
- sitn = net_generic(net, sit_net_id);
rtnl_lock();
sit_destroy_tunnels(sitn, &list);
unregister_netdevice_queue(sitn->fb_tunnel_dev, &list);
unregister_netdevice_many(&list);
rtnl_unlock();
- kfree(sitn);
}
static struct pernet_operations sit_net_ops = {
.init = sit_init_net,
.exit = sit_exit_net,
+ .id = &sit_net_id,
+ .size = sizeof(struct sit_net),
};
static void __exit sit_cleanup(void)
{
xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
- unregister_pernet_gen_device(sit_net_id, &sit_net_ops);
+ unregister_pernet_device(&sit_net_ops);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
}
@@ -1246,7 +1233,7 @@ static int __init sit_init(void)
return -EAGAIN;
}
- err = register_pernet_gen_device(&sit_net_id, &sit_net_ops);
+ err = register_pernet_device(&sit_net_ops);
if (err < 0)
xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 612fc53e0bb9..5b9af508b8f2 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -159,6 +159,8 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
{
+ struct tcp_options_received tcp_opt;
+ u8 *hash_location;
struct inet_request_sock *ireq;
struct inet6_request_sock *ireq6;
struct tcp_request_sock *treq;
@@ -171,7 +173,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
int mss;
struct dst_entry *dst;
__u8 rcv_wscale;
- struct tcp_options_received tcp_opt;
if (!sysctl_tcp_syncookies || !th->ack)
goto out;
@@ -254,7 +255,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(skb, &tcp_opt, 0, dst);
+ tcp_parse_options(skb, &tcp_opt, &hash_location, 0, dst);
if (tcp_opt.saw_tstamp)
cookie_check_timestamp(&tcp_opt);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 34925f089e07..aadd7cef73b3 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -461,7 +461,8 @@ out:
}
-static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
+static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
+ struct request_values *rvp)
{
struct inet6_request_sock *treq = inet6_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -499,7 +500,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req)
if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
goto done;
- skb = tcp_make_synack(sk, dst, req);
+ skb = tcp_make_synack(sk, dst, req, rvp);
if (skb) {
struct tcphdr *th = tcp_hdr(skb);
@@ -1161,13 +1162,15 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
*/
static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
+ struct tcp_extend_values tmp_ext;
+ struct tcp_options_received tmp_opt;
+ u8 *hash_location;
+ struct request_sock *req;
struct inet6_request_sock *treq;
struct ipv6_pinfo *np = inet6_sk(sk);
- struct tcp_options_received tmp_opt;
struct tcp_sock *tp = tcp_sk(sk);
- struct request_sock *req = NULL;
- __u32 isn = TCP_SKB_CB(skb)->when;
struct dst_entry *dst = __sk_dst_get(sk);
+ __u32 isn = TCP_SKB_CB(skb)->when;
#ifdef CONFIG_SYN_COOKIES
int want_cookie = 0;
#else
@@ -1205,8 +1208,52 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
tmp_opt.user_mss = tp->rx_opt.user_mss;
+ tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst);
+
+ if (tmp_opt.cookie_plus > 0 &&
+ tmp_opt.saw_tstamp &&
+ !tp->rx_opt.cookie_out_never &&
+ (sysctl_tcp_cookie_size > 0 ||
+ (tp->cookie_values != NULL &&
+ tp->cookie_values->cookie_desired > 0))) {
+ u8 *c;
+ u32 *d;
+ u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
+ int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
+
+ if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
+ goto drop_and_free;
+
+ /* Secret recipe starts with IP addresses */
+ d = &ipv6_hdr(skb)->daddr.s6_addr32[0];
+ *mess++ ^= *d++;
+ *mess++ ^= *d++;
+ *mess++ ^= *d++;
+ *mess++ ^= *d++;
+ d = &ipv6_hdr(skb)->saddr.s6_addr32[0];
+ *mess++ ^= *d++;
+ *mess++ ^= *d++;
+ *mess++ ^= *d++;
+ *mess++ ^= *d++;
+
+ /* plus variable length Initiator Cookie */
+ c = (u8 *)mess;
+ while (l-- > 0)
+ *c++ ^= *hash_location++;
- tcp_parse_options(skb, &tmp_opt, 0, dst);
+#ifdef CONFIG_SYN_COOKIES
+ want_cookie = 0; /* not our kind of cookie */
+#endif
+ tmp_ext.cookie_out_never = 0; /* false */
+ tmp_ext.cookie_plus = tmp_opt.cookie_plus;
+ } else if (!tp->rx_opt.cookie_in_always) {
+ /* redundant indications, but ensure initialization. */
+ tmp_ext.cookie_out_never = 1; /* true */
+ tmp_ext.cookie_plus = 0;
+ } else {
+ goto drop_and_free;
+ }
+ tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
if (want_cookie && !tmp_opt.saw_tstamp)
tcp_clear_options(&tmp_opt);
@@ -1239,23 +1286,21 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
isn = tcp_v6_init_sequence(skb);
}
-
tcp_rsk(req)->snt_isn = isn;
security_inet_conn_request(sk, skb, req);
- if (tcp_v6_send_synack(sk, req))
- goto drop;
+ if (tcp_v6_send_synack(sk, req,
+ (struct request_values *)&tmp_ext) ||
+ want_cookie)
+ goto drop_and_free;
- if (!want_cookie) {
- inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
- return 0;
- }
+ inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+ return 0;
+drop_and_free:
+ reqsk_free(req);
drop:
- if (req)
- reqsk_free(req);
-
return 0; /* don't send reset */
}
@@ -1851,7 +1896,7 @@ static int tcp_v6_init_sock(struct sock *sk)
*/
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tp->snd_cwnd_clamp = ~0;
- tp->mss_cache = 536;
+ tp->mss_cache = TCP_MSS_DEFAULT;
tp->reordering = sysctl_tcp_reordering;
@@ -1867,6 +1912,19 @@ static int tcp_v6_init_sock(struct sock *sk)
tp->af_specific = &tcp_sock_ipv6_specific;
#endif
+ /* TCP Cookie Transactions */
+ if (sysctl_tcp_cookie_size > 0) {
+ /* Default, cookies without s_data_payload. */
+ tp->cookie_values =
+ kzalloc(sizeof(*tp->cookie_values),
+ sk->sk_allocation);
+ if (tp->cookie_values != NULL)
+ kref_init(&tp->cookie_values->kref);
+ }
+ /* Presumed zeroed, in order of appearance:
+ * cookie_in_always, cookie_out_never,
+ * s_data_constant, s_data_in, s_data_out
+ */
sk->sk_sndbuf = sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
@@ -2112,7 +2170,6 @@ static struct inet_protosw tcpv6_protosw = {
.protocol = IPPROTO_TCP,
.prot = &tcpv6_prot,
.ops = &inet6_stream_ops,
- .capability = -1,
.no_check = 0,
.flags = INET_PROTOSW_PERMANENT |
INET_PROTOSW_ICSK,
@@ -2127,12 +2184,17 @@ static int tcpv6_net_init(struct net *net)
static void tcpv6_net_exit(struct net *net)
{
inet_ctl_sock_destroy(net->ipv6.tcp_sk);
- inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET6);
+}
+
+static void tcpv6_net_exit_batch(struct list_head *net_exit_list)
+{
+ inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
}
static struct pernet_operations tcpv6_net_ops = {
- .init = tcpv6_net_init,
- .exit = tcpv6_net_exit,
+ .init = tcpv6_net_init,
+ .exit = tcpv6_net_exit,
+ .exit_batch = tcpv6_net_exit_batch,
};
int __init tcpv6_init(void)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d3b59d73f507..69ebdbe78c47 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -81,9 +81,33 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
return 0;
}
+static unsigned int udp6_portaddr_hash(struct net *net,
+ const struct in6_addr *addr6,
+ unsigned int port)
+{
+ unsigned int hash, mix = net_hash_mix(net);
+
+ if (ipv6_addr_any(addr6))
+ hash = jhash_1word(0, mix);
+ else if (ipv6_addr_v4mapped(addr6))
+ hash = jhash_1word(addr6->s6_addr32[3], mix);
+ else
+ hash = jhash2(addr6->s6_addr32, 4, mix);
+
+ return hash ^ port;
+}
+
+
int udp_v6_get_port(struct sock *sk, unsigned short snum)
{
- return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal);
+ unsigned int hash2_nulladdr =
+ udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
+ unsigned int hash2_partial =
+ udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0);
+
+ /* precompute partial secondary hash */
+ udp_sk(sk)->udp_portaddr_hash = hash2_partial;
+ return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr);
}
static inline int compute_score(struct sock *sk, struct net *net,
@@ -94,7 +118,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
{
int score = -1;
- if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
+ if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
sk->sk_family == PF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_sock *inet = inet_sk(sk);
@@ -124,6 +148,86 @@ static inline int compute_score(struct sock *sk, struct net *net,
return score;
}
+#define SCORE2_MAX (1 + 1 + 1)
+static inline int compute_score2(struct sock *sk, struct net *net,
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr, unsigned short hnum,
+ int dif)
+{
+ int score = -1;
+
+ if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
+ sk->sk_family == PF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
+ return -1;
+ score = 0;
+ if (inet->inet_dport) {
+ if (inet->inet_dport != sport)
+ return -1;
+ score++;
+ }
+ if (!ipv6_addr_any(&np->daddr)) {
+ if (!ipv6_addr_equal(&np->daddr, saddr))
+ return -1;
+ score++;
+ }
+ if (sk->sk_bound_dev_if) {
+ if (sk->sk_bound_dev_if != dif)
+ return -1;
+ score++;
+ }
+ }
+ return score;
+}
+
+
+/* called with read_rcu_lock() */
+static struct sock *udp6_lib_lookup2(struct net *net,
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr, unsigned int hnum, int dif,
+ struct udp_hslot *hslot2, unsigned int slot2)
+{
+ struct sock *sk, *result;
+ struct hlist_nulls_node *node;
+ int score, badness;
+
+begin:
+ result = NULL;
+ badness = -1;
+ udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
+ score = compute_score2(sk, net, saddr, sport,
+ daddr, hnum, dif);
+ if (score > badness) {
+ result = sk;
+ badness = score;
+ if (score == SCORE2_MAX)
+ goto exact_match;
+ }
+ }
+ /*
+ * if the nulls value we got at the end of this lookup is
+ * not the expected one, we must restart lookup.
+ * We probably met an item that was moved to another chain.
+ */
+ if (get_nulls_value(node) != slot2)
+ goto begin;
+
+ if (result) {
+exact_match:
+ if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
+ result = NULL;
+ else if (unlikely(compute_score2(result, net, saddr, sport,
+ daddr, hnum, dif) < badness)) {
+ sock_put(result);
+ goto begin;
+ }
+ }
+ return result;
+}
+
static struct sock *__udp6_lib_lookup(struct net *net,
struct in6_addr *saddr, __be16 sport,
struct in6_addr *daddr, __be16 dport,
@@ -132,11 +236,35 @@ static struct sock *__udp6_lib_lookup(struct net *net,
struct sock *sk, *result;
struct hlist_nulls_node *node;
unsigned short hnum = ntohs(dport);
- unsigned int hash = udp_hashfn(net, hnum, udptable->mask);
- struct udp_hslot *hslot = &udptable->hash[hash];
+ unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
+ struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
int score, badness;
rcu_read_lock();
+ if (hslot->count > 10) {
+ hash2 = udp6_portaddr_hash(net, daddr, hnum);
+ slot2 = hash2 & udptable->mask;
+ hslot2 = &udptable->hash2[slot2];
+ if (hslot->count < hslot2->count)
+ goto begin;
+
+ result = udp6_lib_lookup2(net, saddr, sport,
+ daddr, hnum, dif,
+ hslot2, slot2);
+ if (!result) {
+ hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
+ slot2 = hash2 & udptable->mask;
+ hslot2 = &udptable->hash2[slot2];
+ if (hslot->count < hslot2->count)
+ goto begin;
+
+ result = udp6_lib_lookup2(net, &in6addr_any, sport,
+ daddr, hnum, dif,
+ hslot2, slot2);
+ }
+ rcu_read_unlock();
+ return result;
+ }
begin:
result = NULL;
badness = -1;
@@ -152,7 +280,7 @@ begin:
* not the expected one, we must restart lookup.
* We probably met an item that was moved to another chain.
*/
- if (get_nulls_value(node) != hash)
+ if (get_nulls_value(node) != slot)
goto begin;
if (result) {
@@ -288,9 +416,7 @@ try_again:
err = ulen;
out_free:
- lock_sock(sk);
- skb_free_datagram(sk, skb);
- release_sock(sk);
+ skb_free_datagram_locked(sk, skb);
out:
return err;
@@ -417,7 +543,8 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
if (!net_eq(sock_net(s), net))
continue;
- if (s->sk_hash == num && s->sk_family == PF_INET6) {
+ if (udp_sk(s)->udp_port_hash == num &&
+ s->sk_family == PF_INET6) {
struct ipv6_pinfo *np = inet6_sk(s);
if (inet->inet_dport) {
if (inet->inet_dport != rmt_port)
@@ -442,6 +569,33 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
return NULL;
}
+static void flush_stack(struct sock **stack, unsigned int count,
+ struct sk_buff *skb, unsigned int final)
+{
+ unsigned int i;
+ struct sock *sk;
+ struct sk_buff *skb1;
+
+ for (i = 0; i < count; i++) {
+ skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
+
+ sk = stack[i];
+ if (skb1) {
+ bh_lock_sock(sk);
+ if (!sock_owned_by_user(sk))
+ udpv6_queue_rcv_skb(sk, skb1);
+ else
+ sk_add_backlog(sk, skb1);
+ bh_unlock_sock(sk);
+ } else {
+ atomic_inc(&sk->sk_drops);
+ UDP6_INC_STATS_BH(sock_net(sk),
+ UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk));
+ UDP6_INC_STATS_BH(sock_net(sk),
+ UDP_MIB_INERRORS, IS_UDPLITE(sk));
+ }
+ }
+}
/*
* Note: called only from the BH handler context,
* so we don't need to lock the hashes.
@@ -450,41 +604,43 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct in6_addr *saddr, struct in6_addr *daddr,
struct udp_table *udptable)
{
- struct sock *sk, *sk2;
+ struct sock *sk, *stack[256 / sizeof(struct sock *)];
const struct udphdr *uh = udp_hdr(skb);
struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
int dif;
+ unsigned int i, count = 0;
spin_lock(&hslot->lock);
sk = sk_nulls_head(&hslot->head);
dif = inet6_iif(skb);
sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
- if (!sk) {
- kfree_skb(skb);
- goto out;
- }
-
- sk2 = sk;
- while ((sk2 = udp_v6_mcast_next(net, sk_nulls_next(sk2), uh->dest, daddr,
- uh->source, saddr, dif))) {
- struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC);
- if (buff) {
- bh_lock_sock(sk2);
- if (!sock_owned_by_user(sk2))
- udpv6_queue_rcv_skb(sk2, buff);
- else
- sk_add_backlog(sk2, buff);
- bh_unlock_sock(sk2);
+ while (sk) {
+ stack[count++] = sk;
+ sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
+ uh->source, saddr, dif);
+ if (unlikely(count == ARRAY_SIZE(stack))) {
+ if (!sk)
+ break;
+ flush_stack(stack, count, skb, ~0);
+ count = 0;
}
}
- bh_lock_sock(sk);
- if (!sock_owned_by_user(sk))
- udpv6_queue_rcv_skb(sk, skb);
- else
- sk_add_backlog(sk, skb);
- bh_unlock_sock(sk);
-out:
+ /*
+ * before releasing the lock, we must take reference on sockets
+ */
+ for (i = 0; i < count; i++)
+ sock_hold(stack[i]);
+
spin_unlock(&hslot->lock);
+
+ if (count) {
+ flush_stack(stack, count, skb, count - 1);
+
+ for (i = 0; i < count; i++)
+ sock_put(stack[i]);
+ } else {
+ kfree_skb(skb);
+ }
return 0;
}
@@ -1286,7 +1442,6 @@ static struct inet_protosw udpv6_protosw = {
.protocol = IPPROTO_UDP,
.prot = &udpv6_prot,
.ops = &inet6_dgram_ops,
- .capability =-1,
.no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_PERMANENT,
};
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index d737a27ee010..6ea6938919e6 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -62,7 +62,6 @@ static struct inet_protosw udplite6_protosw = {
.protocol = IPPROTO_UDPLITE,
.prot = &udplitev6_prot,
.ops = &inet6_dgram_ops,
- .capability = -1,
.no_check = 0,
.flags = INET_PROTOSW_PERMANENT,
};