diff options
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/addrconf.c | 236 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 22 | ||||
-rw-r--r-- | net/ipv6/ah6.c | 2 | ||||
-rw-r--r-- | net/ipv6/anycast.c | 35 | ||||
-rw-r--r-- | net/ipv6/datagram.c | 14 | ||||
-rw-r--r-- | net/ipv6/esp6.c | 2 | ||||
-rw-r--r-- | net/ipv6/fib6_rules.c | 24 | ||||
-rw-r--r-- | net/ipv6/ip6_flowlabel.c | 17 | ||||
-rw-r--r-- | net/ipv6/ip6_tunnel.c | 38 | ||||
-rw-r--r-- | net/ipv6/mcast.c | 51 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6_queue.c | 2 | ||||
-rw-r--r-- | net/ipv6/raw.c | 24 | ||||
-rw-r--r-- | net/ipv6/reassembly.c | 17 | ||||
-rw-r--r-- | net/ipv6/sit.c | 69 | ||||
-rw-r--r-- | net/ipv6/syncookies.c | 5 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 104 | ||||
-rw-r--r-- | net/ipv6/udp.c | 223 | ||||
-rw-r--r-- | net/ipv6/udplite.c | 1 |
18 files changed, 550 insertions, 336 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 918648409612..b1ce8fc62049 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -481,9 +481,8 @@ static void addrconf_forward_change(struct net *net, __s32 newf) struct net_device *dev; struct inet6_dev *idev; - read_lock(&dev_base_lock); - for_each_netdev(net, dev) { - rcu_read_lock(); + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.forwarding) ^ (!newf); @@ -491,9 +490,8 @@ static void addrconf_forward_change(struct net *net, __s32 newf) if (changed) dev_forward_change(idev); } - rcu_read_unlock(); } - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) @@ -1137,10 +1135,9 @@ int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev, hiscore->rule = -1; hiscore->ifa = NULL; - read_lock(&dev_base_lock); rcu_read_lock(); - for_each_netdev(net, dev) { + for_each_netdev_rcu(net, dev) { struct inet6_dev *idev; /* Candidate Source Address (section 4) @@ -1235,7 +1232,6 @@ try_nextdev: read_unlock_bh(&idev->lock); } rcu_read_unlock(); - read_unlock(&dev_base_lock); if (!hiscore->ifa) return -EADDRNOTAVAIL; @@ -3485,85 +3481,114 @@ enum addr_type_t ANYCAST_ADDR, }; +/* called with rcu_read_lock() */ +static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, + struct netlink_callback *cb, enum addr_type_t type, + int s_ip_idx, int *p_ip_idx) +{ + struct inet6_ifaddr *ifa; + struct ifmcaddr6 *ifmca; + struct ifacaddr6 *ifaca; + int err = 1; + int ip_idx = *p_ip_idx; + + read_lock_bh(&idev->lock); + switch (type) { + case UNICAST_ADDR: + /* unicast address incl. temp addr */ + for (ifa = idev->addr_list; ifa; + ifa = ifa->if_next, ip_idx++) { + if (ip_idx < s_ip_idx) + continue; + err = inet6_fill_ifaddr(skb, ifa, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + RTM_NEWADDR, + NLM_F_MULTI); + if (err <= 0) + break; + } + break; + case MULTICAST_ADDR: + /* multicast address */ + for (ifmca = idev->mc_list; ifmca; + ifmca = ifmca->next, ip_idx++) { + if (ip_idx < s_ip_idx) + continue; + err = inet6_fill_ifmcaddr(skb, ifmca, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + RTM_GETMULTICAST, + NLM_F_MULTI); + if (err <= 0) + break; + } + break; + case ANYCAST_ADDR: + /* anycast address */ + for (ifaca = idev->ac_list; ifaca; + ifaca = ifaca->aca_next, ip_idx++) { + if (ip_idx < s_ip_idx) + continue; + err = inet6_fill_ifacaddr(skb, ifaca, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + RTM_GETANYCAST, + NLM_F_MULTI); + if (err <= 0) + break; + } + break; + default: + break; + } + read_unlock_bh(&idev->lock); + *p_ip_idx = ip_idx; + return err; +} + static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, enum addr_type_t type) { + struct net *net = sock_net(skb->sk); + int h, s_h; int idx, ip_idx; int s_idx, s_ip_idx; - int err = 1; struct net_device *dev; - struct inet6_dev *idev = NULL; - struct inet6_ifaddr *ifa; - struct ifmcaddr6 *ifmca; - struct ifacaddr6 *ifaca; - struct net *net = sock_net(skb->sk); - - s_idx = cb->args[0]; - s_ip_idx = ip_idx = cb->args[1]; + struct inet6_dev *idev; + struct hlist_head *head; + struct hlist_node *node; - idx = 0; - for_each_netdev(net, dev) { - if (idx < s_idx) - goto cont; - if (idx > s_idx) - s_ip_idx = 0; - ip_idx = 0; - if ((idev = in6_dev_get(dev)) == NULL) - goto cont; - read_lock_bh(&idev->lock); - switch (type) { - case UNICAST_ADDR: - /* unicast address incl. temp addr */ - for (ifa = idev->addr_list; ifa; - ifa = ifa->if_next, ip_idx++) { - if (ip_idx < s_ip_idx) - continue; - err = inet6_fill_ifaddr(skb, ifa, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - RTM_NEWADDR, - NLM_F_MULTI); - } - break; - case MULTICAST_ADDR: - /* multicast address */ - for (ifmca = idev->mc_list; ifmca; - ifmca = ifmca->next, ip_idx++) { - if (ip_idx < s_ip_idx) - continue; - err = inet6_fill_ifmcaddr(skb, ifmca, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - RTM_GETMULTICAST, - NLM_F_MULTI); - } - break; - case ANYCAST_ADDR: - /* anycast address */ - for (ifaca = idev->ac_list; ifaca; - ifaca = ifaca->aca_next, ip_idx++) { - if (ip_idx < s_ip_idx) - continue; - err = inet6_fill_ifacaddr(skb, ifaca, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - RTM_GETANYCAST, - NLM_F_MULTI); - } - break; - default: - break; - } - read_unlock_bh(&idev->lock); - in6_dev_put(idev); + s_h = cb->args[0]; + s_idx = idx = cb->args[1]; + s_ip_idx = ip_idx = cb->args[2]; - if (err <= 0) - break; + rcu_read_lock(); + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + hlist_for_each_entry_rcu(dev, node, head, index_hlist) { + if (idx < s_idx) + goto cont; + if (idx > s_idx) + s_ip_idx = 0; + ip_idx = 0; + if ((idev = __in6_dev_get(dev)) == NULL) + goto cont; + + if (in6_dump_addrs(idev, skb, cb, type, + s_ip_idx, &ip_idx) <= 0) + goto done; cont: - idx++; + idx++; + } } - cb->args[0] = idx; - cb->args[1] = ip_idx; +done: + rcu_read_unlock(); + cb->args[0] = h; + cb->args[1] = idx; + cb->args[2] = ip_idx; + return skb->len; } @@ -3827,28 +3852,39 @@ nla_put_failure: static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - int idx, err; - int s_idx = cb->args[0]; + int h, s_h; + int idx = 0, s_idx; struct net_device *dev; struct inet6_dev *idev; + struct hlist_head *head; + struct hlist_node *node; - read_lock(&dev_base_lock); - idx = 0; - for_each_netdev(net, dev) { - if (idx < s_idx) - goto cont; - if ((idev = in6_dev_get(dev)) == NULL) - goto cont; - err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI); - in6_dev_put(idev); - if (err <= 0) - break; + s_h = cb->args[0]; + s_idx = cb->args[1]; + + rcu_read_lock(); + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + hlist_for_each_entry_rcu(dev, node, head, index_hlist) { + if (idx < s_idx) + goto cont; + idev = __in6_dev_get(dev); + if (!idev) + goto cont; + if (inet6_fill_ifinfo(skb, idev, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + RTM_NEWLINK, NLM_F_MULTI) <= 0) + goto out; cont: - idx++; + idx++; + } } - read_unlock(&dev_base_lock); - cb->args[0] = idx; +out: + rcu_read_unlock(); + cb->args[1] = idx; + cb->args[0] = h; return skb->len; } @@ -4052,9 +4088,8 @@ static void addrconf_disable_change(struct net *net, __s32 newf) struct net_device *dev; struct inet6_dev *idev; - read_lock(&dev_base_lock); - for_each_netdev(net, dev) { - rcu_read_lock(); + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.disable_ipv6) ^ (!newf); @@ -4062,9 +4097,8 @@ static void addrconf_disable_change(struct net *net, __s32 newf) if (changed) dev_disable_change(idev); } - rcu_read_unlock(); } - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old) @@ -4464,7 +4498,7 @@ static int addrconf_init_net(struct net *net) all = &ipv6_devconf; dflt = &ipv6_devconf_dflt; - if (net != &init_net) { + if (!net_eq(net, &init_net)) { all = kmemdup(all, sizeof(ipv6_devconf), GFP_KERNEL); if (all == NULL) goto err_alloc_all; @@ -4512,7 +4546,7 @@ static void addrconf_exit_net(struct net *net) __addrconf_sysctl_unregister(net->ipv6.devconf_dflt); __addrconf_sysctl_unregister(net->ipv6.devconf_all); #endif - if (net != &init_net) { + if (!net_eq(net, &init_net)) { kfree(net->ipv6.devconf_dflt); kfree(net->ipv6.devconf_all); } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b6d058818673..12e69d364dd5 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -95,7 +95,8 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } -static int inet6_create(struct net *net, struct socket *sock, int protocol) +static int inet6_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct inet_sock *inet; struct ipv6_pinfo *np; @@ -158,7 +159,7 @@ lookup_protocol: } err = -EPERM; - if (answer->capability > 0 && !capable(answer->capability)) + if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW)) goto out_rcu_unlock; sock->ops = answer->ops; @@ -314,6 +315,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_type != IPV6_ADDR_ANY) { struct net_device *dev = NULL; + rcu_read_lock(); if (addr_type & IPV6_ADDR_LINKLOCAL) { if (addr_len >= sizeof(struct sockaddr_in6) && addr->sin6_scope_id) { @@ -326,12 +328,12 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* Binding to link-local address requires an interface */ if (!sk->sk_bound_dev_if) { err = -EINVAL; - goto out; + goto out_unlock; } - dev = dev_get_by_index(net, sk->sk_bound_dev_if); + dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; - goto out; + goto out_unlock; } } @@ -342,14 +344,11 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (!(addr_type & IPV6_ADDR_MULTICAST)) { if (!ipv6_chk_addr(net, &addr->sin6_addr, dev, 0)) { - if (dev) - dev_put(dev); err = -EADDRNOTAVAIL; - goto out; + goto out_unlock; } } - if (dev) - dev_put(dev); + rcu_read_unlock(); } } @@ -381,6 +380,9 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) out: release_sock(sk); return err; +out_unlock: + rcu_read_unlock(); + goto out; } EXPORT_SYMBOL(inet6_bind); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 0f526f8ea518..c2f300c314be 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -667,7 +667,7 @@ static int ah6_init_state(struct xfrm_state *x) } ahp->icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8; - ahp->icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8; + ahp->icv_trunc_len = x->aalg->alg_trunc_len/8; BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 1ae58bec1de0..f1c74c8ef9de 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -404,13 +404,13 @@ int ipv6_chk_acast_addr(struct net *net, struct net_device *dev, if (dev) return ipv6_chk_acast_dev(dev, addr); - read_lock(&dev_base_lock); - for_each_netdev(net, dev) + rcu_read_lock(); + for_each_netdev_rcu(net, dev) if (ipv6_chk_acast_dev(dev, addr)) { found = 1; break; } - read_unlock(&dev_base_lock); + rcu_read_unlock(); return found; } @@ -431,9 +431,9 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) struct net *net = seq_file_net(seq); state->idev = NULL; - for_each_netdev(net, state->dev) { + for_each_netdev_rcu(net, state->dev) { struct inet6_dev *idev; - idev = in6_dev_get(state->dev); + idev = __in6_dev_get(state->dev); if (!idev) continue; read_lock_bh(&idev->lock); @@ -443,7 +443,6 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) break; } read_unlock_bh(&idev->lock); - in6_dev_put(idev); } return im; } @@ -454,16 +453,15 @@ static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im im = im->aca_next; while (!im) { - if (likely(state->idev != NULL)) { + if (likely(state->idev != NULL)) read_unlock_bh(&state->idev->lock); - in6_dev_put(state->idev); - } - state->dev = next_net_device(state->dev); + + state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->idev = NULL; break; } - state->idev = in6_dev_get(state->dev); + state->idev = __in6_dev_get(state->dev); if (!state->idev) continue; read_lock_bh(&state->idev->lock); @@ -482,29 +480,30 @@ static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos) } static void *ac6_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(dev_base_lock) + __acquires(RCU) { - read_lock(&dev_base_lock); + rcu_read_lock(); return ac6_get_idx(seq, *pos); } static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct ifacaddr6 *im; - im = ac6_get_next(seq, v); + struct ifacaddr6 *im = ac6_get_next(seq, v); + ++*pos; return im; } static void ac6_seq_stop(struct seq_file *seq, void *v) - __releases(dev_base_lock) + __releases(RCU) { struct ac6_iter_state *state = ac6_seq_private(seq); + if (likely(state->idev != NULL)) { read_unlock_bh(&state->idev->lock); - in6_dev_put(state->idev); + state->idev = NULL; } - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static int ac6_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 9f70452a69e7..e6f9cdf780fe 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -537,12 +537,17 @@ int datagram_send_ctl(struct net *net, addr_type = __ipv6_addr_type(&src_info->ipi6_addr); + rcu_read_lock(); if (fl->oif) { - dev = dev_get_by_index(net, fl->oif); - if (!dev) + dev = dev_get_by_index_rcu(net, fl->oif); + if (!dev) { + rcu_read_unlock(); return -ENODEV; - } else if (addr_type & IPV6_ADDR_LINKLOCAL) + } + } else if (addr_type & IPV6_ADDR_LINKLOCAL) { + rcu_read_unlock(); return -EINVAL; + } if (addr_type != IPV6_ADDR_ANY) { int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; @@ -553,8 +558,7 @@ int datagram_send_ctl(struct net *net, ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr); } - if (dev) - dev_put(dev); + rcu_read_unlock(); if (err) goto exit_f; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index af597c73ebe9..668a46b655e6 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -473,7 +473,7 @@ static int esp_init_authenc(struct xfrm_state *x) } err = crypto_aead_setauthsize( - aead, aalg_desc->uinfo.auth.icv_truncbits / 8); + aead, x->aalg->alg_trunc_len / 8); if (err) goto free_key; } diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 00a7a5e4ac97..b7aa7c64cc4a 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -264,44 +264,36 @@ static struct fib_rules_ops fib6_rules_ops_template = { static int fib6_rules_net_init(struct net *net) { + struct fib_rules_ops *ops; int err = -ENOMEM; - net->ipv6.fib6_rules_ops = kmemdup(&fib6_rules_ops_template, - sizeof(*net->ipv6.fib6_rules_ops), - GFP_KERNEL); - if (!net->ipv6.fib6_rules_ops) - goto out; + ops = fib_rules_register(&fib6_rules_ops_template, net); + if (IS_ERR(ops)) + return PTR_ERR(ops); + net->ipv6.fib6_rules_ops = ops; - net->ipv6.fib6_rules_ops->fro_net = net; - INIT_LIST_HEAD(&net->ipv6.fib6_rules_ops->rules_list); err = fib_default_rule_add(net->ipv6.fib6_rules_ops, 0, - RT6_TABLE_LOCAL, FIB_RULE_PERMANENT); + RT6_TABLE_LOCAL, 0); if (err) goto out_fib6_rules_ops; err = fib_default_rule_add(net->ipv6.fib6_rules_ops, 0x7FFE, RT6_TABLE_MAIN, 0); if (err) - goto out_fib6_default_rule_add; + goto out_fib6_rules_ops; - err = fib_rules_register(net->ipv6.fib6_rules_ops); - if (err) - goto out_fib6_default_rule_add; out: return err; -out_fib6_default_rule_add: - fib_rules_cleanup_ops(net->ipv6.fib6_rules_ops); out_fib6_rules_ops: - kfree(net->ipv6.fib6_rules_ops); + fib_rules_unregister(ops); goto out; } static void fib6_rules_net_exit(struct net *net) { fib_rules_unregister(net->ipv6.fib6_rules_ops); - kfree(net->ipv6.fib6_rules_ops); } static struct pernet_operations fib6_rules_net_ops = { diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 7712578bdc66..6e7bffa2205e 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -67,7 +67,7 @@ static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label) struct ip6_flowlabel *fl; for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) { - if (fl->label == label && fl->fl_net == net) + if (fl->label == label && net_eq(fl->fl_net, net)) return fl; } return NULL; @@ -163,7 +163,8 @@ static void ip6_fl_purge(struct net *net) struct ip6_flowlabel *fl, **flp; flp = &fl_ht[i]; while ((fl = *flp) != NULL) { - if (fl->fl_net == net && atomic_read(&fl->users) == 0) { + if (net_eq(fl->fl_net, net) && + atomic_read(&fl->users) == 0) { *flp = fl->next; fl_free(fl); atomic_dec(&fl_size); @@ -377,8 +378,8 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, goto done; fl->share = freq->flr_share; addr_type = ipv6_addr_type(&freq->flr_dst); - if ((addr_type&IPV6_ADDR_MAPPED) - || addr_type == IPV6_ADDR_ANY) { + if ((addr_type & IPV6_ADDR_MAPPED) || + addr_type == IPV6_ADDR_ANY) { err = -EINVAL; goto done; } @@ -421,8 +422,8 @@ static int mem_check(struct sock *sk) if (room <= 0 || ((count >= FL_MAX_PER_SOCK || - (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) - && !capable(CAP_NET_ADMIN))) + (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) && + !capable(CAP_NET_ADMIN))) return -ENOBUFS; return 0; @@ -630,7 +631,7 @@ static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq) for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) { fl = fl_ht[state->bucket]; - while (fl && fl->fl_net != net) + while (fl && !net_eq(fl->fl_net, net)) fl = fl->next; if (fl) break; @@ -645,7 +646,7 @@ static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flo fl = fl->next; try_again: - while (fl && fl->fl_net != net) + while (fl && !net_eq(fl->fl_net, net)) fl = fl->next; while (!fl) { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 6c1b5c98e818..d453d07b0dfe 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -78,7 +78,7 @@ static void ip6_fb_tnl_dev_init(struct net_device *dev); static void ip6_tnl_dev_init(struct net_device *dev); static void ip6_tnl_dev_setup(struct net_device *dev); -static int ip6_tnl_net_id; +static int ip6_tnl_net_id __read_mostly; struct ip6_tnl_net { /* the IPv6 tunnel fallback device */ struct net_device *fb_tnl_dev; @@ -658,6 +658,7 @@ static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t, IP6_ECN_set_ce(ipv6_hdr(skb)); } +/* called with rcu_read_lock() */ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t) { struct ip6_tnl_parm *p = &t->parms; @@ -668,15 +669,13 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t) struct net_device *ldev = NULL; if (p->link) - ldev = dev_get_by_index(net, p->link); + ldev = dev_get_by_index_rcu(net, p->link); if ((ipv6_addr_is_multicast(&p->laddr) || likely(ipv6_chk_addr(net, &p->laddr, ldev, 0))) && likely(!ipv6_chk_addr(net, &p->raddr, NULL, 0))) ret = 1; - if (ldev) - dev_put(ldev); } return ret; } @@ -804,8 +803,9 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) if (p->flags & IP6_TNL_F_CAP_XMIT) { struct net_device *ldev = NULL; + rcu_read_lock(); if (p->link) - ldev = dev_get_by_index(net, p->link); + ldev = dev_get_by_index_rcu(net, p->link); if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0))) printk(KERN_WARNING @@ -819,8 +819,7 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) p->name); else ret = 1; - if (ldev) - dev_put(ldev); + rcu_read_unlock(); } return ret; } @@ -1410,17 +1409,8 @@ static void ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n) static int ip6_tnl_init_net(struct net *net) { + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); int err; - struct ip6_tnl_net *ip6n; - - err = -ENOMEM; - ip6n = kzalloc(sizeof(struct ip6_tnl_net), GFP_KERNEL); - if (ip6n == NULL) - goto err_alloc; - - err = net_assign_generic(net, ip6_tnl_net_id, ip6n); - if (err < 0) - goto err_assign; ip6n->tnls[0] = ip6n->tnls_wc; ip6n->tnls[1] = ip6n->tnls_r_l; @@ -1443,27 +1433,23 @@ static int ip6_tnl_init_net(struct net *net) err_register: free_netdev(ip6n->fb_tnl_dev); err_alloc_dev: - /* nothing */ -err_assign: - kfree(ip6n); -err_alloc: return err; } static void ip6_tnl_exit_net(struct net *net) { - struct ip6_tnl_net *ip6n; + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); - ip6n = net_generic(net, ip6_tnl_net_id); rtnl_lock(); ip6_tnl_destroy_tunnels(ip6n); rtnl_unlock(); - kfree(ip6n); } static struct pernet_operations ip6_tnl_net_ops = { .init = ip6_tnl_init_net, .exit = ip6_tnl_exit_net, + .id = &ip6_tnl_net_id, + .size = sizeof(struct ip6_tnl_net), }; /** @@ -1488,7 +1474,7 @@ static int __init ip6_tunnel_init(void) goto unreg_ip4ip6; } - err = register_pernet_gen_device(&ip6_tnl_net_id, &ip6_tnl_net_ops); + err = register_pernet_device(&ip6_tnl_net_ops); if (err < 0) goto err_pernet; return 0; @@ -1512,7 +1498,7 @@ static void __exit ip6_tunnel_cleanup(void) if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6)) printk(KERN_INFO "ip6_tunnel close: can't deregister ip6ip6\n"); - unregister_pernet_gen_device(ip6_tnl_net_id, &ip6_tnl_net_ops); + unregister_pernet_device(&ip6_tnl_net_ops); } module_init(ip6_tunnel_init); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index f9fcf690bd5d..1f9c44442e65 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2375,9 +2375,9 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq) struct net *net = seq_file_net(seq); state->idev = NULL; - for_each_netdev(net, state->dev) { + for_each_netdev_rcu(net, state->dev) { struct inet6_dev *idev; - idev = in6_dev_get(state->dev); + idev = __in6_dev_get(state->dev); if (!idev) continue; read_lock_bh(&idev->lock); @@ -2387,7 +2387,6 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq) break; } read_unlock_bh(&idev->lock); - in6_dev_put(idev); } return im; } @@ -2398,16 +2397,15 @@ static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr im = im->next; while (!im) { - if (likely(state->idev != NULL)) { + if (likely(state->idev != NULL)) read_unlock_bh(&state->idev->lock); - in6_dev_put(state->idev); - } - state->dev = next_net_device(state->dev); + + state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->idev = NULL; break; } - state->idev = in6_dev_get(state->dev); + state->idev = __in6_dev_get(state->dev); if (!state->idev) continue; read_lock_bh(&state->idev->lock); @@ -2426,31 +2424,31 @@ static struct ifmcaddr6 *igmp6_mc_get_idx(struct seq_file *seq, loff_t pos) } static void *igmp6_mc_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(dev_base_lock) + __acquires(RCU) { - read_lock(&dev_base_lock); + rcu_read_lock(); return igmp6_mc_get_idx(seq, *pos); } static void *igmp6_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct ifmcaddr6 *im; - im = igmp6_mc_get_next(seq, v); + struct ifmcaddr6 *im = igmp6_mc_get_next(seq, v); + ++*pos; return im; } static void igmp6_mc_seq_stop(struct seq_file *seq, void *v) - __releases(dev_base_lock) + __releases(RCU) { struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); + if (likely(state->idev != NULL)) { read_unlock_bh(&state->idev->lock); - in6_dev_put(state->idev); state->idev = NULL; } state->dev = NULL; - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static int igmp6_mc_seq_show(struct seq_file *seq, void *v) @@ -2507,9 +2505,9 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq) state->idev = NULL; state->im = NULL; - for_each_netdev(net, state->dev) { + for_each_netdev_rcu(net, state->dev) { struct inet6_dev *idev; - idev = in6_dev_get(state->dev); + idev = __in6_dev_get(state->dev); if (unlikely(idev == NULL)) continue; read_lock_bh(&idev->lock); @@ -2525,7 +2523,6 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq) spin_unlock_bh(&im->mca_lock); } read_unlock_bh(&idev->lock); - in6_dev_put(idev); } return psf; } @@ -2539,16 +2536,15 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s spin_unlock_bh(&state->im->mca_lock); state->im = state->im->next; while (!state->im) { - if (likely(state->idev != NULL)) { + if (likely(state->idev != NULL)) read_unlock_bh(&state->idev->lock); - in6_dev_put(state->idev); - } - state->dev = next_net_device(state->dev); + + state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->idev = NULL; goto out; } - state->idev = in6_dev_get(state->dev); + state->idev = __in6_dev_get(state->dev); if (!state->idev) continue; read_lock_bh(&state->idev->lock); @@ -2573,9 +2569,9 @@ static struct ip6_sf_list *igmp6_mcf_get_idx(struct seq_file *seq, loff_t pos) } static void *igmp6_mcf_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(dev_base_lock) + __acquires(RCU) { - read_lock(&dev_base_lock); + rcu_read_lock(); return *pos ? igmp6_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } @@ -2591,7 +2587,7 @@ static void *igmp6_mcf_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v) - __releases(dev_base_lock) + __releases(RCU) { struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); if (likely(state->im != NULL)) { @@ -2600,11 +2596,10 @@ static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v) } if (likely(state->idev != NULL)) { read_unlock_bh(&state->idev->lock); - in6_dev_put(state->idev); state->idev = NULL; } state->dev = NULL; - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static int igmp6_mcf_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 47a3623e7119..db4d5725cce8 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -501,7 +501,7 @@ ipq_rcv_nl_event(struct notifier_block *this, if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) { write_lock_bh(&queue_lock); - if ((n->net == &init_net) && (n->pid == peer_pid)) + if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) __ipq_reset(); write_unlock_bh(&queue_lock); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index cb834ab7f071..926ce8eeffaf 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -249,7 +249,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* Raw sockets are IPv6 only */ if (addr_type == IPV6_ADDR_MAPPED) - return(-EADDRNOTAVAIL); + return -EADDRNOTAVAIL; lock_sock(sk); @@ -257,6 +257,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (sk->sk_state != TCP_CLOSE) goto out; + rcu_read_lock(); /* Check if the address belongs to the host. */ if (addr_type != IPV6_ADDR_ANY) { struct net_device *dev = NULL; @@ -272,13 +273,13 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* Binding to link-local address requires an interface */ if (!sk->sk_bound_dev_if) - goto out; + goto out_unlock; - dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if); - if (!dev) { - err = -ENODEV; - goto out; - } + err = -ENODEV; + dev = dev_get_by_index_rcu(sock_net(sk), + sk->sk_bound_dev_if); + if (!dev) + goto out_unlock; } /* ipv4 addr of the socket is invalid. Only the @@ -289,13 +290,9 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) err = -EADDRNOTAVAIL; if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr, dev, 0)) { - if (dev) - dev_put(dev); - goto out; + goto out_unlock; } } - if (dev) - dev_put(dev); } inet->inet_rcv_saddr = inet->inet_saddr = v4addr; @@ -303,6 +300,8 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (!(addr_type & IPV6_ADDR_MULTICAST)) ipv6_addr_copy(&np->saddr, &addr->sin6_addr); err = 0; +out_unlock: + rcu_read_unlock(); out: release_sock(sk); return err; @@ -1336,7 +1335,6 @@ static struct inet_protosw rawv6_protosw = { .protocol = IPPROTO_IP, /* wild card */ .prot = &rawv6_prot, .ops = &inet6_sockraw_ops, - .capability = CAP_NET_RAW, .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, }; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index da5bd0ed83df..45efc39753e2 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -208,18 +208,17 @@ static void ip6_frag_expire(unsigned long data) fq_kill(fq); net = container_of(fq->q.net, struct net, ipv6.frags); - dev = dev_get_by_index(net, fq->iif); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, fq->iif); if (!dev) - goto out; + goto out_rcu_unlock; - rcu_read_lock(); IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); - rcu_read_unlock(); /* Don't send error if the first segment did not arrive. */ if (!(fq->q.last_in & INET_FRAG_FIRST_IN) || !fq->q.fragments) - goto out; + goto out_rcu_unlock; /* But use as source device on which LAST ARRIVED @@ -228,9 +227,9 @@ static void ip6_frag_expire(unsigned long data) */ fq->q.fragments->dev = dev; icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev); +out_rcu_unlock: + rcu_read_unlock(); out: - if (dev) - dev_put(dev); spin_unlock(&fq->q.lock); fq_put(fq); } @@ -682,7 +681,7 @@ static int ip6_frags_ns_sysctl_register(struct net *net) struct ctl_table_header *hdr; table = ip6_frags_ns_ctl_table; - if (net != &init_net) { + if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL); if (table == NULL) goto err_alloc; @@ -700,7 +699,7 @@ static int ip6_frags_ns_sysctl_register(struct net *net) return 0; err_reg: - if (net != &init_net) + if (!net_eq(net, &init_net)) kfree(table); err_alloc: return -ENOMEM; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 2362a3397e91..976e68244b99 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -66,7 +66,7 @@ static void ipip6_fb_tunnel_init(struct net_device *dev); static void ipip6_tunnel_init(struct net_device *dev); static void ipip6_tunnel_setup(struct net_device *dev); -static int sit_net_id; +static int sit_net_id __read_mostly; struct sit_net { struct ip_tunnel *tunnels_r_l[HASH_SIZE]; struct ip_tunnel *tunnels_r[HASH_SIZE]; @@ -637,6 +637,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, struct iphdr *tiph = &tunnel->parms.iph; struct ipv6hdr *iph6 = ipv6_hdr(skb); u8 tos = tunnel->parms.iph.tos; + __be16 df = tiph->frag_off; struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ struct iphdr *iph; /* Our new IP header */ @@ -726,25 +727,28 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, goto tx_error; } - if (tiph->frag_off) + if (df) { mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); - else - mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; - if (mtu < 68) { - stats->collisions++; - ip_rt_put(rt); - goto tx_error; - } - if (mtu < IPV6_MIN_MTU) - mtu = IPV6_MIN_MTU; - if (tunnel->parms.iph.daddr && skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + if (mtu < 68) { + stats->collisions++; + ip_rt_put(rt); + goto tx_error; + } - if (skb->len > mtu) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); - ip_rt_put(rt); - goto tx_error; + if (mtu < IPV6_MIN_MTU) { + mtu = IPV6_MIN_MTU; + df = 0; + } + + if (tunnel->parms.iph.daddr && skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + + if (skb->len > mtu) { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); + ip_rt_put(rt); + goto tx_error; + } } if (tunnel->err_count > 0) { @@ -792,11 +796,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, iph = ip_hdr(skb); iph->version = 4; iph->ihl = sizeof(struct iphdr)>>2; - if (mtu > IPV6_MIN_MTU) - iph->frag_off = tiph->frag_off; - else - iph->frag_off = 0; - + iph->frag_off = df; iph->protocol = IPPROTO_IPV6; iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); iph->daddr = rt->rt_dst; @@ -1164,17 +1164,8 @@ static void sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head) static int sit_init_net(struct net *net) { + struct sit_net *sitn = net_generic(net, sit_net_id); int err; - struct sit_net *sitn; - - err = -ENOMEM; - sitn = kzalloc(sizeof(struct sit_net), GFP_KERNEL); - if (sitn == NULL) - goto err_alloc; - - err = net_assign_generic(net, sit_net_id, sitn); - if (err < 0) - goto err_assign; sitn->tunnels[0] = sitn->tunnels_wc; sitn->tunnels[1] = sitn->tunnels_l; @@ -1201,37 +1192,33 @@ err_reg_dev: dev_put(sitn->fb_tunnel_dev); free_netdev(sitn->fb_tunnel_dev); err_alloc_dev: - /* nothing */ -err_assign: - kfree(sitn); -err_alloc: return err; } static void sit_exit_net(struct net *net) { - struct sit_net *sitn; + struct sit_net *sitn = net_generic(net, sit_net_id); LIST_HEAD(list); - sitn = net_generic(net, sit_net_id); rtnl_lock(); sit_destroy_tunnels(sitn, &list); unregister_netdevice_queue(sitn->fb_tunnel_dev, &list); unregister_netdevice_many(&list); rtnl_unlock(); - kfree(sitn); } static struct pernet_operations sit_net_ops = { .init = sit_init_net, .exit = sit_exit_net, + .id = &sit_net_id, + .size = sizeof(struct sit_net), }; static void __exit sit_cleanup(void) { xfrm4_tunnel_deregister(&sit_handler, AF_INET6); - unregister_pernet_gen_device(sit_net_id, &sit_net_ops); + unregister_pernet_device(&sit_net_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ } @@ -1246,7 +1233,7 @@ static int __init sit_init(void) return -EAGAIN; } - err = register_pernet_gen_device(&sit_net_id, &sit_net_ops); + err = register_pernet_device(&sit_net_ops); if (err < 0) xfrm4_tunnel_deregister(&sit_handler, AF_INET6); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 612fc53e0bb9..5b9af508b8f2 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -159,6 +159,8 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie) struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) { + struct tcp_options_received tcp_opt; + u8 *hash_location; struct inet_request_sock *ireq; struct inet6_request_sock *ireq6; struct tcp_request_sock *treq; @@ -171,7 +173,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) int mss; struct dst_entry *dst; __u8 rcv_wscale; - struct tcp_options_received tcp_opt; if (!sysctl_tcp_syncookies || !th->ack) goto out; @@ -254,7 +255,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, 0, dst); + tcp_parse_options(skb, &tcp_opt, &hash_location, 0, dst); if (tcp_opt.saw_tstamp) cookie_check_timestamp(&tcp_opt); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 34925f089e07..aadd7cef73b3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -461,7 +461,8 @@ out: } -static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) +static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, + struct request_values *rvp) { struct inet6_request_sock *treq = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); @@ -499,7 +500,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto done; - skb = tcp_make_synack(sk, dst, req); + skb = tcp_make_synack(sk, dst, req, rvp); if (skb) { struct tcphdr *th = tcp_hdr(skb); @@ -1161,13 +1162,15 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) */ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { + struct tcp_extend_values tmp_ext; + struct tcp_options_received tmp_opt; + u8 *hash_location; + struct request_sock *req; struct inet6_request_sock *treq; struct ipv6_pinfo *np = inet6_sk(sk); - struct tcp_options_received tmp_opt; struct tcp_sock *tp = tcp_sk(sk); - struct request_sock *req = NULL; - __u32 isn = TCP_SKB_CB(skb)->when; struct dst_entry *dst = __sk_dst_get(sk); + __u32 isn = TCP_SKB_CB(skb)->when; #ifdef CONFIG_SYN_COOKIES int want_cookie = 0; #else @@ -1205,8 +1208,52 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); tmp_opt.user_mss = tp->rx_opt.user_mss; + tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst); + + if (tmp_opt.cookie_plus > 0 && + tmp_opt.saw_tstamp && + !tp->rx_opt.cookie_out_never && + (sysctl_tcp_cookie_size > 0 || + (tp->cookie_values != NULL && + tp->cookie_values->cookie_desired > 0))) { + u8 *c; + u32 *d; + u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; + int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; + + if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) + goto drop_and_free; + + /* Secret recipe starts with IP addresses */ + d = &ipv6_hdr(skb)->daddr.s6_addr32[0]; + *mess++ ^= *d++; + *mess++ ^= *d++; + *mess++ ^= *d++; + *mess++ ^= *d++; + d = &ipv6_hdr(skb)->saddr.s6_addr32[0]; + *mess++ ^= *d++; + *mess++ ^= *d++; + *mess++ ^= *d++; + *mess++ ^= *d++; + + /* plus variable length Initiator Cookie */ + c = (u8 *)mess; + while (l-- > 0) + *c++ ^= *hash_location++; - tcp_parse_options(skb, &tmp_opt, 0, dst); +#ifdef CONFIG_SYN_COOKIES + want_cookie = 0; /* not our kind of cookie */ +#endif + tmp_ext.cookie_out_never = 0; /* false */ + tmp_ext.cookie_plus = tmp_opt.cookie_plus; + } else if (!tp->rx_opt.cookie_in_always) { + /* redundant indications, but ensure initialization. */ + tmp_ext.cookie_out_never = 1; /* true */ + tmp_ext.cookie_plus = 0; + } else { + goto drop_and_free; + } + tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); @@ -1239,23 +1286,21 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) isn = tcp_v6_init_sequence(skb); } - tcp_rsk(req)->snt_isn = isn; security_inet_conn_request(sk, skb, req); - if (tcp_v6_send_synack(sk, req)) - goto drop; + if (tcp_v6_send_synack(sk, req, + (struct request_values *)&tmp_ext) || + want_cookie) + goto drop_and_free; - if (!want_cookie) { - inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); - return 0; - } + inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + return 0; +drop_and_free: + reqsk_free(req); drop: - if (req) - reqsk_free(req); - return 0; /* don't send reset */ } @@ -1851,7 +1896,7 @@ static int tcp_v6_init_sock(struct sock *sk) */ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_clamp = ~0; - tp->mss_cache = 536; + tp->mss_cache = TCP_MSS_DEFAULT; tp->reordering = sysctl_tcp_reordering; @@ -1867,6 +1912,19 @@ static int tcp_v6_init_sock(struct sock *sk) tp->af_specific = &tcp_sock_ipv6_specific; #endif + /* TCP Cookie Transactions */ + if (sysctl_tcp_cookie_size > 0) { + /* Default, cookies without s_data_payload. */ + tp->cookie_values = + kzalloc(sizeof(*tp->cookie_values), + sk->sk_allocation); + if (tp->cookie_values != NULL) + kref_init(&tp->cookie_values->kref); + } + /* Presumed zeroed, in order of appearance: + * cookie_in_always, cookie_out_never, + * s_data_constant, s_data_in, s_data_out + */ sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; @@ -2112,7 +2170,6 @@ static struct inet_protosw tcpv6_protosw = { .protocol = IPPROTO_TCP, .prot = &tcpv6_prot, .ops = &inet6_stream_ops, - .capability = -1, .no_check = 0, .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK, @@ -2127,12 +2184,17 @@ static int tcpv6_net_init(struct net *net) static void tcpv6_net_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv6.tcp_sk); - inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET6); +} + +static void tcpv6_net_exit_batch(struct list_head *net_exit_list) +{ + inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6); } static struct pernet_operations tcpv6_net_ops = { - .init = tcpv6_net_init, - .exit = tcpv6_net_exit, + .init = tcpv6_net_init, + .exit = tcpv6_net_exit, + .exit_batch = tcpv6_net_exit_batch, }; int __init tcpv6_init(void) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d3b59d73f507..69ebdbe78c47 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -81,9 +81,33 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) return 0; } +static unsigned int udp6_portaddr_hash(struct net *net, + const struct in6_addr *addr6, + unsigned int port) +{ + unsigned int hash, mix = net_hash_mix(net); + + if (ipv6_addr_any(addr6)) + hash = jhash_1word(0, mix); + else if (ipv6_addr_v4mapped(addr6)) + hash = jhash_1word(addr6->s6_addr32[3], mix); + else + hash = jhash2(addr6->s6_addr32, 4, mix); + + return hash ^ port; +} + + int udp_v6_get_port(struct sock *sk, unsigned short snum) { - return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal); + unsigned int hash2_nulladdr = + udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum); + unsigned int hash2_partial = + udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0); + + /* precompute partial secondary hash */ + udp_sk(sk)->udp_portaddr_hash = hash2_partial; + return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr); } static inline int compute_score(struct sock *sk, struct net *net, @@ -94,7 +118,7 @@ static inline int compute_score(struct sock *sk, struct net *net, { int score = -1; - if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && + if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && sk->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); @@ -124,6 +148,86 @@ static inline int compute_score(struct sock *sk, struct net *net, return score; } +#define SCORE2_MAX (1 + 1 + 1) +static inline int compute_score2(struct sock *sk, struct net *net, + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, unsigned short hnum, + int dif) +{ + int score = -1; + + if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && + sk->sk_family == PF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + struct inet_sock *inet = inet_sk(sk); + + if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + return -1; + score = 0; + if (inet->inet_dport) { + if (inet->inet_dport != sport) + return -1; + score++; + } + if (!ipv6_addr_any(&np->daddr)) { + if (!ipv6_addr_equal(&np->daddr, saddr)) + return -1; + score++; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score++; + } + } + return score; +} + + +/* called with read_rcu_lock() */ +static struct sock *udp6_lib_lookup2(struct net *net, + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, unsigned int hnum, int dif, + struct udp_hslot *hslot2, unsigned int slot2) +{ + struct sock *sk, *result; + struct hlist_nulls_node *node; + int score, badness; + +begin: + result = NULL; + badness = -1; + udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { + score = compute_score2(sk, net, saddr, sport, + daddr, hnum, dif); + if (score > badness) { + result = sk; + badness = score; + if (score == SCORE2_MAX) + goto exact_match; + } + } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot2) + goto begin; + + if (result) { +exact_match: + if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + result = NULL; + else if (unlikely(compute_score2(result, net, saddr, sport, + daddr, hnum, dif) < badness)) { + sock_put(result); + goto begin; + } + } + return result; +} + static struct sock *__udp6_lib_lookup(struct net *net, struct in6_addr *saddr, __be16 sport, struct in6_addr *daddr, __be16 dport, @@ -132,11 +236,35 @@ static struct sock *__udp6_lib_lookup(struct net *net, struct sock *sk, *result; struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); - unsigned int hash = udp_hashfn(net, hnum, udptable->mask); - struct udp_hslot *hslot = &udptable->hash[hash]; + unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); + struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; int score, badness; rcu_read_lock(); + if (hslot->count > 10) { + hash2 = udp6_portaddr_hash(net, daddr, hnum); + slot2 = hash2 & udptable->mask; + hslot2 = &udptable->hash2[slot2]; + if (hslot->count < hslot2->count) + goto begin; + + result = udp6_lib_lookup2(net, saddr, sport, + daddr, hnum, dif, + hslot2, slot2); + if (!result) { + hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum); + slot2 = hash2 & udptable->mask; + hslot2 = &udptable->hash2[slot2]; + if (hslot->count < hslot2->count) + goto begin; + + result = udp6_lib_lookup2(net, &in6addr_any, sport, + daddr, hnum, dif, + hslot2, slot2); + } + rcu_read_unlock(); + return result; + } begin: result = NULL; badness = -1; @@ -152,7 +280,7 @@ begin: * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ - if (get_nulls_value(node) != hash) + if (get_nulls_value(node) != slot) goto begin; if (result) { @@ -288,9 +416,7 @@ try_again: err = ulen; out_free: - lock_sock(sk); - skb_free_datagram(sk, skb); - release_sock(sk); + skb_free_datagram_locked(sk, skb); out: return err; @@ -417,7 +543,8 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, if (!net_eq(sock_net(s), net)) continue; - if (s->sk_hash == num && s->sk_family == PF_INET6) { + if (udp_sk(s)->udp_port_hash == num && + s->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(s); if (inet->inet_dport) { if (inet->inet_dport != rmt_port) @@ -442,6 +569,33 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, return NULL; } +static void flush_stack(struct sock **stack, unsigned int count, + struct sk_buff *skb, unsigned int final) +{ + unsigned int i; + struct sock *sk; + struct sk_buff *skb1; + + for (i = 0; i < count; i++) { + skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); + + sk = stack[i]; + if (skb1) { + bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) + udpv6_queue_rcv_skb(sk, skb1); + else + sk_add_backlog(sk, skb1); + bh_unlock_sock(sk); + } else { + atomic_inc(&sk->sk_drops); + UDP6_INC_STATS_BH(sock_net(sk), + UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk)); + UDP6_INC_STATS_BH(sock_net(sk), + UDP_MIB_INERRORS, IS_UDPLITE(sk)); + } + } +} /* * Note: called only from the BH handler context, * so we don't need to lock the hashes. @@ -450,41 +604,43 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct in6_addr *saddr, struct in6_addr *daddr, struct udp_table *udptable) { - struct sock *sk, *sk2; + struct sock *sk, *stack[256 / sizeof(struct sock *)]; const struct udphdr *uh = udp_hdr(skb); struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest)); int dif; + unsigned int i, count = 0; spin_lock(&hslot->lock); sk = sk_nulls_head(&hslot->head); dif = inet6_iif(skb); sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); - if (!sk) { - kfree_skb(skb); - goto out; - } - - sk2 = sk; - while ((sk2 = udp_v6_mcast_next(net, sk_nulls_next(sk2), uh->dest, daddr, - uh->source, saddr, dif))) { - struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); - if (buff) { - bh_lock_sock(sk2); - if (!sock_owned_by_user(sk2)) - udpv6_queue_rcv_skb(sk2, buff); - else - sk_add_backlog(sk2, buff); - bh_unlock_sock(sk2); + while (sk) { + stack[count++] = sk; + sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr, + uh->source, saddr, dif); + if (unlikely(count == ARRAY_SIZE(stack))) { + if (!sk) + break; + flush_stack(stack, count, skb, ~0); + count = 0; } } - bh_lock_sock(sk); - if (!sock_owned_by_user(sk)) - udpv6_queue_rcv_skb(sk, skb); - else - sk_add_backlog(sk, skb); - bh_unlock_sock(sk); -out: + /* + * before releasing the lock, we must take reference on sockets + */ + for (i = 0; i < count; i++) + sock_hold(stack[i]); + spin_unlock(&hslot->lock); + + if (count) { + flush_stack(stack, count, skb, count - 1); + + for (i = 0; i < count; i++) + sock_put(stack[i]); + } else { + kfree_skb(skb); + } return 0; } @@ -1286,7 +1442,6 @@ static struct inet_protosw udpv6_protosw = { .protocol = IPPROTO_UDP, .prot = &udpv6_prot, .ops = &inet6_dgram_ops, - .capability =-1, .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_PERMANENT, }; diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index d737a27ee010..6ea6938919e6 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -62,7 +62,6 @@ static struct inet_protosw udplite6_protosw = { .protocol = IPPROTO_UDPLITE, .prot = &udplitev6_prot, .ops = &inet6_dgram_ops, - .capability = -1, .no_check = 0, .flags = INET_PROTOSW_PERMANENT, }; |