diff options
Diffstat (limited to 'net/ipv4')
34 files changed, 386 insertions, 301 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 91d3d96805d0..b12dae2b0b2d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1029,6 +1029,11 @@ skip: } } +static inline bool inetdev_valid_mtu(unsigned mtu) +{ + return mtu >= 68; +} + /* Called only under RTNL semaphore */ static int inetdev_event(struct notifier_block *this, unsigned long event, @@ -1048,6 +1053,10 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, IN_DEV_CONF_SET(in_dev, NOXFRM, 1); IN_DEV_CONF_SET(in_dev, NOPOLICY, 1); } + } else if (event == NETDEV_CHANGEMTU) { + /* Re-enabling IP */ + if (inetdev_valid_mtu(dev->mtu)) + in_dev = inetdev_init(dev); } goto out; } @@ -1058,7 +1067,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, dev->ip_ptr = NULL; break; case NETDEV_UP: - if (dev->mtu < 68) + if (!inetdev_valid_mtu(dev->mtu)) break; if (dev->flags & IFF_LOOPBACK) { struct in_ifaddr *ifa; @@ -1080,9 +1089,9 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, ip_mc_down(in_dev); break; case NETDEV_CHANGEMTU: - if (dev->mtu >= 68) + if (inetdev_valid_mtu(dev->mtu)) break; - /* MTU falled under 68, disable IP */ + /* disable IP when MTU is not enough */ case NETDEV_UNREGISTER: inetdev_destroy(in_dev); break; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 4e73e5708e70..21515d4c49eb 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -575,7 +575,7 @@ static int esp_init_state(struct xfrm_state *x) crypto_aead_ivsize(aead); if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); - else if (x->props.mode == XFRM_MODE_BEET) + else if (x->props.mode == XFRM_MODE_BEET && x->sel.family != AF_INET6) x->props.header_len += IPV4_BEET_PHMAXLEN; if (x->encap) { struct xfrm_encap_tmpl *encap = x->encap; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 860558633b2c..55c355e63234 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -204,18 +204,22 @@ static struct sock *icmp_sk(struct net *net) return net->ipv4.icmp_sk[smp_processor_id()]; } -static inline int icmp_xmit_lock(struct sock *sk) +static inline struct sock *icmp_xmit_lock(struct net *net) { + struct sock *sk; + local_bh_disable(); + sk = icmp_sk(net); + if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { /* This can happen if the output path signals a * dst_link_failure() for an outgoing ICMP packet. */ local_bh_enable(); - return 1; + return NULL; } - return 0; + return sk; } static inline void icmp_xmit_unlock(struct sock *sk) @@ -354,15 +358,17 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct ipcm_cookie ipc; struct rtable *rt = skb->rtable; struct net *net = dev_net(rt->u.dst.dev); - struct sock *sk = icmp_sk(net); - struct inet_sock *inet = inet_sk(sk); + struct sock *sk; + struct inet_sock *inet; __be32 daddr; if (ip_options_echo(&icmp_param->replyopts, skb)) return; - if (icmp_xmit_lock(sk)) + sk = icmp_xmit_lock(net); + if (sk == NULL) return; + inet = inet_sk(sk); icmp_param->data.icmph.checksum = 0; @@ -419,7 +425,6 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) if (!rt) goto out; net = dev_net(rt->u.dst.dev); - sk = icmp_sk(net); /* * Find the original header. It is expected to be valid, of course. @@ -483,7 +488,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) } } - if (icmp_xmit_lock(sk)) + sk = icmp_xmit_lock(net); + if (sk == NULL) return; /* diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6203ece53606..f70fac612596 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -289,6 +289,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) struct rtable *rt; struct iphdr *pip; struct igmpv3_report *pig; + struct net *net = dev_net(dev); skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); if (skb == NULL) @@ -299,7 +300,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) .nl_u = { .ip4_u = { .daddr = IGMPV3_ALL_MCR } }, .proto = IPPROTO_IGMP }; - if (ip_route_output_key(&init_net, &rt, &fl)) { + if (ip_route_output_key(net, &rt, &fl)) { kfree_skb(skb); return NULL; } @@ -629,6 +630,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, struct igmphdr *ih; struct rtable *rt; struct net_device *dev = in_dev->dev; + struct net *net = dev_net(dev); __be32 group = pmc ? pmc->multiaddr : 0; __be32 dst; @@ -643,7 +645,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, struct flowi fl = { .oif = dev->ifindex, .nl_u = { .ip4_u = { .daddr = dst } }, .proto = IPPROTO_IGMP }; - if (ip_route_output_key(&init_net, &rt, &fl)) + if (ip_route_output_key(net, &rt, &fl)) return -1; } if (rt->rt_src == 0) { @@ -1196,9 +1198,6 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) ASSERT_RTNL(); - if (!net_eq(dev_net(in_dev->dev), &init_net)) - return; - for (im=in_dev->mc_list; im; im=im->next) { if (im->multiaddr == addr) { im->users++; @@ -1278,9 +1277,6 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) ASSERT_RTNL(); - if (!net_eq(dev_net(in_dev->dev), &init_net)) - return; - for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { if (i->multiaddr==addr) { if (--i->users == 0) { @@ -1308,9 +1304,6 @@ void ip_mc_down(struct in_device *in_dev) ASSERT_RTNL(); - if (!net_eq(dev_net(in_dev->dev), &init_net)) - return; - for (i=in_dev->mc_list; i; i=i->next) igmp_group_dropped(i); @@ -1331,9 +1324,6 @@ void ip_mc_init_dev(struct in_device *in_dev) { ASSERT_RTNL(); - if (!net_eq(dev_net(in_dev->dev), &init_net)) - return; - in_dev->mc_tomb = NULL; #ifdef CONFIG_IP_MULTICAST in_dev->mr_gq_running = 0; @@ -1357,9 +1347,6 @@ void ip_mc_up(struct in_device *in_dev) ASSERT_RTNL(); - if (!net_eq(dev_net(in_dev->dev), &init_net)) - return; - ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); for (i=in_dev->mc_list; i; i=i->next) @@ -1376,9 +1363,6 @@ void ip_mc_destroy_dev(struct in_device *in_dev) ASSERT_RTNL(); - if (!net_eq(dev_net(in_dev->dev), &init_net)) - return; - /* Deactivate timers */ ip_mc_down(in_dev); @@ -1395,7 +1379,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev) write_unlock_bh(&in_dev->mc_list_lock); } -static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) +static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = imr->imr_multiaddr.s_addr } } }; @@ -1404,19 +1388,19 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) struct in_device *idev = NULL; if (imr->imr_ifindex) { - idev = inetdev_by_index(&init_net, imr->imr_ifindex); + idev = inetdev_by_index(net, imr->imr_ifindex); if (idev) __in_dev_put(idev); return idev; } if (imr->imr_address.s_addr) { - dev = ip_dev_find(&init_net, imr->imr_address.s_addr); + dev = ip_dev_find(net, imr->imr_address.s_addr); if (!dev) return NULL; dev_put(dev); } - if (!dev && !ip_route_output_key(&init_net, &rt, &fl)) { + if (!dev && !ip_route_output_key(net, &rt, &fl)) { dev = rt->u.dst.dev; ip_rt_put(rt); } @@ -1754,18 +1738,16 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) struct ip_mc_socklist *iml=NULL, *i; struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); + struct net *net = sock_net(sk); int ifindex; int count = 0; if (!ipv4_is_multicast(addr)) return -EINVAL; - if (!net_eq(sock_net(sk), &init_net)) - return -EPROTONOSUPPORT; - rtnl_lock(); - in_dev = ip_mc_find_dev(imr); + in_dev = ip_mc_find_dev(net, imr); if (!in_dev) { iml = NULL; @@ -1827,15 +1809,13 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *iml, **imlp; struct in_device *in_dev; + struct net *net = sock_net(sk); __be32 group = imr->imr_multiaddr.s_addr; u32 ifindex; int ret = -EADDRNOTAVAIL; - if (!net_eq(sock_net(sk), &init_net)) - return -EPROTONOSUPPORT; - rtnl_lock(); - in_dev = ip_mc_find_dev(imr); + in_dev = ip_mc_find_dev(net, imr); ifindex = imr->imr_ifindex; for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) { if (iml->multi.imr_multiaddr.s_addr != group) @@ -1873,21 +1853,19 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct struct in_device *in_dev = NULL; struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *psl; + struct net *net = sock_net(sk); int leavegroup = 0; int i, j, rv; if (!ipv4_is_multicast(addr)) return -EINVAL; - if (!net_eq(sock_net(sk), &init_net)) - return -EPROTONOSUPPORT; - rtnl_lock(); imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr; imr.imr_address.s_addr = mreqs->imr_interface; imr.imr_ifindex = ifindex; - in_dev = ip_mc_find_dev(&imr); + in_dev = ip_mc_find_dev(net, &imr); if (!in_dev) { err = -ENODEV; @@ -2007,6 +1985,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *newpsl, *psl; + struct net *net = sock_net(sk); int leavegroup = 0; if (!ipv4_is_multicast(addr)) @@ -2015,15 +1994,12 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) msf->imsf_fmode != MCAST_EXCLUDE) return -EINVAL; - if (!net_eq(sock_net(sk), &init_net)) - return -EPROTONOSUPPORT; - rtnl_lock(); imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; imr.imr_address.s_addr = msf->imsf_interface; imr.imr_ifindex = ifindex; - in_dev = ip_mc_find_dev(&imr); + in_dev = ip_mc_find_dev(net, &imr); if (!in_dev) { err = -ENODEV; @@ -2094,19 +2070,17 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *psl; + struct net *net = sock_net(sk); if (!ipv4_is_multicast(addr)) return -EINVAL; - if (!net_eq(sock_net(sk), &init_net)) - return -EPROTONOSUPPORT; - rtnl_lock(); imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; imr.imr_address.s_addr = msf->imsf_interface; imr.imr_ifindex = 0; - in_dev = ip_mc_find_dev(&imr); + in_dev = ip_mc_find_dev(net, &imr); if (!in_dev) { err = -ENODEV; @@ -2163,9 +2137,6 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, if (!ipv4_is_multicast(addr)) return -EINVAL; - if (!net_eq(sock_net(sk), &init_net)) - return -EPROTONOSUPPORT; - rtnl_lock(); err = -EADDRNOTAVAIL; @@ -2246,19 +2217,17 @@ void ip_mc_drop_socket(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *iml; + struct net *net = sock_net(sk); if (inet->mc_list == NULL) return; - if (!net_eq(sock_net(sk), &init_net)) - return; - rtnl_lock(); while ((iml = inet->mc_list) != NULL) { struct in_device *in_dev; inet->mc_list = iml->next; - in_dev = inetdev_by_index(&init_net, iml->multi.imr_ifindex); + in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); (void) ip_mc_leave_src(sk, iml, in_dev); if (in_dev != NULL) { ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index d985bd613d25..743f011b9a84 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -409,3 +409,38 @@ out: } EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); + +void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, + struct inet_timewait_death_row *twdr, int family) +{ + struct inet_timewait_sock *tw; + struct sock *sk; + struct hlist_node *node; + int h; + + local_bh_disable(); + for (h = 0; h < (hashinfo->ehash_size); h++) { + struct inet_ehash_bucket *head = + inet_ehash_bucket(hashinfo, h); + rwlock_t *lock = inet_ehash_lockp(hashinfo, h); +restart: + write_lock(lock); + sk_for_each(sk, node, &head->twchain) { + + tw = inet_twsk(sk); + if (!net_eq(twsk_net(tw), net) || + tw->tw_family != family) + continue; + + atomic_inc(&tw->tw_refcnt); + write_unlock(lock); + inet_twsk_deschedule(tw, twdr); + inet_twsk_put(tw); + + goto restart; + } + write_unlock(lock); + } + local_bh_enable(); +} +EXPORT_SYMBOL_GPL(inet_twsk_purge); diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index 1f1897a1a702..201b8ea3020d 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c @@ -608,7 +608,7 @@ int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, } -int ip_vs_app_init(void) +int __init ip_vs_app_init(void) { /* we will replace it with proc_net_ipvs_create() soon */ proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops); diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index f8bdae47a77f..44a6872dc245 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -965,7 +965,7 @@ static void ip_vs_conn_flush(void) } -int ip_vs_conn_init(void) +int __init ip_vs_conn_init(void) { int idx; diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 9a5ace0b4dd6..6379705a8dcb 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -683,9 +683,22 @@ static void ip_vs_zero_stats(struct ip_vs_stats *stats) { spin_lock_bh(&stats->lock); - memset(stats, 0, (char *)&stats->lock - (char *)stats); - spin_unlock_bh(&stats->lock); + + stats->conns = 0; + stats->inpkts = 0; + stats->outpkts = 0; + stats->inbytes = 0; + stats->outbytes = 0; + + stats->cps = 0; + stats->inpps = 0; + stats->outpps = 0; + stats->inbps = 0; + stats->outbps = 0; + ip_vs_zero_estimator(stats); + + spin_unlock_bh(&stats->lock); } /* @@ -1589,7 +1602,7 @@ static struct ctl_table vs_vars[] = { { .ctl_name = 0 } }; -struct ctl_path net_vs_ctl_path[] = { +const struct ctl_path net_vs_ctl_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "ipv4", .ctl_name = NET_IPV4, }, { .procname = "vs", }, @@ -1784,7 +1797,9 @@ static const struct file_operations ip_vs_info_fops = { #endif -struct ip_vs_stats ip_vs_stats; +struct ip_vs_stats ip_vs_stats = { + .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock), +}; #ifdef CONFIG_PROC_FS static int ip_vs_stats_show(struct seq_file *seq, void *v) @@ -2306,7 +2321,7 @@ static struct nf_sockopt_ops ip_vs_sockopts = { }; -int ip_vs_control_init(void) +int __init ip_vs_control_init(void) { int ret; int idx; @@ -2333,8 +2348,6 @@ int ip_vs_control_init(void) INIT_LIST_HEAD(&ip_vs_rtable[idx]); } - memset(&ip_vs_stats, 0, sizeof(ip_vs_stats)); - spin_lock_init(&ip_vs_stats.lock); ip_vs_new_estimator(&ip_vs_stats); /* Hook the defense timer */ diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c index 8afc1503ed20..fa66824d264f 100644 --- a/net/ipv4/ipvs/ip_vs_dh.c +++ b/net/ipv4/ipvs/ip_vs_dh.c @@ -233,6 +233,7 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler = .name = "dh", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list), .init_service = ip_vs_dh_init_svc, .done_service = ip_vs_dh_done_svc, .update_service = ip_vs_dh_update_svc, @@ -242,7 +243,6 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler = static int __init ip_vs_dh_init(void) { - INIT_LIST_HEAD(&ip_vs_dh_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_dh_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c index bc04eedd6dbb..5a20f93bd7f9 100644 --- a/net/ipv4/ipvs/ip_vs_est.c +++ b/net/ipv4/ipvs/ip_vs_est.c @@ -17,6 +17,7 @@ #include <linux/types.h> #include <linux/interrupt.h> #include <linux/sysctl.h> +#include <linux/list.h> #include <net/ip_vs.h> @@ -44,28 +45,11 @@ */ -struct ip_vs_estimator -{ - struct ip_vs_estimator *next; - struct ip_vs_stats *stats; - - u32 last_conns; - u32 last_inpkts; - u32 last_outpkts; - u64 last_inbytes; - u64 last_outbytes; - - u32 cps; - u32 inpps; - u32 outpps; - u32 inbps; - u32 outbps; -}; - +static void estimation_timer(unsigned long arg); -static struct ip_vs_estimator *est_list = NULL; -static DEFINE_RWLOCK(est_lock); -static struct timer_list est_timer; +static LIST_HEAD(est_list); +static DEFINE_SPINLOCK(est_lock); +static DEFINE_TIMER(est_timer, estimation_timer, 0, 0); static void estimation_timer(unsigned long arg) { @@ -76,9 +60,9 @@ static void estimation_timer(unsigned long arg) u64 n_inbytes, n_outbytes; u32 rate; - read_lock(&est_lock); - for (e = est_list; e; e = e->next) { - s = e->stats; + spin_lock(&est_lock); + list_for_each_entry(e, &est_list, list) { + s = container_of(e, struct ip_vs_stats, est); spin_lock(&s->lock); n_conns = s->conns; @@ -114,19 +98,16 @@ static void estimation_timer(unsigned long arg) s->outbps = (e->outbps+0xF)>>5; spin_unlock(&s->lock); } - read_unlock(&est_lock); + spin_unlock(&est_lock); mod_timer(&est_timer, jiffies + 2*HZ); } -int ip_vs_new_estimator(struct ip_vs_stats *stats) +void ip_vs_new_estimator(struct ip_vs_stats *stats) { - struct ip_vs_estimator *est; + struct ip_vs_estimator *est = &stats->est; - est = kzalloc(sizeof(*est), GFP_KERNEL); - if (est == NULL) - return -ENOMEM; + INIT_LIST_HEAD(&est->list); - est->stats = stats; est->last_conns = stats->conns; est->cps = stats->cps<<10; @@ -142,59 +123,40 @@ int ip_vs_new_estimator(struct ip_vs_stats *stats) est->last_outbytes = stats->outbytes; est->outbps = stats->outbps<<5; - write_lock_bh(&est_lock); - est->next = est_list; - if (est->next == NULL) { - setup_timer(&est_timer, estimation_timer, 0); - est_timer.expires = jiffies + 2*HZ; - add_timer(&est_timer); - } - est_list = est; - write_unlock_bh(&est_lock); - return 0; + spin_lock_bh(&est_lock); + if (list_empty(&est_list)) + mod_timer(&est_timer, jiffies + 2 * HZ); + list_add(&est->list, &est_list); + spin_unlock_bh(&est_lock); } void ip_vs_kill_estimator(struct ip_vs_stats *stats) { - struct ip_vs_estimator *est, **pest; - int killed = 0; - - write_lock_bh(&est_lock); - pest = &est_list; - while ((est=*pest) != NULL) { - if (est->stats != stats) { - pest = &est->next; - continue; - } - *pest = est->next; - kfree(est); - killed++; + struct ip_vs_estimator *est = &stats->est; + + spin_lock_bh(&est_lock); + list_del(&est->list); + while (list_empty(&est_list) && try_to_del_timer_sync(&est_timer) < 0) { + spin_unlock_bh(&est_lock); + cpu_relax(); + spin_lock_bh(&est_lock); } - if (killed && est_list == NULL) - del_timer_sync(&est_timer); - write_unlock_bh(&est_lock); + spin_unlock_bh(&est_lock); } void ip_vs_zero_estimator(struct ip_vs_stats *stats) { - struct ip_vs_estimator *e; - - write_lock_bh(&est_lock); - for (e = est_list; e; e = e->next) { - if (e->stats != stats) - continue; - - /* set counters zero */ - e->last_conns = 0; - e->last_inpkts = 0; - e->last_outpkts = 0; - e->last_inbytes = 0; - e->last_outbytes = 0; - e->cps = 0; - e->inpps = 0; - e->outpps = 0; - e->inbps = 0; - e->outbps = 0; - } - write_unlock_bh(&est_lock); + struct ip_vs_estimator *est = &stats->est; + + /* set counters zero, caller must hold the stats->lock lock */ + est->last_inbytes = 0; + est->last_outbytes = 0; + est->last_conns = 0; + est->last_inpkts = 0; + est->last_outpkts = 0; + est->cps = 0; + est->inpps = 0; + est->outpps = 0; + est->inbps = 0; + est->outbps = 0; } diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index 0efa3db4b180..7a6a319f544a 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c @@ -539,6 +539,7 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = .name = "lblc", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list), .init_service = ip_vs_lblc_init_svc, .done_service = ip_vs_lblc_done_svc, .update_service = ip_vs_lblc_update_svc, @@ -550,7 +551,6 @@ static int __init ip_vs_lblc_init(void) { int ret; - INIT_LIST_HEAD(&ip_vs_lblc_scheduler.n_list); sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler); if (ret) diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index 8e3bbeb45138..c234e73968a6 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c @@ -728,6 +728,7 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler = .name = "lblcr", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list), .init_service = ip_vs_lblcr_init_svc, .done_service = ip_vs_lblcr_done_svc, .update_service = ip_vs_lblcr_update_svc, @@ -739,7 +740,6 @@ static int __init ip_vs_lblcr_init(void) { int ret; - INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list); sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); if (ret) diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/ipv4/ipvs/ip_vs_lc.c index ac9f08e065d5..ebcdbf75ac65 100644 --- a/net/ipv4/ipvs/ip_vs_lc.c +++ b/net/ipv4/ipvs/ip_vs_lc.c @@ -98,6 +98,7 @@ static struct ip_vs_scheduler ip_vs_lc_scheduler = { .name = "lc", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list), .init_service = ip_vs_lc_init_svc, .done_service = ip_vs_lc_done_svc, .update_service = ip_vs_lc_update_svc, @@ -107,7 +108,6 @@ static struct ip_vs_scheduler ip_vs_lc_scheduler = { static int __init ip_vs_lc_init(void) { - INIT_LIST_HEAD(&ip_vs_lc_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_lc_scheduler) ; } diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/ipv4/ipvs/ip_vs_nq.c index a46bf258d420..92f3a6770031 100644 --- a/net/ipv4/ipvs/ip_vs_nq.c +++ b/net/ipv4/ipvs/ip_vs_nq.c @@ -136,6 +136,7 @@ static struct ip_vs_scheduler ip_vs_nq_scheduler = .name = "nq", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list), .init_service = ip_vs_nq_init_svc, .done_service = ip_vs_nq_done_svc, .update_service = ip_vs_nq_update_svc, @@ -145,7 +146,6 @@ static struct ip_vs_scheduler ip_vs_nq_scheduler = static int __init ip_vs_nq_init(void) { - INIT_LIST_HEAD(&ip_vs_nq_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_nq_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c index 876714f23d65..6099a88fc200 100644 --- a/net/ipv4/ipvs/ip_vs_proto.c +++ b/net/ipv4/ipvs/ip_vs_proto.c @@ -43,7 +43,7 @@ static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE]; /* * register an ipvs protocol */ -static int __used register_ip_vs_protocol(struct ip_vs_protocol *pp) +static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp) { unsigned hash = IP_VS_PROTO_HASH(pp->protocol); @@ -190,7 +190,7 @@ ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, } -int ip_vs_protocol_init(void) +int __init ip_vs_protocol_init(void) { char protocols[64]; #define REGISTER_PROTOCOL(p) \ diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/ipv4/ipvs/ip_vs_rr.c index c8db12d39e61..358110d17e59 100644 --- a/net/ipv4/ipvs/ip_vs_rr.c +++ b/net/ipv4/ipvs/ip_vs_rr.c @@ -94,6 +94,7 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = { .name = "rr", /* name */ .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list), .init_service = ip_vs_rr_init_svc, .done_service = ip_vs_rr_done_svc, .update_service = ip_vs_rr_update_svc, @@ -102,7 +103,6 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = { static int __init ip_vs_rr_init(void) { - INIT_LIST_HEAD(&ip_vs_rr_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_rr_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/ipv4/ipvs/ip_vs_sched.c index b64767309855..a46ad9e35016 100644 --- a/net/ipv4/ipvs/ip_vs_sched.c +++ b/net/ipv4/ipvs/ip_vs_sched.c @@ -184,7 +184,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) write_lock_bh(&__ip_vs_sched_lock); - if (scheduler->n_list.next != &scheduler->n_list) { + if (!list_empty(&scheduler->n_list)) { write_unlock_bh(&__ip_vs_sched_lock); ip_vs_use_count_dec(); IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler " @@ -229,7 +229,7 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) } write_lock_bh(&__ip_vs_sched_lock); - if (scheduler->n_list.next == &scheduler->n_list) { + if (list_empty(&scheduler->n_list)) { write_unlock_bh(&__ip_vs_sched_lock); IP_VS_ERR("unregister_ip_vs_scheduler(): [%s] scheduler " "is not in the list. failed\n", scheduler->name); diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c index 2a7d31358181..77663d84cbd1 100644 --- a/net/ipv4/ipvs/ip_vs_sed.c +++ b/net/ipv4/ipvs/ip_vs_sed.c @@ -138,6 +138,7 @@ static struct ip_vs_scheduler ip_vs_sed_scheduler = .name = "sed", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list), .init_service = ip_vs_sed_init_svc, .done_service = ip_vs_sed_done_svc, .update_service = ip_vs_sed_update_svc, @@ -147,7 +148,6 @@ static struct ip_vs_scheduler ip_vs_sed_scheduler = static int __init ip_vs_sed_init(void) { - INIT_LIST_HEAD(&ip_vs_sed_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_sed_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c index b8fdfac65001..7b979e228056 100644 --- a/net/ipv4/ipvs/ip_vs_sh.c +++ b/net/ipv4/ipvs/ip_vs_sh.c @@ -230,6 +230,7 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler = .name = "sh", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list), .init_service = ip_vs_sh_init_svc, .done_service = ip_vs_sh_done_svc, .update_service = ip_vs_sh_update_svc, @@ -239,7 +240,6 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler = static int __init ip_vs_sh_init(void) { - INIT_LIST_HEAD(&ip_vs_sh_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_sh_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 45e9bd96c286..a652da2c3200 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -904,9 +904,9 @@ int stop_sync_thread(int state) * progress of stopping the master sync daemon. */ - spin_lock(&ip_vs_sync_lock); + spin_lock_bh(&ip_vs_sync_lock); ip_vs_sync_state &= ~IP_VS_STATE_MASTER; - spin_unlock(&ip_vs_sync_lock); + spin_unlock_bh(&ip_vs_sync_lock); kthread_stop(sync_master_thread); sync_master_thread = NULL; } else if (state == IP_VS_STATE_BACKUP) { diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/ipv4/ipvs/ip_vs_wlc.c index 772c3cb4eca1..9b0ef86bb1f7 100644 --- a/net/ipv4/ipvs/ip_vs_wlc.c +++ b/net/ipv4/ipvs/ip_vs_wlc.c @@ -126,6 +126,7 @@ static struct ip_vs_scheduler ip_vs_wlc_scheduler = .name = "wlc", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list), .init_service = ip_vs_wlc_init_svc, .done_service = ip_vs_wlc_done_svc, .update_service = ip_vs_wlc_update_svc, @@ -135,7 +136,6 @@ static struct ip_vs_scheduler ip_vs_wlc_scheduler = static int __init ip_vs_wlc_init(void) { - INIT_LIST_HEAD(&ip_vs_wlc_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_wlc_scheduler); } diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/ipv4/ipvs/ip_vs_wrr.c index 1d6932d7dc97..0d86a79b87b5 100644 --- a/net/ipv4/ipvs/ip_vs_wrr.c +++ b/net/ipv4/ipvs/ip_vs_wrr.c @@ -212,6 +212,7 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = { .name = "wrr", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list), .init_service = ip_vs_wrr_init_svc, .done_service = ip_vs_wrr_done_svc, .update_service = ip_vs_wrr_update_svc, @@ -220,7 +221,6 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = { static int __init ip_vs_wrr_init(void) { - INIT_LIST_HEAD(&ip_vs_wrr_scheduler.n_list); return register_ip_vs_scheduler(&ip_vs_wrr_scheduler) ; } diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 1819ad7ab910..fafe8ebb4c55 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -475,11 +475,10 @@ static void arp_print(struct arp_payload *payload) #define HBUFFERLEN 30 char hbuffer[HBUFFERLEN]; int j,k; - const char hexbuf[]= "0123456789abcdef"; for (k=0, j=0; k < HBUFFERLEN-3 && j < ETH_ALEN; j++) { - hbuffer[k++]=hexbuf[(payload->src_hw[j]>>4)&15]; - hbuffer[k++]=hexbuf[payload->src_hw[j]&15]; + hbuffer[k++] = hex_asc_hi(payload->src_hw[j]); + hbuffer[k++] = hex_asc_lo(payload->src_hw[j]); hbuffer[k++]=':'; } hbuffer[--k]='\0'; diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index 49587a497229..462a22c97877 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c @@ -70,7 +70,7 @@ addrtype_mt_v1(const struct sk_buff *skb, const struct net_device *in, (info->flags & IPT_ADDRTYPE_INVERT_SOURCE); if (ret && info->dest) ret &= match_type(dev, iph->daddr, info->dest) ^ - (info->flags & IPT_ADDRTYPE_INVERT_DEST); + !!(info->flags & IPT_ADDRTYPE_INVERT_DEST); return ret; } diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 21cb053f5d7d..3974d7cae5c0 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -305,10 +305,10 @@ static void recent_mt_destroy(const struct xt_match *match, void *matchinfo) spin_lock_bh(&recent_lock); list_del(&t->list); spin_unlock_bh(&recent_lock); - recent_table_flush(t); #ifdef CONFIG_PROC_FS remove_proc_entry(t->name, proc_dir); #endif + recent_table_flush(t); kfree(t); } mutex_unlock(&recent_mutex); diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c index 91537f11273f..6c4f11f51446 100644 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ b/net/ipv4/netfilter/nf_nat_proto_common.c @@ -73,9 +73,13 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, range_size = ntohs(range->max.all) - min + 1; } - off = *rover; if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) - off = net_random(); + off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip, + maniptype == IP_NAT_MANIP_SRC + ? tuple->dst.u.all + : tuple->src.u.all); + else + off = *rover; for (i = 0; i < range_size; i++, off++) { *portptr = htons(min + off % range_size); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 834356ea99df..8f5a403f6f6b 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -232,6 +232,8 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPDSACKIgnoredOld", LINUX_MIB_TCPDSACKIGNOREDOLD), SNMP_MIB_ITEM("TCPDSACKIgnoredNoUndo", LINUX_MIB_TCPDSACKIGNOREDNOUNDO), SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS), + SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND), + SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 380d6474cf66..6ee5354c9aa1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1509,14 +1509,14 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, /* BSD 4.2 compatibility hack :-( */ if (mtu == 0 && - old_mtu >= dst_metric(&rth->u.dst, RTAX_MTU) && + old_mtu >= dst_mtu(&rth->u.dst) && old_mtu >= 68 + (iph->ihl << 2)) old_mtu -= iph->ihl << 2; mtu = guess_mtu(old_mtu); } - if (mtu <= dst_metric(&rth->u.dst, RTAX_MTU)) { - if (mtu < dst_metric(&rth->u.dst, RTAX_MTU)) { + if (mtu <= dst_mtu(&rth->u.dst)) { + if (mtu < dst_mtu(&rth->u.dst)) { dst_confirm(&rth->u.dst); if (mtu < ip_rt_min_pmtu) { mtu = ip_rt_min_pmtu; @@ -1538,7 +1538,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) { - if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= 68 && + if (dst_mtu(dst) > mtu && mtu >= 68 && !(dst_metric_locked(dst, RTAX_MTU))) { if (mtu < ip_rt_min_pmtu) { mtu = ip_rt_min_pmtu; @@ -1667,7 +1667,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; - if (dst_metric(&rt->u.dst, RTAX_MTU) > IP_MAX_MTU) + if (dst_mtu(&rt->u.dst) > IP_MAX_MTU) rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0) rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40, @@ -2914,6 +2914,68 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, return 0; } +static void rt_secret_reschedule(int old) +{ + struct net *net; + int new = ip_rt_secret_interval; + int diff = new - old; + + if (!diff) + return; + + rtnl_lock(); + for_each_net(net) { + int deleted = del_timer_sync(&net->ipv4.rt_secret_timer); + + if (!new) + continue; + + if (deleted) { + long time = net->ipv4.rt_secret_timer.expires - jiffies; + + if (time <= 0 || (time += diff) <= 0) + time = 0; + + net->ipv4.rt_secret_timer.expires = time; + } else + net->ipv4.rt_secret_timer.expires = new; + + net->ipv4.rt_secret_timer.expires += jiffies; + add_timer(&net->ipv4.rt_secret_timer); + } + rtnl_unlock(); +} + +static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write, + struct file *filp, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int old = ip_rt_secret_interval; + int ret = proc_dointvec_jiffies(ctl, write, filp, buffer, lenp, ppos); + + rt_secret_reschedule(old); + + return ret; +} + +static int ipv4_sysctl_rt_secret_interval_strategy(ctl_table *table, + int __user *name, + int nlen, + void __user *oldval, + size_t __user *oldlenp, + void __user *newval, + size_t newlen) +{ + int old = ip_rt_secret_interval; + int ret = sysctl_jiffies(table, name, nlen, oldval, oldlenp, newval, + newlen); + + rt_secret_reschedule(old); + + return ret; +} + static ctl_table ipv4_route_table[] = { { .ctl_name = NET_IPV4_ROUTE_GC_THRESH, @@ -3048,20 +3110,29 @@ static ctl_table ipv4_route_table[] = { .data = &ip_rt_secret_interval, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = &ipv4_sysctl_rt_secret_interval, + .strategy = &ipv4_sysctl_rt_secret_interval_strategy, }, { .ctl_name = 0 } }; -static __net_initdata struct ctl_path ipv4_route_path[] = { +static struct ctl_table empty[1]; + +static struct ctl_table ipv4_skeleton[] = +{ + { .procname = "route", .ctl_name = NET_IPV4_ROUTE, + .mode = 0555, .child = ipv4_route_table}, + { .procname = "neigh", .ctl_name = NET_IPV4_NEIGH, + .mode = 0555, .child = empty}, + { } +}; + +static __net_initdata struct ctl_path ipv4_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "ipv4", .ctl_name = NET_IPV4, }, - { .procname = "route", .ctl_name = NET_IPV4_ROUTE, }, { }, }; - static struct ctl_table ipv4_route_flush_table[] = { { .ctl_name = NET_IPV4_ROUTE_FLUSH, @@ -3074,6 +3145,13 @@ static struct ctl_table ipv4_route_flush_table[] = { { .ctl_name = 0 }, }; +static __net_initdata struct ctl_path ipv4_route_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "ipv4", .ctl_name = NET_IPV4, }, + { .procname = "route", .ctl_name = NET_IPV4_ROUTE, }, + { }, +}; + static __net_init int sysctl_route_net_init(struct net *net) { struct ctl_table *tbl; @@ -3126,10 +3204,12 @@ static __net_init int rt_secret_timer_init(struct net *net) net->ipv4.rt_secret_timer.data = (unsigned long)net; init_timer_deferrable(&net->ipv4.rt_secret_timer); - net->ipv4.rt_secret_timer.expires = - jiffies + net_random() % ip_rt_secret_interval + - ip_rt_secret_interval; - add_timer(&net->ipv4.rt_secret_timer); + if (ip_rt_secret_interval) { + net->ipv4.rt_secret_timer.expires = + jiffies + net_random() % ip_rt_secret_interval + + ip_rt_secret_interval; + add_timer(&net->ipv4.rt_secret_timer); + } return 0; } @@ -3216,14 +3296,16 @@ int __init ip_rt_init(void) return rc; } +#ifdef CONFIG_SYSCTL /* * We really need to sanitize the damn ipv4 init order, then all * this nonsense will go away. */ void __init ip_static_sysctl_init(void) { - register_sysctl_paths(ipv4_route_path, ipv4_route_table); + register_sysctl_paths(ipv4_path, ipv4_skeleton); } +#endif EXPORT_SYMBOL(__ip_select_ident); EXPORT_SYMBOL(ip_route_input); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 770d827f5ab8..e0689fd7b798 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -232,6 +232,7 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = &ipv4_doint_and_flush, .strategy = &ipv4_doint_and_flush_strategy, + .extra2 = &init_net, }, { .ctl_name = NET_IPV4_NO_PMTU_DISC, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a2b06d0cc26b..1b4fee20fc93 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -655,8 +655,8 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, rep.th.doff = arg.iov[0].iov_len/4; tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset], - key, ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, &rep.th); + key, ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, &rep.th); } #endif arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, @@ -687,14 +687,14 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) inet_twsk_put(tw); } -static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, +static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent, 0, - tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr)); + tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr)); } /* @@ -1116,18 +1116,12 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) return 0; if (hash_expected && !hash_location) { - LIMIT_NETDEBUG(KERN_INFO "MD5 Hash expected but NOT found " - "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n", - NIPQUAD(iph->saddr), ntohs(th->source), - NIPQUAD(iph->daddr), ntohs(th->dest)); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); return 1; } if (!hash_expected && hash_location) { - LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found " - "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n", - NIPQUAD(iph->saddr), ntohs(th->source), - NIPQUAD(iph->daddr), ntohs(th->dest)); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); return 1; } @@ -2382,6 +2376,7 @@ static int __net_init tcp_sk_init(struct net *net) static void __net_exit tcp_sk_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv4.tcp_sock); + inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET); } static struct pernet_operations __net_initdata tcp_sk_ops = { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 204c42162660..f976fc57892c 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -609,7 +609,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) { /* Out of window: send ACK and drop. */ if (!(flg & TCP_FLAG_RST)) - req->rsk_ops->send_ack(skb, req); + req->rsk_ops->send_ack(sk, skb, req); if (paws_reject) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); return NULL; @@ -618,89 +618,87 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, /* In sequence, PAWS is OK. */ if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1)) - req->ts_recent = tmp_opt.rcv_tsval; + req->ts_recent = tmp_opt.rcv_tsval; - if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) { - /* Truncate SYN, it is out of window starting - at tcp_rsk(req)->rcv_isn + 1. */ - flg &= ~TCP_FLAG_SYN; - } + if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) { + /* Truncate SYN, it is out of window starting + at tcp_rsk(req)->rcv_isn + 1. */ + flg &= ~TCP_FLAG_SYN; + } - /* RFC793: "second check the RST bit" and - * "fourth, check the SYN bit" - */ - if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) { - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); - goto embryonic_reset; - } + /* RFC793: "second check the RST bit" and + * "fourth, check the SYN bit" + */ + if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) { + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); + goto embryonic_reset; + } - /* ACK sequence verified above, just make sure ACK is - * set. If ACK not set, just silently drop the packet. - */ - if (!(flg & TCP_FLAG_ACK)) - return NULL; - - /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ - if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && - TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { - inet_rsk(req)->acked = 1; - return NULL; - } + /* ACK sequence verified above, just make sure ACK is + * set. If ACK not set, just silently drop the packet. + */ + if (!(flg & TCP_FLAG_ACK)) + return NULL; - /* OK, ACK is valid, create big socket and - * feed this segment to it. It will repeat all - * the tests. THIS SEGMENT MUST MOVE SOCKET TO - * ESTABLISHED STATE. If it will be dropped after - * socket is created, wait for troubles. - */ - child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, - req, NULL); - if (child == NULL) - goto listen_overflow; + /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ + if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && + TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { + inet_rsk(req)->acked = 1; + return NULL; + } + + /* OK, ACK is valid, create big socket and + * feed this segment to it. It will repeat all + * the tests. THIS SEGMENT MUST MOVE SOCKET TO + * ESTABLISHED STATE. If it will be dropped after + * socket is created, wait for troubles. + */ + child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); + if (child == NULL) + goto listen_overflow; #ifdef CONFIG_TCP_MD5SIG - else { - /* Copy over the MD5 key from the original socket */ - struct tcp_md5sig_key *key; - struct tcp_sock *tp = tcp_sk(sk); - key = tp->af_specific->md5_lookup(sk, child); - if (key != NULL) { - /* - * We're using one, so create a matching key on the - * newsk structure. If we fail to get memory then we - * end up not copying the key across. Shucks. - */ - char *newkey = kmemdup(key->key, key->keylen, - GFP_ATOMIC); - if (newkey) { - if (!tcp_alloc_md5sig_pool()) - BUG(); - tp->af_specific->md5_add(child, child, - newkey, - key->keylen); - } + else { + /* Copy over the MD5 key from the original socket */ + struct tcp_md5sig_key *key; + struct tcp_sock *tp = tcp_sk(sk); + key = tp->af_specific->md5_lookup(sk, child); + if (key != NULL) { + /* + * We're using one, so create a matching key on the + * newsk structure. If we fail to get memory then we + * end up not copying the key across. Shucks. + */ + char *newkey = kmemdup(key->key, key->keylen, + GFP_ATOMIC); + if (newkey) { + if (!tcp_alloc_md5sig_pool()) + BUG(); + tp->af_specific->md5_add(child, child, newkey, + key->keylen); } } + } #endif - inet_csk_reqsk_queue_unlink(sk, req, prev); - inet_csk_reqsk_queue_removed(sk, req); + inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_removed(sk, req); - inet_csk_reqsk_queue_add(sk, req, child); - return child; + inet_csk_reqsk_queue_add(sk, req, child); + return child; - listen_overflow: - if (!sysctl_tcp_abort_on_overflow) { - inet_rsk(req)->acked = 1; - return NULL; - } +listen_overflow: + if (!sysctl_tcp_abort_on_overflow) { + inet_rsk(req)->acked = 1; + return NULL; + } - embryonic_reset: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); - if (!(flg & TCP_FLAG_RST)) - req->rsk_ops->send_reset(sk, skb); +embryonic_reset: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); + if (!(flg & TCP_FLAG_RST)) + req->rsk_ops->send_reset(sk, skb); - inet_csk_reqsk_queue_drop(sk, req, prev); - return NULL; + inet_csk_reqsk_queue_drop(sk, req, prev); + return NULL; } /* diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a00532de2a8c..8165f5aa8c71 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -468,7 +468,8 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, } if (likely(sysctl_tcp_window_scaling)) { opts->ws = tp->rx_opt.rcv_wscale; - size += TCPOLEN_WSCALE_ALIGNED; + if(likely(opts->ws)) + size += TCPOLEN_WSCALE_ALIGNED; } if (likely(sysctl_tcp_sack)) { opts->options |= OPTION_SACK_ADVERTISE; @@ -509,7 +510,8 @@ static unsigned tcp_synack_options(struct sock *sk, if (likely(ireq->wscale_ok)) { opts->ws = ireq->rcv_wscale; - size += TCPOLEN_WSCALE_ALIGNED; + if(likely(opts->ws)) + size += TCPOLEN_WSCALE_ALIGNED; } if (likely(doing_ts)) { opts->options |= OPTION_TS; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 383d17359d01..57e26fa66185 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -951,6 +951,27 @@ int udp_disconnect(struct sock *sk, int flags) return 0; } +static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + int is_udplite = IS_UDPLITE(sk); + int rc; + + if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) { + /* Note that an ENOMEM error is charged twice */ + if (rc == -ENOMEM) + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, + is_udplite); + goto drop; + } + + return 0; + +drop: + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + kfree_skb(skb); + return -1; +} + /* returns: * -1: error * 0: success @@ -1042,17 +1063,16 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) goto drop; } - if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { - /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) { - UDP_INC_STATS_BH(sock_net(sk), - UDP_MIB_RCVBUFERRORS, is_udplite); - atomic_inc(&sk->sk_drops); - } - goto drop; - } + rc = 0; - return 0; + bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) + rc = __udp_queue_rcv_skb(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); + + return rc; drop: UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); @@ -1090,15 +1110,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, skb1 = skb_clone(skb, GFP_ATOMIC); if (skb1) { - int ret = 0; - - bh_lock_sock_nested(sk); - if (!sock_owned_by_user(sk)) - ret = udp_queue_rcv_skb(sk, skb1); - else - sk_add_backlog(sk, skb1); - bh_unlock_sock(sk); - + int ret = udp_queue_rcv_skb(sk, skb1); if (ret > 0) /* we should probably re-process instead * of dropping packets here. */ @@ -1193,13 +1205,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], uh->dest, inet_iif(skb), udptable); if (sk != NULL) { - int ret = 0; - bh_lock_sock_nested(sk); - if (!sock_owned_by_user(sk)) - ret = udp_queue_rcv_skb(sk, skb); - else - sk_add_backlog(sk, skb); - bh_unlock_sock(sk); + int ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); /* a return value > 0 means to resubmit the input, but @@ -1492,7 +1498,7 @@ struct proto udp_prot = { .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, .sendpage = udp_sendpage, - .backlog_rcv = udp_queue_rcv_skb, + .backlog_rcv = __udp_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index 9c798abce736..63418185f524 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c @@ -47,8 +47,10 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) if (unlikely(optlen)) hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); - skb_set_network_header(skb, IPV4_BEET_PHMAXLEN - x->props.header_len - - hdrlen); + skb_set_network_header(skb, -x->props.header_len - + hdrlen + (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph))); + if (x->sel.family != AF_INET6) + skb->network_header += IPV4_BEET_PHMAXLEN; skb->mac_header = skb->network_header + offsetof(struct iphdr, protocol); skb->transport_header = skb->network_header + sizeof(*top_iph); |