diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/devinet.c | 21 | ||||
-rw-r--r-- | net/ipv4/esp4.c | 2 | ||||
-rw-r--r-- | net/ipv4/fib_rules.c | 25 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 7 | ||||
-rw-r--r-- | net/ipv4/igmp.c | 34 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 6 | ||||
-rw-r--r-- | net/ipv4/ip_input.c | 8 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel.c | 65 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel_core.c | 4 | ||||
-rw-r--r-- | net/ipv4/ip_vti.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipip.c | 3 | ||||
-rw-r--r-- | net/ipv4/ping.c | 2 | ||||
-rw-r--r-- | net/ipv4/proc.c | 9 | ||||
-rw-r--r-- | net/ipv4/raw.c | 2 | ||||
-rw-r--r-- | net/ipv4/route.c | 16 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_cubic.c | 12 | ||||
-rw-r--r-- | net/ipv4/tcp_fastopen.c | 13 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 9 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 19 | ||||
-rw-r--r-- | net/ipv4/udp.c | 5 |
22 files changed, 183 insertions, 91 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 8d48c392adcc..a1b5bcbd04ae 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -73,6 +73,8 @@ static struct ipv4_devconf ipv4_devconf = { [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1, [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, + [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/, + [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/, }, }; @@ -83,6 +85,8 @@ static struct ipv4_devconf ipv4_devconf_dflt = { [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1, [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1, + [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/, + [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/, }, }; @@ -772,7 +776,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, ci = nla_data(tb[IFA_CACHEINFO]); if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { err = -EINVAL; - goto errout; + goto errout_free; } *pvalid_lft = ci->ifa_valid; *pprefered_lft = ci->ifa_prefered; @@ -780,6 +784,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, return ifa; +errout_free: + inet_free_ifa(ifa); errout: return ERR_PTR(err); } @@ -1124,10 +1130,7 @@ static int inet_gifconf(struct net_device *dev, char __user *buf, int len) if (len < (int) sizeof(ifr)) break; memset(&ifr, 0, sizeof(struct ifreq)); - if (ifa->ifa_label) - strcpy(ifr.ifr_name, ifa->ifa_label); - else - strcpy(ifr.ifr_name, dev->name); + strcpy(ifr.ifr_name, ifa->ifa_label); (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET; (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr = @@ -2095,11 +2098,15 @@ static struct devinet_sysctl_table { DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"), + DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION, + "force_igmp_version"), + DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL, + "igmpv2_unsolicited_report_interval"), + DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL, + "igmpv3_unsolicited_report_interval"), DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), - DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION, - "force_igmp_version"), DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES, "promote_secondaries"), DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET, diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index ab3d814bc80a..109ee89f123e 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -477,7 +477,7 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) } return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) - - net_adj) & ~(align - 1)) + (net_adj - 2); + net_adj) & ~(align - 1)) + net_adj - 2; } static void esp4_err(struct sk_buff *skb, u32 info) diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 26aa65d1fce4..523be38e37de 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -101,6 +101,30 @@ errout: return err; } +static bool fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) +{ + struct fib_result *result = (struct fib_result *) arg->result; + struct net_device *dev = result->fi->fib_dev; + + /* do not accept result if the route does + * not meet the required prefix length + */ + if (result->prefixlen <= rule->suppress_prefixlen) + goto suppress_route; + + /* do not accept result if the route uses a device + * belonging to a forbidden interface group + */ + if (rule->suppress_ifgroup != -1 && dev && dev->group == rule->suppress_ifgroup) + goto suppress_route; + + return false; + +suppress_route: + if (!(arg->flags & FIB_LOOKUP_NOREF)) + fib_info_put(result->fi); + return true; +} static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) { @@ -267,6 +291,7 @@ static const struct fib_rules_ops __net_initconst fib4_rules_ops_template = { .rule_size = sizeof(struct fib4_rule), .addr_size = sizeof(u32), .action = fib4_rule_action, + .suppress = fib4_rule_suppress, .match = fib4_rule_match, .configure = fib4_rule_configure, .delete = fib4_rule_delete, diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 49616fed9340..3df6d3edb2a1 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -71,7 +71,6 @@ #include <linux/init.h> #include <linux/list.h> #include <linux/slab.h> -#include <linux/prefetch.h> #include <linux/export.h> #include <net/net_namespace.h> #include <net/ip.h> @@ -1761,10 +1760,8 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) if (!c) continue; - if (IS_LEAF(c)) { - prefetch(rcu_dereference_rtnl(p->child[idx])); + if (IS_LEAF(c)) return (struct leaf *) c; - } /* Rescan start scanning in new node */ p = (struct tnode *) c; @@ -2133,7 +2130,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) max--; pointers = 0; - for (i = 1; i <= max; i++) + for (i = 1; i < max; i++) if (stat->nodesizes[i] != 0) { seq_printf(seq, " %u: %u", i, stat->nodesizes[i]); pointers += (1<<i) * stat->nodesizes[i]; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 375aca372250..d6c0e64ec97f 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -88,6 +88,7 @@ #include <linux/if_arp.h> #include <linux/rtnetlink.h> #include <linux/times.h> +#include <linux/pkt_sched.h> #include <net/net_namespace.h> #include <net/arp.h> @@ -113,7 +114,8 @@ #define IGMP_V1_Router_Present_Timeout (400*HZ) #define IGMP_V2_Router_Present_Timeout (400*HZ) -#define IGMP_Unsolicited_Report_Interval (10*HZ) +#define IGMP_V2_Unsolicited_Report_Interval (10*HZ) +#define IGMP_V3_Unsolicited_Report_Interval (1*HZ) #define IGMP_Query_Response_Interval (10*HZ) #define IGMP_Unsolicited_Report_Count 2 @@ -138,6 +140,29 @@ ((in_dev)->mr_v2_seen && \ time_before(jiffies, (in_dev)->mr_v2_seen))) +static int unsolicited_report_interval(struct in_device *in_dev) +{ + int interval_ms, interval_jiffies; + + if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) + interval_ms = IN_DEV_CONF_GET( + in_dev, + IGMPV2_UNSOLICITED_REPORT_INTERVAL); + else /* v3 */ + interval_ms = IN_DEV_CONF_GET( + in_dev, + IGMPV3_UNSOLICITED_REPORT_INTERVAL); + + interval_jiffies = msecs_to_jiffies(interval_ms); + + /* _timer functions can't handle a delay of 0 jiffies so ensure + * we always return a positive value. + */ + if (interval_jiffies <= 0) + interval_jiffies = 1; + return interval_jiffies; +} + static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im); static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr); static void igmpv3_clear_delrec(struct in_device *in_dev); @@ -315,6 +340,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) if (size < 256) return NULL; } + skb->priority = TC_PRIO_CONTROL; igmp_skb_size(skb) = size; rt = ip_route_output_ports(net, &fl4, NULL, IGMPV3_ALL_MCR, 0, @@ -670,6 +696,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, ip_rt_put(rt); return -1; } + skb->priority = TC_PRIO_CONTROL; skb_dst_set(skb, &rt->dst); @@ -719,7 +746,8 @@ static void igmp_ifc_timer_expire(unsigned long data) igmpv3_send_cr(in_dev); if (in_dev->mr_ifc_count) { in_dev->mr_ifc_count--; - igmp_ifc_start_timer(in_dev, IGMP_Unsolicited_Report_Interval); + igmp_ifc_start_timer(in_dev, + unsolicited_report_interval(in_dev)); } __in_dev_put(in_dev); } @@ -744,7 +772,7 @@ static void igmp_timer_expire(unsigned long data) if (im->unsolicit_count) { im->unsolicit_count--; - igmp_start_timer(im, IGMP_Unsolicited_Report_Interval); + igmp_start_timer(im, unsolicited_report_interval(in_dev)); } im->reporter = 1; spin_unlock(&im->lock); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 1f6eab66f7ce..d7aea4c5b940 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -383,7 +383,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, if (daddr) memcpy(&iph->daddr, daddr, 4); if (iph->daddr) - return t->hlen; + return t->hlen + sizeof(*iph); return -(t->hlen + sizeof(*iph)); } @@ -534,7 +534,7 @@ static int __net_init ipgre_init_net(struct net *net) static void __net_exit ipgre_exit_net(struct net *net) { struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id); - ip_tunnel_delete_net(itn); + ip_tunnel_delete_net(itn, &ipgre_link_ops); } static struct pernet_operations ipgre_net_ops = { @@ -767,7 +767,7 @@ static int __net_init ipgre_tap_init_net(struct net *net) static void __net_exit ipgre_tap_exit_net(struct net *net) { struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id); - ip_tunnel_delete_net(itn); + ip_tunnel_delete_net(itn, &ipgre_tap_ops); } static struct pernet_operations ipgre_tap_net_ops = { diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 15e3e683adec..054a3e97d822 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -141,6 +141,7 @@ #include <net/icmp.h> #include <net/raw.h> #include <net/checksum.h> +#include <net/inet_ecn.h> #include <linux/netfilter_ipv4.h> #include <net/xfrm.h> #include <linux/mroute.h> @@ -410,6 +411,13 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, if (iph->ihl < 5 || iph->version != 4) goto inhdr_error; + BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1); + BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0); + BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE); + IP_ADD_STATS_BH(dev_net(dev), + IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK), + max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); + if (!pskb_may_pull(skb, iph->ihl*4)) goto inhdr_error; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index ca1cb2d5f6e2..a4d9126c7b51 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -350,7 +350,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev) struct flowi4 fl4; struct rtable *rt; - rt = ip_route_output_tunnel(dev_net(dev), &fl4, + rt = ip_route_output_tunnel(tunnel->net, &fl4, tunnel->parms.iph.protocol, iph->daddr, iph->saddr, tunnel->parms.o_key, @@ -365,7 +365,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev) } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); + tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); if (tdev) { hlen = tdev->hard_header_len + tdev->needed_headroom; @@ -454,15 +454,16 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); - if (tunnel->net != dev_net(tunnel->dev)) - skb_scrub_packet(skb); - if (tunnel->dev->type == ARPHRD_ETHER) { skb->protocol = eth_type_trans(skb, tunnel->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } else { skb->dev = tunnel->dev; } + + if (!net_eq(tunnel->net, dev_net(tunnel->dev))) + skb_scrub_packet(skb); + gro_cells_receive(&tunnel->gro_cells, skb); return 0; @@ -613,7 +614,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - if (tunnel->net != dev_net(dev)) + if (!net_eq(tunnel->net, dev_net(dev))) skb_scrub_packet(skb); if (tunnel->err_count > 0) { @@ -653,7 +654,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } } - err = iptunnel_xmit(dev_net(dev), rt, skb, + err = iptunnel_xmit(tunnel->net, rt, skb, fl4.saddr, fl4.daddr, protocol, ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df); iptunnel_xmit_stats(err, &dev->stats, dev->tstats); @@ -820,11 +821,10 @@ static void ip_tunnel_dev_free(struct net_device *dev) void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) { - struct net *net = dev_net(dev); struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel_net *itn; - itn = net_generic(net, tunnel->ip_tnl_net_id); + itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id); if (itn->fb_tunnel_dev != dev) { ip_tunnel_del(netdev_priv(dev)); @@ -838,56 +838,70 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, { struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); struct ip_tunnel_parm parms; + unsigned int i; - itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL); - if (!itn->tunnels) - return -ENOMEM; + for (i = 0; i < IP_TNL_HASH_SIZE; i++) + INIT_HLIST_HEAD(&itn->tunnels[i]); if (!ops) { itn->fb_tunnel_dev = NULL; return 0; } + memset(&parms, 0, sizeof(parms)); if (devname) strlcpy(parms.name, devname, IFNAMSIZ); rtnl_lock(); itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); + /* FB netdevice is special: we have one, and only one per netns. + * Allowing to move it to another netns is clearly unsafe. + */ + itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; rtnl_unlock(); - if (IS_ERR(itn->fb_tunnel_dev)) { - kfree(itn->tunnels); + + if (IS_ERR(itn->fb_tunnel_dev)) return PTR_ERR(itn->fb_tunnel_dev); - } return 0; } EXPORT_SYMBOL_GPL(ip_tunnel_init_net); -static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head) +static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, + struct rtnl_link_ops *ops) { + struct net *net = dev_net(itn->fb_tunnel_dev); + struct net_device *dev, *aux; int h; + for_each_netdev_safe(net, dev, aux) + if (dev->rtnl_link_ops == ops) + unregister_netdevice_queue(dev, head); + for (h = 0; h < IP_TNL_HASH_SIZE; h++) { struct ip_tunnel *t; struct hlist_node *n; struct hlist_head *thead = &itn->tunnels[h]; hlist_for_each_entry_safe(t, n, thead, hash_node) - unregister_netdevice_queue(t->dev, head); + /* If dev is in the same netns, it has already + * been added to the list by the previous loop. + */ + if (!net_eq(dev_net(t->dev), net)) + unregister_netdevice_queue(t->dev, head); } if (itn->fb_tunnel_dev) unregister_netdevice_queue(itn->fb_tunnel_dev, head); } -void ip_tunnel_delete_net(struct ip_tunnel_net *itn) +void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) { LIST_HEAD(list); rtnl_lock(); - ip_tunnel_destroy(itn, &list); + ip_tunnel_destroy(itn, &list, ops); unregister_netdevice_many(&list); rtnl_unlock(); - kfree(itn->tunnels); } EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); @@ -929,23 +943,21 @@ EXPORT_SYMBOL_GPL(ip_tunnel_newlink); int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p) { - struct ip_tunnel *t, *nt; - struct net *net = dev_net(dev); + struct ip_tunnel *t; struct ip_tunnel *tunnel = netdev_priv(dev); + struct net *net = tunnel->net; struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); if (dev == itn->fb_tunnel_dev) return -EINVAL; - nt = netdev_priv(dev); - t = ip_tunnel_find(itn, p, dev->type); if (t) { if (t->dev != dev) return -EEXIST; } else { - t = nt; + t = tunnel; if (dev->type != ARPHRD_ETHER) { unsigned int nflags = 0; @@ -984,6 +996,7 @@ int ip_tunnel_init(struct net_device *dev) } tunnel->dev = dev; + tunnel->net = dev_net(dev); strcpy(tunnel->parms.name, dev->name); iph->version = 4; iph->ihl = 5; @@ -994,8 +1007,8 @@ EXPORT_SYMBOL_GPL(ip_tunnel_init); void ip_tunnel_uninit(struct net_device *dev) { - struct net *net = dev_net(dev); struct ip_tunnel *tunnel = netdev_priv(dev); + struct net *net = tunnel->net; struct ip_tunnel_net *itn; itn = net_generic(net, tunnel->ip_tnl_net_id); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 7167b08977df..850525b34899 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -76,9 +76,7 @@ int iptunnel_xmit(struct net *net, struct rtable *rt, iph->daddr = dst; iph->saddr = src; iph->ttl = ttl; - tunnel_ip_select_ident(skb, - (const struct iphdr *)skb_inner_network_header(skb), - &rt->dst); + __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1); err = ip_local_out(skb); if (unlikely(net_xmit_eval(err))) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 79b263da4168..e805e7b3030e 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -318,7 +318,7 @@ static int __net_init vti_init_net(struct net *net) static void __net_exit vti_exit_net(struct net *net) { struct ip_tunnel_net *itn = net_generic(net, vti_net_id); - ip_tunnel_delete_net(itn); + ip_tunnel_delete_net(itn, &vti_link_ops); } static struct pernet_operations vti_net_ops = { diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 51fc2a1dcdd3..87bd2952c733 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -286,7 +286,6 @@ static void ipip_tunnel_setup(struct net_device *dev) dev->flags = IFF_NOARP; dev->iflink = 0; dev->addr_len = 4; - dev->features |= NETIF_F_NETNS_LOCAL; dev->features |= NETIF_F_LLTX; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; @@ -437,7 +436,7 @@ static int __net_init ipip_init_net(struct net *net) static void __net_exit ipip_exit_net(struct net *net) { struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); - ip_tunnel_delete_net(itn); + ip_tunnel_delete_net(itn, &ipip_link_ops); } static struct pernet_operations ipip_net_ops = { diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 746427c9e719..d7d9882d4cae 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -1082,7 +1082,7 @@ static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, __u16 srcp = ntohs(inet->inet_sport); seq_printf(f, "%5d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d%n", + " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d%n", bucket, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 6577a1149a47..4a0335854b89 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -111,7 +111,7 @@ static const struct snmp_mib snmp4_ipstats_list[] = { SNMP_MIB_SENTINEL }; -/* Following RFC4293 items are displayed in /proc/net/netstat */ +/* Following items are displayed in /proc/net/netstat */ static const struct snmp_mib snmp4_ipextstats_list[] = { SNMP_MIB_ITEM("InNoRoutes", IPSTATS_MIB_INNOROUTES), SNMP_MIB_ITEM("InTruncatedPkts", IPSTATS_MIB_INTRUNCATEDPKTS), @@ -125,7 +125,12 @@ static const struct snmp_mib snmp4_ipextstats_list[] = { SNMP_MIB_ITEM("OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS), SNMP_MIB_ITEM("InBcastOctets", IPSTATS_MIB_INBCASTOCTETS), SNMP_MIB_ITEM("OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS), + /* Non RFC4293 fields */ SNMP_MIB_ITEM("InCsumErrors", IPSTATS_MIB_CSUMERRORS), + SNMP_MIB_ITEM("InNoECTPkts", IPSTATS_MIB_NOECTPKTS), + SNMP_MIB_ITEM("InECT1Pkts", IPSTATS_MIB_ECT1PKTS), + SNMP_MIB_ITEM("InECT0Pkts", IPSTATS_MIB_ECT0PKTS), + SNMP_MIB_ITEM("InCEPkts", IPSTATS_MIB_CEPKTS), SNMP_MIB_SENTINEL }; @@ -273,7 +278,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES), - SNMP_MIB_ITEM("LowLatencyRxPackets", LINUX_MIB_LOWLATENCYRXPACKETS), + SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index dd44e0ab600c..41d84505a922 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -987,7 +987,7 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) srcp = inet->inet_num; seq_printf(seq, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n", + " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n", i, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a9a54a236832..e805481eff72 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -435,12 +435,12 @@ static inline int ip_rt_proc_init(void) static inline bool rt_is_expired(const struct rtable *rth) { - return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); + return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev)); } void rt_cache_flush(struct net *net) { - rt_genid_bump(net); + rt_genid_bump_ipv4(net); } static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, @@ -1458,7 +1458,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, #endif rth->dst.output = ip_rt_bug; - rth->rt_genid = rt_genid(dev_net(dev)); + rth->rt_genid = rt_genid_ipv4(dev_net(dev)); rth->rt_flags = RTCF_MULTICAST; rth->rt_type = RTN_MULTICAST; rth->rt_is_input= 1; @@ -1589,7 +1589,7 @@ static int __mkroute_input(struct sk_buff *skb, goto cleanup; } - rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); + rth->rt_genid = rt_genid_ipv4(dev_net(rth->dst.dev)); rth->rt_flags = flags; rth->rt_type = res->type; rth->rt_is_input = 1; @@ -1760,7 +1760,7 @@ local_input: rth->dst.tclassid = itag; #endif - rth->rt_genid = rt_genid(net); + rth->rt_genid = rt_genid_ipv4(net); rth->rt_flags = flags|RTCF_LOCAL; rth->rt_type = res.type; rth->rt_is_input = 1; @@ -1945,7 +1945,7 @@ add: rth->dst.output = ip_output; - rth->rt_genid = rt_genid(dev_net(dev_out)); + rth->rt_genid = rt_genid_ipv4(dev_net(dev_out)); rth->rt_flags = flags; rth->rt_type = type; rth->rt_is_input = 0; @@ -2227,7 +2227,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_iif = ort->rt_iif; rt->rt_pmtu = ort->rt_pmtu; - rt->rt_genid = rt_genid(net); + rt->rt_genid = rt_genid_ipv4(net); rt->rt_flags = ort->rt_flags; rt->rt_type = ort->rt_type; rt->rt_gateway = ort->rt_gateway; @@ -2665,7 +2665,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = { static __net_init int rt_genid_init(struct net *net) { - atomic_set(&net->rt_genid, 0); + atomic_set(&net->ipv4.rt_genid, 0); atomic_set(&net->fnhe_genid, 0); get_random_bytes(&net->ipv4.dev_addr_genid, sizeof(net->ipv4.dev_addr_genid)); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 69ed203802da..8ed7c32ae28e 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -36,6 +36,8 @@ static int tcp_adv_win_scale_min = -31; static int tcp_adv_win_scale_max = 31; static int ip_ttl_min = 1; static int ip_ttl_max = 255; +static int tcp_syn_retries_min = 1; +static int tcp_syn_retries_max = MAX_TCP_SYNCNT; static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; @@ -332,7 +334,9 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_syn_retries, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_syn_retries_min, + .extra2 = &tcp_syn_retries_max }, { .procname = "tcp_synack_retries", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c27e81392398..ab64eea042fa 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -410,10 +410,6 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_sync_mss = tcp_sync_mss; - /* Presumed zeroed, in order of appearance: - * cookie_in_always, cookie_out_never, - * s_data_constant, s_data_in, s_data_out - */ sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index a9077f441cb2..b6ae92a51f58 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -206,8 +206,8 @@ static u32 cubic_root(u64 a) */ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) { - u64 offs; - u32 delta, t, bic_target, max_cnt; + u32 delta, bic_target, max_cnt; + u64 offs, t; ca->ack_cnt++; /* count the number of ACKs */ @@ -250,9 +250,11 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) * if the cwnd < 1 million packets !!! */ + t = (s32)(tcp_time_stamp - ca->epoch_start); + t += msecs_to_jiffies(ca->delay_min >> 3); /* change the unit from HZ to bictcp_HZ */ - t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3) - - ca->epoch_start) << BICTCP_HZ) / HZ; + t <<= BICTCP_HZ; + do_div(t, HZ); if (t < ca->bic_K) /* t - K */ offs = ca->bic_K - t; @@ -414,7 +416,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) return; /* Discard delay samples right after fast recovery */ - if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ) + if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ) return; delay = (rtt_us << 3) / USEC_PER_MSEC; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 8f7ef0ad80e5..ab7bd35bb312 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -58,23 +58,22 @@ error: kfree(ctx); return err; } -/* Computes the fastopen cookie for the peer. - * The peer address is a 128 bits long (pad with zeros for IPv4). +/* Computes the fastopen cookie for the IP path. + * The path is a 128 bits long (pad with zeros for IPv4). * * The caller must check foc->len to determine if a valid cookie * has been generated successfully. */ -void tcp_fastopen_cookie_gen(__be32 addr, struct tcp_fastopen_cookie *foc) +void tcp_fastopen_cookie_gen(__be32 src, __be32 dst, + struct tcp_fastopen_cookie *foc) { - __be32 peer_addr[4] = { addr, 0, 0, 0 }; + __be32 path[4] = { src, dst, 0, 0 }; struct tcp_fastopen_context *ctx; rcu_read_lock(); ctx = rcu_dereference(tcp_fastopen_ctx); if (ctx) { - crypto_cipher_encrypt_one(ctx->tfm, - foc->val, - (__u8 *)peer_addr); + crypto_cipher_encrypt_one(ctx->tfm, foc->val, (__u8 *)path); foc->len = TCP_FASTOPEN_COOKIE_SIZE; } rcu_read_unlock(); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b61274b666f6..e965cc7b87ff 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1877,8 +1877,13 @@ void tcp_enter_loss(struct sock *sk, int how) } tcp_verify_left_out(tp); - tp->reordering = min_t(unsigned int, tp->reordering, - sysctl_tcp_reordering); + /* Timeout in disordered state after receiving substantial DUPACKs + * suggests that the degree of reordering is over-estimated. + */ + if (icsk->icsk_ca_state <= TCP_CA_Disorder && + tp->sacked_out >= sysctl_tcp_reordering) + tp->reordering = min_t(unsigned int, tp->reordering, + sysctl_tcp_reordering); tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2a5d5c469d17..05a3d45d3102 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -890,7 +890,7 @@ bool tcp_syn_flood_action(struct sock *sk, NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; - if (!lopt->synflood_warned) { + if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) { lopt->synflood_warned = 1; pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", proto, ntohs(tcp_hdr(skb)->dest), msg); @@ -1316,9 +1316,11 @@ static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; return true; } + if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) { if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) { - tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc); + tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, valid_foc); if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) || memcmp(&foc->val[0], &valid_foc->val[0], TCP_FASTOPEN_COOKIE_SIZE) != 0) @@ -1329,14 +1331,16 @@ static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; return true; } else if (foc->len == 0) { /* Client requesting a cookie */ - tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc); + tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, valid_foc); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); } else { /* Client sent a cookie with wrong size. Treat it * the same as invalid and return a valid one. */ - tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc); + tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, valid_foc); } return false; } @@ -1462,7 +1466,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * limitations, they conserve resources and peer is * evidently real one. */ - if (inet_csk_reqsk_queue_is_full(sk) && !isn) { + if ((sysctl_tcp_syncookies == 2 || + inet_csk_reqsk_queue_is_full(sk)) && !isn) { want_cookie = tcp_syn_flood_action(sk, skb, "TCP"); if (!want_cookie) goto drop; @@ -2603,7 +2608,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req, long delta = req->expires - jiffies; seq_printf(f, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n", + " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK%n", i, ireq->loc_addr, ntohs(inet_sk(sk)->inet_sport), @@ -2661,7 +2666,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " - "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n", + "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d%n", i, src, srcp, dest, destp, sk->sk_state, tp->write_seq - tp->snd_una, rx_queue, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 766e6bab9113..0b24508bcdc4 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -704,7 +704,7 @@ EXPORT_SYMBOL(udp_flush_pending_frames); * @src: source IP address * @dst: destination IP address */ -static void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) +void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) { struct udphdr *uh = udp_hdr(skb); struct sk_buff *frags = skb_shinfo(skb)->frag_list; @@ -740,6 +740,7 @@ static void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) uh->check = CSUM_MANGLED_0; } } +EXPORT_SYMBOL_GPL(udp4_hwcsum); static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) { @@ -2158,7 +2159,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, __u16 srcp = ntohs(inet->inet_sport); seq_printf(f, "%5d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d%n", + " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d%n", bucket, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), |