diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/Kconfig | 1 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 4 | ||||
-rw-r--r-- | net/ipv4/bpf_tcp_ca.c | 7 | ||||
-rw-r--r-- | net/ipv4/esp4.c | 16 | ||||
-rw-r--r-- | net/ipv4/esp4_offload.c | 32 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 2 | ||||
-rw-r--r-- | net/ipv4/fib_lookup.h | 2 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 22 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 2 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 105 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 2 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel_core.c | 4 | ||||
-rw-r--r-- | net/ipv4/ip_vti.c | 38 | ||||
-rw-r--r-- | net/ipv4/netfilter/arp_tables.c | 4 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_tables.c | 4 | ||||
-rw-r--r-- | net/ipv4/proc.c | 2 | ||||
-rw-r--r-- | net/ipv4/route.c | 13 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_bic.c | 11 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 12 | ||||
-rw-r--r-- | net/ipv4/tcp_scalable.c | 17 | ||||
-rw-r--r-- | net/ipv4/tcp_veno.c | 47 | ||||
-rw-r--r-- | net/ipv4/tcp_yeah.c | 41 | ||||
-rw-r--r-- | net/ipv4/udp.c | 2 |
25 files changed, 257 insertions, 143 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index f96bd489b362..6490b845e17b 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -303,6 +303,7 @@ config SYN_COOKIES config NET_IPVTI tristate "Virtual (secure) IP: tunneling" + depends on IPV6 || IPV6=n select INET_TUNNEL select NET_IP_TUNNEL select XFRM diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index bd7b4e92e07f..cf58e29cf746 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1793,6 +1793,10 @@ static __net_exit void ipv4_mib_exit_net(struct net *net) free_percpu(net->mib.net_statistics); free_percpu(net->mib.ip_statistics); free_percpu(net->mib.tcp_statistics); +#ifdef CONFIG_MPTCP + /* allocated on demand, see mptcp_init_sock() */ + free_percpu(net->mib.mptcp_statistics); +#endif } static __net_initdata struct pernet_operations ipv4_mib_ops = { diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 0fd8bfde2448..e3939f76b024 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -217,7 +217,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t, { const struct tcp_congestion_ops *utcp_ca; struct tcp_congestion_ops *tcp_ca; - size_t tcp_ca_name_len; int prog_fd; u32 moff; @@ -232,13 +231,11 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t, tcp_ca->flags = utcp_ca->flags; return 1; case offsetof(struct tcp_congestion_ops, name): - tcp_ca_name_len = strnlen(utcp_ca->name, sizeof(utcp_ca->name)); - if (!tcp_ca_name_len || - tcp_ca_name_len == sizeof(utcp_ca->name)) + if (bpf_obj_name_cpy(tcp_ca->name, utcp_ca->name, + sizeof(tcp_ca->name)) <= 0) return -EINVAL; if (tcp_ca_find(utcp_ca->name)) return -EEXIST; - memcpy(tcp_ca->name, utcp_ca->name, sizeof(tcp_ca->name)); return 1; } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 103c7d599a3c..8b07f3a4f2db 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -341,22 +341,6 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err) esp_output_done(base, err); } -static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto) -{ - /* Fill padding... */ - if (tfclen) { - memset(tail, 0, tfclen); - tail += tfclen; - } - do { - int i; - for (i = 0; i < plen - 2; i++) - tail[i] = i + 1; - } while (0); - tail[plen - 2] = plen - 2; - tail[plen - 1] = proto; -} - static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb, int encap_type, struct esp_info *esp, diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index e2e219c7854a..731022cff600 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -132,6 +132,36 @@ static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x, return segs; } +static struct sk_buff *xfrm4_beet_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + struct xfrm_offload *xo = xfrm_offload(skb); + struct sk_buff *segs = ERR_PTR(-EINVAL); + const struct net_offload *ops; + int proto = xo->proto; + + skb->transport_header += x->props.header_len; + + if (proto == IPPROTO_BEETPH) { + struct ip_beet_phdr *ph = (struct ip_beet_phdr *)skb->data; + + skb->transport_header += ph->hdrlen * 8; + proto = ph->nexthdr; + } else if (x->sel.family != AF_INET6) { + skb->transport_header -= IPV4_BEET_PHMAXLEN; + } else if (proto == IPPROTO_TCP) { + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; + } + + __skb_pull(skb, skb_transport_offset(skb)); + ops = rcu_dereference(inet_offloads[proto]); + if (likely(ops && ops->callbacks.gso_segment)) + segs = ops->callbacks.gso_segment(skb, features); + + return segs; +} + static struct sk_buff *xfrm4_outer_mode_gso_segment(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features) @@ -141,6 +171,8 @@ static struct sk_buff *xfrm4_outer_mode_gso_segment(struct xfrm_state *x, return xfrm4_tunnel_gso_segment(x, skb, features); case XFRM_MODE_TRANSPORT: return xfrm4_transport_gso_segment(x, skb, features); + case XFRM_MODE_BEET: + return xfrm4_beet_gso_segment(x, skb, features); } return ERR_PTR(-EOPNOTSUPP); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 577db1d50a24..213be9c050ad 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -997,7 +997,9 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) return -ENOENT; } + rcu_read_lock(); err = fib_table_dump(tb, skb, cb, &filter); + rcu_read_unlock(); return skb->len ? : err; } diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index c092e9a55790..818916b2a04d 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -35,7 +35,7 @@ static inline void fib_alias_accessed(struct fib_alias *fa) void fib_release_info(struct fib_info *); struct fib_info *fib_create_info(struct fib_config *cfg, struct netlink_ext_ack *extack); -int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, +int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, struct netlink_ext_ack *extack); bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi); int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e4c62b8f57a8..6ed8c9317179 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -570,8 +570,9 @@ static int fib_detect_death(struct fib_info *fi, int order, return 1; } -int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap, - u16 encap_type, void *cfg, gfp_t gfp_flags, +int fib_nh_common_init(struct net *net, struct fib_nh_common *nhc, + struct nlattr *encap, u16 encap_type, + void *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) { int err; @@ -589,8 +590,9 @@ int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap, err = -EINVAL; goto lwt_failure; } - err = lwtunnel_build_state(encap_type, encap, nhc->nhc_family, - cfg, &lwtstate, extack); + err = lwtunnel_build_state(net, encap_type, encap, + nhc->nhc_family, cfg, &lwtstate, + extack); if (err) goto lwt_failure; @@ -614,7 +616,7 @@ int fib_nh_init(struct net *net, struct fib_nh *nh, nh->fib_nh_family = AF_INET; - err = fib_nh_common_init(&nh->nh_common, cfg->fc_encap, + err = fib_nh_common_init(net, &nh->nh_common, cfg->fc_encap, cfg->fc_encap_type, cfg, GFP_KERNEL, extack); if (err) return err; @@ -814,7 +816,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, #endif /* CONFIG_IP_ROUTE_MULTIPATH */ -static int fib_encap_match(u16 encap_type, +static int fib_encap_match(struct net *net, u16 encap_type, struct nlattr *encap, const struct fib_nh *nh, const struct fib_config *cfg, @@ -826,7 +828,7 @@ static int fib_encap_match(u16 encap_type, if (encap_type == LWTUNNEL_ENCAP_NONE) return 0; - ret = lwtunnel_build_state(encap_type, encap, AF_INET, + ret = lwtunnel_build_state(net, encap_type, encap, AF_INET, cfg, &lwtstate, extack); if (!ret) { result = lwtunnel_cmp_encap(lwtstate, nh->fib_nh_lws); @@ -836,7 +838,7 @@ static int fib_encap_match(u16 encap_type, return result; } -int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, +int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, struct netlink_ext_ack *extack) { #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -857,8 +859,8 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, struct fib_nh *nh = fib_info_nh(fi, 0); if (cfg->fc_encap) { - if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap, - nh, cfg, extack)) + if (fib_encap_match(net, cfg->fc_encap_type, + cfg->fc_encap, nh, cfg, extack)) return 1; } #ifdef CONFIG_IP_ROUTE_CLASSID diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index f4c2ac445b3f..b01b748df9dd 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1679,7 +1679,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb, fi->fib_prefsrc == cfg->fc_prefsrc) && (!cfg->fc_protocol || fi->fib_protocol == cfg->fc_protocol) && - fib_nh_match(cfg, fi, extack) == 0 && + fib_nh_match(net, cfg, fi, extack) == 0 && fib_metrics_match(cfg, fi)) { fa_to_delete = fa; break; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 8274f98c511c..029b24eeafba 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1153,6 +1153,24 @@ static int ipgre_netlink_parms(struct net_device *dev, if (data[IFLA_GRE_FWMARK]) *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]); + return 0; +} + +static int erspan_netlink_parms(struct net_device *dev, + struct nlattr *data[], + struct nlattr *tb[], + struct ip_tunnel_parm *parms, + __u32 *fwmark) +{ + struct ip_tunnel *t = netdev_priv(dev); + int err; + + err = ipgre_netlink_parms(dev, data, tb, parms, fwmark); + if (err) + return err; + if (!data) + return 0; + if (data[IFLA_GRE_ERSPAN_VER]) { t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); @@ -1276,45 +1294,70 @@ static void ipgre_tap_setup(struct net_device *dev) ip_tunnel_setup(dev, gre_tap_net_id); } -static int ipgre_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], - struct netlink_ext_ack *extack) +static int +ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[]) { - struct ip_tunnel_parm p; struct ip_tunnel_encap ipencap; - __u32 fwmark = 0; - int err; if (ipgre_netlink_encap_parms(data, &ipencap)) { struct ip_tunnel *t = netdev_priv(dev); - err = ip_tunnel_encap_setup(t, &ipencap); + int err = ip_tunnel_encap_setup(t, &ipencap); if (err < 0) return err; } + return 0; +} + +static int ipgre_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct ip_tunnel_parm p; + __u32 fwmark = 0; + int err; + + err = ipgre_newlink_encap_setup(dev, data); + if (err) + return err; + err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); if (err < 0) return err; return ip_tunnel_newlink(dev, tb, &p, fwmark); } +static int erspan_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct ip_tunnel_parm p; + __u32 fwmark = 0; + int err; + + err = ipgre_newlink_encap_setup(dev, data); + if (err) + return err; + + err = erspan_netlink_parms(dev, data, tb, &p, &fwmark); + if (err) + return err; + return ip_tunnel_newlink(dev, tb, &p, fwmark); +} + static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct ip_tunnel *t = netdev_priv(dev); - struct ip_tunnel_encap ipencap; __u32 fwmark = t->fwmark; struct ip_tunnel_parm p; int err; - if (ipgre_netlink_encap_parms(data, &ipencap)) { - err = ip_tunnel_encap_setup(t, &ipencap); - - if (err < 0) - return err; - } + err = ipgre_newlink_encap_setup(dev, data); + if (err) + return err; err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); if (err < 0) @@ -1327,8 +1370,34 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], t->parms.i_flags = p.i_flags; t->parms.o_flags = p.o_flags; - if (strcmp(dev->rtnl_link_ops->kind, "erspan")) - ipgre_link_update(dev, !tb[IFLA_MTU]); + ipgre_link_update(dev, !tb[IFLA_MTU]); + + return 0; +} + +static int erspan_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct ip_tunnel *t = netdev_priv(dev); + __u32 fwmark = t->fwmark; + struct ip_tunnel_parm p; + int err; + + err = ipgre_newlink_encap_setup(dev, data); + if (err) + return err; + + err = erspan_netlink_parms(dev, data, tb, &p, &fwmark); + if (err < 0) + return err; + + err = ip_tunnel_changelink(dev, tb, &p, fwmark); + if (err < 0) + return err; + + t->parms.i_flags = p.i_flags; + t->parms.o_flags = p.o_flags; return 0; } @@ -1519,8 +1588,8 @@ static struct rtnl_link_ops erspan_link_ops __read_mostly = { .priv_size = sizeof(struct ip_tunnel), .setup = erspan_setup, .validate = erspan_validate, - .newlink = ipgre_newlink, - .changelink = ipgre_changelink, + .newlink = erspan_newlink, + .changelink = erspan_changelink, .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, .fill_info = ipgre_fill_info, diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index aaaaf907e0d8..090d3097ee15 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -263,7 +263,7 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk, * insufficent MTU. */ features = netif_skb_features(skb); - BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET); + BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_GSO_CB_OFFSET); segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); if (IS_ERR_OR_NULL(segs)) { kfree_skb(skb); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 47f8b947eef1..181b7a2a0247 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -432,7 +432,7 @@ static int ip_tun_set_opts(struct nlattr *attr, struct ip_tunnel_info *info, return ip_tun_parse_opts(attr, info, extack); } -static int ip_tun_build_state(struct nlattr *attr, +static int ip_tun_build_state(struct net *net, struct nlattr *attr, unsigned int family, const void *cfg, struct lwtunnel_state **ts, struct netlink_ext_ack *extack) @@ -719,7 +719,7 @@ static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = { [LWTUNNEL_IP6_OPTS] = { .type = NLA_NESTED }, }; -static int ip6_tun_build_state(struct nlattr *attr, +static int ip6_tun_build_state(struct net *net, struct nlattr *attr, unsigned int family, const void *cfg, struct lwtunnel_state **ts, struct netlink_ext_ack *extack) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 37cddd18f282..1b4e6f298648 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -187,17 +187,39 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, int mtu; if (!dst) { - struct rtable *rt; - - fl->u.ip4.flowi4_oif = dev->ifindex; - fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; - rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4); - if (IS_ERR(rt)) { + switch (skb->protocol) { + case htons(ETH_P_IP): { + struct rtable *rt; + + fl->u.ip4.flowi4_oif = dev->ifindex; + fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; + rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error_icmp; + } + dst = &rt->dst; + skb_dst_set(skb, dst); + break; + } +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + fl->u.ip6.flowi6_oif = dev->ifindex; + fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC; + dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6); + if (dst->error) { + dst_release(dst); + dst = NULL; + dev->stats.tx_carrier_errors++; + goto tx_error_icmp; + } + skb_dst_set(skb, dst); + break; +#endif + default: dev->stats.tx_carrier_errors++; goto tx_error_icmp; } - dst = &rt->dst; - skb_dst_set(skb, dst); } dst_hold(dst); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index f1f78a742b36..b167f4a5b684 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1057,7 +1057,7 @@ struct compat_arpt_replace { u32 underflow[NF_ARP_NUMHOOKS]; u32 num_counters; compat_uptr_t counters; - struct compat_arpt_entry entries[0]; + struct compat_arpt_entry entries[]; }; static inline void compat_release_entry(struct compat_arpt_entry *e) @@ -1383,7 +1383,7 @@ static int compat_copy_entries_to_user(unsigned int total_size, struct compat_arpt_get_entries { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t size; - struct compat_arpt_entry entrytable[0]; + struct compat_arpt_entry entrytable[]; }; static int compat_get_entries(struct net *net, diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 10b91ebdf213..c2670eaa74e6 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1211,7 +1211,7 @@ struct compat_ipt_replace { u32 underflow[NF_INET_NUMHOOKS]; u32 num_counters; compat_uptr_t counters; /* struct xt_counters * */ - struct compat_ipt_entry entries[0]; + struct compat_ipt_entry entries[]; }; static int @@ -1562,7 +1562,7 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, struct compat_ipt_get_entries { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t size; - struct compat_ipt_entry entrytable[0]; + struct compat_ipt_entry entrytable[]; }; static int diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 2580303249e2..75545a829a2b 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -32,6 +32,7 @@ #include <net/icmp.h> #include <net/protocol.h> #include <net/tcp.h> +#include <net/mptcp.h> #include <net/udp.h> #include <net/udplite.h> #include <linux/bottom_half.h> @@ -485,6 +486,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v) offsetof(struct ipstats_mib, syncp))); seq_putc(seq, '\n'); + mptcp_seq_show(seq); return 0; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 042599cc691d..788c69d9bfe0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1621,12 +1621,11 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, struct rtable *rt_dst_alloc(struct net_device *dev, unsigned int flags, u16 type, - bool nopolicy, bool noxfrm, bool will_cache) + bool nopolicy, bool noxfrm) { struct rtable *rt; rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, - (will_cache ? 0 : DST_HOST) | (nopolicy ? DST_NOPOLICY : 0) | (noxfrm ? DST_NOXFRM : 0)); @@ -1674,7 +1673,6 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt) new_rt->rt_gw6 = rt->rt_gw6; INIT_LIST_HEAD(&new_rt->rt_uncached); - new_rt->dst.flags |= DST_HOST; new_rt->dst.input = rt->dst.input; new_rt->dst.output = rt->dst.output; new_rt->dst.error = rt->dst.error; @@ -1734,7 +1732,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, flags |= RTCF_LOCAL; rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, - IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); + IN_DEV_CONF_GET(in_dev, NOPOLICY), false); if (!rth) return -ENOBUFS; @@ -1851,7 +1849,7 @@ static int __mkroute_input(struct sk_buff *skb, rth = rt_dst_alloc(out_dev->dev, 0, res->type, IN_DEV_CONF_GET(in_dev, NOPOLICY), - IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache); + IN_DEV_CONF_GET(out_dev, NOXFRM)); if (!rth) { err = -ENOBUFS; goto cleanup; @@ -2219,7 +2217,7 @@ local_input: rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev, flags | RTCF_LOCAL, res->type, - IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache); + IN_DEV_CONF_GET(in_dev, NOPOLICY), false); if (!rth) goto e_nobufs; @@ -2443,8 +2441,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, add: rth = rt_dst_alloc(dev_out, flags, type, IN_DEV_CONF_GET(in_dev, NOPOLICY), - IN_DEV_CONF_GET(in_dev, NOXFRM), - do_cache); + IN_DEV_CONF_GET(in_dev, NOXFRM)); if (!rth) return ERR_PTR(-ENOBUFS); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5c57850fab4b..6d87de434377 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2948,8 +2948,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level, err = -EPERM; else if (tp->repair_queue == TCP_SEND_QUEUE) WRITE_ONCE(tp->write_seq, val); - else if (tp->repair_queue == TCP_RECV_QUEUE) + else if (tp->repair_queue == TCP_RECV_QUEUE) { WRITE_ONCE(tp->rcv_nxt, val); + WRITE_ONCE(tp->copied_seq, val); + } else err = -EINVAL; break; diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 645cc3009e64..f5f588b1f6e9 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -145,12 +145,13 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tcp_in_slow_start(tp)) - tcp_slow_start(tp, acked); - else { - bictcp_update(ca, tp->snd_cwnd); - tcp_cong_avoid_ai(tp, ca->cnt, 1); + if (tcp_in_slow_start(tp)) { + acked = tcp_slow_start(tp, acked); + if (!acked) + return; } + bictcp_update(ca, tp->snd_cwnd); + tcp_cong_avoid_ai(tp, ca->cnt, acked); } /* diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 03af7c3e75ef..7e40322cc5ec 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -774,6 +774,12 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (!child) goto listen_overflow; + if (own_req && sk_is_mptcp(child) && mptcp_sk_is_subflow(child)) { + reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); + inet_csk_reqsk_queue_drop_and_put(sk, req); + return child; + } + sock_rps_save_rxhash(child, skb); tcp_synack_rtt_meas(child, req); *req_stolen = !own_req; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 306e25d743e8..2f45cde168c4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1109,6 +1109,10 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, if (unlikely(!skb)) return -ENOBUFS; + /* retransmit skbs might have a non zero value in skb->dev + * because skb->dev is aliased with skb->rbnode.rb_left + */ + skb->dev = NULL; } inet = inet_sk(sk); @@ -3037,8 +3041,12 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) tcp_skb_tsorted_save(skb) { nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); - err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : - -ENOBUFS; + if (nskb) { + nskb->dev = NULL; + err = tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC); + } else { + err = -ENOBUFS; + } } tcp_skb_tsorted_restore(skb); if (!err) { diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 471571e1ab26..6cebf412d590 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -10,10 +10,9 @@ #include <net/tcp.h> /* These factors derived from the recommended values in the aer: - * .01 and and 7/8. We use 50 instead of 100 to account for - * delayed ack. + * .01 and and 7/8. */ -#define TCP_SCALABLE_AI_CNT 50U +#define TCP_SCALABLE_AI_CNT 100U #define TCP_SCALABLE_MD_SCALE 3 static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) @@ -23,11 +22,13 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tcp_in_slow_start(tp)) - tcp_slow_start(tp, acked); - else - tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT), - 1); + if (tcp_in_slow_start(tp)) { + acked = tcp_slow_start(tp, acked); + if (!acked) + return; + } + tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT), + acked); } static u32 tcp_scalable_ssthresh(struct sock *sk) diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 3b36bb1a0dda..50a9a6e2c4cd 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -153,31 +153,34 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) veno->diff = (tp->snd_cwnd << V_PARAM_SHIFT) - target_cwnd; if (tcp_in_slow_start(tp)) { - /* Slow start. */ - tcp_slow_start(tp, acked); + /* Slow start. */ + acked = tcp_slow_start(tp, acked); + if (!acked) + goto done; + } + + /* Congestion avoidance. */ + if (veno->diff < beta) { + /* In the "non-congestive state", increase cwnd + * every rtt. + */ + tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); } else { - /* Congestion avoidance. */ - if (veno->diff < beta) { - /* In the "non-congestive state", increase cwnd - * every rtt. - */ - tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1); - } else { - /* In the "congestive state", increase cwnd - * every other rtt. - */ - if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { - if (veno->inc && - tp->snd_cwnd < tp->snd_cwnd_clamp) { - tp->snd_cwnd++; - veno->inc = 0; - } else - veno->inc = 1; - tp->snd_cwnd_cnt = 0; + /* In the "congestive state", increase cwnd + * every other rtt. + */ + if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { + if (veno->inc && + tp->snd_cwnd < tp->snd_cwnd_clamp) { + tp->snd_cwnd++; + veno->inc = 0; } else - tp->snd_cwnd_cnt++; - } + veno->inc = 1; + tp->snd_cwnd_cnt = 0; + } else + tp->snd_cwnd_cnt += acked; } +done: if (tp->snd_cwnd < 2) tp->snd_cwnd = 2; else if (tp->snd_cwnd > tp->snd_cwnd_clamp) diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index e00570dd0a69..3bb448761ca3 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -36,8 +36,6 @@ struct yeah { u32 reno_count; u32 fast_count; - - u32 pkts_acked; }; static void tcp_yeah_init(struct sock *sk) @@ -57,18 +55,6 @@ static void tcp_yeah_init(struct sock *sk) tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); } -static void tcp_yeah_pkts_acked(struct sock *sk, - const struct ack_sample *sample) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - struct yeah *yeah = inet_csk_ca(sk); - - if (icsk->icsk_ca_state == TCP_CA_Open) - yeah->pkts_acked = sample->pkts_acked; - - tcp_vegas_pkts_acked(sk, sample); -} - static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); @@ -77,24 +63,19 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tcp_in_slow_start(tp)) - tcp_slow_start(tp, acked); + if (tcp_in_slow_start(tp)) { + acked = tcp_slow_start(tp, acked); + if (!acked) + goto do_vegas; + } - else if (!yeah->doing_reno_now) { + if (!yeah->doing_reno_now) { /* Scalable */ - - tp->snd_cwnd_cnt += yeah->pkts_acked; - if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)) { - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - tp->snd_cwnd_cnt = 0; - } - - yeah->pkts_acked = 1; - + tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT), + acked); } else { /* Reno */ - tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1); + tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); } /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt. @@ -118,7 +99,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) * of bytes we send in an RTT is often less than our cwnd will allow. * So we keep track of our cwnd separately, in v_beg_snd_cwnd. */ - +do_vegas: if (after(ack, yeah->vegas.beg_snd_nxt)) { /* We do the Vegas calculations only if we got enough RTT * samples that we can be reasonably sure that we got @@ -232,7 +213,7 @@ static struct tcp_congestion_ops tcp_yeah __read_mostly = { .set_state = tcp_vegas_state, .cwnd_event = tcp_vegas_cwnd_event, .get_info = tcp_vegas_get_info, - .pkts_acked = tcp_yeah_pkts_acked, + .pkts_acked = tcp_vegas_pkts_acked, .owner = THIS_MODULE, .name = "yeah", diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b4035021bbd3..32564b350823 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2109,7 +2109,7 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (likely(!udp_unexpected_gso(sk, skb))) return udp_queue_rcv_one_skb(sk, skb); - BUILD_BUG_ON(sizeof(struct udp_skb_cb) > SKB_SGO_CB_OFFSET); + BUILD_BUG_ON(sizeof(struct udp_skb_cb) > SKB_GSO_CB_OFFSET); __skb_push(skb, -skb_mac_offset(skb)); segs = udp_rcv_segment(sk, skb, true); skb_list_walk_safe(segs, skb, next) { |