diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 1 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 12 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 50 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 5 | ||||
-rw-r--r-- | net/ipv4/ip_sockglue.c | 3 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel.c | 9 | ||||
-rw-r--r-- | net/ipv4/ipmr_base.c | 2 | ||||
-rw-r--r-- | net/ipv4/route.c | 7 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 4 | ||||
-rw-r--r-- | net/ipv4/udp.c | 51 | ||||
-rw-r--r-- | net/ipv4/xfrm4_input.c | 1 | ||||
-rw-r--r-- | net/ipv4/xfrm4_mode_transport.c | 4 |
14 files changed, 116 insertions, 43 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 20fda8fb8ffd..1fbe2f815474 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1377,6 +1377,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, if (encap) skb_reset_inner_headers(skb); skb->network_header = (u8 *)iph - skb->head; + skb_reset_mac_len(skb); } while ((skb = skb->next)); out: diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 2998b0e47d4b..0113993e9b2c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1243,7 +1243,8 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct netdev_notifier_changeupper_info *info; + struct netdev_notifier_changeupper_info *upper_info = ptr; + struct netdev_notifier_info_ext *info_ext = ptr; struct in_device *in_dev; struct net *net = dev_net(dev); unsigned int flags; @@ -1278,16 +1279,19 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo fib_sync_up(dev, RTNH_F_LINKDOWN); else fib_sync_down_dev(dev, event, false); - /* fall through */ + rt_cache_flush(net); + break; case NETDEV_CHANGEMTU: + fib_sync_mtu(dev, info_ext->ext.mtu); rt_cache_flush(net); break; case NETDEV_CHANGEUPPER: - info = ptr; + upper_info = ptr; /* flush all routes if dev is linked to or unlinked from * an L3 master device (e.g., VRF) */ - if (info->upper_dev && netif_is_l3_master(info->upper_dev)) + if (upper_info->upper_dev && + netif_is_l3_master(upper_info->upper_dev)) fib_disable_ip(dev, NETDEV_DOWN, true); break; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f3c89ccf14c5..446204ca7406 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1470,6 +1470,56 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh, return NOTIFY_DONE; } +/* Update the PMTU of exceptions when: + * - the new MTU of the first hop becomes smaller than the PMTU + * - the old MTU was the same as the PMTU, and it limited discovery of + * larger MTUs on the path. With that limit raised, we can now + * discover larger MTUs + * A special case is locked exceptions, for which the PMTU is smaller + * than the minimal accepted PMTU: + * - if the new MTU is greater than the PMTU, don't make any change + * - otherwise, unlock and set PMTU + */ +static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig) +{ + struct fnhe_hash_bucket *bucket; + int i; + + bucket = rcu_dereference_protected(nh->nh_exceptions, 1); + if (!bucket) + return; + + for (i = 0; i < FNHE_HASH_SIZE; i++) { + struct fib_nh_exception *fnhe; + + for (fnhe = rcu_dereference_protected(bucket[i].chain, 1); + fnhe; + fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) { + if (fnhe->fnhe_mtu_locked) { + if (new <= fnhe->fnhe_pmtu) { + fnhe->fnhe_pmtu = new; + fnhe->fnhe_mtu_locked = false; + } + } else if (new < fnhe->fnhe_pmtu || + orig == fnhe->fnhe_pmtu) { + fnhe->fnhe_pmtu = new; + } + } + } +} + +void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) +{ + unsigned int hash = fib_devindex_hashfn(dev->ifindex); + struct hlist_head *head = &fib_info_devhash[hash]; + struct fib_nh *nh; + + hlist_for_each_entry(nh, head, nh_hash) { + if (nh->nh_dev == dev) + nh_update_mtu(nh, dev->mtu, orig_mtu); + } +} + /* Event force Flags Description * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index dfd5009f96ef..15e7f7915a21 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -544,7 +544,8 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, struct ip_options_rcu *opt; struct rtable *rt; - opt = ireq_opt_deref(ireq); + rcu_read_lock(); + opt = rcu_dereference(ireq->ireq_opt); flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, @@ -558,11 +559,13 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, goto no_route; if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) goto route_err; + rcu_read_unlock(); return &rt->dst; route_err: ip_rt_put(rt); no_route: + rcu_read_unlock(); __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index c0fe5ad996f2..26c36cccabdc 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -149,7 +149,6 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) { struct sockaddr_in sin; - const struct iphdr *iph = ip_hdr(skb); __be16 *ports; int end; @@ -164,7 +163,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) ports = (__be16 *)skb_transport_header(skb); sin.sin_family = AF_INET; - sin.sin_addr.s_addr = iph->daddr; + sin.sin_addr.s_addr = ip_hdr(skb)->daddr; sin.sin_port = ports[1]; memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index c4f5602308ed..284a22154b4e 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -627,6 +627,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, u8 protocol) { struct ip_tunnel *tunnel = netdev_priv(dev); + unsigned int inner_nhdr_len = 0; const struct iphdr *inner_iph; struct flowi4 fl4; u8 tos, ttl; @@ -636,6 +637,14 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, __be32 dst; bool connected; + /* ensure we can access the inner net header, for several users below */ + if (skb->protocol == htons(ETH_P_IP)) + inner_nhdr_len = sizeof(struct iphdr); + else if (skb->protocol == htons(ETH_P_IPV6)) + inner_nhdr_len = sizeof(struct ipv6hdr); + if (unlikely(!pskb_may_pull(skb, inner_nhdr_len))) + goto tx_error; + inner_iph = (const struct iphdr *)skb_inner_network_header(skb); connected = (tunnel->parms.iph.daddr != 0); diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index 1ad9aa62a97b..eab8cd5ec2f5 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -296,8 +296,6 @@ int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, next_entry: e++; } - e = 0; - s_e = 0; spin_lock_bh(lock); list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b678466da451..8501554e96a4 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1001,21 +1001,22 @@ out: kfree_skb(skb); static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { struct dst_entry *dst = &rt->dst; + u32 old_mtu = ipv4_mtu(dst); struct fib_result res; bool lock = false; if (ip_mtu_locked(dst)) return; - if (ipv4_mtu(dst) < mtu) + if (old_mtu < mtu) return; if (mtu < ip_rt_min_pmtu) { lock = true; - mtu = ip_rt_min_pmtu; + mtu = min(old_mtu, ip_rt_min_pmtu); } - if (rt->rt_pmtu == mtu && + if (rt->rt_pmtu == mtu && !lock && time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) return; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index b92f422f2fa8..891ed2f91467 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -48,6 +48,7 @@ static int tcp_syn_retries_max = MAX_TCP_SYNCNT; static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; static int comp_sack_nr_max = 255; +static u32 u32_max_div_HZ = UINT_MAX / HZ; /* obsolete */ static int sysctl_tcp_low_latency __read_mostly; @@ -745,9 +746,10 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_probe_interval", .data = &init_net.ipv4.sysctl_tcp_probe_interval, - .maxlen = sizeof(int), + .maxlen = sizeof(u32), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_douintvec_minmax, + .extra2 = &u32_max_div_HZ, }, { .procname = "igmp_link_local_mcast_reports", diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4cf2f7bb2802..47e08c1b5bc3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6009,11 +6009,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (th->fin) goto discard; /* It is possible that we process SYN packets from backlog, - * so we need to make sure to disable BH right there. + * so we need to make sure to disable BH and RCU right there. */ + rcu_read_lock(); local_bh_disable(); acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0; local_bh_enable(); + rcu_read_unlock(); if (!acceptable) return 1; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 44c09eddbb78..cd426313a298 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -943,9 +943,11 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, if (skb) { __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); + rcu_read_lock(); err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, - ireq_opt_deref(ireq)); + rcu_dereference(ireq->ireq_opt)); + rcu_read_unlock(); err = net_xmit_eval(err); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f4e35b2ff8b8..c32a4c16b7ff 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1627,7 +1627,7 @@ busy_check: *err = error; return NULL; } -EXPORT_SYMBOL_GPL(__skb_recv_udp); +EXPORT_SYMBOL(__skb_recv_udp); /* * This should be easy, if there is something there we @@ -2124,6 +2124,28 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, inet_compute_pseudo); } +/* wrapper for udp_queue_rcv_skb tacking care of csum conversion and + * return code conversion for ip layer consumption + */ +static int udp_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, + struct udphdr *uh) +{ + int ret; + + if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) + skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, + inet_compute_pseudo); + + ret = udp_queue_rcv_skb(sk, skb); + + /* a return value > 0 means to resubmit the input, but + * it wants the return to be -protocol, or 0 + */ + if (ret > 0) + return -ret; + return 0; +} + /* * All we need to do is get the socket, and then do a checksum. */ @@ -2170,14 +2192,9 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (unlikely(sk->sk_rx_dst != dst)) udp_sk_rx_dst_set(sk, dst); - ret = udp_queue_rcv_skb(sk, skb); + ret = udp_unicast_rcv_skb(sk, skb, uh); sock_put(sk); - /* a return value > 0 means to resubmit the input, but - * it wants the return to be -protocol, or 0 - */ - if (ret > 0) - return -ret; - return 0; + return ret; } if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) @@ -2185,22 +2202,8 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, saddr, daddr, udptable, proto); sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); - if (sk) { - int ret; - - if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) - skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, - inet_compute_pseudo); - - ret = udp_queue_rcv_skb(sk, skb); - - /* a return value > 0 means to resubmit the input, but - * it wants the return to be -protocol, or 0 - */ - if (ret > 0) - return -ret; - return 0; - } + if (sk) + return udp_unicast_rcv_skb(sk, skb, uh); if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index bcfc00e88756..f8de2482a529 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -67,6 +67,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) if (xo && (xo->flags & XFRM_GRO)) { skb_mac_header_rebuild(skb); + skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c index 3d36644890bb..1ad2c2c4e250 100644 --- a/net/ipv4/xfrm4_mode_transport.c +++ b/net/ipv4/xfrm4_mode_transport.c @@ -46,7 +46,6 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) { int ihl = skb->data - skb_transport_header(skb); - struct xfrm_offload *xo = xfrm_offload(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), @@ -54,8 +53,7 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) skb->network_header = skb->transport_header; } ip_hdr(skb)->tot_len = htons(skb->len + ihl); - if (!xo || !(xo->flags & XFRM_GRO)) - skb_reset_transport_header(skb); + skb_reset_transport_header(skb); return 0; } |