diff options
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 330 |
1 files changed, 202 insertions, 128 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 723ac9181558..fccb05fb3a79 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -189,7 +189,11 @@ const __u8 ip_tos2prio[16] = { EXPORT_SYMBOL(ip_tos2prio); static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); +#ifndef CONFIG_PREEMPT_RT #define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field) +#else +#define RT_CACHE_STAT_INC(field) this_cpu_inc(rt_cache_stat.field) +#endif #ifdef CONFIG_PROC_FS static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) @@ -390,7 +394,13 @@ static inline int ip_rt_proc_init(void) static inline bool rt_is_expired(const struct rtable *rth) { - return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev)); + bool res; + + rcu_read_lock(); + res = rth->rt_genid != rt_genid_ipv4(dev_net_rcu(rth->dst.dev)); + rcu_read_unlock(); + + return res; } void rt_cache_flush(struct net *net) @@ -870,11 +880,11 @@ void ip_rt_send_redirect(struct sk_buff *skb) } log_martians = IN_DEV_LOG_MARTIANS(in_dev); vif = l3mdev_master_ifindex_rcu(rt->dst.dev); - rcu_read_unlock(); net = dev_net(rt->dst.dev); - peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1); + peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif); if (!peer) { + rcu_read_unlock(); icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt_nexthop(rt, ip_hdr(skb)->daddr)); return; @@ -893,7 +903,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) */ if (peer->n_redirects >= ip_rt_redirect_number) { peer->rate_last = jiffies; - goto out_put_peer; + goto out_unlock; } /* Check for load limit; set rate_last to the latest sent @@ -914,8 +924,8 @@ void ip_rt_send_redirect(struct sk_buff *skb) &ip_hdr(skb)->saddr, inet_iif(skb), &ip_hdr(skb)->daddr, &gw); } -out_put_peer: - inet_putpeer(peer); +out_unlock: + rcu_read_unlock(); } static int ip_error(struct sk_buff *skb) @@ -975,9 +985,9 @@ static int ip_error(struct sk_buff *skb) break; } + rcu_read_lock(); peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, - l3mdev_master_ifindex(skb->dev), 1); - + l3mdev_master_ifindex_rcu(skb->dev)); send = true; if (peer) { now = jiffies; @@ -989,8 +999,9 @@ static int ip_error(struct sk_buff *skb) peer->rate_tokens -= ip_rt_error_cost; else send = false; - inet_putpeer(peer); } + rcu_read_unlock(); + if (send) icmp_send(skb, ICMP_DEST_UNREACH, code, 0); @@ -1001,9 +1012,9 @@ out: kfree_skb_reason(skb, reason); static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { struct dst_entry *dst = &rt->dst; - struct net *net = dev_net(dst->dev); struct fib_result res; bool lock = false; + struct net *net; u32 old_mtu; if (ip_mtu_locked(dst)) @@ -1013,6 +1024,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (old_mtu < mtu) return; + rcu_read_lock(); + net = dev_net_rcu(dst->dev); if (mtu < net->ipv4.ip_rt_min_pmtu) { lock = true; mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu); @@ -1020,17 +1033,29 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (rt->rt_pmtu == mtu && !lock && time_before(jiffies, dst->expires - net->ipv4.ip_rt_mtu_expires / 2)) - return; + goto out; - rcu_read_lock(); if (fib_lookup(net, fl4, &res, 0) == 0) { struct fib_nh_common *nhc; fib_select_path(net, &res, fl4, NULL); +#ifdef CONFIG_IP_ROUTE_MULTIPATH + if (fib_info_num_path(res.fi) > 1) { + int nhsel; + + for (nhsel = 0; nhsel < fib_info_num_path(res.fi); nhsel++) { + nhc = fib_info_nhc(res.fi, nhsel); + update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, + jiffies + net->ipv4.ip_rt_mtu_expires); + } + goto out; + } +#endif /* CONFIG_IP_ROUTE_MULTIPATH */ nhc = FIB_RES_NHC(res); update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, jiffies + net->ipv4.ip_rt_mtu_expires); } +out: rcu_read_unlock(); } @@ -1263,7 +1288,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) struct flowi4 fl4 = { .daddr = iph->daddr, .saddr = iph->saddr, - .flowi4_tos = iph->tos & INET_DSCP_MASK, + .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)), .flowi4_oif = rt->dst.dev->ifindex, .flowi4_iif = skb->dev->ifindex, .flowi4_mark = skb->mark, @@ -1293,10 +1318,15 @@ static void set_class_tag(struct rtable *rt, u32 tag) static unsigned int ipv4_default_advmss(const struct dst_entry *dst) { - struct net *net = dev_net(dst->dev); unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr); - unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, - net->ipv4.ip_rt_min_advmss); + unsigned int advmss; + struct net *net; + + rcu_read_lock(); + net = dev_net_rcu(dst->dev); + advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, + net->ipv4.ip_rt_min_advmss); + rcu_read_unlock(); return min(advmss, IPV4_MAX_PMTU - header_size); } @@ -1665,49 +1695,54 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt) EXPORT_SYMBOL(rt_dst_clone); /* called in rcu_read_lock() section */ -int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, - struct in_device *in_dev, u32 *itag) +enum skb_drop_reason +ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, + dscp_t dscp, struct net_device *dev, + struct in_device *in_dev, u32 *itag) { - int err; + enum skb_drop_reason reason; /* Primary sanity checks. */ if (!in_dev) - return -EINVAL; + return SKB_DROP_REASON_NOT_SPECIFIED; - if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || - skb->protocol != htons(ETH_P_IP)) - return -EINVAL; + if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) + return SKB_DROP_REASON_IP_INVALID_SOURCE; + + if (skb->protocol != htons(ETH_P_IP)) + return SKB_DROP_REASON_INVALID_PROTO; if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev)) - return -EINVAL; + return SKB_DROP_REASON_IP_LOCALNET; if (ipv4_is_zeronet(saddr)) { if (!ipv4_is_local_multicast(daddr) && ip_hdr(skb)->protocol != IPPROTO_IGMP) - return -EINVAL; + return SKB_DROP_REASON_IP_INVALID_SOURCE; } else { - err = fib_validate_source(skb, saddr, 0, tos, 0, dev, - in_dev, itag); - if (err < 0) - return err; + reason = fib_validate_source_reason(skb, saddr, 0, dscp, 0, + dev, in_dev, itag); + if (reason) + return reason; } - return 0; + return SKB_NOT_DROPPED_YET; } /* called in rcu_read_lock() section */ -static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, int our) +static enum skb_drop_reason +ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, + dscp_t dscp, struct net_device *dev, int our) { struct in_device *in_dev = __in_dev_get_rcu(dev); unsigned int flags = RTCF_MULTICAST; + enum skb_drop_reason reason; struct rtable *rth; u32 itag = 0; - int err; - err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag); - if (err) - return err; + reason = ip_mc_validate_source(skb, daddr, saddr, dscp, dev, in_dev, + &itag); + if (reason) + return reason; if (our) flags |= RTCF_LOCAL; @@ -1718,7 +1753,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, false); if (!rth) - return -ENOBUFS; + return SKB_DROP_REASON_NOMEM; #ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; @@ -1734,7 +1769,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, skb_dst_drop(skb); skb_dst_set(skb, &rth->dst); - return 0; + return SKB_NOT_DROPPED_YET; } @@ -1764,11 +1799,12 @@ static void ip_handle_martian_source(struct net_device *dev, } /* called in rcu_read_lock() section */ -static int __mkroute_input(struct sk_buff *skb, - const struct fib_result *res, - struct in_device *in_dev, - __be32 daddr, __be32 saddr, u32 tos) +static enum skb_drop_reason +__mkroute_input(struct sk_buff *skb, const struct fib_result *res, + struct in_device *in_dev, __be32 daddr, + __be32 saddr, dscp_t dscp) { + enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct fib_nh_common *nhc = FIB_RES_NHC(*res); struct net_device *dev = nhc->nhc_dev; struct fib_nh_exception *fnhe; @@ -1782,12 +1818,13 @@ static int __mkroute_input(struct sk_buff *skb, out_dev = __in_dev_get_rcu(dev); if (!out_dev) { net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n"); - return -EINVAL; + return reason; } - err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), + err = fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res), in_dev->dev, in_dev, &itag); if (err < 0) { + reason = -err; ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, saddr); @@ -1815,7 +1852,7 @@ static int __mkroute_input(struct sk_buff *skb, */ if (out_dev == in_dev && IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) { - err = -EINVAL; + reason = SKB_DROP_REASON_ARP_PVLAN_DISABLE; goto cleanup; } } @@ -1838,7 +1875,7 @@ static int __mkroute_input(struct sk_buff *skb, rth = rt_dst_alloc(out_dev->dev, 0, res->type, IN_DEV_ORCONF(out_dev, NOXFRM)); if (!rth) { - err = -ENOBUFS; + reason = SKB_DROP_REASON_NOMEM; goto cleanup; } @@ -1852,9 +1889,9 @@ static int __mkroute_input(struct sk_buff *skb, lwtunnel_set_redirect(&rth->dst); skb_dst_set(skb, &rth->dst); out: - err = 0; - cleanup: - return err; + reason = SKB_NOT_DROPPED_YET; +cleanup: + return reason; } #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -2004,8 +2041,12 @@ static u32 fib_multipath_custom_hash_fl4(const struct net *net, hash_keys.addrs.v4addrs.dst = fl4->daddr; if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) hash_keys.basic.ip_proto = fl4->flowi4_proto; - if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) - hash_keys.ports.src = fl4->fl4_sport; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) { + if (fl4->flowi4_flags & FLOWI_FLAG_ANY_SPORT) + hash_keys.ports.src = (__force __be16)get_random_u16(); + else + hash_keys.ports.src = fl4->fl4_sport; + } if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) hash_keys.ports.dst = fl4->fl4_dport; @@ -2060,7 +2101,10 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; hash_keys.addrs.v4addrs.src = fl4->saddr; hash_keys.addrs.v4addrs.dst = fl4->daddr; - hash_keys.ports.src = fl4->fl4_sport; + if (fl4->flowi4_flags & FLOWI_FLAG_ANY_SPORT) + hash_keys.ports.src = (__force __be16)get_random_u16(); + else + hash_keys.ports.src = fl4->fl4_sport; hash_keys.ports.dst = fl4->fl4_dport; hash_keys.basic.ip_proto = fl4->flowi4_proto; } @@ -2112,66 +2156,72 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, } #endif /* CONFIG_IP_ROUTE_MULTIPATH */ -static int ip_mkroute_input(struct sk_buff *skb, - struct fib_result *res, - struct in_device *in_dev, - __be32 daddr, __be32 saddr, u32 tos, - struct flow_keys *hkeys) +static enum skb_drop_reason +ip_mkroute_input(struct sk_buff *skb, struct fib_result *res, + struct in_device *in_dev, __be32 daddr, + __be32 saddr, dscp_t dscp, struct flow_keys *hkeys) { #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi && fib_info_num_path(res->fi) > 1) { int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys); - fib_select_multipath(res, h); + fib_select_multipath(res, h, NULL); IPCB(skb)->flags |= IPSKB_MULTIPATH; } #endif /* create a routing cache entry */ - return __mkroute_input(skb, res, in_dev, daddr, saddr, tos); + return __mkroute_input(skb, res, in_dev, daddr, saddr, dscp); } /* Implements all the saddr-related checks as ip_route_input_slow(), * assuming daddr is valid and the destination is not a local broadcast one. * Uses the provided hint instead of performing a route lookup. */ -int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, - const struct sk_buff *hint) +enum skb_drop_reason +ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, + dscp_t dscp, struct net_device *dev, + const struct sk_buff *hint) { + enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct in_device *in_dev = __in_dev_get_rcu(dev); struct rtable *rt = skb_rtable(hint); struct net *net = dev_net(dev); - int err = -EINVAL; u32 tag = 0; if (!in_dev) - return -EINVAL; + return reason; - if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) + if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) { + reason = SKB_DROP_REASON_IP_INVALID_SOURCE; goto martian_source; + } - if (ipv4_is_zeronet(saddr)) + if (ipv4_is_zeronet(saddr)) { + reason = SKB_DROP_REASON_IP_INVALID_SOURCE; goto martian_source; + } - if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) + if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) { + reason = SKB_DROP_REASON_IP_LOCALNET; goto martian_source; + } if (rt->rt_type != RTN_LOCAL) goto skip_validate_source; - tos &= INET_DSCP_MASK; - err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &tag); - if (err < 0) + reason = fib_validate_source_reason(skb, saddr, daddr, dscp, 0, dev, + in_dev, &tag); + if (reason) goto martian_source; skip_validate_source: skb_dst_copy(skb, hint); - return 0; + return SKB_NOT_DROPPED_YET; martian_source: ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); - return err; + return reason; } /* get device for dst_alloc with local routes */ @@ -2200,10 +2250,12 @@ static struct net_device *ip_rt_get_dev(struct net *net, * called with rcu_read_lock() */ -static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, - struct fib_result *res) +static enum skb_drop_reason +ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, + dscp_t dscp, struct net_device *dev, + struct fib_result *res) { + enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct in_device *in_dev = __in_dev_get_rcu(dev); struct flow_keys *flkeys = NULL, _flkeys; struct net *net = dev_net(dev); @@ -2231,8 +2283,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.flowi4_tun_key.tun_id = 0; skb_dst_drop(skb); - if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) + if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) { + reason = SKB_DROP_REASON_IP_INVALID_SOURCE; goto martian_source; + } res->fi = NULL; res->table = NULL; @@ -2242,21 +2296,29 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, /* Accept zero addresses only to limited broadcast; * I even do not know to fix it or not. Waiting for complains :-) */ - if (ipv4_is_zeronet(saddr)) + if (ipv4_is_zeronet(saddr)) { + reason = SKB_DROP_REASON_IP_INVALID_SOURCE; goto martian_source; + } - if (ipv4_is_zeronet(daddr)) + if (ipv4_is_zeronet(daddr)) { + reason = SKB_DROP_REASON_IP_INVALID_DEST; goto martian_destination; + } /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(), * and call it once if daddr or/and saddr are loopback addresses */ if (ipv4_is_loopback(daddr)) { - if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) + if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) { + reason = SKB_DROP_REASON_IP_LOCALNET; goto martian_destination; + } } else if (ipv4_is_loopback(saddr)) { - if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) + if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) { + reason = SKB_DROP_REASON_IP_LOCALNET; goto martian_source; + } } /* @@ -2266,7 +2328,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.flowi4_oif = 0; fl4.flowi4_iif = dev->ifindex; fl4.flowi4_mark = skb->mark; - fl4.flowi4_tos = tos; + fl4.flowi4_tos = inet_dscp_to_dsfield(dscp); fl4.flowi4_scope = RT_SCOPE_UNIVERSE; fl4.flowi4_flags = 0; fl4.daddr = daddr; @@ -2298,10 +2360,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto brd_input; } + err = -EINVAL; if (res->type == RTN_LOCAL) { - err = fib_validate_source(skb, saddr, daddr, tos, - 0, dev, in_dev, &itag); - if (err < 0) + reason = fib_validate_source_reason(skb, saddr, daddr, dscp, + 0, dev, in_dev, &itag); + if (reason) goto martian_source; goto local_input; } @@ -2310,21 +2373,28 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, err = -EHOSTUNREACH; goto no_route; } - if (res->type != RTN_UNICAST) + if (res->type != RTN_UNICAST) { + reason = SKB_DROP_REASON_IP_INVALID_DEST; goto martian_destination; + } make_route: - err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys); -out: return err; + reason = ip_mkroute_input(skb, res, in_dev, daddr, saddr, dscp, + flkeys); + +out: + return reason; brd_input: - if (skb->protocol != htons(ETH_P_IP)) - goto e_inval; + if (skb->protocol != htons(ETH_P_IP)) { + reason = SKB_DROP_REASON_INVALID_PROTO; + goto out; + } if (!ipv4_is_zeronet(saddr)) { - err = fib_validate_source(skb, saddr, 0, tos, 0, dev, - in_dev, &itag); - if (err < 0) + reason = fib_validate_source_reason(skb, saddr, 0, dscp, 0, + dev, in_dev, &itag); + if (reason) goto martian_source; } flags |= RTCF_BROADCAST; @@ -2342,7 +2412,7 @@ local_input: rth = rcu_dereference(nhc->nhc_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); - err = 0; + reason = SKB_NOT_DROPPED_YET; goto out; } } @@ -2379,7 +2449,7 @@ local_input: rt_add_uncached_list(rth); } skb_dst_set(skb, &rth->dst); - err = 0; + reason = SKB_NOT_DROPPED_YET; goto out; no_route: @@ -2399,13 +2469,10 @@ martian_destination: net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n", &daddr, &saddr, dev->name); #endif - -e_inval: - err = -EINVAL; goto out; e_nobufs: - err = -ENOBUFS; + reason = SKB_DROP_REASON_NOMEM; goto out; martian_source: @@ -2414,8 +2481,10 @@ martian_source: } /* called with rcu_read_lock held */ -static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, struct fib_result *res) +static enum skb_drop_reason +ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr, + dscp_t dscp, struct net_device *dev, + struct fib_result *res) { /* Multicast recognition logic is moved from route cache to here. * The problem was that too many Ethernet cards have broken/missing @@ -2429,12 +2498,13 @@ static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr, * route cache entry is created eventually. */ if (ipv4_is_multicast(daddr)) { + enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct in_device *in_dev = __in_dev_get_rcu(dev); int our = 0; - int err = -EINVAL; if (!in_dev) - return err; + return reason; + our = ip_check_mc_rcu(in_dev, daddr, saddr, ip_hdr(skb)->protocol); @@ -2455,27 +2525,27 @@ static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr, IN_DEV_MFORWARD(in_dev)) #endif ) { - err = ip_route_input_mc(skb, daddr, saddr, - tos, dev, our); + reason = ip_route_input_mc(skb, daddr, saddr, dscp, + dev, our); } - return err; + return reason; } - return ip_route_input_slow(skb, daddr, saddr, tos, dev, res); + return ip_route_input_slow(skb, daddr, saddr, dscp, dev, res); } -int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev) +enum skb_drop_reason ip_route_input_noref(struct sk_buff *skb, __be32 daddr, + __be32 saddr, dscp_t dscp, + struct net_device *dev) { + enum skb_drop_reason reason; struct fib_result res; - int err; - tos &= INET_DSCP_MASK; rcu_read_lock(); - err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res); + reason = ip_route_input_rcu(skb, daddr, saddr, dscp, dev, &res); rcu_read_unlock(); - return err; + return reason; } EXPORT_SYMBOL(ip_route_input_noref); @@ -2640,8 +2710,7 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, if (fl4->saddr) { if (ipv4_is_multicast(fl4->saddr) || - ipv4_is_lbcast(fl4->saddr) || - ipv4_is_zeronet(fl4->saddr)) { + ipv4_is_lbcast(fl4->saddr)) { rth = ERR_PTR(-EINVAL); goto out; } @@ -3147,7 +3216,8 @@ static int inet_rtm_valid_getroute_req(struct sk_buff *skb, struct rtmsg *rtm; int i, err; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + rtm = nlmsg_payload(nlh, sizeof(*rtm)); + if (!rtm) { NL_SET_ERR_MSG(extack, "ipv4: Invalid header for route get request"); return -EINVAL; @@ -3157,7 +3227,6 @@ static int inet_rtm_valid_getroute_req(struct sk_buff *skb, return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy, extack); - rtm = nlmsg_data(nlh); if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) || rtm->rtm_table || rtm->rtm_protocol || @@ -3223,6 +3292,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct flowi4 fl4 = {}; __be32 dst = 0; __be32 src = 0; + dscp_t dscp; kuid_t uid; u32 iif; int err; @@ -3233,10 +3303,11 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, return err; rtm = nlmsg_data(nlh); - src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; - dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; - iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; - mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; + src = nla_get_in_addr_default(tb[RTA_SRC], 0); + dst = nla_get_in_addr_default(tb[RTA_DST], 0); + iif = nla_get_u32_default(tb[RTA_IIF], 0); + mark = nla_get_u32_default(tb[RTA_MARK], 0); + dscp = inet_dsfield_to_dscp(rtm->rtm_tos); if (tb[RTA_UID]) uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID])); else @@ -3261,8 +3332,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fl4.daddr = dst; fl4.saddr = src; - fl4.flowi4_tos = rtm->rtm_tos & INET_DSCP_MASK; - fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; + fl4.flowi4_tos = inet_dscp_to_dsfield(dscp); + fl4.flowi4_oif = nla_get_u32_default(tb[RTA_OIF], 0); fl4.flowi4_mark = mark; fl4.flowi4_uid = uid; if (sport) @@ -3285,9 +3356,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fl4.flowi4_iif = iif; /* for rt_fill_info */ skb->dev = dev; skb->mark = mark; - err = ip_route_input_rcu(skb, dst, src, - rtm->rtm_tos & INET_DSCP_MASK, dev, - &res); + err = ip_route_input_rcu(skb, dst, src, dscp, dev, + &res) ? -EINVAL : 0; rt = skb_rtable(skb); if (err == 0 && rt->dst.error) @@ -3634,6 +3704,11 @@ static __net_initdata struct pernet_operations ipv4_inetpeer_ops = { struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; #endif /* CONFIG_IP_ROUTE_CLASSID */ +static const struct rtnl_msg_handler ip_rt_rtnl_msg_handlers[] __initconst = { + {.protocol = PF_INET, .msgtype = RTM_GETROUTE, + .doit = inet_rtm_getroute, .flags = RTNL_FLAG_DOIT_UNLOCKED}, +}; + int __init ip_rt_init(void) { void *idents_hash; @@ -3691,8 +3766,7 @@ int __init ip_rt_init(void) xfrm_init(); xfrm4_init(); #endif - rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, - RTNL_FLAG_DOIT_UNLOCKED); + rtnl_register_many(ip_rt_rtnl_msg_handlers); #ifdef CONFIG_SYSCTL register_pernet_subsys(&sysctl_route_ops); |