summaryrefslogtreecommitdiff
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c330
1 files changed, 202 insertions, 128 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 723ac9181558..fccb05fb3a79 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -189,7 +189,11 @@ const __u8 ip_tos2prio[16] = {
EXPORT_SYMBOL(ip_tos2prio);
static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
+#ifndef CONFIG_PREEMPT_RT
#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
+#else
+#define RT_CACHE_STAT_INC(field) this_cpu_inc(rt_cache_stat.field)
+#endif
#ifdef CONFIG_PROC_FS
static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
@@ -390,7 +394,13 @@ static inline int ip_rt_proc_init(void)
static inline bool rt_is_expired(const struct rtable *rth)
{
- return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
+ bool res;
+
+ rcu_read_lock();
+ res = rth->rt_genid != rt_genid_ipv4(dev_net_rcu(rth->dst.dev));
+ rcu_read_unlock();
+
+ return res;
}
void rt_cache_flush(struct net *net)
@@ -870,11 +880,11 @@ void ip_rt_send_redirect(struct sk_buff *skb)
}
log_martians = IN_DEV_LOG_MARTIANS(in_dev);
vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
- rcu_read_unlock();
net = dev_net(rt->dst.dev);
- peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1);
+ peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif);
if (!peer) {
+ rcu_read_unlock();
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
rt_nexthop(rt, ip_hdr(skb)->daddr));
return;
@@ -893,7 +903,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
*/
if (peer->n_redirects >= ip_rt_redirect_number) {
peer->rate_last = jiffies;
- goto out_put_peer;
+ goto out_unlock;
}
/* Check for load limit; set rate_last to the latest sent
@@ -914,8 +924,8 @@ void ip_rt_send_redirect(struct sk_buff *skb)
&ip_hdr(skb)->saddr, inet_iif(skb),
&ip_hdr(skb)->daddr, &gw);
}
-out_put_peer:
- inet_putpeer(peer);
+out_unlock:
+ rcu_read_unlock();
}
static int ip_error(struct sk_buff *skb)
@@ -975,9 +985,9 @@ static int ip_error(struct sk_buff *skb)
break;
}
+ rcu_read_lock();
peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
- l3mdev_master_ifindex(skb->dev), 1);
-
+ l3mdev_master_ifindex_rcu(skb->dev));
send = true;
if (peer) {
now = jiffies;
@@ -989,8 +999,9 @@ static int ip_error(struct sk_buff *skb)
peer->rate_tokens -= ip_rt_error_cost;
else
send = false;
- inet_putpeer(peer);
}
+ rcu_read_unlock();
+
if (send)
icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
@@ -1001,9 +1012,9 @@ out: kfree_skb_reason(skb, reason);
static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
{
struct dst_entry *dst = &rt->dst;
- struct net *net = dev_net(dst->dev);
struct fib_result res;
bool lock = false;
+ struct net *net;
u32 old_mtu;
if (ip_mtu_locked(dst))
@@ -1013,6 +1024,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
if (old_mtu < mtu)
return;
+ rcu_read_lock();
+ net = dev_net_rcu(dst->dev);
if (mtu < net->ipv4.ip_rt_min_pmtu) {
lock = true;
mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu);
@@ -1020,17 +1033,29 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
if (rt->rt_pmtu == mtu && !lock &&
time_before(jiffies, dst->expires - net->ipv4.ip_rt_mtu_expires / 2))
- return;
+ goto out;
- rcu_read_lock();
if (fib_lookup(net, fl4, &res, 0) == 0) {
struct fib_nh_common *nhc;
fib_select_path(net, &res, fl4, NULL);
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ if (fib_info_num_path(res.fi) > 1) {
+ int nhsel;
+
+ for (nhsel = 0; nhsel < fib_info_num_path(res.fi); nhsel++) {
+ nhc = fib_info_nhc(res.fi, nhsel);
+ update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
+ jiffies + net->ipv4.ip_rt_mtu_expires);
+ }
+ goto out;
+ }
+#endif /* CONFIG_IP_ROUTE_MULTIPATH */
nhc = FIB_RES_NHC(res);
update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
jiffies + net->ipv4.ip_rt_mtu_expires);
}
+out:
rcu_read_unlock();
}
@@ -1263,7 +1288,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
struct flowi4 fl4 = {
.daddr = iph->daddr,
.saddr = iph->saddr,
- .flowi4_tos = iph->tos & INET_DSCP_MASK,
+ .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)),
.flowi4_oif = rt->dst.dev->ifindex,
.flowi4_iif = skb->dev->ifindex,
.flowi4_mark = skb->mark,
@@ -1293,10 +1318,15 @@ static void set_class_tag(struct rtable *rt, u32 tag)
static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
{
- struct net *net = dev_net(dst->dev);
unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
- unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
- net->ipv4.ip_rt_min_advmss);
+ unsigned int advmss;
+ struct net *net;
+
+ rcu_read_lock();
+ net = dev_net_rcu(dst->dev);
+ advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
+ net->ipv4.ip_rt_min_advmss);
+ rcu_read_unlock();
return min(advmss, IPV4_MAX_PMTU - header_size);
}
@@ -1665,49 +1695,54 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
EXPORT_SYMBOL(rt_dst_clone);
/* called in rcu_read_lock() section */
-int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev,
- struct in_device *in_dev, u32 *itag)
+enum skb_drop_reason
+ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev,
+ struct in_device *in_dev, u32 *itag)
{
- int err;
+ enum skb_drop_reason reason;
/* Primary sanity checks. */
if (!in_dev)
- return -EINVAL;
+ return SKB_DROP_REASON_NOT_SPECIFIED;
- if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
- skb->protocol != htons(ETH_P_IP))
- return -EINVAL;
+ if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
+ return SKB_DROP_REASON_IP_INVALID_SOURCE;
+
+ if (skb->protocol != htons(ETH_P_IP))
+ return SKB_DROP_REASON_INVALID_PROTO;
if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
- return -EINVAL;
+ return SKB_DROP_REASON_IP_LOCALNET;
if (ipv4_is_zeronet(saddr)) {
if (!ipv4_is_local_multicast(daddr) &&
ip_hdr(skb)->protocol != IPPROTO_IGMP)
- return -EINVAL;
+ return SKB_DROP_REASON_IP_INVALID_SOURCE;
} else {
- err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
- in_dev, itag);
- if (err < 0)
- return err;
+ reason = fib_validate_source_reason(skb, saddr, 0, dscp, 0,
+ dev, in_dev, itag);
+ if (reason)
+ return reason;
}
- return 0;
+ return SKB_NOT_DROPPED_YET;
}
/* called in rcu_read_lock() section */
-static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev, int our)
+static enum skb_drop_reason
+ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev, int our)
{
struct in_device *in_dev = __in_dev_get_rcu(dev);
unsigned int flags = RTCF_MULTICAST;
+ enum skb_drop_reason reason;
struct rtable *rth;
u32 itag = 0;
- int err;
- err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
- if (err)
- return err;
+ reason = ip_mc_validate_source(skb, daddr, saddr, dscp, dev, in_dev,
+ &itag);
+ if (reason)
+ return reason;
if (our)
flags |= RTCF_LOCAL;
@@ -1718,7 +1753,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
false);
if (!rth)
- return -ENOBUFS;
+ return SKB_DROP_REASON_NOMEM;
#ifdef CONFIG_IP_ROUTE_CLASSID
rth->dst.tclassid = itag;
@@ -1734,7 +1769,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
skb_dst_drop(skb);
skb_dst_set(skb, &rth->dst);
- return 0;
+ return SKB_NOT_DROPPED_YET;
}
@@ -1764,11 +1799,12 @@ static void ip_handle_martian_source(struct net_device *dev,
}
/* called in rcu_read_lock() section */
-static int __mkroute_input(struct sk_buff *skb,
- const struct fib_result *res,
- struct in_device *in_dev,
- __be32 daddr, __be32 saddr, u32 tos)
+static enum skb_drop_reason
+__mkroute_input(struct sk_buff *skb, const struct fib_result *res,
+ struct in_device *in_dev, __be32 daddr,
+ __be32 saddr, dscp_t dscp)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct fib_nh_common *nhc = FIB_RES_NHC(*res);
struct net_device *dev = nhc->nhc_dev;
struct fib_nh_exception *fnhe;
@@ -1782,12 +1818,13 @@ static int __mkroute_input(struct sk_buff *skb,
out_dev = __in_dev_get_rcu(dev);
if (!out_dev) {
net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
- return -EINVAL;
+ return reason;
}
- err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
+ err = fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res),
in_dev->dev, in_dev, &itag);
if (err < 0) {
+ reason = -err;
ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
saddr);
@@ -1815,7 +1852,7 @@ static int __mkroute_input(struct sk_buff *skb,
*/
if (out_dev == in_dev &&
IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
- err = -EINVAL;
+ reason = SKB_DROP_REASON_ARP_PVLAN_DISABLE;
goto cleanup;
}
}
@@ -1838,7 +1875,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth = rt_dst_alloc(out_dev->dev, 0, res->type,
IN_DEV_ORCONF(out_dev, NOXFRM));
if (!rth) {
- err = -ENOBUFS;
+ reason = SKB_DROP_REASON_NOMEM;
goto cleanup;
}
@@ -1852,9 +1889,9 @@ static int __mkroute_input(struct sk_buff *skb,
lwtunnel_set_redirect(&rth->dst);
skb_dst_set(skb, &rth->dst);
out:
- err = 0;
- cleanup:
- return err;
+ reason = SKB_NOT_DROPPED_YET;
+cleanup:
+ return reason;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -2004,8 +2041,12 @@ static u32 fib_multipath_custom_hash_fl4(const struct net *net,
hash_keys.addrs.v4addrs.dst = fl4->daddr;
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
hash_keys.basic.ip_proto = fl4->flowi4_proto;
- if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
- hash_keys.ports.src = fl4->fl4_sport;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) {
+ if (fl4->flowi4_flags & FLOWI_FLAG_ANY_SPORT)
+ hash_keys.ports.src = (__force __be16)get_random_u16();
+ else
+ hash_keys.ports.src = fl4->fl4_sport;
+ }
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
hash_keys.ports.dst = fl4->fl4_dport;
@@ -2060,7 +2101,10 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
hash_keys.addrs.v4addrs.src = fl4->saddr;
hash_keys.addrs.v4addrs.dst = fl4->daddr;
- hash_keys.ports.src = fl4->fl4_sport;
+ if (fl4->flowi4_flags & FLOWI_FLAG_ANY_SPORT)
+ hash_keys.ports.src = (__force __be16)get_random_u16();
+ else
+ hash_keys.ports.src = fl4->fl4_sport;
hash_keys.ports.dst = fl4->fl4_dport;
hash_keys.basic.ip_proto = fl4->flowi4_proto;
}
@@ -2112,66 +2156,72 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
}
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
-static int ip_mkroute_input(struct sk_buff *skb,
- struct fib_result *res,
- struct in_device *in_dev,
- __be32 daddr, __be32 saddr, u32 tos,
- struct flow_keys *hkeys)
+static enum skb_drop_reason
+ip_mkroute_input(struct sk_buff *skb, struct fib_result *res,
+ struct in_device *in_dev, __be32 daddr,
+ __be32 saddr, dscp_t dscp, struct flow_keys *hkeys)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi && fib_info_num_path(res->fi) > 1) {
int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
- fib_select_multipath(res, h);
+ fib_select_multipath(res, h, NULL);
IPCB(skb)->flags |= IPSKB_MULTIPATH;
}
#endif
/* create a routing cache entry */
- return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
+ return __mkroute_input(skb, res, in_dev, daddr, saddr, dscp);
}
/* Implements all the saddr-related checks as ip_route_input_slow(),
* assuming daddr is valid and the destination is not a local broadcast one.
* Uses the provided hint instead of performing a route lookup.
*/
-int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev,
- const struct sk_buff *hint)
+enum skb_drop_reason
+ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev,
+ const struct sk_buff *hint)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct in_device *in_dev = __in_dev_get_rcu(dev);
struct rtable *rt = skb_rtable(hint);
struct net *net = dev_net(dev);
- int err = -EINVAL;
u32 tag = 0;
if (!in_dev)
- return -EINVAL;
+ return reason;
- if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
+ if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
goto martian_source;
+ }
- if (ipv4_is_zeronet(saddr))
+ if (ipv4_is_zeronet(saddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
goto martian_source;
+ }
- if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
+ if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) {
+ reason = SKB_DROP_REASON_IP_LOCALNET;
goto martian_source;
+ }
if (rt->rt_type != RTN_LOCAL)
goto skip_validate_source;
- tos &= INET_DSCP_MASK;
- err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &tag);
- if (err < 0)
+ reason = fib_validate_source_reason(skb, saddr, daddr, dscp, 0, dev,
+ in_dev, &tag);
+ if (reason)
goto martian_source;
skip_validate_source:
skb_dst_copy(skb, hint);
- return 0;
+ return SKB_NOT_DROPPED_YET;
martian_source:
ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
- return err;
+ return reason;
}
/* get device for dst_alloc with local routes */
@@ -2200,10 +2250,12 @@ static struct net_device *ip_rt_get_dev(struct net *net,
* called with rcu_read_lock()
*/
-static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev,
- struct fib_result *res)
+static enum skb_drop_reason
+ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev,
+ struct fib_result *res)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct in_device *in_dev = __in_dev_get_rcu(dev);
struct flow_keys *flkeys = NULL, _flkeys;
struct net *net = dev_net(dev);
@@ -2231,8 +2283,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
fl4.flowi4_tun_key.tun_id = 0;
skb_dst_drop(skb);
- if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
+ if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
goto martian_source;
+ }
res->fi = NULL;
res->table = NULL;
@@ -2242,21 +2296,29 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
/* Accept zero addresses only to limited broadcast;
* I even do not know to fix it or not. Waiting for complains :-)
*/
- if (ipv4_is_zeronet(saddr))
+ if (ipv4_is_zeronet(saddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
goto martian_source;
+ }
- if (ipv4_is_zeronet(daddr))
+ if (ipv4_is_zeronet(daddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_DEST;
goto martian_destination;
+ }
/* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
* and call it once if daddr or/and saddr are loopback addresses
*/
if (ipv4_is_loopback(daddr)) {
- if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
+ if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) {
+ reason = SKB_DROP_REASON_IP_LOCALNET;
goto martian_destination;
+ }
} else if (ipv4_is_loopback(saddr)) {
- if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
+ if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) {
+ reason = SKB_DROP_REASON_IP_LOCALNET;
goto martian_source;
+ }
}
/*
@@ -2266,7 +2328,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
fl4.flowi4_oif = 0;
fl4.flowi4_iif = dev->ifindex;
fl4.flowi4_mark = skb->mark;
- fl4.flowi4_tos = tos;
+ fl4.flowi4_tos = inet_dscp_to_dsfield(dscp);
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_flags = 0;
fl4.daddr = daddr;
@@ -2298,10 +2360,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
goto brd_input;
}
+ err = -EINVAL;
if (res->type == RTN_LOCAL) {
- err = fib_validate_source(skb, saddr, daddr, tos,
- 0, dev, in_dev, &itag);
- if (err < 0)
+ reason = fib_validate_source_reason(skb, saddr, daddr, dscp,
+ 0, dev, in_dev, &itag);
+ if (reason)
goto martian_source;
goto local_input;
}
@@ -2310,21 +2373,28 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
err = -EHOSTUNREACH;
goto no_route;
}
- if (res->type != RTN_UNICAST)
+ if (res->type != RTN_UNICAST) {
+ reason = SKB_DROP_REASON_IP_INVALID_DEST;
goto martian_destination;
+ }
make_route:
- err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
-out: return err;
+ reason = ip_mkroute_input(skb, res, in_dev, daddr, saddr, dscp,
+ flkeys);
+
+out:
+ return reason;
brd_input:
- if (skb->protocol != htons(ETH_P_IP))
- goto e_inval;
+ if (skb->protocol != htons(ETH_P_IP)) {
+ reason = SKB_DROP_REASON_INVALID_PROTO;
+ goto out;
+ }
if (!ipv4_is_zeronet(saddr)) {
- err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
- in_dev, &itag);
- if (err < 0)
+ reason = fib_validate_source_reason(skb, saddr, 0, dscp, 0,
+ dev, in_dev, &itag);
+ if (reason)
goto martian_source;
}
flags |= RTCF_BROADCAST;
@@ -2342,7 +2412,7 @@ local_input:
rth = rcu_dereference(nhc->nhc_rth_input);
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
- err = 0;
+ reason = SKB_NOT_DROPPED_YET;
goto out;
}
}
@@ -2379,7 +2449,7 @@ local_input:
rt_add_uncached_list(rth);
}
skb_dst_set(skb, &rth->dst);
- err = 0;
+ reason = SKB_NOT_DROPPED_YET;
goto out;
no_route:
@@ -2399,13 +2469,10 @@ martian_destination:
net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
&daddr, &saddr, dev->name);
#endif
-
-e_inval:
- err = -EINVAL;
goto out;
e_nobufs:
- err = -ENOBUFS;
+ reason = SKB_DROP_REASON_NOMEM;
goto out;
martian_source:
@@ -2414,8 +2481,10 @@ martian_source:
}
/* called with rcu_read_lock held */
-static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev, struct fib_result *res)
+static enum skb_drop_reason
+ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev,
+ struct fib_result *res)
{
/* Multicast recognition logic is moved from route cache to here.
* The problem was that too many Ethernet cards have broken/missing
@@ -2429,12 +2498,13 @@ static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
* route cache entry is created eventually.
*/
if (ipv4_is_multicast(daddr)) {
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct in_device *in_dev = __in_dev_get_rcu(dev);
int our = 0;
- int err = -EINVAL;
if (!in_dev)
- return err;
+ return reason;
+
our = ip_check_mc_rcu(in_dev, daddr, saddr,
ip_hdr(skb)->protocol);
@@ -2455,27 +2525,27 @@ static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
IN_DEV_MFORWARD(in_dev))
#endif
) {
- err = ip_route_input_mc(skb, daddr, saddr,
- tos, dev, our);
+ reason = ip_route_input_mc(skb, daddr, saddr, dscp,
+ dev, our);
}
- return err;
+ return reason;
}
- return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
+ return ip_route_input_slow(skb, daddr, saddr, dscp, dev, res);
}
-int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev)
+enum skb_drop_reason ip_route_input_noref(struct sk_buff *skb, __be32 daddr,
+ __be32 saddr, dscp_t dscp,
+ struct net_device *dev)
{
+ enum skb_drop_reason reason;
struct fib_result res;
- int err;
- tos &= INET_DSCP_MASK;
rcu_read_lock();
- err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
+ reason = ip_route_input_rcu(skb, daddr, saddr, dscp, dev, &res);
rcu_read_unlock();
- return err;
+ return reason;
}
EXPORT_SYMBOL(ip_route_input_noref);
@@ -2640,8 +2710,7 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
if (fl4->saddr) {
if (ipv4_is_multicast(fl4->saddr) ||
- ipv4_is_lbcast(fl4->saddr) ||
- ipv4_is_zeronet(fl4->saddr)) {
+ ipv4_is_lbcast(fl4->saddr)) {
rth = ERR_PTR(-EINVAL);
goto out;
}
@@ -3147,7 +3216,8 @@ static int inet_rtm_valid_getroute_req(struct sk_buff *skb,
struct rtmsg *rtm;
int i, err;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
+ rtm = nlmsg_payload(nlh, sizeof(*rtm));
+ if (!rtm) {
NL_SET_ERR_MSG(extack,
"ipv4: Invalid header for route get request");
return -EINVAL;
@@ -3157,7 +3227,6 @@ static int inet_rtm_valid_getroute_req(struct sk_buff *skb,
return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
rtm_ipv4_policy, extack);
- rtm = nlmsg_data(nlh);
if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) ||
(rtm->rtm_dst_len && rtm->rtm_dst_len != 32) ||
rtm->rtm_table || rtm->rtm_protocol ||
@@ -3223,6 +3292,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct flowi4 fl4 = {};
__be32 dst = 0;
__be32 src = 0;
+ dscp_t dscp;
kuid_t uid;
u32 iif;
int err;
@@ -3233,10 +3303,11 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
return err;
rtm = nlmsg_data(nlh);
- src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
- dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
- iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
- mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
+ src = nla_get_in_addr_default(tb[RTA_SRC], 0);
+ dst = nla_get_in_addr_default(tb[RTA_DST], 0);
+ iif = nla_get_u32_default(tb[RTA_IIF], 0);
+ mark = nla_get_u32_default(tb[RTA_MARK], 0);
+ dscp = inet_dsfield_to_dscp(rtm->rtm_tos);
if (tb[RTA_UID])
uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
else
@@ -3261,8 +3332,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fl4.daddr = dst;
fl4.saddr = src;
- fl4.flowi4_tos = rtm->rtm_tos & INET_DSCP_MASK;
- fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
+ fl4.flowi4_tos = inet_dscp_to_dsfield(dscp);
+ fl4.flowi4_oif = nla_get_u32_default(tb[RTA_OIF], 0);
fl4.flowi4_mark = mark;
fl4.flowi4_uid = uid;
if (sport)
@@ -3285,9 +3356,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fl4.flowi4_iif = iif; /* for rt_fill_info */
skb->dev = dev;
skb->mark = mark;
- err = ip_route_input_rcu(skb, dst, src,
- rtm->rtm_tos & INET_DSCP_MASK, dev,
- &res);
+ err = ip_route_input_rcu(skb, dst, src, dscp, dev,
+ &res) ? -EINVAL : 0;
rt = skb_rtable(skb);
if (err == 0 && rt->dst.error)
@@ -3634,6 +3704,11 @@ static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
#endif /* CONFIG_IP_ROUTE_CLASSID */
+static const struct rtnl_msg_handler ip_rt_rtnl_msg_handlers[] __initconst = {
+ {.protocol = PF_INET, .msgtype = RTM_GETROUTE,
+ .doit = inet_rtm_getroute, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+};
+
int __init ip_rt_init(void)
{
void *idents_hash;
@@ -3691,8 +3766,7 @@ int __init ip_rt_init(void)
xfrm_init();
xfrm4_init();
#endif
- rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL,
- RTNL_FLAG_DOIT_UNLOCKED);
+ rtnl_register_many(ip_rt_rtnl_msg_handlers);
#ifdef CONFIG_SYSCTL
register_pernet_subsys(&sysctl_route_ops);