summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWei Wang <weiwan@google.com>2019-10-16 22:03:15 +0300
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2021-02-10 11:12:08 +0300
commit43ebd30881d4b3b95da6a4cc5a58f8c594d0d550 (patch)
tree2ba5e72fac1f751a791733864b7c55e1682d2db9
parent347cf0e6266a327ee26eb43a01ab5b02d8a1a839 (diff)
downloadlinux-43ebd30881d4b3b95da6a4cc5a58f8c594d0d550.tar.xz
ipv4: fix race condition between route lookup and invalidation
[ upstream commit 5018c59607a511cdee743b629c76206d9c9e6d7b ] Jesse and Ido reported the following race condition: <CPU A, t0> - Received packet A is forwarded and cached dst entry is taken from the nexthop ('nhc->nhc_rth_input'). Calls skb_dst_set() <t1> - Given Jesse has busy routers ("ingesting full BGP routing tables from multiple ISPs"), route is added / deleted and rt_cache_flush() is called <CPU B, t2> - Received packet B tries to use the same cached dst entry from t0, but rt_cache_valid() is no longer true and it is replaced in rt_cache_route() by the newer one. This calls dst_dev_put() on the original dst entry which assigns the blackhole netdev to 'dst->dev' <CPU A, t3> - dst_input(skb) is called on packet A and it is dropped due to 'dst->dev' being the blackhole netdev There are 2 issues in the v4 routing code: 1. A per-netns counter is used to do the validation of the route. That means whenever a route is changed in the netns, users of all routes in the netns needs to redo lookup. v6 has an implementation of only updating fn_sernum for routes that are affected. 2. When rt_cache_valid() returns false, rt_cache_route() is called to throw away the current cache, and create a new one. This seems unnecessary because as long as this route does not change, the route cache does not need to be recreated. To fully solve the above 2 issues, it probably needs quite some code changes and requires careful testing, and does not suite for net branch. So this patch only tries to add the deleted cached rt into the uncached list, so user could still be able to use it to receive packets until it's done. Fixes: 95c47f9cf5e0 ("ipv4: call dst_dev_put() properly") Signed-off-by: Wei Wang <weiwan@google.com> Reported-by: Ido Schimmel <idosch@idosch.org> Reported-by: Jesse Hathaway <jesse@mbuki-mvuki.org> Tested-by: Jesse Hathaway <jesse@mbuki-mvuki.org> Acked-by: Martin KaFai Lau <kafai@fb.com> Cc: David Ahern <dsahern@gmail.com> Reviewed-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Carsten Schmid <carsten_schmid@mentor.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--net/ipv4/route.c38
1 files changed, 19 insertions, 19 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index be7383d139c1..78d6bc61a1d8 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1437,6 +1437,24 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
return ret;
}
+struct uncached_list {
+ spinlock_t lock;
+ struct list_head head;
+};
+
+static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
+
+static void rt_add_uncached_list(struct rtable *rt)
+{
+ struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
+
+ rt->rt_uncached_list = ul;
+
+ spin_lock_bh(&ul->lock);
+ list_add_tail(&rt->rt_uncached, &ul->head);
+ spin_unlock_bh(&ul->lock);
+}
+
static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
{
struct rtable *orig, *prev, **p;
@@ -1456,7 +1474,7 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
prev = cmpxchg(p, orig, rt);
if (prev == orig) {
if (orig) {
- dst_dev_put(&orig->dst);
+ rt_add_uncached_list(orig);
dst_release(&orig->dst);
}
} else {
@@ -1467,24 +1485,6 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
return ret;
}
-struct uncached_list {
- spinlock_t lock;
- struct list_head head;
-};
-
-static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
-
-static void rt_add_uncached_list(struct rtable *rt)
-{
- struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
-
- rt->rt_uncached_list = ul;
-
- spin_lock_bh(&ul->lock);
- list_add_tail(&rt->rt_uncached, &ul->head);
- spin_unlock_bh(&ul->lock);
-}
-
static void ipv4_dst_destroy(struct dst_entry *dst)
{
struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);