From 62f8f4d9066c1c6f2474845d1ca7e2891f2ae3fd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 5 Mar 2017 10:52:16 -0800 Subject: dccp: fix use-after-free in dccp_feat_activate_values Dmitry reported crashes in DCCP stack [1] Problem here is that when I got rid of listener spinlock, I missed the fact that DCCP stores a complex state in struct dccp_request_sock, while TCP does not. Since multiple cpus could access it at the same time, we need to add protection. [1] BUG: KASAN: use-after-free in dccp_feat_activate_values+0x967/0xab0 net/dccp/feat.c:1541 at addr ffff88003713be68 Read of size 8 by task syz-executor2/8457 CPU: 2 PID: 8457 Comm: syz-executor2 Not tainted 4.10.0-rc7+ #127 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0x292/0x398 lib/dump_stack.c:51 kasan_object_err+0x1c/0x70 mm/kasan/report.c:162 print_address_description mm/kasan/report.c:200 [inline] kasan_report_error mm/kasan/report.c:289 [inline] kasan_report.part.1+0x20e/0x4e0 mm/kasan/report.c:311 kasan_report mm/kasan/report.c:332 [inline] __asan_report_load8_noabort+0x29/0x30 mm/kasan/report.c:332 dccp_feat_activate_values+0x967/0xab0 net/dccp/feat.c:1541 dccp_create_openreq_child+0x464/0x610 net/dccp/minisocks.c:121 dccp_v6_request_recv_sock+0x1f6/0x1960 net/dccp/ipv6.c:457 dccp_check_req+0x335/0x5a0 net/dccp/minisocks.c:186 dccp_v6_rcv+0x69e/0x1d00 net/dccp/ipv6.c:711 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279 NF_HOOK include/linux/netfilter.h:257 [inline] ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322 dst_input include/net/dst.h:507 [inline] ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69 NF_HOOK include/linux/netfilter.h:257 [inline] ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228 process_backlog+0xe5/0x6c0 net/core/dev.c:4839 napi_poll net/core/dev.c:5202 [inline] net_rx_action+0xe70/0x1900 net/core/dev.c:5267 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284 do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:902 do_softirq.part.17+0x1e8/0x230 kernel/softirq.c:328 do_softirq kernel/softirq.c:176 [inline] __local_bh_enable_ip+0x1f2/0x200 kernel/softirq.c:181 local_bh_enable include/linux/bottom_half.h:31 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:971 [inline] ip6_finish_output2+0xbb0/0x23d0 net/ipv6/ip6_output.c:123 ip6_finish_output+0x302/0x960 net/ipv6/ip6_output.c:148 NF_HOOK_COND include/linux/netfilter.h:246 [inline] ip6_output+0x1cb/0x8d0 net/ipv6/ip6_output.c:162 ip6_xmit+0xcdf/0x20d0 include/net/dst.h:501 inet6_csk_xmit+0x320/0x5f0 net/ipv6/inet6_connection_sock.c:179 dccp_transmit_skb+0xb09/0x1120 net/dccp/output.c:141 dccp_xmit_packet+0x215/0x760 net/dccp/output.c:280 dccp_write_xmit+0x168/0x1d0 net/dccp/output.c:362 dccp_sendmsg+0x79c/0xb10 net/dccp/proto.c:796 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 sock_sendmsg_nosec net/socket.c:635 [inline] sock_sendmsg+0xca/0x110 net/socket.c:645 SYSC_sendto+0x660/0x810 net/socket.c:1687 SyS_sendto+0x40/0x50 net/socket.c:1655 entry_SYSCALL_64_fastpath+0x1f/0xc2 RIP: 0033:0x4458b9 RSP: 002b:00007f8ceb77bb58 EFLAGS: 00000282 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000017 RCX: 00000000004458b9 RDX: 0000000000000023 RSI: 0000000020e60000 RDI: 0000000000000017 RBP: 00000000006e1b90 R08: 00000000200f9fe1 R09: 0000000000000020 R10: 0000000000008010 R11: 0000000000000282 R12: 00000000007080a8 R13: 0000000000000000 R14: 00007f8ceb77c9c0 R15: 00007f8ceb77c700 Object at ffff88003713be50, in cache kmalloc-64 size: 64 Allocated: PID = 8446 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 save_stack+0x43/0xd0 mm/kasan/kasan.c:502 set_track mm/kasan/kasan.c:514 [inline] kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:605 kmem_cache_alloc_trace+0x82/0x270 mm/slub.c:2738 kmalloc include/linux/slab.h:490 [inline] dccp_feat_entry_new+0x214/0x410 net/dccp/feat.c:467 dccp_feat_push_change+0x38/0x220 net/dccp/feat.c:487 __feat_register_sp+0x223/0x2f0 net/dccp/feat.c:741 dccp_feat_propagate_ccid+0x22b/0x2b0 net/dccp/feat.c:949 dccp_feat_server_ccid_dependencies+0x1b3/0x250 net/dccp/feat.c:1012 dccp_make_response+0x1f1/0xc90 net/dccp/output.c:423 dccp_v6_send_response+0x4ec/0xc20 net/dccp/ipv6.c:217 dccp_v6_conn_request+0xaba/0x11b0 net/dccp/ipv6.c:377 dccp_rcv_state_process+0x51e/0x1650 net/dccp/input.c:606 dccp_v6_do_rcv+0x213/0x350 net/dccp/ipv6.c:632 sk_backlog_rcv include/net/sock.h:893 [inline] __sk_receive_skb+0x36f/0xcc0 net/core/sock.c:479 dccp_v6_rcv+0xba5/0x1d00 net/dccp/ipv6.c:742 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279 NF_HOOK include/linux/netfilter.h:257 [inline] ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322 dst_input include/net/dst.h:507 [inline] ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69 NF_HOOK include/linux/netfilter.h:257 [inline] ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228 process_backlog+0xe5/0x6c0 net/core/dev.c:4839 napi_poll net/core/dev.c:5202 [inline] net_rx_action+0xe70/0x1900 net/core/dev.c:5267 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284 Freed: PID = 15 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 save_stack+0x43/0xd0 mm/kasan/kasan.c:502 set_track mm/kasan/kasan.c:514 [inline] kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:578 slab_free_hook mm/slub.c:1355 [inline] slab_free_freelist_hook mm/slub.c:1377 [inline] slab_free mm/slub.c:2954 [inline] kfree+0xe8/0x2b0 mm/slub.c:3874 dccp_feat_entry_destructor.part.4+0x48/0x60 net/dccp/feat.c:418 dccp_feat_entry_destructor net/dccp/feat.c:416 [inline] dccp_feat_list_pop net/dccp/feat.c:541 [inline] dccp_feat_activate_values+0x57f/0xab0 net/dccp/feat.c:1543 dccp_create_openreq_child+0x464/0x610 net/dccp/minisocks.c:121 dccp_v6_request_recv_sock+0x1f6/0x1960 net/dccp/ipv6.c:457 dccp_check_req+0x335/0x5a0 net/dccp/minisocks.c:186 dccp_v6_rcv+0x69e/0x1d00 net/dccp/ipv6.c:711 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279 NF_HOOK include/linux/netfilter.h:257 [inline] ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322 dst_input include/net/dst.h:507 [inline] ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69 NF_HOOK include/linux/netfilter.h:257 [inline] ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228 process_backlog+0xe5/0x6c0 net/core/dev.c:4839 napi_poll net/core/dev.c:5202 [inline] net_rx_action+0xe70/0x1900 net/core/dev.c:5267 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284 Memory state around the buggy address: ffff88003713bd00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88003713bd80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88003713be00: fc fc fc fc fc fc fc fc fc fc fb fb fb fb fb fb ^ Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: David S. Miller --- net/dccp/minisocks.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index e267e6f4c9a5..abd07a443219 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -142,6 +142,13 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, struct dccp_request_sock *dreq = dccp_rsk(req); bool own_req; + /* TCP/DCCP listeners became lockless. + * DCCP stores complex state in its request_sock, so we need + * a protection for them, now this code runs without being protected + * by the parent (listener) lock. + */ + spin_lock_bh(&dreq->dreq_lock); + /* Check for retransmitted REQUEST */ if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { @@ -156,7 +163,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, inet_rtx_syn_ack(sk, req); } /* Network Duplicate, discard packet */ - return NULL; + goto out; } DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; @@ -182,20 +189,20 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, req, &own_req); - if (!child) - goto listen_overflow; - - return inet_csk_complete_hashdance(sk, child, req, own_req); + if (child) { + child = inet_csk_complete_hashdance(sk, child, req, own_req); + goto out; + } -listen_overflow: - dccp_pr_debug("listen_overflow!\n"); DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; drop: if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) req->rsk_ops->send_reset(sk, skb); inet_csk_reqsk_queue_drop(sk, req); - return NULL; +out: + spin_unlock_bh(&dreq->dreq_lock); + return child; } EXPORT_SYMBOL_GPL(dccp_check_req); @@ -246,6 +253,7 @@ int dccp_reqsk_init(struct request_sock *req, { struct dccp_request_sock *dreq = dccp_rsk(req); + spin_lock_init(&dreq->dreq_lock); inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport; inet_rsk(req)->ir_num = ntohs(dccp_hdr(skb)->dccph_dport); inet_rsk(req)->acked = 0; -- cgit v1.2.3 From 45caeaa5ac0b4b11784ac6f932c0ad4c6b67cda0 Mon Sep 17 00:00:00 2001 From: Jon Maxwell Date: Fri, 10 Mar 2017 16:40:33 +1100 Subject: dccp/tcp: fix routing redirect race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As Eric Dumazet pointed out this also needs to be fixed in IPv6. v2: Contains the IPv6 tcp/Ipv6 dccp patches as well. We have seen a few incidents lately where a dst_enty has been freed with a dangling TCP socket reference (sk->sk_dst_cache) pointing to that dst_entry. If the conditions/timings are right a crash then ensues when the freed dst_entry is referenced later on. A Common crashing back trace is: #8 [] page_fault at ffffffff8163e648 [exception RIP: __tcp_ack_snd_check+74] . . #9 [] tcp_rcv_established at ffffffff81580b64 #10 [] tcp_v4_do_rcv at ffffffff8158b54a #11 [] tcp_v4_rcv at ffffffff8158cd02 #12 [] ip_local_deliver_finish at ffffffff815668f4 #13 [] ip_local_deliver at ffffffff81566bd9 #14 [] ip_rcv_finish at ffffffff8156656d #15 [] ip_rcv at ffffffff81566f06 #16 [] __netif_receive_skb_core at ffffffff8152b3a2 #17 [] __netif_receive_skb at ffffffff8152b608 #18 [] netif_receive_skb at ffffffff8152b690 #19 [] vmxnet3_rq_rx_complete at ffffffffa015eeaf [vmxnet3] #20 [] vmxnet3_poll_rx_only at ffffffffa015f32a [vmxnet3] #21 [] net_rx_action at ffffffff8152bac2 #22 [] __do_softirq at ffffffff81084b4f #23 [] call_softirq at ffffffff8164845c #24 [] do_softirq at ffffffff81016fc5 #25 [] irq_exit at ffffffff81084ee5 #26 [] do_IRQ at ffffffff81648ff8 Of course it may happen with other NIC drivers as well. It's found the freed dst_entry here: 224 static bool tcp_in_quickack_mode(struct sock *sk)↩ 225 {↩ 226 ▹ const struct inet_connection_sock *icsk = inet_csk(sk);↩ 227 ▹ const struct dst_entry *dst = __sk_dst_get(sk);↩ 228 ↩ 229 ▹ return (dst && dst_metric(dst, RTAX_QUICKACK)) ||↩ 230 ▹ ▹ (icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong);↩ 231 }↩ But there are other backtraces attributed to the same freed dst_entry in netfilter code as well. All the vmcores showed 2 significant clues: - Remote hosts behind the default gateway had always been redirected to a different gateway. A rtable/dst_entry will be added for that host. Making more dst_entrys with lower reference counts. Making this more probable. - All vmcores showed a postitive LockDroppedIcmps value, e.g: LockDroppedIcmps 267 A closer look at the tcp_v4_err() handler revealed that do_redirect() will run regardless of whether user space has the socket locked. This can result in a race condition where the same dst_entry cached in sk->sk_dst_entry can be decremented twice for the same socket via: do_redirect()->__sk_dst_check()-> dst_release(). Which leads to the dst_entry being prematurely freed with another socket pointing to it via sk->sk_dst_cache and a subsequent crash. To fix this skip do_redirect() if usespace has the socket locked. Instead let the redirect take place later when user space does not have the socket locked. The dccp/IPv6 code is very similar in this respect, so fixing it there too. As Eric Garver pointed out the following commit now invalidates routes. Which can set the dst->obsolete flag so that ipv4_dst_check() returns null and triggers the dst_release(). Fixes: ceb3320610d6 ("ipv4: Kill routes during PMTU/redirect updates.") Cc: Eric Garver Cc: Hannes Sowa Signed-off-by: Jon Maxwell Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 3 ++- net/dccp/ipv6.c | 8 +++++--- net/ipv4/tcp_ipv4.c | 3 ++- net/ipv6/tcp_ipv6.c | 8 +++++--- 4 files changed, 14 insertions(+), 8 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 409d0cfd3447..b99168b0fabf 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -289,7 +289,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) switch (type) { case ICMP_REDIRECT: - dccp_do_redirect(skb, sk); + if (!sock_owned_by_user(sk)) + dccp_do_redirect(skb, sk); goto out; case ICMP_SOURCE_QUENCH: /* Just silently ignore these. */ diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 233b57367758..d9b6a4e403e7 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -122,10 +122,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); if (type == NDISC_REDIRECT) { - struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + if (!sock_owned_by_user(sk)) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); - if (dst) - dst->ops->redirect(dst, sk, skb); + if (dst) + dst->ops->redirect(dst, sk, skb); + } goto out; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8f3ec1365497..575e19dcc017 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -431,7 +431,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) switch (type) { case ICMP_REDIRECT: - do_redirect(icmp_skb, sk); + if (!sock_owned_by_user(sk)) + do_redirect(icmp_skb, sk); goto out; case ICMP_SOURCE_QUENCH: /* Just silently ignore these. */ diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 60a5295a7de6..49fa2e8c3fa9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -391,10 +391,12 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); if (type == NDISC_REDIRECT) { - struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + if (!sock_owned_by_user(sk)) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); - if (dst) - dst->ops->redirect(dst, sk, skb); + if (dst) + dst->ops->redirect(dst, sk, skb); + } goto out; } -- cgit v1.2.3 From 72ef9c4125c7b257e3a714d62d778ab46583d6a3 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 13 Mar 2017 00:01:30 +0100 Subject: dccp: fix memory leak during tear-down of unsuccessful connection request This patch fixes a memory leak, which happens if the connection request is not fulfilled between parsing the DCCP options and handling the SYN (because e.g. the backlog is full), because we forgot to free the list of ack vectors. Reported-by: Jianwen Ji Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/dccp/ccids/ccid2.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/dccp') diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index f053198e730c..5e3a7302f774 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -749,6 +749,7 @@ static void ccid2_hc_tx_exit(struct sock *sk) for (i = 0; i < hc->tx_seqbufc; i++) kfree(hc->tx_seqbuf[i]); hc->tx_seqbufc = 0; + dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); } static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) -- cgit v1.2.3