From 014894360ec95abe868e94416b3dd6569f6e2c0c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 13 May 2019 07:19:19 -0700 Subject: bpf: sockmap, only stop/flush strp if it was enabled at some point If we try to call strp_done on a parser that has never been initialized, because the sockmap user is only using TX side for example we get the following error. [ 883.422081] WARNING: CPU: 1 PID: 208 at kernel/workqueue.c:3030 __flush_work+0x1ca/0x1e0 ... [ 883.422095] Workqueue: events sk_psock_destroy_deferred [ 883.422097] RIP: 0010:__flush_work+0x1ca/0x1e0 This had been wrapped in a 'if (psock->parser.enabled)' logic which was broken because the strp_done() was never actually being called because we do a strp_stop() earlier in the tear down logic will set parser.enabled to false. This could result in a use after free if work was still in the queue and was resolved by the patch here, 1d79895aef18f ("sk_msg: Always cancel strp work before freeing the psock"). However, calling strp_stop(), done by the patch marked in the fixes tag, only is useful if we never initialized a strp parser program and never initialized the strp to start with. Because if we had initialized a stream parser strp_stop() would have been called by sk_psock_drop() earlier in the tear down process. By forcing the strp to stop we get past the WARNING in strp_done that checks the stopped flag but calling cancel_work_sync on work that has never been initialized is also wrong and generates the warning above. To fix check if the parser program exists. If the program exists then the strp work has been initialized and must be sync'd and cancelled before free'ing any structures. If no program exists we never initialized the stream parser in the first place so skip the sync/cancel logic implemented by strp_done. Finally, remove the strp_done its not needed and in the case where we are using the stream parser has already been called. Fixes: e8e3437762ad9 ("bpf: Stop the psock parser before canceling its work") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- net/core/skmsg.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/skmsg.c b/net/core/skmsg.c index cc94d921476c..49d1efa329d7 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -554,8 +554,10 @@ static void sk_psock_destroy_deferred(struct work_struct *gc) struct sk_psock *psock = container_of(gc, struct sk_psock, gc); /* No sk_callback_lock since already detached. */ - strp_stop(&psock->parser.strp); - strp_done(&psock->parser.strp); + + /* Parser has been stopped */ + if (psock->progs.skb_parser) + strp_done(&psock->parser.strp); cancel_work_sync(&psock->work); -- cgit v1.2.3 From c42253cc88206fd0e9868c8b2fd7f9e79f9e0e03 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 13 May 2019 07:19:37 -0700 Subject: bpf: sockmap remove duplicate queue free In tcp bpf remove we free the cork list and purge the ingress msg list. However we do this before the ref count reaches zero so it could be possible some other access is in progress. In this case (tcp close and/or tcp_unhash) we happen to also hold the sock lock so no path exists but lets fix it otherwise it is extremely fragile and breaks the reference counting rules. Also we already check the cork list and ingress msg queue and free them once the ref count reaches zero so its wasteful to check twice. Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- net/ipv4/tcp_bpf.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 1bb7321a256d..4a619c85daed 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -528,8 +528,6 @@ static void tcp_bpf_remove(struct sock *sk, struct sk_psock *psock) { struct sk_psock_link *link; - sk_psock_cork_free(psock); - __sk_psock_purge_ingress_msg(psock); while ((link = sk_psock_link_pop(psock))) { sk_psock_unlink(sk, link); sk_psock_free_link(link); -- cgit v1.2.3 From cabede8b4f2b746232aa25730a0b752de1cb82ca Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 13 May 2019 07:19:55 -0700 Subject: bpf: sockmap fix msg->sg.size account on ingress skb When converting a skb to msg->sg we forget to set the size after the latest ktls/tls code conversion. This patch can be reached by doing a redir into ingress path from BPF skb sock recv hook. Then trying to read the size fails. Fix this by setting the size. Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- net/core/skmsg.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 49d1efa329d7..93bffaad2135 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -411,6 +411,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) sk_mem_charge(sk, skb->len); copied = skb->len; msg->sg.start = 0; + msg->sg.size = copied; msg->sg.end = num_sge == MAX_MSG_FRAGS ? 0 : num_sge; msg->skb = skb; -- cgit v1.2.3 From 5fa2ca7c4a3fc176f31b495e1a704862d8188b53 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 13 May 2019 21:42:03 -0700 Subject: bpf, tcp: correctly handle DONT_WAIT flags and timeo == 0 The tcp_bpf_wait_data() routine needs to check timeo != 0 before calling sk_wait_event() otherwise we may see unexpected stalls on receiver. Arika did all the leg work here I just formatted, posted and ran a few tests. Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface") Reported-by: Arika Chen Suggested-by: Arika Chen Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- net/ipv4/tcp_bpf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 4a619c85daed..3d1e15401384 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -27,7 +27,10 @@ static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock, int flags, long timeo, int *err) { DEFINE_WAIT_FUNC(wait, woken_wake_function); - int ret; + int ret = 0; + + if (!timeo) + return ret; add_wait_queue(sk_sleep(sk), &wait); sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); -- cgit v1.2.3