From 469cb3bb42d13272b66b6c23ffd5057694bd4add Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 10 Aug 2020 08:16:58 -0400 Subject: net: Fix potential memory leak in proto_register() [ Upstream commit 0f5907af39137f8183ed536aaa00f322d7365130 ] If we failed to assign proto idx, we free the twsk_slab_name but forget to free the twsk_slab. Add a helper function tw_prot_cleanup() to free these together and also use this helper function in proto_unregister(). Fixes: b45ce32135d1 ("sock: fix potential memory leak in proto_register()") Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/sock.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'net/core') diff --git a/net/core/sock.c b/net/core/sock.c index 2e5b7870e5d3..a14a8cb6ccca 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3443,6 +3443,16 @@ static void sock_inuse_add(struct net *net, int val) } #endif +static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot) +{ + if (!twsk_prot) + return; + kfree(twsk_prot->twsk_slab_name); + twsk_prot->twsk_slab_name = NULL; + kmem_cache_destroy(twsk_prot->twsk_slab); + twsk_prot->twsk_slab = NULL; +} + static void req_prot_cleanup(struct request_sock_ops *rsk_prot) { if (!rsk_prot) @@ -3513,7 +3523,7 @@ int proto_register(struct proto *prot, int alloc_slab) prot->slab_flags, NULL); if (prot->twsk_prot->twsk_slab == NULL) - goto out_free_timewait_sock_slab_name; + goto out_free_timewait_sock_slab; } } @@ -3521,15 +3531,15 @@ int proto_register(struct proto *prot, int alloc_slab) ret = assign_proto_idx(prot); if (ret) { mutex_unlock(&proto_list_mutex); - goto out_free_timewait_sock_slab_name; + goto out_free_timewait_sock_slab; } list_add(&prot->node, &proto_list); mutex_unlock(&proto_list_mutex); return ret; -out_free_timewait_sock_slab_name: +out_free_timewait_sock_slab: if (alloc_slab && prot->twsk_prot) - kfree(prot->twsk_prot->twsk_slab_name); + tw_prot_cleanup(prot->twsk_prot); out_free_request_sock_slab: if (alloc_slab) { req_prot_cleanup(prot->rsk_prot); @@ -3553,12 +3563,7 @@ void proto_unregister(struct proto *prot) prot->slab = NULL; req_prot_cleanup(prot->rsk_prot); - - if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { - kmem_cache_destroy(prot->twsk_prot->twsk_slab); - kfree(prot->twsk_prot->twsk_slab_name); - prot->twsk_prot->twsk_slab = NULL; - } + tw_prot_cleanup(prot->twsk_prot); } EXPORT_SYMBOL(proto_unregister); -- cgit v1.2.3 From 2559f40308f35a62fb809c047adbabf13a9390cd Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 9 Jun 2020 16:11:29 -0700 Subject: net/compat: Add missing sock updates for SCM_RIGHTS commit d9539752d23283db4692384a634034f451261e29 upstream. Add missed sock updates to compat path via a new helper, which will be used more in coming patches. (The net/core/scm.c code is left as-is here to assist with -stable backports for the compat path.) Cc: Christoph Hellwig Cc: Sargun Dhillon Cc: Jakub Kicinski Cc: stable@vger.kernel.org Fixes: 48a87cc26c13 ("net: netprio: fd passed in SCM_RIGHTS datagram not set correctly") Fixes: d84295067fc7 ("net: net_cls: fd passed in SCM_RIGHTS datagram not set correctly") Acked-by: Christian Brauner Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- include/net/sock.h | 4 ++++ net/compat.c | 1 + net/core/sock.c | 21 +++++++++++++++++++++ 3 files changed, 26 insertions(+) (limited to 'net/core') diff --git a/include/net/sock.h b/include/net/sock.h index 1183507df95b..d05a2c3ed3a6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -891,6 +891,8 @@ static inline int sk_memalloc_socks(void) { return static_branch_unlikely(&memalloc_socks_key); } + +void __receive_sock(struct file *file); #else static inline int sk_memalloc_socks(void) @@ -898,6 +900,8 @@ static inline int sk_memalloc_socks(void) return 0; } +static inline void __receive_sock(struct file *file) +{ } #endif static inline gfp_t sk_gfp_mask(const struct sock *sk, gfp_t gfp_mask) diff --git a/net/compat.c b/net/compat.c index 434838bef5f8..7dc670c8eac5 100644 --- a/net/compat.c +++ b/net/compat.c @@ -309,6 +309,7 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm) break; } /* Bump the usage count and install the file. */ + __receive_sock(fp[i]); fd_install(new_fd, get_file(fp[i])); } diff --git a/net/core/sock.c b/net/core/sock.c index a14a8cb6ccca..78f8736be9c5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2842,6 +2842,27 @@ int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct * } EXPORT_SYMBOL(sock_no_mmap); +/* + * When a file is received (via SCM_RIGHTS, etc), we must bump the + * various sock-based usage counts. + */ +void __receive_sock(struct file *file) +{ + struct socket *sock; + int error; + + /* + * The resulting value of "error" is ignored here since we only + * need to take action when the file is a socket and testing + * "sock" for NULL is sufficient. + */ + sock = sock_from_file(file, &error); + if (sock) { + sock_update_netprioidx(&sock->sk->sk_cgrp_data); + sock_update_classid(&sock->sk->sk_cgrp_data); + } +} + ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { ssize_t res; -- cgit v1.2.3 From cd4644d904e1d153d516e73e2e127e7a2fe687e1 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 11 Aug 2020 15:04:37 -0700 Subject: bpf: sock_ops ctx access may stomp registers in corner case [ Upstream commit fd09af010788a884de1c39537c288830c3d305db ] I had a sockmap program that after doing some refactoring started spewing this splat at me: [18610.807284] BUG: unable to handle kernel NULL pointer dereference at 0000000000000001 [...] [18610.807359] Call Trace: [18610.807370] ? 0xffffffffc114d0d5 [18610.807382] __cgroup_bpf_run_filter_sock_ops+0x7d/0xb0 [18610.807391] tcp_connect+0x895/0xd50 [18610.807400] tcp_v4_connect+0x465/0x4e0 [18610.807407] __inet_stream_connect+0xd6/0x3a0 [18610.807412] ? __inet_stream_connect+0x5/0x3a0 [18610.807417] inet_stream_connect+0x3b/0x60 [18610.807425] __sys_connect+0xed/0x120 After some debugging I was able to build this simple reproducer, __section("sockops/reproducer_bad") int bpf_reproducer_bad(struct bpf_sock_ops *skops) { volatile __maybe_unused __u32 i = skops->snd_ssthresh; return 0; } And along the way noticed that below program ran without splat, __section("sockops/reproducer_good") int bpf_reproducer_good(struct bpf_sock_ops *skops) { volatile __maybe_unused __u32 i = skops->snd_ssthresh; volatile __maybe_unused __u32 family; compiler_barrier(); family = skops->family; return 0; } So I decided to check out the code we generate for the above two programs and noticed each generates the BPF code you would expect, 0000000000000000 : ; volatile __maybe_unused __u32 i = skops->snd_ssthresh; 0: r1 = *(u32 *)(r1 + 96) 1: *(u32 *)(r10 - 4) = r1 ; return 0; 2: r0 = 0 3: exit 0000000000000000 : ; volatile __maybe_unused __u32 i = skops->snd_ssthresh; 0: r2 = *(u32 *)(r1 + 96) 1: *(u32 *)(r10 - 4) = r2 ; family = skops->family; 2: r1 = *(u32 *)(r1 + 20) 3: *(u32 *)(r10 - 8) = r1 ; return 0; 4: r0 = 0 5: exit So we get reasonable assembly, but still something was causing the null pointer dereference. So, we load the programs and dump the xlated version observing that line 0 above 'r* = *(u32 *)(r1 +96)' is going to be translated by the skops access helpers. int bpf_reproducer_bad(struct bpf_sock_ops * skops): ; volatile __maybe_unused __u32 i = skops->snd_ssthresh; 0: (61) r1 = *(u32 *)(r1 +28) 1: (15) if r1 == 0x0 goto pc+2 2: (79) r1 = *(u64 *)(r1 +0) 3: (61) r1 = *(u32 *)(r1 +2340) ; volatile __maybe_unused __u32 i = skops->snd_ssthresh; 4: (63) *(u32 *)(r10 -4) = r1 ; return 0; 5: (b7) r0 = 0 6: (95) exit int bpf_reproducer_good(struct bpf_sock_ops * skops): ; volatile __maybe_unused __u32 i = skops->snd_ssthresh; 0: (61) r2 = *(u32 *)(r1 +28) 1: (15) if r2 == 0x0 goto pc+2 2: (79) r2 = *(u64 *)(r1 +0) 3: (61) r2 = *(u32 *)(r2 +2340) ; volatile __maybe_unused __u32 i = skops->snd_ssthresh; 4: (63) *(u32 *)(r10 -4) = r2 ; family = skops->family; 5: (79) r1 = *(u64 *)(r1 +0) 6: (69) r1 = *(u16 *)(r1 +16) ; family = skops->family; 7: (63) *(u32 *)(r10 -8) = r1 ; return 0; 8: (b7) r0 = 0 9: (95) exit Then we look at lines 0 and 2 above. In the good case we do the zero check in r2 and then load 'r1 + 0' at line 2. Do a quick cross-check into the bpf_sock_ops check and we can confirm that is the 'struct sock *sk' pointer field. But, in the bad case, 0: (61) r1 = *(u32 *)(r1 +28) 1: (15) if r1 == 0x0 goto pc+2 2: (79) r1 = *(u64 *)(r1 +0) Oh no, we read 'r1 +28' into r1, this is skops->fullsock and then in line 2 we read the 'r1 +0' as a pointer. Now jumping back to our spat, [18610.807284] BUG: unable to handle kernel NULL pointer dereference at 0000000000000001 The 0x01 makes sense because that is exactly the fullsock value. And its not a valid dereference so we splat. To fix we need to guard the case when a program is doing a sock_ops field access with src_reg == dst_reg. This is already handled in the load case where the ctx_access handler uses a tmp register being careful to store the old value and restore it. To fix the get case test if src_reg == dst_reg and in this case do the is_fullsock test in the temporary register. Remembering to restore the temporary register before writing to either dst_reg or src_reg to avoid smashing the pointer into the struct holding the tmp variable. Adding this inline code to test_tcpbpf_kern will now be generated correctly from, 9: r2 = *(u32 *)(r2 + 96) to xlated code, 12: (7b) *(u64 *)(r2 +32) = r9 13: (61) r9 = *(u32 *)(r2 +28) 14: (15) if r9 == 0x0 goto pc+4 15: (79) r9 = *(u64 *)(r2 +32) 16: (79) r2 = *(u64 *)(r2 +0) 17: (61) r2 = *(u32 *)(r2 +2348) 18: (05) goto pc+1 19: (79) r9 = *(u64 *)(r2 +32) And in the normal case we keep the original code, because really this is an edge case. From this, 9: r2 = *(u32 *)(r6 + 96) to xlated code, 22: (61) r2 = *(u32 *)(r6 +28) 23: (15) if r2 == 0x0 goto pc+2 24: (79) r2 = *(u64 *)(r6 +0) 25: (61) r2 = *(u32 *)(r2 +2348) So three additional instructions if dst == src register, but I scanned my current code base and did not see this pattern anywhere so should not be a big deal. Further, it seems no one else has hit this or at least reported it so it must a fairly rare pattern. Fixes: 9b1f3d6e5af29 ("bpf: Refactor sock_ops_convert_ctx_access") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Acked-by: Song Liu Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/159718347772.4728.2781381670567919577.stgit@john-Precision-5820-Tower Signed-off-by: Sasha Levin --- net/core/filter.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/filter.c b/net/core/filter.c index 82e1b5b06167..09286a1f7457 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8249,15 +8249,31 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, /* Helper macro for adding read access to tcp_sock or sock fields. */ #define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ do { \ + int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 2; \ BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) > \ sizeof_field(struct bpf_sock_ops, BPF_FIELD)); \ + if (si->dst_reg == reg || si->src_reg == reg) \ + reg--; \ + if (si->dst_reg == reg || si->src_reg == reg) \ + reg--; \ + if (si->dst_reg == si->src_reg) { \ + *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ + fullsock_reg = reg; \ + jmp += 2; \ + } \ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ struct bpf_sock_ops_kern, \ is_fullsock), \ - si->dst_reg, si->src_reg, \ + fullsock_reg, si->src_reg, \ offsetof(struct bpf_sock_ops_kern, \ is_fullsock)); \ - *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \ + *insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp); \ + if (si->dst_reg == si->src_reg) \ + *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ struct bpf_sock_ops_kern, sk),\ si->dst_reg, si->src_reg, \ @@ -8266,6 +8282,12 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, OBJ_FIELD), \ si->dst_reg, si->dst_reg, \ offsetof(OBJ, OBJ_FIELD)); \ + if (si->dst_reg == si->src_reg) { \ + *insn++ = BPF_JMP_A(1); \ + *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ + } \ } while (0) #define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \ -- cgit v1.2.3 From db7f8c57dbdd31f7e59f8dc8d1e1b38607a320ef Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 11 Aug 2020 15:04:56 -0700 Subject: bpf: sock_ops sk access may stomp registers when dst_reg = src_reg [ Upstream commit 84f44df664e9f0e261157e16ee1acd77cc1bb78d ] Similar to patch ("bpf: sock_ops ctx access may stomp registers") if the src_reg = dst_reg when reading the sk field of a sock_ops struct we generate xlated code, 53: (61) r9 = *(u32 *)(r9 +28) 54: (15) if r9 == 0x0 goto pc+3 56: (79) r9 = *(u64 *)(r9 +0) This stomps on the r9 reg to do the sk_fullsock check and then when reading the skops->sk field instead of the sk pointer we get the sk_fullsock. To fix use similar pattern noted in the previous fix and use the temp field to save/restore a register used to do sk_fullsock check. After the fix the generated xlated code reads, 52: (7b) *(u64 *)(r9 +32) = r8 53: (61) r8 = *(u32 *)(r9 +28) 54: (15) if r9 == 0x0 goto pc+3 55: (79) r8 = *(u64 *)(r9 +32) 56: (79) r9 = *(u64 *)(r9 +0) 57: (05) goto pc+1 58: (79) r8 = *(u64 *)(r9 +32) Here r9 register was in-use so r8 is chosen as the temporary register. In line 52 r8 is saved in temp variable and at line 54 restored in case fullsock != 0. Finally we handle fullsock == 0 case by restoring at line 58. This adds a new macro SOCK_OPS_GET_SK it is almost possible to merge this with SOCK_OPS_GET_FIELD, but I found the extra branch logic a bit more confusing than just adding a new macro despite a bit of duplicating code. Fixes: 1314ef561102e ("bpf: export bpf_sock for BPF_PROG_TYPE_SOCK_OPS prog type") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Acked-by: Song Liu Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/159718349653.4728.6559437186853473612.stgit@john-Precision-5820-Tower Signed-off-by: Sasha Levin --- net/core/filter.c | 49 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 11 deletions(-) (limited to 'net/core') diff --git a/net/core/filter.c b/net/core/filter.c index 09286a1f7457..a69e79327c29 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8290,6 +8290,43 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, } \ } while (0) +#define SOCK_OPS_GET_SK() \ + do { \ + int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 1; \ + if (si->dst_reg == reg || si->src_reg == reg) \ + reg--; \ + if (si->dst_reg == reg || si->src_reg == reg) \ + reg--; \ + if (si->dst_reg == si->src_reg) { \ + *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ + fullsock_reg = reg; \ + jmp += 2; \ + } \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ + struct bpf_sock_ops_kern, \ + is_fullsock), \ + fullsock_reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + is_fullsock)); \ + *insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp); \ + if (si->dst_reg == si->src_reg) \ + *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ + struct bpf_sock_ops_kern, sk),\ + si->dst_reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, sk));\ + if (si->dst_reg == si->src_reg) { \ + *insn++ = BPF_JMP_A(1); \ + *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ + } \ + } while (0) + #define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \ SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock) @@ -8574,17 +8611,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_acked); break; case offsetof(struct bpf_sock_ops, sk): - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( - struct bpf_sock_ops_kern, - is_fullsock), - si->dst_reg, si->src_reg, - offsetof(struct bpf_sock_ops_kern, - is_fullsock)); - *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( - struct bpf_sock_ops_kern, sk), - si->dst_reg, si->src_reg, - offsetof(struct bpf_sock_ops_kern, sk)); + SOCK_OPS_GET_SK(); break; } return insn - insn_buf; -- cgit v1.2.3 From a75f8a60c415325c7ce3df90bfe1b782f84b78cf Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Sat, 15 Aug 2020 04:44:31 -0400 Subject: net: Fix potential wrong skb->protocol in skb_vlan_untag() [ Upstream commit 55eff0eb7460c3d50716ed9eccf22257b046ca92 ] We may access the two bytes after vlan_hdr in vlan_set_encap_proto(). So we should pull VLAN_HLEN + sizeof(unsigned short) in skb_vlan_untag() or we may access the wrong data. Fixes: 0d5501c1c828 ("net: Always untag vlan-tagged traffic on input.") Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b8afefe6f6b6..7afe52bd038b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5419,8 +5419,8 @@ struct sk_buff *skb_vlan_untag(struct sk_buff *skb) skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) goto err_free; - - if (unlikely(!pskb_may_pull(skb, VLAN_HLEN))) + /* We may access the two bytes after vlan_hdr in vlan_set_encap_proto(). */ + if (unlikely(!pskb_may_pull(skb, VLAN_HLEN + sizeof(unsigned short)))) goto err_free; vhdr = (struct vlan_hdr *)skb->data; -- cgit v1.2.3