From 864d9664245565a6b9df86a68c7664a25a4fcd58 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 21 Jul 2017 18:49:45 +0200 Subject: net/socket: fix type in assignment and trim long line The commit ffb07550c76f ("copy_msghdr_from_user(): get rid of field-by-field copyin") introduce a new sparse warning: net/socket.c:1919:27: warning: incorrect type in assignment (different address spaces) net/socket.c:1919:27: expected void *msg_control net/socket.c:1919:27: got void [noderef] *[addressable] msg_control and a line above 80 chars, let's fix them Fixes: ffb07550c76f ("copy_msghdr_from_user(): get rid of field-by-field copyin") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/socket.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index bf2122691fba..ad22df1ffbd1 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1916,7 +1916,7 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, if (copy_from_user(&msg, umsg, sizeof(*umsg))) return -EFAULT; - kmsg->msg_control = msg.msg_control; + kmsg->msg_control = (void __force *)msg.msg_control; kmsg->msg_controllen = msg.msg_controllen; kmsg->msg_flags = msg.msg_flags; @@ -1935,7 +1935,8 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, if (msg.msg_name && kmsg->msg_namelen) { if (!save_addr) { - err = move_addr_to_kernel(msg.msg_name, kmsg->msg_namelen, + err = move_addr_to_kernel(msg.msg_name, + kmsg->msg_namelen, kmsg->msg_name); if (err < 0) return err; -- cgit v1.2.3 From 69ec932e364b1ba9c3a2085fe96b76c8a3f71e7c Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 23 Jul 2017 17:52:23 +0800 Subject: openvswitch: fix potential out of bound access in parse_ct Before the 'type' is validated, we shouldn't use it to fetch the ovs_ct_attr_lens's minlen and maxlen, else, out of bound access may happen. Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action") Signed-off-by: Liping Zhang Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index e3c4c6c3fef7..03859e386b47 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1310,8 +1310,8 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, nla_for_each_nested(a, attr, rem) { int type = nla_type(a); - int maxlen = ovs_ct_attr_lens[type].maxlen; - int minlen = ovs_ct_attr_lens[type].minlen; + int maxlen; + int minlen; if (type > OVS_CT_ATTR_MAX) { OVS_NLERR(log, @@ -1319,6 +1319,9 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, type, OVS_CT_ATTR_MAX); return -EINVAL; } + + maxlen = ovs_ct_attr_lens[type].maxlen; + minlen = ovs_ct_attr_lens[type].minlen; if (nla_len(a) < minlen || nla_len(a) > maxlen) { OVS_NLERR(log, "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)", -- cgit v1.2.3 From c800aaf8d869f2b9b47b10c5c312fe19f0a94042 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 24 Jul 2017 10:07:32 -0700 Subject: packet: fix use-after-free in prb_retire_rx_blk_timer_expired() There are multiple reports showing we have a use-after-free in the timer prb_retire_rx_blk_timer_expired(), where we use struct tpacket_kbdq_core::pkbdq, a pg_vec, after it gets freed by free_pg_vec(). The interesting part is it is not freed via packet_release() but via packet_setsockopt(), which means we are not closing the socket. Looking into the big and fat function packet_set_ring(), this could happen if we satisfy the following conditions: 1. closing == 0, not on packet_release() path 2. req->tp_block_nr == 0, we don't allocate a new pg_vec 3. rx_ring->pg_vec is already set as V3, which means we already called packet_set_ring() wtih req->tp_block_nr > 0 previously 4. req->tp_frame_nr == 0, pass sanity check 5. po->mapped == 0, never called mmap() In this scenario we are clearing the old rx_ring->pg_vec, so we need to free this pg_vec, but we don't stop the timer on this path because of closing==0. The timer has to be stopped as long as we need to free pg_vec, therefore the check on closing!=0 is wrong, we should check pg_vec!=NULL instead. Thanks to liujian for testing different fixes. Reported-by: alexander.levin@verizon.com Reported-by: Dave Jones Reported-by: liujian (CE) Tested-by: liujian (CE) Cc: Ding Tianhong Cc: Willem de Bruijn Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/packet/af_packet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 008bb34ee324..0615c2a950fa 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4329,7 +4329,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, register_prot_hook(sk); } spin_unlock(&po->bind_lock); - if (closing && (po->tp_version > TPACKET_V2)) { + if (pg_vec && (po->tp_version > TPACKET_V2)) { /* Because we don't support block-based V3 on tx-ring */ if (!tx_ring) prb_shutdown_retire_blk_timer(po, rb_queue); -- cgit v1.2.3 From 9f9e772da2db279c8d8c9206d271b8009c9093f4 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 24 Jul 2017 10:49:23 -0700 Subject: net: dsa: Initialize ds->cpu_port_mask earlier The mt7530 driver has its dsa_switch_ops::get_tag_protocol function check ds->cpu_port_mask to issue a warning in case the configured CPU port is not capable of supporting tags. After commit 14be36c2c96c ("net: dsa: Initialize all CPU and enabled ports masks in dsa_ds_parse()") we slightly re-arranged the initialization such that this was no longer working. Just make sure that ds->cpu_port_mask is set prior to the first call to get_tag_protocol, thus restoring the expected contract. In case of error, the CPU port bit is cleared. Fixes: 14be36c2c96c ("net: dsa: Initialize all CPU and enabled ports masks in dsa_ds_parse()") Reported-by: Sean Wang Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 56e46090526b..c442051d5a55 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -509,21 +509,22 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, dst->cpu_dp->netdev = ethernet_dev; } + /* Initialize cpu_port_mask now for drv->setup() + * to have access to a correct value, just like what + * net/dsa/dsa.c::dsa_switch_setup_one does. + */ + ds->cpu_port_mask |= BIT(index); + tag_protocol = ds->ops->get_tag_protocol(ds); dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); if (IS_ERR(dst->tag_ops)) { dev_warn(ds->dev, "No tagger for this switch\n"); + ds->cpu_port_mask &= ~BIT(index); return PTR_ERR(dst->tag_ops); } dst->rcv = dst->tag_ops->rcv; - /* Initialize cpu_port_mask now for drv->setup() - * to have access to a correct value, just like what - * net/dsa/dsa.c::dsa_switch_setup_one does. - */ - ds->cpu_port_mask |= BIT(index); - return 0; } -- cgit v1.2.3 From dce4551cb2adb1ac9a30f8ab5299d614392b3cff Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 25 Jul 2017 17:57:47 +0200 Subject: udp: preserve head state for IP_CMSG_PASSSEC Paul Moore reported a SELinux/IP_PASSSEC regression caused by missing skb->sp at recvmsg() time. We need to preserve the skb head state to process the IP_CMSG_PASSSEC cmsg. With this commit we avoid releasing the skb head state in the BH even if a secpath is attached to the current skb, and stores the skb status (with/without head states) in the scratch area, so that we can access it at skb deallocation time, without incurring in cache-miss penalties. This also avoids misusing the skb CB for ipv6 packets, as introduced by the commit 0ddf3fb2c43d ("udp: preserve skb->dst if required for IP options processing"). Clean a bit the scratch area helpers implementation, to reduce the code differences between 32 and 64 bits build. Reported-by: Paul Moore Fixes: 0a463c78d25b ("udp: avoid a cache miss on dequeue") Fixes: 0ddf3fb2c43d ("udp: preserve skb->dst if required for IP options processing") Signed-off-by: Paolo Abeni Tested-by: Paul Moore Signed-off-by: David S. Miller --- include/net/udp.h | 33 ++++++++++++++++++++++----------- net/ipv4/udp.c | 38 ++++++++++++++++++-------------------- 2 files changed, 40 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/include/net/udp.h b/include/net/udp.h index 972ce4baab6b..56ce2d2a612d 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -305,33 +305,44 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, /* UDP uses skb->dev_scratch to cache as much information as possible and avoid * possibly multiple cache miss on dequeue() */ -#if BITS_PER_LONG == 64 - -/* truesize, len and the bit needed to compute skb_csum_unnecessary will be on - * cold cache lines at recvmsg time. - * skb->len can be stored on 16 bits since the udp header has been already - * validated and pulled. - */ struct udp_dev_scratch { - u32 truesize; + /* skb->truesize and the stateless bit are embedded in a single field; + * do not use a bitfield since the compiler emits better/smaller code + * this way + */ + u32 _tsize_state; + +#if BITS_PER_LONG == 64 + /* len and the bit needed to compute skb_csum_unnecessary + * will be on cold cache lines at recvmsg time. + * skb->len can be stored on 16 bits since the udp header has been + * already validated and pulled. + */ u16 len; bool is_linear; bool csum_unnecessary; +#endif }; +static inline struct udp_dev_scratch *udp_skb_scratch(struct sk_buff *skb) +{ + return (struct udp_dev_scratch *)&skb->dev_scratch; +} + +#if BITS_PER_LONG == 64 static inline unsigned int udp_skb_len(struct sk_buff *skb) { - return ((struct udp_dev_scratch *)&skb->dev_scratch)->len; + return udp_skb_scratch(skb)->len; } static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb) { - return ((struct udp_dev_scratch *)&skb->dev_scratch)->csum_unnecessary; + return udp_skb_scratch(skb)->csum_unnecessary; } static inline bool udp_skb_is_linear(struct sk_buff *skb) { - return ((struct udp_dev_scratch *)&skb->dev_scratch)->is_linear; + return udp_skb_scratch(skb)->is_linear; } #else diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b057653ceca9..d243772f6efc 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1163,34 +1163,32 @@ out: return ret; } -#if BITS_PER_LONG == 64 +#define UDP_SKB_IS_STATELESS 0x80000000 + static void udp_set_dev_scratch(struct sk_buff *skb) { - struct udp_dev_scratch *scratch; + struct udp_dev_scratch *scratch = udp_skb_scratch(skb); BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long)); - scratch = (struct udp_dev_scratch *)&skb->dev_scratch; - scratch->truesize = skb->truesize; + scratch->_tsize_state = skb->truesize; +#if BITS_PER_LONG == 64 scratch->len = skb->len; scratch->csum_unnecessary = !!skb_csum_unnecessary(skb); scratch->is_linear = !skb_is_nonlinear(skb); +#endif + if (likely(!skb->_skb_refdst)) + scratch->_tsize_state |= UDP_SKB_IS_STATELESS; } static int udp_skb_truesize(struct sk_buff *skb) { - return ((struct udp_dev_scratch *)&skb->dev_scratch)->truesize; -} -#else -static void udp_set_dev_scratch(struct sk_buff *skb) -{ - skb->dev_scratch = skb->truesize; + return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS; } -static int udp_skb_truesize(struct sk_buff *skb) +static bool udp_skb_has_head_state(struct sk_buff *skb) { - return skb->dev_scratch; + return !(udp_skb_scratch(skb)->_tsize_state & UDP_SKB_IS_STATELESS); } -#endif /* fully reclaim rmem/fwd memory allocated for skb */ static void udp_rmem_release(struct sock *sk, int size, int partial, @@ -1388,10 +1386,10 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) unlock_sock_fast(sk, slow); } - /* we cleared the head states previously only if the skb lacks any IP - * options, see __udp_queue_rcv_skb(). + /* In the more common cases we cleared the head states previously, + * see __udp_queue_rcv_skb(). */ - if (unlikely(IPCB(skb)->opt.optlen > 0)) + if (unlikely(udp_skb_has_head_state(skb))) skb_release_head_state(skb); consume_stateless_skb(skb); } @@ -1784,11 +1782,11 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) sk_mark_napi_id_once(sk, skb); } - /* At recvmsg() time we need skb->dst to process IP options-related - * cmsg, elsewhere can we clear all pending head states while they are - * hot in the cache + /* At recvmsg() time we may access skb->dst or skb->sp depending on + * the IP options and the cmsg flags, elsewhere can we clear all + * pending head states while they are hot in the cache */ - if (likely(IPCB(skb)->opt.optlen == 0)) + if (likely(IPCB(skb)->opt.optlen == 0 && !skb->sp)) skb_release_head_state(skb); rc = __udp_enqueue_schedule_skb(sk, skb); -- cgit v1.2.3 From afce615aaabfbaad02550e75c0bec106dafa1adf Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Mon, 24 Jul 2017 23:14:28 +0200 Subject: ipv6: Don't increase IPSTATS_MIB_FRAGFAILS twice in ip6_fragment() RFC 2465 defines ipv6IfStatsOutFragFails as: "The number of IPv6 datagrams that have been discarded because they needed to be fragmented at this output interface but could not be." The existing implementation, instead, would increase the counter twice in case we fail to allocate room for single fragments: once for the fragment, once for the datagram. This didn't look intentional though. In one of the two affected affected failure paths, the double increase was simply a result of a new 'goto fail' statement, introduced to avoid a skb leak. The other path appears to be affected since at least 2.6.12-rc2. Reported-by: Sabrina Dubroca Fixes: 1d325d217c7f ("ipv6: ip6_fragment: fix headroom tests and skb leak") Signed-off-by: Stefano Brivio Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1422d6c08377..162efba0d0cd 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -673,8 +673,6 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, *prevhdr = NEXTHDR_FRAGMENT; tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); if (!tmp_hdr) { - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_FRAGFAILS); err = -ENOMEM; goto fail; } @@ -789,8 +787,6 @@ slow_path: frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + hroom + troom, GFP_ATOMIC); if (!frag) { - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_FRAGFAILS); err = -ENOMEM; goto fail; } -- cgit v1.2.3 From 9688f9b020263c4a17471d09bb18e34eccc1c0a5 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 26 Jul 2017 11:33:49 +0200 Subject: udp: unbreak build lacking CONFIG_XFRM We must use pre-processor conditional block or suitable accessors to manipulate skb->sp elsewhere builds lacking the CONFIG_XFRM will break. Fixes: dce4551cb2ad ("udp: preserve head state for IP_CMSG_PASSSEC") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv4/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d243772f6efc..fac7cb9e3b0f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1786,7 +1786,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) * the IP options and the cmsg flags, elsewhere can we clear all * pending head states while they are hot in the cache */ - if (likely(IPCB(skb)->opt.optlen == 0 && !skb->sp)) + if (likely(IPCB(skb)->opt.optlen == 0 && !skb_sec_path(skb))) skb_release_head_state(skb); rc = __udp_enqueue_schedule_skb(sk, skb); -- cgit v1.2.3 From 0c3a8f8b8fabff4f3ad2dd7b95ae0e90cdd1aebb Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Tue, 25 Jul 2017 11:36:25 -0700 Subject: netpoll: Fix device name check in netpoll_setup() Apparently netpoll_setup() assumes that netpoll.dev_name is a pointer when checking if the device name is set: if (np->dev_name) { ... However the field is a character array, therefore the condition always yields true. Check instead whether the first byte of the array has a non-zero value. Signed-off-by: Matthias Kaehlcke Signed-off-by: David S. Miller --- net/core/netpoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 8357f164c660..912731bed7b7 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -666,7 +666,7 @@ int netpoll_setup(struct netpoll *np) int err; rtnl_lock(); - if (np->dev_name) { + if (np->dev_name[0]) { struct net *net = current->nsproxy->net_ns; ndev = __dev_get_by_name(net, np->dev_name); } -- cgit v1.2.3 From 0c2232b0a71db0ac1d22f751aa1ac0cadb950fd2 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 26 Jul 2017 14:19:09 +0800 Subject: dccp: fix a memleak that dccp_ipv6 doesn't put reqsk properly In dccp_v6_conn_request, after reqsk gets alloced and hashed into ehash table, reqsk's refcnt is set 3. one is for req->rsk_timer, one is for hlist, and the other one is for current using. The problem is when dccp_v6_conn_request returns and finishes using reqsk, it doesn't put reqsk. This will cause reqsk refcnt leaks and reqsk obj never gets freed. Jianlin found this issue when running dccp_memleak.c in a loop, the system memory would run out. dccp_memleak.c: int s1 = socket(PF_INET6, 6, IPPROTO_IP); bind(s1, &sa1, 0x20); listen(s1, 0x9); int s2 = socket(PF_INET6, 6, IPPROTO_IP); connect(s2, &sa1, 0x20); close(s1); close(s2); This patch is to put the reqsk before dccp_v6_conn_request returns, just as what tcp_conn_request does. Reported-by: Jianlin Shi Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index c376af5bfdfb..1b58eac8aad3 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -380,6 +380,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); + reqsk_put(req); return 0; drop_and_free: -- cgit v1.2.3 From b7953d3c0e30a5fc944f6b7bd0bcceb0794bcd85 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 26 Jul 2017 14:19:46 +0800 Subject: dccp: fix a memleak that dccp_ipv4 doesn't put reqsk properly The patch "dccp: fix a memleak that dccp_ipv6 doesn't put reqsk properly" fixed reqsk refcnt leak for dccp_ipv6. The same issue exists on dccp_ipv4. This patch is to fix it for dccp_ipv4. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index f85d901f4e3f..1b202f16531f 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -631,6 +631,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); + reqsk_put(req); return 0; drop_and_free: -- cgit v1.2.3 From e90ce2fc27cad7e7b1e72b9e66201a7a4c124c2b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 26 Jul 2017 14:20:15 +0800 Subject: dccp: fix a memleak for dccp_feat_init err process In dccp_feat_init, when ccid_get_builtin_ccids failsto alloc memory for rx.val, it should free tx.val before returning an error. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/dccp/feat.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 1704948e6a12..f227f002c73d 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -1471,9 +1471,12 @@ int dccp_feat_init(struct sock *sk) * singleton values (which always leads to failure). * These settings can still (later) be overridden via sockopts. */ - if (ccid_get_builtin_ccids(&tx.val, &tx.len) || - ccid_get_builtin_ccids(&rx.val, &rx.len)) + if (ccid_get_builtin_ccids(&tx.val, &tx.len)) return -ENOBUFS; + if (ccid_get_builtin_ccids(&rx.val, &rx.len)) { + kfree(tx.val); + return -ENOBUFS; + } if (!dccp_feat_prefer(sysctl_dccp_tx_ccid, tx.val, tx.len) || !dccp_feat_prefer(sysctl_dccp_rx_ccid, rx.val, rx.len)) -- cgit v1.2.3 From 0254e0c632bfe4a0cf610e2d90397474144f00d2 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 26 Jul 2017 15:22:06 -0700 Subject: net: check dev->addr_len for dev_set_mac_address() Historically, dev_ifsioc() uses struct sockaddr as mac address definition, this is why dev_set_mac_address() accepts a struct sockaddr pointer as input but now we have various types of mac addresse whose lengths are up to MAX_ADDR_LEN, longer than struct sockaddr, and saved in dev->addr_len. It is too late to fix dev_ifsioc() due to API compatibility, so just reject those larger than sizeof(struct sockaddr), otherwise we would read and use some random bytes from kernel stack. Fortunately, only a few IPv6 tunnel devices have addr_len larger than sizeof(struct sockaddr) and they don't support ndo_set_mac_addr(). But with team driver, in lb mode, they can still be enslaved to a team master and make its mac addr length as the same. Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/dev_ioctl.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 06b147d7d9e2..709a4e6fb447 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -263,6 +263,8 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) return dev_set_mtu(dev, ifr->ifr_mtu); case SIOCSIFHWADDR: + if (dev->addr_len > sizeof(struct sockaddr)) + return -EINVAL; return dev_set_mac_address(dev, &ifr->ifr_hwaddr); case SIOCSIFHWBROADCAST: -- cgit v1.2.3 From c9f2c1ae123a751d4e4f949144500219354d5ee1 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 27 Jul 2017 14:45:09 +0200 Subject: udp6: fix socket leak on early demux When an early demuxed packet reaches __udp6_lib_lookup_skb(), the sk reference is retrieved and used, but the relevant reference count is leaked and the socket destructor is never called. Beyond leaking the sk memory, if there are pending UDP packets in the receive queue, even the related accounted memory is leaked. In the long run, this will cause persistent forward allocation errors and no UDP skbs (both ipv4 and ipv6) will be able to reach the user-space. Fix this by explicitly accessing the early demux reference before the lookup, and properly decreasing the socket reference count after usage. Also drop the skb_steal_sock() in __udp6_lib_lookup_skb(), and the now obsoleted comment about "socket cache". The newly added code is derived from the current ipv4 code for the similar path. v1 -> v2: fixed the __udp6_lib_rcv() return code for resubmission, as suggested by Eric Reported-by: Sam Edwards Reported-by: Marc Haber Fixes: 5425077d73e0 ("net: ipv6: Add early demux handler for UDP unicast") Signed-off-by: Paolo Abeni Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/udp.h | 1 + net/ipv4/udp.c | 3 ++- net/ipv6/udp.c | 27 ++++++++++++++++++--------- 3 files changed, 21 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/include/net/udp.h b/include/net/udp.h index 56ce2d2a612d..cc8036987dcb 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -260,6 +260,7 @@ static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, } void udp_v4_early_demux(struct sk_buff *skb); +void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst); int udp_get_port(struct sock *sk, unsigned short snum, int (*saddr_cmp)(const struct sock *, const struct sock *)); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index fac7cb9e3b0f..e6276fa3750b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1928,7 +1928,7 @@ drop: /* For TCP sockets, sk_rx_dst is protected by socket lock * For UDP, we use xchg() to guard against concurrent changes. */ -static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) +void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) { struct dst_entry *old; @@ -1937,6 +1937,7 @@ static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) dst_release(old); } } +EXPORT_SYMBOL(udp_sk_rx_dst_set); /* * Multicasts and broadcasts go to each listener. diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 4a3e65626e8b..98fe4560e24c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -291,11 +291,7 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, struct udp_table *udptable) { const struct ipv6hdr *iph = ipv6_hdr(skb); - struct sock *sk; - sk = skb_steal_sock(skb); - if (unlikely(sk)) - return sk; return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), udptable, skb); @@ -804,6 +800,24 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (udp6_csum_init(skb, uh, proto)) goto csum_error; + /* Check if the socket is already available, e.g. due to early demux */ + sk = skb_steal_sock(skb); + if (sk) { + struct dst_entry *dst = skb_dst(skb); + int ret; + + if (unlikely(sk->sk_rx_dst != dst)) + udp_sk_rx_dst_set(sk, dst); + + ret = udpv6_queue_rcv_skb(sk, skb); + sock_put(sk); + + /* a return value > 0 means to resubmit the input */ + if (ret > 0) + return ret; + return 0; + } + /* * Multicast receive code */ @@ -812,11 +826,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, saddr, daddr, udptable, proto); /* Unicast */ - - /* - * check socket cache ... must talk to Alan about his plans - * for sock caches... i'll skip this for now. - */ sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk) { int ret; -- cgit v1.2.3 From efe967cdec32af93e15839cc639695ec5f637771 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 28 Jul 2017 16:41:37 +0200 Subject: tcp: avoid bogus gcc-7 array-bounds warning When using CONFIG_UBSAN_SANITIZE_ALL, the TCP code produces a false-positive warning: net/ipv4/tcp_output.c: In function 'tcp_connect': net/ipv4/tcp_output.c:2207:40: error: array subscript is below array bounds [-Werror=array-bounds] tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start; ^~ net/ipv4/tcp_output.c:2207:40: error: array subscript is below array bounds [-Werror=array-bounds] tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~ I have opened a gcc bug for this, but distros have already shipped compilers with this problem, and it's not clear yet whether there is a way for gcc to avoid the warning. As the problem is related to the bitfield access, this introduces a temporary variable to store the old enum value. I did not notice this warning earlier, since UBSAN is disabled when building with COMPILE_TEST, and that was always turned on in both allmodconfig and randconfig tests. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81601 Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4e985dea1dd2..2f1588bf73da 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2202,9 +2202,10 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new) { const u32 now = tcp_jiffies32; + enum tcp_chrono old = tp->chrono_type; - if (tp->chrono_type > TCP_CHRONO_UNSPEC) - tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start; + if (old > TCP_CHRONO_UNSPEC) + tp->chrono_stat[old - 1] += now - tp->chrono_start; tp->chrono_start = now; tp->chrono_type = new; } -- cgit v1.2.3 From 71ed7ee35ad2c5300f4b51634185a0193b4fb0fa Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 28 Jul 2017 23:27:44 +0300 Subject: ipv4: fib: Fix NULL pointer deref during fib_sync_down_dev() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Michał reported a NULL pointer deref during fib_sync_down_dev() when unregistering a netdevice. The problem is that we don't check for 'in_dev' being NULL, which can happen in very specific cases. Usually routes are flushed upon NETDEV_DOWN sent in either the netdev or the inetaddr notification chains. However, if an interface isn't configured with any IP address, then it's possible for host routes to be flushed following NETDEV_UNREGISTER, after NULLing dev->ip_ptr in inetdev_destroy(). To reproduce: $ ip link add type dummy $ ip route add local 1.1.1.0/24 dev dummy0 $ ip link del dev dummy0 Fix this by checking for the presence of 'in_dev' before referencing it. Fixes: 982acb97560c ("ipv4: fib: Notify about nexthop status changes") Signed-off-by: Ido Schimmel Reported-by: Michał Mirosław Tested-by: Michał Mirosław Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 222100103808..b8d18171cca3 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1452,7 +1452,7 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh, return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type, &info.info); case FIB_EVENT_NH_DEL: - if ((IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && + if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && fib_nh->nh_flags & RTNH_F_LINKDOWN) || (fib_nh->nh_flags & RTNH_F_DEAD)) return call_fib_notifiers(dev_net(fib_nh->nh_dev), -- cgit v1.2.3 From cb891fa6a1d5f52c5f5c07b6f7f4c6d65ea55fc0 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 31 Jul 2017 16:52:36 +0200 Subject: udp6: fix jumbogram reception Since commit 67a51780aebb ("ipv6: udp: leverage scratch area helpers") udp6_recvmsg() read the skb len from the scratch area, to avoid a cache miss. But the UDP6 rx path support RFC 2675 UDPv6 jumbograms, and their length exceeds the 16 bits available in the scratch area. As a side effect the length returned by recvmsg() is: % (1<<16) This commit addresses the issue allocating one more bit in the IP6CB flags field and setting it for incoming jumbograms. Such field is still in the first cacheline, so at recvmsg() time we can check it and fallback to access skb->len if required, without a measurable overhead. Fixes: 67a51780aebb ("ipv6: udp: leverage scratch area helpers") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/ipv6.h | 6 ++++++ net/ipv6/exthdrs.c | 1 + net/ipv6/udp.c | 11 ++++++++++- 3 files changed, 17 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index e1b442996f81..474d6bbc158c 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -128,6 +128,7 @@ struct inet6_skb_parm { #define IP6SKB_FRAGMENTED 16 #define IP6SKB_HOPBYHOP 32 #define IP6SKB_L3SLAVE 64 +#define IP6SKB_JUMBOGRAM 128 }; #if defined(CONFIG_NET_L3_MASTER_DEV) @@ -152,6 +153,11 @@ static inline int inet6_iif(const struct sk_buff *skb) return l3_slave ? skb->skb_iif : IP6CB(skb)->iif; } +static inline bool inet6_is_jumbogram(const struct sk_buff *skb) +{ + return !!(IP6CB(skb)->flags & IP6SKB_JUMBOGRAM); +} + /* can not be used in TCP layer after tcp_v6_fill_cb */ static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb) { diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 4996d734f1d2..3cec529c6113 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -756,6 +756,7 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff) if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) goto drop; + IP6CB(skb)->flags |= IP6SKB_JUMBOGRAM; return true; drop: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 98fe4560e24c..578142b7ca3e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -328,6 +328,15 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be EXPORT_SYMBOL_GPL(udp6_lib_lookup); #endif +/* do not use the scratch area len for jumbogram: their length execeeds the + * scratch area space; note that the IP6CB flags is still in the first + * cacheline, so checking for jumbograms is cheap + */ +static int udp6_skb_len(struct sk_buff *skb) +{ + return unlikely(inet6_is_jumbogram(skb)) ? skb->len : udp_skb_len(skb); +} + /* * This should be easy, if there is something there we * return it, otherwise we block. @@ -358,7 +367,7 @@ try_again: if (!skb) return err; - ulen = udp_skb_len(skb); + ulen = udp6_skb_len(skb); copied = len; if (copied > ulen - off) copied = ulen - off; -- cgit v1.2.3