From 640b2b107cec23c754214b62a811465fa8f9257f Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 2 Jun 2015 14:36:34 +0200 Subject: openvswitch: disable LRO Currently, openvswitch tries to disable LRO from the user space. This does not work correctly when the device added is a vlan interface, though. Instead of dealing with possibly complex stacked cross name space relations in the user space, do the same as bridging does and call dev_disable_lro in the kernel. Signed-off-by: Jiri Benc Acked-by: Flavio Leitner Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/vport-netdev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 4776282c6417..33e6d6e2908f 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -125,6 +125,7 @@ static struct vport *netdev_create(const struct vport_parms *parms) if (err) goto error_master_upper_dev_unlink; + dev_disable_lro(netdev_vport->dev); dev_set_promiscuity(netdev_vport->dev, 1); netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH; rtnl_unlock(); -- cgit v1.2.3 From 6e540309326188f769e03bb4c6dd8ff6752930c2 Mon Sep 17 00:00:00 2001 From: Shawn Bohrer Date: Wed, 3 Jun 2015 16:27:38 -0500 Subject: ipv4/udp: Verify multicast group is ours in upd_v4_early_demux() 421b3885bf6d56391297844f43fb7154a6396e12 "udp: ipv4: Add udp early demux" introduced a regression that allowed sockets bound to INADDR_ANY to receive packets from multicast groups that the socket had not joined. For example a socket that had joined 224.168.2.9 could also receive packets from 225.168.2.9 despite not having joined that group if ip_early_demux is enabled. Fix this by calling ip_check_mc_rcu() in udp_v4_early_demux() to verify that the multicast packet is indeed ours. Signed-off-by: Shawn Bohrer Reported-by: Yurij M. Plotnikov Signed-off-by: David S. Miller --- net/ipv4/udp.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1c92ea67baef..83aa604f9273 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -90,6 +90,7 @@ #include #include #include +#include #include #include #include @@ -1960,6 +1961,7 @@ void udp_v4_early_demux(struct sk_buff *skb) struct sock *sk; struct dst_entry *dst; int dif = skb->dev->ifindex; + int ours; /* validate the packet */ if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) @@ -1969,14 +1971,24 @@ void udp_v4_early_demux(struct sk_buff *skb) uh = udp_hdr(skb); if (skb->pkt_type == PACKET_BROADCAST || - skb->pkt_type == PACKET_MULTICAST) + skb->pkt_type == PACKET_MULTICAST) { + struct in_device *in_dev = __in_dev_get_rcu(skb->dev); + + if (!in_dev) + return; + + ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, + iph->protocol); + if (!ours) + return; sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, uh->source, iph->saddr, dif); - else if (skb->pkt_type == PACKET_HOST) + } else if (skb->pkt_type == PACKET_HOST) { sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr, uh->source, iph->saddr, dif); - else + } else { return; + } if (!sk) return; -- cgit v1.2.3 From 1d7c49037b12016e7056b9f2c990380e2187e766 Mon Sep 17 00:00:00 2001 From: Wilson Kok Date: Fri, 5 Jun 2015 00:52:57 -0700 Subject: bridge: use _bh spinlock variant for br_fdb_update to avoid lockup br_fdb_update() can be called in process context in the following way: br_fdb_add() -> __br_fdb_add() -> br_fdb_update() (if NTF_USE flag is set) so we need to use spin_lock_bh because there are softirq users of the hash_lock. One easy way to reproduce this is to modify the bridge utility to set NTF_USE, enable stp and then set maxageing to a low value so br_fdb_cleanup() is called frequently and then just add new entries in a loop. This happens because br_fdb_cleanup() is called from timer/softirq context. These locks were _bh before commit f8ae737deea1 ("[BRIDGE]: forwarding remove unneeded preempt and bh diasables") and at the time that commit was correct because br_fdb_update() couldn't be called from process context, but that changed after commit: 292d1398983f ("bridge: add NTF_USE support") Signed-off-by: Wilson Kok Signed-off-by: Nikolay Aleksandrov Fixes: 292d1398983f ("bridge: add NTF_USE support") Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index e0670d7054f9..7eacc8ae9779 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -569,7 +569,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, fdb_notify(br, fdb, RTM_NEWNEIGH); } } else { - spin_lock(&br->hash_lock); + spin_lock_bh(&br->hash_lock); if (likely(!fdb_find(head, addr, vid))) { fdb = fdb_create(head, source, addr, vid); if (fdb) { @@ -581,7 +581,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, /* else we lose race and someone else inserts * it first, don't bother updating */ - spin_unlock(&br->hash_lock); + spin_unlock_bh(&br->hash_lock); } } -- cgit v1.2.3 From 25cc8f0763c972911b1a65099cd10d9f8a45a7b0 Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Fri, 5 Jun 2015 18:54:45 +0100 Subject: mpls: fix possible use after free of device The mpls device is used in an RCU read context without a lock being held. As the memory is freed without waiting for the RCU grace period to elapse, the freed memory could still be in use. Address this by using kfree_rcu to free the memory for the mpls device after the RCU grace period has elapsed. Fixes: 03c57747a702 ("mpls: Per-device MPLS state") Signed-off-by: Robert Shearman Acked-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 2 +- net/mpls/internal.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 7b3f732269e4..bff427f31924 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -541,7 +541,7 @@ static void mpls_ifdown(struct net_device *dev) RCU_INIT_POINTER(dev->mpls_ptr, NULL); - kfree(mdev); + kfree_rcu(mdev, rcu); } static int mpls_dev_notify(struct notifier_block *this, unsigned long event, diff --git a/net/mpls/internal.h b/net/mpls/internal.h index b064c345042c..8cabeb5a1cb9 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -16,6 +16,7 @@ struct mpls_dev { int input_enabled; struct ctl_table_header *sysctl; + struct rcu_head rcu; }; struct sk_buff; -- cgit v1.2.3 From 7ff46e79fb7df5b09c46c36857929fdf039f8b31 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 7 Jun 2015 19:43:47 -0700 Subject: Revert "bridge: use _bh spinlock variant for br_fdb_update to avoid lockup" This reverts commit 1d7c49037b12016e7056b9f2c990380e2187e766. Nikolay Aleksandrov has a better version of this fix. Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 7eacc8ae9779..e0670d7054f9 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -569,7 +569,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, fdb_notify(br, fdb, RTM_NEWNEIGH); } } else { - spin_lock_bh(&br->hash_lock); + spin_lock(&br->hash_lock); if (likely(!fdb_find(head, addr, vid))) { fdb = fdb_create(head, source, addr, vid); if (fdb) { @@ -581,7 +581,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, /* else we lose race and someone else inserts * it first, don't bother updating */ - spin_unlock_bh(&br->hash_lock); + spin_unlock(&br->hash_lock); } } -- cgit v1.2.3 From c4c832f89dc468cf11dc0dd17206bace44526651 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 6 Jun 2015 06:49:00 -0700 Subject: bridge: disable softirqs around br_fdb_update to avoid lockup br_fdb_update() can be called in process context in the following way: br_fdb_add() -> __br_fdb_add() -> br_fdb_update() (if NTF_USE flag is set) so we need to disable softirqs because there are softirq users of the hash_lock. One easy way to reproduce this is to modify the bridge utility to set NTF_USE, enable stp and then set maxageing to a low value so br_fdb_cleanup() is called frequently and then just add new entries in a loop. This happens because br_fdb_cleanup() is called from timer/softirq context. The spin locks in br_fdb_update were _bh before commit f8ae737deea1 ("[BRIDGE]: forwarding remove unneeded preempt and bh diasables") and at the time that commit was correct because br_fdb_update() couldn't be called from process context, but that changed after commit: 292d1398983f ("bridge: add NTF_USE support") Using local_bh_disable/enable around br_fdb_update() allows us to keep using the spin_lock/unlock in br_fdb_update for the fast-path. Signed-off-by: Nikolay Aleksandrov Fixes: 292d1398983f ("bridge: add NTF_USE support") Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index e0670d7054f9..659fb96672e4 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -796,9 +796,11 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p, int err = 0; if (ndm->ndm_flags & NTF_USE) { + local_bh_disable(); rcu_read_lock(); br_fdb_update(p->br, p, addr, vid, true); rcu_read_unlock(); + local_bh_enable(); } else { spin_lock_bh(&p->br->hash_lock); err = fdb_add_entry(p, addr, ndm->ndm_state, -- cgit v1.2.3 From 27e41fcfa6b326ad44eee7e0b1930d080b270895 Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Fri, 5 Jun 2015 18:51:54 +0100 Subject: ipv6: fix possible use after free of dev stats The memory pointed to by idev->stats.icmpv6msgdev, idev->stats.icmpv6dev and idev->stats.ipv6 can each be used in an RCU read context without taking a reference on idev. For example, through IP6_*_STATS_* calls in ip6_rcv. These memory blocks are freed without waiting for an RCU grace period to elapse. This could lead to the memory being written to after it has been freed. Fix this by using call_rcu to free the memory used for stats, as well as idev after an RCU grace period has elapsed. Signed-off-by: Robert Shearman Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/addrconf_core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index d873ceea86e6..ca09bf49ac68 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -133,6 +133,14 @@ static void snmp6_free_dev(struct inet6_dev *idev) free_percpu(idev->stats.ipv6); } +static void in6_dev_finish_destroy_rcu(struct rcu_head *head) +{ + struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu); + + snmp6_free_dev(idev); + kfree(idev); +} + /* Nobody refers to this device, we may destroy it. */ void in6_dev_finish_destroy(struct inet6_dev *idev) @@ -151,7 +159,6 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) pr_warn("Freeing alive inet6 device %p\n", idev); return; } - snmp6_free_dev(idev); - kfree_rcu(idev, rcu); + call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu); } EXPORT_SYMBOL(in6_dev_finish_destroy); -- cgit v1.2.3 From 0243508edd317ff1fa63b495643a7c192fbfcd92 Mon Sep 17 00:00:00 2001 From: Josh Hunt Date: Mon, 8 Jun 2015 12:00:59 -0400 Subject: ipv6: Fix protocol resubmission UDP encapsulation is broken on IPv6. This is because the logic to resubmit the nexthdr is inverted, checking for a ret value > 0 instead of < 0. Also, the resubmit label is in the wrong position since we already get the nexthdr value when performing decapsulation. In addition the skb pull is no longer necessary either. This changes the return value check to look for < 0, using it for the nexthdr on the next iteration, and moves the resubmit label to the proper location. With these changes the v6 code now matches what we do in the v4 ip input code wrt resubmitting when decapsulating. Signed-off-by: Josh Hunt Acked-by: "Tom Herbert" Signed-off-by: David S. Miller --- net/ipv6/ip6_input.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index f2e464eba5ef..41a73da371a9 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -212,13 +212,13 @@ static int ip6_input_finish(struct sock *sk, struct sk_buff *skb) */ rcu_read_lock(); -resubmit: idev = ip6_dst_idev(skb_dst(skb)); if (!pskb_pull(skb, skb_transport_offset(skb))) goto discard; nhoff = IP6CB(skb)->nhoff; nexthdr = skb_network_header(skb)[nhoff]; +resubmit: raw = raw6_local_deliver(skb, nexthdr); ipprot = rcu_dereference(inet6_protos[nexthdr]); if (ipprot) { @@ -246,10 +246,12 @@ resubmit: goto discard; ret = ipprot->handler(skb); - if (ret > 0) + if (ret < 0) { + nexthdr = -ret; goto resubmit; - else if (ret == 0) + } else if (ret == 0) { IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS); + } } else { if (!raw) { if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { -- cgit v1.2.3 From bbbf2df0039d31c6a0a9708ce4fe220a54bd5379 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 8 Jun 2015 11:53:08 -0400 Subject: net: replace last open coded skb_orphan_frags with function call Commit 70008aa50e92 ("skbuff: convert to skb_orphan_frags") replaced open coded tests of SKBTX_DEV_ZEROCOPY and skb_copy_ubufs with calls to helper function skb_orphan_frags. Apply that to the last remaining open coded site. Signed-off-by: Willem de Bruijn Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/core/dev.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 2c1c67fad64d..aa82f9ab6a36 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1718,15 +1718,8 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { - if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { - if (skb_copy_ubufs(skb, GFP_ATOMIC)) { - atomic_long_inc(&dev->rx_dropped); - kfree_skb(skb); - return NET_RX_DROP; - } - } - - if (unlikely(!is_skb_forwardable(dev, skb))) { + if (skb_orphan_frags(skb, GFP_ATOMIC) || + unlikely(!is_skb_forwardable(dev, skb))) { atomic_long_inc(&dev->rx_dropped); kfree_skb(skb); return NET_RX_DROP; -- cgit v1.2.3