From 3082a2b7b1af1b1508c1c3fa589566064f926f40 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 16 Feb 2010 16:36:25 -0500 Subject: rfkill: Add support for KEY_RFKILL Add support for handling KEY_RFKILL in the rfkill input module. This simply toggles the state of all rfkill devices. The comment in rfkill.h is also updated to reflect that RFKILL_TYPE_ALL may be used inside the kernel. Signed-off-by: Matthew Garrett Acked-by: Marcel Holtmann Signed-off-by: John W. Linville --- include/linux/rfkill.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 97059d08a626..4f82326eb294 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -29,7 +29,7 @@ /** * enum rfkill_type - type of rfkill switch. * - * @RFKILL_TYPE_ALL: toggles all switches (userspace only) + * @RFKILL_TYPE_ALL: toggles all switches (requests only - not a switch type) * @RFKILL_TYPE_WLAN: switch is on a 802.11 wireless network device. * @RFKILL_TYPE_BLUETOOTH: switch is on a bluetooth device. * @RFKILL_TYPE_UWB: switch is on a ultra wideband device. -- cgit v1.2.3 From 87c1e12b5eeb7b30b4b41291bef8e0b41fc3dde9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 2 Mar 2010 02:51:56 +0000 Subject: ipsec: Fix bogus bundle flowi When I merged the bundle creation code, I introduced a bogus flowi value in the bundle. Instead of getting from the caller, it was instead set to the flow in the route object, which is totally different. The end result is that the bundles we created never match, and we instead end up with an ever growing bundle list. Thanks to Jamal for find this problem. Reported-by: Jamal Hadi Salim Signed-off-by: Herbert Xu Acked-by: Steffen Klassert Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/xfrm.h | 3 ++- net/ipv4/xfrm4_policy.c | 5 +++-- net/ipv6/xfrm6_policy.c | 3 ++- net/xfrm/xfrm_policy.c | 7 ++++--- 4 files changed, 11 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index a7df3275b860..d74e080ba6c9 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -275,7 +275,8 @@ struct xfrm_policy_afinfo { struct dst_entry *dst, int nfheader_len); int (*fill_dst)(struct xfrm_dst *xdst, - struct net_device *dev); + struct net_device *dev, + struct flowi *fl); }; extern int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 67107d63c1cd..e4a1483fba77 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -91,11 +91,12 @@ static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, return 0; } -static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) +static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + struct flowi *fl) { struct rtable *rt = (struct rtable *)xdst->route; - xdst->u.rt.fl = rt->fl; + xdst->u.rt.fl = *fl; xdst->u.dst.dev = dev; dev_hold(dev); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index dbdc696f5fc5..ae181651c75a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -116,7 +116,8 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, return 0; } -static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) +static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + struct flowi *fl) { struct rt6_info *rt = (struct rt6_info*)xdst->route; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 34a5ef8316e7..843e066649cb 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1372,7 +1372,8 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, return err; } -static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) +static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, + struct flowi *fl) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(xdst->u.dst.ops->family); @@ -1381,7 +1382,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) if (!afinfo) return -EINVAL; - err = afinfo->fill_dst(xdst, dev); + err = afinfo->fill_dst(xdst, dev, fl); xfrm_policy_put_afinfo(afinfo); @@ -1486,7 +1487,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev; - err = xfrm_fill_dst(xdst, dev); + err = xfrm_fill_dst(xdst, dev, fl); if (err) goto free_dst; -- cgit v1.2.3 From 4fa004373133ece3d9b1c0a7e243b0e53760b165 Mon Sep 17 00:00:00 2001 From: Sujith Date: Mon, 1 Mar 2010 14:42:57 +0530 Subject: mac80211: Fix HT rate control configuration Handling HT configuration changes involved setting the channel with the new HT parameters and then issuing a rate_update() notification to the driver. This behavior changed after the off-channel changes. Now, the channel is not updated with the new HT params in enable_ht() - instead, it is now done when the scan work terminates. This results in the driver depending on stale information, defaulting to non-HT mode always. Fix this by passing the new channel type to the driver. Cc: stable@kernel.org Signed-off-by: Sujith Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/rc.c | 6 +++--- include/net/mac80211.h | 3 ++- net/mac80211/mlme.c | 3 ++- net/mac80211/rate.h | 5 +++-- 4 files changed, 10 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/ath9k/rc.c b/drivers/net/wireless/ath/ath9k/rc.c index ac34a055c713..0e79e58cf4c9 100644 --- a/drivers/net/wireless/ath/ath9k/rc.c +++ b/drivers/net/wireless/ath/ath9k/rc.c @@ -1323,7 +1323,7 @@ static void ath_rate_init(void *priv, struct ieee80211_supported_band *sband, static void ath_rate_update(void *priv, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta, void *priv_sta, - u32 changed) + u32 changed, enum nl80211_channel_type oper_chan_type) { struct ath_softc *sc = priv; struct ath_rate_priv *ath_rc_priv = priv_sta; @@ -1340,8 +1340,8 @@ static void ath_rate_update(void *priv, struct ieee80211_supported_band *sband, if (sc->sc_ah->opmode != NL80211_IFTYPE_STATION) return; - if (sc->hw->conf.channel_type == NL80211_CHAN_HT40MINUS || - sc->hw->conf.channel_type == NL80211_CHAN_HT40PLUS) + if (oper_chan_type == NL80211_CHAN_HT40MINUS || + oper_chan_type == NL80211_CHAN_HT40PLUS) oper_cw40 = true; oper_sgi40 = (sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_40) ? diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 80eb7cc42ce9..45d7d44d7cbe 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2426,7 +2426,8 @@ struct rate_control_ops { struct ieee80211_sta *sta, void *priv_sta); void (*rate_update)(void *priv, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta, - void *priv_sta, u32 changed); + void *priv_sta, u32 changed, + enum nl80211_channel_type oper_chan_type); void (*free_sta)(void *priv, struct ieee80211_sta *sta, void *priv_sta); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5a268761e4c5..0ab284c32135 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -177,7 +177,8 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, sta = sta_info_get(sdata, bssid); if (sta) rate_control_rate_update(local, sband, sta, - IEEE80211_RC_HT_CHANGED); + IEEE80211_RC_HT_CHANGED, + local->oper_channel_type); rcu_read_unlock(); } diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index b6108bca96d4..065a96190e32 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -66,7 +66,8 @@ static inline void rate_control_rate_init(struct sta_info *sta) static inline void rate_control_rate_update(struct ieee80211_local *local, struct ieee80211_supported_band *sband, - struct sta_info *sta, u32 changed) + struct sta_info *sta, u32 changed, + enum nl80211_channel_type oper_chan_type) { struct rate_control_ref *ref = local->rate_ctrl; struct ieee80211_sta *ista = &sta->sta; @@ -74,7 +75,7 @@ static inline void rate_control_rate_update(struct ieee80211_local *local, if (ref && ref->ops->rate_update) ref->ops->rate_update(ref->priv, sband, ista, - priv_sta, changed); + priv_sta, changed, oper_chan_type); } static inline void *rate_control_alloc_sta(struct rate_control_ref *ref, -- cgit v1.2.3 From c839d30a41dd92eb32d7fcfa2b4e99042fc64bf2 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Wed, 3 Mar 2010 04:46:50 +0000 Subject: net: add scheduler sync hint to tcp_prequeue(). Decreases the odds wakee will suffer from frequent cache misses. Signed-off-by: Mike Galbraith Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 56f0aec40ed6..75be5a28815d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -939,7 +939,7 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb) tp->ucopy.memory = 0; } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { - wake_up_interruptible_poll(sk->sk_sleep, + wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN | POLLRDNORM | POLLRDBAND); if (!inet_csk_ack_scheduled(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, -- cgit v1.2.3 From 8eae939f1400326b06d0c9afe53d2a484a326871 Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Thu, 4 Mar 2010 18:01:40 +0000 Subject: net: add limit for socket backlog We got system OOM while running some UDP netperf testing on the loopback device. The case is multiple senders sent stream UDP packets to a single receiver via loopback on local host. Of course, the receiver is not able to handle all the packets in time. But we surprisingly found that these packets were not discarded due to the receiver's sk->sk_rcvbuf limit. Instead, they are kept queuing to sk->sk_backlog and finally ate up all the memory. We believe this is a secure hole that a none privileged user can crash the system. The root cause for this problem is, when the receiver is doing __release_sock() (i.e. after userspace recv, kernel udp_recvmsg -> skb_free_datagram_locked -> release_sock), it moves skbs from backlog to sk_receive_queue with the softirq enabled. In the above case, multiple busy senders will almost make it an endless loop. The skbs in the backlog end up eat all the system memory. The issue is not only for UDP. Any protocols using socket backlog is potentially affected. The patch adds limit for socket backlog so that the backlog size cannot be expanded endlessly. Reported-by: Alex Shi Cc: David Miller Cc: Arnaldo Carvalho de Melo Cc: Alexey Kuznetsov Cc: Patrick McHardy Cc: Vlad Yasevich Cc: Sridhar Samudrala Cc: Jon Maloy Cc: Allan Stephens Cc: Andrew Hendry Signed-off-by: Zhu Yi Signed-off-by: Eric Dumazet Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/sock.h | 15 ++++++++++++++- net/core/sock.c | 16 ++++++++++++++-- 2 files changed, 28 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 6cb1676e409a..2516d76f043c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -253,6 +253,8 @@ struct sock { struct { struct sk_buff *head; struct sk_buff *tail; + int len; + int limit; } sk_backlog; wait_queue_head_t *sk_sleep; struct dst_entry *sk_dst_cache; @@ -589,7 +591,7 @@ static inline int sk_stream_memory_free(struct sock *sk) return sk->sk_wmem_queued < sk->sk_sndbuf; } -/* The per-socket spinlock must be held here. */ +/* OOB backlog add */ static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) { if (!sk->sk_backlog.tail) { @@ -601,6 +603,17 @@ static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) skb->next = NULL; } +/* The per-socket spinlock must be held here. */ +static inline int sk_add_backlog_limited(struct sock *sk, struct sk_buff *skb) +{ + if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1)) + return -ENOBUFS; + + sk_add_backlog(sk, skb); + sk->sk_backlog.len += skb->truesize; + return 0; +} + static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) { return sk->sk_backlog_rcv(sk, skb); diff --git a/net/core/sock.c b/net/core/sock.c index fcd397a762ff..6e22dc973d23 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -340,8 +340,12 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) rc = sk_backlog_rcv(sk, skb); mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); - } else - sk_add_backlog(sk, skb); + } else if (sk_add_backlog_limited(sk, skb)) { + bh_unlock_sock(sk); + atomic_inc(&sk->sk_drops); + goto discard_and_relse; + } + bh_unlock_sock(sk); out: sock_put(sk); @@ -1139,6 +1143,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) sock_lock_init(newsk); bh_lock_sock(newsk); newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; + newsk->sk_backlog.len = 0; atomic_set(&newsk->sk_rmem_alloc, 0); /* @@ -1542,6 +1547,12 @@ static void __release_sock(struct sock *sk) bh_lock_sock(sk); } while ((skb = sk->sk_backlog.head) != NULL); + + /* + * Doing the zeroing here guarantee we can not loop forever + * while a wild producer attempts to flood us. + */ + sk->sk_backlog.len = 0; } /** @@ -1874,6 +1885,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_allocation = GFP_KERNEL; sk->sk_rcvbuf = sysctl_rmem_default; sk->sk_sndbuf = sysctl_wmem_default; + sk->sk_backlog.limit = sk->sk_rcvbuf << 1; sk->sk_state = TCP_CLOSE; sk_set_socket(sk, sock); -- cgit v1.2.3 From a3a858ff18a72a8d388e31ab0d98f7e944841a62 Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Thu, 4 Mar 2010 18:01:47 +0000 Subject: net: backlog functions rename sk_add_backlog -> __sk_add_backlog sk_add_backlog_limited -> sk_add_backlog Signed-off-by: Zhu Yi Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 6 +++--- net/core/sock.c | 2 +- net/dccp/minisocks.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_minisocks.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/udp.c | 4 ++-- net/llc/llc_c_ac.c | 2 +- net/llc/llc_conn.c | 2 +- net/sctp/input.c | 4 ++-- net/tipc/socket.c | 2 +- net/x25/x25_dev.c | 2 +- 13 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 2516d76f043c..170353dd9570 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -592,7 +592,7 @@ static inline int sk_stream_memory_free(struct sock *sk) } /* OOB backlog add */ -static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) +static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) { if (!sk->sk_backlog.tail) { sk->sk_backlog.head = sk->sk_backlog.tail = skb; @@ -604,12 +604,12 @@ static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) } /* The per-socket spinlock must be held here. */ -static inline int sk_add_backlog_limited(struct sock *sk, struct sk_buff *skb) +static inline int sk_add_backlog(struct sock *sk, struct sk_buff *skb) { if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1)) return -ENOBUFS; - sk_add_backlog(sk, skb); + __sk_add_backlog(sk, skb); sk->sk_backlog.len += skb->truesize; return 0; } diff --git a/net/core/sock.c b/net/core/sock.c index 6e22dc973d23..61a65a2e0455 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -340,7 +340,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) rc = sk_backlog_rcv(sk, skb); mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); - } else if (sk_add_backlog_limited(sk, skb)) { + } else if (sk_add_backlog(sk, skb)) { bh_unlock_sock(sk); atomic_inc(&sk->sk_drops); goto discard_and_relse; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index af226a063141..0d508c359fa9 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -254,7 +254,7 @@ int dccp_child_process(struct sock *parent, struct sock *child, * in main socket hash table and lock on listening * socket does not protect us more. */ - sk_add_backlog(child, skb); + __sk_add_backlog(child, skb); } bh_unlock_sock(child); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4baf1943b1bd..1915f7dc30e6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1682,7 +1682,7 @@ process: if (!tcp_prequeue(sk, skb)) ret = tcp_v4_do_rcv(sk, skb); } - } else if (sk_add_backlog_limited(sk, skb)) { + } else if (sk_add_backlog(sk, skb)) { bh_unlock_sock(sk); goto discard_and_relse; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f206ee5dda80..4199bc6915c5 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -728,7 +728,7 @@ int tcp_child_process(struct sock *parent, struct sock *child, * in main socket hash table and lock on listening * socket does not protect us more. */ - sk_add_backlog(child, skb); + __sk_add_backlog(child, skb); } bh_unlock_sock(child); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e7eb47f338d4..7af756d0f931 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1371,7 +1371,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) bh_lock_sock(sk); if (!sock_owned_by_user(sk)) rc = __udp_queue_rcv_skb(sk, skb); - else if (sk_add_backlog_limited(sk, skb)) { + else if (sk_add_backlog(sk, skb)) { bh_unlock_sock(sk); goto drop; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c4ea9d5cbfaa..2c378b1bd5cf 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1740,7 +1740,7 @@ process: if (!tcp_prequeue(sk, skb)) ret = tcp_v6_do_rcv(sk, skb); } - } else if (sk_add_backlog_limited(sk, skb)) { + } else if (sk_add_backlog(sk, skb)) { bh_unlock_sock(sk); goto discard_and_relse; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 64804912b093..3c0c9c755c92 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -583,7 +583,7 @@ static void flush_stack(struct sock **stack, unsigned int count, bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb1); - else if (sk_add_backlog_limited(sk, skb1)) { + else if (sk_add_backlog(sk, skb1)) { kfree_skb(skb1); bh_unlock_sock(sk); goto drop; @@ -758,7 +758,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb); - else if (sk_add_backlog_limited(sk, skb)) { + else if (sk_add_backlog(sk, skb)) { atomic_inc(&sk->sk_drops); bh_unlock_sock(sk); sock_put(sk); diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 019c780512e8..86d6985b9d49 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -1437,7 +1437,7 @@ static void llc_process_tmr_ev(struct sock *sk, struct sk_buff *skb) llc_conn_state_process(sk, skb); else { llc_set_backlog_type(skb, LLC_EVENT); - sk_add_backlog(sk, skb); + __sk_add_backlog(sk, skb); } } } diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index c0539ffdb272..a12144da7974 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -827,7 +827,7 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb) else { dprintk("%s: adding to backlog...\n", __func__); llc_set_backlog_type(skb, LLC_PACKET); - if (sk_add_backlog_limited(sk, skb)) + if (sk_add_backlog(sk, skb)) goto drop_unlock; } out: diff --git a/net/sctp/input.c b/net/sctp/input.c index cbc063665e6b..3d74b264ea22 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -341,7 +341,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) sctp_bh_lock_sock(sk); if (sock_owned_by_user(sk)) { - if (sk_add_backlog_limited(sk, skb)) + if (sk_add_backlog(sk, skb)) sctp_chunk_free(chunk); else backloged = 1; @@ -375,7 +375,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb) struct sctp_ep_common *rcvr = chunk->rcvr; int ret; - ret = sk_add_backlog_limited(sk, skb); + ret = sk_add_backlog(sk, skb); if (!ret) { /* Hold the assoc/ep while hanging on the backlog queue. * This way, we know structures we need will not disappear diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 22bfbc33a8ac..4b235fc1c70f 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1322,7 +1322,7 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf) if (!sock_owned_by_user(sk)) { res = filter_rcv(sk, buf); } else { - if (sk_add_backlog_limited(sk, buf)) + if (sk_add_backlog(sk, buf)) res = TIPC_ERR_OVERLOAD; else res = TIPC_OK; diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index a9da0dc26f4f..52e304212241 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -53,7 +53,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb) if (!sock_owned_by_user(sk)) { queued = x25_process_rx_frame(sk, skb); } else { - queued = !sk_add_backlog_limited(sk, skb); + queued = !sk_add_backlog(sk, skb); } bh_unlock_sock(sk); sock_put(sk); -- cgit v1.2.3 From 723b2f57ad83ee7087acf9a95e8e289414b1f521 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Wed, 3 Mar 2010 22:51:50 +0000 Subject: ethtool: Add direct access to ops->get_sset_count This patch is an alternative approach for accessing string counts, vs. the drvinfo indirect approach. This way the drvinfo space doesn't run out, and we don't break ABI later. Signed-off-by: Jeff Garzik Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/ethtool.h | 17 +++++++++--- net/core/ethtool.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index cca1c3de140d..f6f961fefbe5 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -253,6 +253,17 @@ struct ethtool_gstrings { __u8 data[0]; }; +struct ethtool_sset_info { + __u32 cmd; /* ETHTOOL_GSSET_INFO */ + __u32 reserved; + __u64 sset_mask; /* input: each bit selects an sset to query */ + /* output: each bit a returned sset */ + __u32 data[0]; /* ETH_SS_xxx count, in order, based on bits + in sset_mask. One bit implies one + __u32, two bits implies two + __u32's, etc. */ +}; + enum ethtool_test_flags { ETH_TEST_FL_OFFLINE = (1 << 0), /* online / offline */ ETH_TEST_FL_FAILED = (1 << 1), /* test passed / failed */ @@ -606,9 +617,9 @@ struct ethtool_ops { #define ETHTOOL_SRXCLSRLINS 0x00000032 /* Insert RX classification rule */ #define ETHTOOL_FLASHDEV 0x00000033 /* Flash firmware to device */ #define ETHTOOL_RESET 0x00000034 /* Reset hardware */ - -#define ETHTOOL_SRXNTUPLE 0x00000035 /* Add an n-tuple filter to device */ -#define ETHTOOL_GRXNTUPLE 0x00000036 /* Get n-tuple filters from device */ +#define ETHTOOL_SRXNTUPLE 0x00000035 /* Add an n-tuple filter to device */ +#define ETHTOOL_GRXNTUPLE 0x00000036 /* Get n-tuple filters from device */ +#define ETHTOOL_GSSET_INFO 0x00000037 /* Get string set info */ /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 0f2f82185ec4..70075c47ada8 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -214,6 +214,10 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use info.cmd = ETHTOOL_GDRVINFO; ops->get_drvinfo(dev, &info); + /* + * this method of obtaining string set info is deprecated; + * consider using ETHTOOL_GSSET_INFO instead + */ if (ops->get_sset_count) { int rc; @@ -237,6 +241,71 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use return 0; } +/* + * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() + */ +static noinline int ethtool_get_sset_info(struct net_device *dev, + void __user *useraddr) +{ + struct ethtool_sset_info info; + const struct ethtool_ops *ops = dev->ethtool_ops; + u64 sset_mask; + int i, idx = 0, n_bits = 0, ret, rc; + u32 *info_buf = NULL; + + if (!ops->get_sset_count) + return -EOPNOTSUPP; + + if (copy_from_user(&info, useraddr, sizeof(info))) + return -EFAULT; + + /* store copy of mask, because we zero struct later on */ + sset_mask = info.sset_mask; + if (!sset_mask) + return 0; + + /* calculate size of return buffer */ + for (i = 0; i < 64; i++) + if (sset_mask & (1ULL << i)) + n_bits++; + + memset(&info, 0, sizeof(info)); + info.cmd = ETHTOOL_GSSET_INFO; + + info_buf = kzalloc(n_bits * sizeof(u32), GFP_USER); + if (!info_buf) + return -ENOMEM; + + /* + * fill return buffer based on input bitmask and successful + * get_sset_count return + */ + for (i = 0; i < 64; i++) { + if (!(sset_mask & (1ULL << i))) + continue; + + rc = ops->get_sset_count(dev, i); + if (rc >= 0) { + info.sset_mask |= (1ULL << i); + info_buf[idx++] = rc; + } + } + + ret = -EFAULT; + if (copy_to_user(useraddr, &info, sizeof(info))) + goto out; + + useraddr += offsetof(struct ethtool_sset_info, data); + if (copy_to_user(useraddr, info_buf, idx * sizeof(u32))) + goto out; + + ret = 0; + +out: + kfree(info_buf); + return ret; +} + /* * noinline attribute so that gcc doesnt use too much stack in dev_ethtool() */ @@ -1471,6 +1540,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXNTUPLE: rc = ethtool_get_rx_ntuple(dev, useraddr); break; + case ETHTOOL_GSSET_INFO: + rc = ethtool_get_sset_info(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } -- cgit v1.2.3 From d17792ebdf90289c9fd1bce888076d3d60ecd53b Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Thu, 4 Mar 2010 08:21:53 +0000 Subject: ethtool: Add direct access to ops->get_sset_count On 03/04/2010 09:26 AM, Ben Hutchings wrote: > On Thu, 2010-03-04 at 00:51 -0800, Jeff Kirsher wrote: >> From: Jeff Garzik >> >> This patch is an alternative approach for accessing string >> counts, vs. the drvinfo indirect approach. This way the drvinfo >> space doesn't run out, and we don't break ABI later. > [...] >> --- a/net/core/ethtool.c >> +++ b/net/core/ethtool.c >> @@ -214,6 +214,10 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use >> info.cmd = ETHTOOL_GDRVINFO; >> ops->get_drvinfo(dev,&info); >> >> + /* >> + * this method of obtaining string set info is deprecated; >> + * consider using ETHTOOL_GSSET_INFO instead >> + */ > > This comment belongs on the interface (ethtool.h) not the > implementation. Debatable -- the current comment is located at the callsite of ops->get_sset_count(), which is where an implementor might think to add a new call. Not all the numeric fields in ethtool_drvinfo are obtained from ->get_sset_count(). Hence the "some" in the attached patch to include/linux/ethtool.h, addressing your comment. > [...] >> +static noinline int ethtool_get_sset_info(struct net_device *dev, >> + void __user *useraddr) >> +{ > [...] >> + /* calculate size of return buffer */ >> + for (i = 0; i< 64; i++) >> + if (sset_mask& (1ULL<< i)) >> + n_bits++; > [...] > > We have a function for this: > > n_bits = hweight64(sset_mask); Agreed. I've attached a follow-up patch, which should enable my/Jeff's kernel patch to be applied, followed by this one. Signed-off-by: Jeff Garzik Signed-off-by: David S. Miller --- include/linux/ethtool.h | 7 +++++++ net/core/ethtool.c | 7 +++---- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index f6f961fefbe5..b33f316bb92e 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -61,6 +61,13 @@ struct ethtool_drvinfo { /* For PCI devices, use pci_name(pci_dev). */ char reserved1[32]; char reserved2[12]; + /* + * Some struct members below are filled in + * using ops->get_sset_count(). Obtaining + * this info from ethtool_drvinfo is now + * deprecated; Use ETHTOOL_GSSET_INFO + * instead. + */ __u32 n_priv_flags; /* number of flags valid in ETHTOOL_GPFLAGS */ __u32 n_stats; /* number of u64's from ETHTOOL_GSTATS */ __u32 testinfo_len; diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 70075c47ada8..33d2ded50f84 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -17,6 +17,7 @@ #include #include #include +#include #include /* @@ -216,7 +217,7 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use /* * this method of obtaining string set info is deprecated; - * consider using ETHTOOL_GSSET_INFO instead + * Use ETHTOOL_GSSET_INFO instead. */ if (ops->get_sset_count) { int rc; @@ -265,9 +266,7 @@ static noinline int ethtool_get_sset_info(struct net_device *dev, return 0; /* calculate size of return buffer */ - for (i = 0; i < 64; i++) - if (sset_mask & (1ULL << i)) - n_bits++; + n_bits = hweight64(sset_mask); memset(&info, 0, sizeof(info)); info.cmd = ETHTOOL_GSSET_INFO; -- cgit v1.2.3 From 0c9a2ac1f8a2e55b3382dfc27256878a58ea49e9 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki / 吉藤英明 Date: Sun, 7 Mar 2010 00:14:44 +0000 Subject: ipv6: Optmize translation between IPV6_PREFER_SRC_xxx and RT6_LOOKUP_F_xxx. IPV6_PREFER_SRC_xxx definitions: | #define IPV6_PREFER_SRC_TMP 0x0001 | #define IPV6_PREFER_SRC_PUBLIC 0x0002 | #define IPV6_PREFER_SRC_COA 0x0004 RT6_LOOKUP_F_xxx definitions: | #define RT6_LOOKUP_F_SRCPREF_TMP 0x00000008 | #define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010 | #define RT6_LOOKUP_F_SRCPREF_COA 0x00000020 So, we can translate between these two groups by shift operation instead of multiple 'if's. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/ip6_route.h | 18 ++++++++++++++++++ net/ipv6/fib6_rules.c | 11 ++--------- net/ipv6/route.c | 11 ++--------- 3 files changed, 22 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 4a808de7c0f6..68f67836e146 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -37,6 +37,24 @@ struct route_info { #define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010 #define RT6_LOOKUP_F_SRCPREF_COA 0x00000020 +/* + * rt6_srcprefs2flags() and rt6_flags2srcprefs() translate + * between IPV6_ADDR_PREFERENCES socket option values + * IPV6_PREFER_SRC_TMP = 0x1 + * IPV6_PREFER_SRC_PUBLIC = 0x2 + * IPV6_PREFER_SRC_COA = 0x4 + * and above RT6_LOOKUP_F_SRCPREF_xxx flags. + */ +static inline int rt6_srcprefs2flags(unsigned int srcprefs) +{ + /* No need to bitmask because srcprefs have only 3 bits. */ + return srcprefs << 3; +} + +static inline unsigned int rt6_flags2srcprefs(int flags) +{ + return (flags >> 3) & 7; +} extern void ip6_route_input(struct sk_buff *skb); diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 551882b9dfd6..5e463c43fcc2 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -84,18 +84,11 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, if ((rule->flags & FIB_RULE_FIND_SADDR) && r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) { struct in6_addr saddr; - unsigned int srcprefs = 0; - - if (flags & RT6_LOOKUP_F_SRCPREF_TMP) - srcprefs |= IPV6_PREFER_SRC_TMP; - if (flags & RT6_LOOKUP_F_SRCPREF_PUBLIC) - srcprefs |= IPV6_PREFER_SRC_PUBLIC; - if (flags & RT6_LOOKUP_F_SRCPREF_COA) - srcprefs |= IPV6_PREFER_SRC_COA; if (ipv6_dev_get_saddr(net, ip6_dst_idev(&rt->u.dst)->dev, - &flp->fl6_dst, srcprefs, + &flp->fl6_dst, + rt6_flags2srcprefs(flags), &saddr)) goto again; if (!ipv6_prefix_equal(&saddr, &r->src.addr, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b08879e97f22..52cd3eff31dc 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -819,15 +819,8 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, if (!ipv6_addr_any(&fl->fl6_src)) flags |= RT6_LOOKUP_F_HAS_SADDR; - else if (sk) { - unsigned int prefs = inet6_sk(sk)->srcprefs; - if (prefs & IPV6_PREFER_SRC_TMP) - flags |= RT6_LOOKUP_F_SRCPREF_TMP; - if (prefs & IPV6_PREFER_SRC_PUBLIC) - flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC; - if (prefs & IPV6_PREFER_SRC_COA) - flags |= RT6_LOOKUP_F_SRCPREF_COA; - } + else if (sk) + flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output); } -- cgit v1.2.3 From 4045635318538d3ddd2007720412fdc4b08f6a62 Mon Sep 17 00:00:00 2001 From: Zhu Yi Date: Sun, 7 Mar 2010 16:21:39 +0000 Subject: net: add __must_check to sk_add_backlog Add the "__must_check" tag to sk_add_backlog() so that any failure to check and drop packets will be warned about. Signed-off-by: Zhu Yi Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 170353dd9570..092b0551e77f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -604,7 +604,7 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) } /* The per-socket spinlock must be held here. */ -static inline int sk_add_backlog(struct sock *sk, struct sk_buff *skb) +static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb) { if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1)) return -ENOBUFS; -- cgit v1.2.3 From 6cce09f87a04797fae5b947ef2626c14a78f0b49 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 7 Mar 2010 23:21:57 +0000 Subject: tcp: Add SNMP counters for backlog and min_ttl drops Commit 6b03a53a (tcp: use limited socket backlog) added the possibility of dropping frames when backlog queue is full. Commit d218d111 (tcp: Generalized TTL Security Mechanism) added the possibility of dropping frames when TTL is under a given limit. This patch adds new SNMP MIB entries, named TCPBacklogDrop and TCPMinTTLDrop, published in /proc/net/netstat in TcpExt: line netstat -s | egrep "TCPBacklogDrop|TCPMinTTLDrop" TCPBacklogDrop: 0 TCPMinTTLDrop: 0 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/snmp.h | 2 ++ net/ipv4/proc.c | 2 ++ net/ipv4/tcp_ipv4.c | 7 +++++-- net/ipv6/tcp_ipv6.c | 3 ++- 4 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/snmp.h b/include/linux/snmp.h index e28f5a0182e8..4435d1084755 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -225,6 +225,8 @@ enum LINUX_MIB_SACKSHIFTED, LINUX_MIB_SACKMERGED, LINUX_MIB_SACKSHIFTFALLBACK, + LINUX_MIB_TCPBACKLOGDROP, + LINUX_MIB_TCPMINTTLDROP, /* RFC 5082 */ __LINUX_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 242ed2307370..4f1f337f4337 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -249,6 +249,8 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED), SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED), SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK), + SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP), + SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1915f7dc30e6..8d51d39ad1bb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1651,8 +1651,10 @@ int tcp_v4_rcv(struct sk_buff *skb) if (!sk) goto no_tcp_socket; - if (iph->ttl < inet_sk(sk)->min_ttl) + if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { + NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); goto discard_and_relse; + } process: if (sk->sk_state == TCP_TIME_WAIT) @@ -1682,8 +1684,9 @@ process: if (!tcp_prequeue(sk, skb)) ret = tcp_v4_do_rcv(sk, skb); } - } else if (sk_add_backlog(sk, skb)) { + } else if (unlikely(sk_add_backlog(sk, skb))) { bh_unlock_sock(sk); + NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); goto discard_and_relse; } bh_unlock_sock(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2c378b1bd5cf..9b6dbba80d31 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1740,8 +1740,9 @@ process: if (!tcp_prequeue(sk, skb)) ret = tcp_v6_do_rcv(sk, skb); } - } else if (sk_add_backlog(sk, skb)) { + } else if (unlikely(sk_add_backlog(sk, skb))) { bh_unlock_sock(sk); + NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); goto discard_and_relse; } bh_unlock_sock(sk); -- cgit v1.2.3 From 2b4c32972b9bcfee29d5e2c1b6f261dda5ef2a21 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki / 吉藤英明 Date: Tue, 9 Mar 2010 16:47:52 +0000 Subject: ipv6 ip6_tunnel: eliminate unused recursion field from ip6_tnl{}. Commit a43912ab19... ("tunnel: eliminate recursion field") eliminated use of recursion field from tunnel structures, but its definition still exists in ip6_tnl{}. Let's remove that unused field. Signed-off-by: YOSHIFUJI Hideaki Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 83b4e008b16d..fbf9d1cda27b 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -15,7 +15,6 @@ struct ip6_tnl { struct ip6_tnl *next; /* next tunnel in list */ struct net_device *dev; /* virtual device associated with tunnel */ - int recursion; /* depth of hard_start_xmit recursion */ struct ip6_tnl_parm parms; /* tunnel configuration parameters */ struct flowi fl; /* flowi template for xmit */ struct dst_entry *dst_cache; /* cached dst */ -- cgit v1.2.3