From 00fe1ae91e0d69e52e8212d23cd3ecc74a7259a0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Jul 2011 16:24:46 +0200 Subject: netfilter: xt_rateest: fix xt_rateest_mt_checkentry() commit 4a5a5c73b7cfee (slightly better error reporting) added some useless code in xt_rateest_mt_checkentry(). Fix this so that different error codes can really be returned. Signed-off-by: Eric Dumazet CC: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/netfilter/xt_rateest.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index 76a083184d8e..ed0db15ab00e 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -78,7 +78,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) { struct xt_rateest_match_info *info = par->matchinfo; struct xt_rateest *est1, *est2; - int ret = false; + int ret = -EINVAL; if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS | XT_RATEEST_MATCH_REL)) != 1) @@ -101,13 +101,12 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) if (!est1) goto err1; + est2 = NULL; if (info->flags & XT_RATEEST_MATCH_REL) { est2 = xt_rateest_lookup(info->name2); if (!est2) goto err2; - } else - est2 = NULL; - + } info->est1 = est1; info->est2 = est2; @@ -116,7 +115,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) err2: xt_rateest_put(est1); err1: - return -EINVAL; + return ret; } static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par) -- cgit v1.2.3 From 91c66c6893a3e2bb8a88a30cb76007d5d49d32c9 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Fri, 29 Jul 2011 16:38:49 +0200 Subject: netfilter: ip_queue: Fix small leak in ipq_build_packet_message() ipq_build_packet_message() in net/ipv4/netfilter/ip_queue.c and net/ipv6/netfilter/ip6_queue.c contain a small potential mem leak as far as I can tell. We allocate memory for 'skb' with alloc_skb() annd then call nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); NLMSG_PUT is a macro NLMSG_PUT(skb, pid, seq, type, len) \ NLMSG_NEW(skb, pid, seq, type, len, 0) that expands to NLMSG_NEW, which is also a macro which expands to: NLMSG_NEW(skb, pid, seq, type, len, flags) \ ({ if (unlikely(skb_tailroom(skb) < (int)NLMSG_SPACE(len))) \ goto nlmsg_failure; \ __nlmsg_put(skb, pid, seq, type, len, flags); }) If we take the true branch of the 'if' statement and 'goto nlmsg_failure', then we'll, at that point, return from ipq_build_packet_message() without having assigned 'skb' to anything and we'll leak the memory we allocated for it when it goes out of scope. Fix this by placing a 'kfree(skb)' at 'nlmsg_failure'. I admit that I do not know how likely this to actually happen or even if there's something that guarantees that it will never happen - I'm not that familiar with this code, but if that is so, I've not been able to spot it. Signed-off-by: Jesper Juhl Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ip_queue.c | 1 + net/ipv6/netfilter/ip6_queue.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 5c9b9d963918..48f7d5b4ff37 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -218,6 +218,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) return skb; nlmsg_failure: + kfree_skb(skb); *errp = -EINVAL; printk(KERN_ERR "ip_queue: error creating packet message\n"); return NULL; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 249394863284..87b243a25afa 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -218,6 +218,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) return skb; nlmsg_failure: + kfree_skb(skb); *errp = -EINVAL; printk(KERN_ERR "ip6_queue: error creating packet message\n"); return NULL; -- cgit v1.2.3 From 9823d9ff483af4ce8804a9eb69600ca739cd1f58 Mon Sep 17 00:00:00 2001 From: Bart De Schuymer Date: Fri, 29 Jul 2011 16:40:30 +0200 Subject: netfilter: ebtables: fix ebtables build dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The configuration of ebtables shouldn't depend on CONFIG_BRIDGE_NETFILTER, only on CONFIG_NETFILTER. Reported-by: Sébastien Laveze Signed-off-by: Bart De Schuymer Signed-off-by: Patrick McHardy --- net/bridge/netfilter/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index ba6f73eb06c6..a9aff9c7d027 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -4,7 +4,7 @@ menuconfig BRIDGE_NF_EBTABLES tristate "Ethernet Bridge tables (ebtables) support" - depends on BRIDGE && BRIDGE_NETFILTER + depends on BRIDGE && NETFILTER select NETFILTER_XTABLES help ebtables is a general, extensible frame/packet identification -- cgit v1.2.3 From 5185352c163a72cf969b2fbbfb89801b398896fd Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 9 Aug 2011 14:48:11 -0700 Subject: libceph: fix msgpool There were several problems here: 1- we weren't tagging allocations with the pool, so they were never returned to the pool. 2- msgpool_put didn't add back to the mempool, even it were called. 3- msgpool_release didn't clear the pool pointer, so it would have looped had #1 not been broken. These may or may not have been responsible for #1136 or #1381 (BUG due to non-empty mempool on umount). I can't seem to trigger the crash now using the method I was using before. Signed-off-by: Sage Weil --- net/ceph/msgpool.c | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c index d5f2d97ac05c..1f4cb30a42c5 100644 --- a/net/ceph/msgpool.c +++ b/net/ceph/msgpool.c @@ -7,27 +7,37 @@ #include -static void *alloc_fn(gfp_t gfp_mask, void *arg) +static void *msgpool_alloc(gfp_t gfp_mask, void *arg) { struct ceph_msgpool *pool = arg; - void *p; + struct ceph_msg *msg; - p = ceph_msg_new(0, pool->front_len, gfp_mask); - if (!p) - pr_err("msgpool %s alloc failed\n", pool->name); - return p; + msg = ceph_msg_new(0, pool->front_len, gfp_mask); + if (!msg) { + dout("msgpool_alloc %s failed\n", pool->name); + } else { + dout("msgpool_alloc %s %p\n", pool->name, msg); + msg->pool = pool; + } + return msg; } -static void free_fn(void *element, void *arg) +static void msgpool_free(void *element, void *arg) { - ceph_msg_put(element); + struct ceph_msgpool *pool = arg; + struct ceph_msg *msg = element; + + dout("msgpool_release %s %p\n", pool->name, msg); + msg->pool = NULL; + ceph_msg_put(msg); } int ceph_msgpool_init(struct ceph_msgpool *pool, int front_len, int size, bool blocking, const char *name) { + dout("msgpool %s init\n", name); pool->front_len = front_len; - pool->pool = mempool_create(size, alloc_fn, free_fn, pool); + pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool); if (!pool->pool) return -ENOMEM; pool->name = name; @@ -36,14 +46,17 @@ int ceph_msgpool_init(struct ceph_msgpool *pool, void ceph_msgpool_destroy(struct ceph_msgpool *pool) { + dout("msgpool %s destroy\n", pool->name); mempool_destroy(pool->pool); } struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len) { + struct ceph_msg *msg; + if (front_len > pool->front_len) { - pr_err("msgpool_get pool %s need front %d, pool size is %d\n", + dout("msgpool_get %s need front %d, pool size is %d\n", pool->name, front_len, pool->front_len); WARN_ON(1); @@ -51,14 +64,19 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, return ceph_msg_new(0, front_len, GFP_NOFS); } - return mempool_alloc(pool->pool, GFP_NOFS); + msg = mempool_alloc(pool->pool, GFP_NOFS); + dout("msgpool_get %s %p\n", pool->name, msg); + return msg; } void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) { + dout("msgpool_put %s %p\n", pool->name, msg); + /* reset msg front_len; user may have changed it */ msg->front.iov_len = pool->front_len; msg->hdr.front_len = cpu_to_le32(pool->front_len); kref_init(&msg->kref); /* retake single ref */ + mempool_free(msg, pool->pool); } -- cgit v1.2.3 From 8475e2336cf80ba6e7b27715b4b3214d73c211ab Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 3 Aug 2011 17:22:45 +0300 Subject: Bluetooth: unlock if allocation fails in hci_blacklist_add() There was a small typo here so we never actually hit the goto which would call hci_dev_unlock_bh(). Signed-off-by: Dan Carpenter Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ec0bc3f60f2e..fca62dcd7f1b 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1327,7 +1327,7 @@ int hci_blacklist_add(struct hci_dev *hdev, bdaddr_t *bdaddr) entry = kzalloc(sizeof(struct bdaddr_list), GFP_KERNEL); if (!entry) { - return -ENOMEM; + err = -ENOMEM; goto err; } -- cgit v1.2.3 From e5842cdb0f4f2c68f6acd39e286e5d10d8c073e8 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 24 Jul 2011 00:10:35 -0400 Subject: Bluetooth: rfcomm: Remove unnecessary krfcommd event Removed superfluous event handling which was used to signal that the rfcomm kthread had been woken. This appears to have been used to prevent lost wakeups. Correctly ordering when the task state is set to TASK_INTERRUPTIBLE is sufficient to prevent lost wakeups. To prevent wakeups which occurred prior to initially setting TASK_INTERRUPTIBLE from being lost, the main work of the thread loop - rfcomm_process_sessions() - is performed prior to sleeping. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/rfcomm/core.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 5759bb7054f7..5ba3f6df665c 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -62,7 +62,6 @@ static DEFINE_MUTEX(rfcomm_mutex); #define rfcomm_lock() mutex_lock(&rfcomm_mutex) #define rfcomm_unlock() mutex_unlock(&rfcomm_mutex) -static unsigned long rfcomm_event; static LIST_HEAD(session_list); @@ -120,7 +119,6 @@ static inline void rfcomm_schedule(void) { if (!rfcomm_thread) return; - set_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event); wake_up_process(rfcomm_thread); } @@ -2038,19 +2036,18 @@ static int rfcomm_run(void *unused) rfcomm_add_listener(BDADDR_ANY); - while (!kthread_should_stop()) { + while (1) { set_current_state(TASK_INTERRUPTIBLE); - if (!test_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event)) { - /* No pending events. Let's sleep. - * Incoming connections and data will wake us up. */ - schedule(); - } - set_current_state(TASK_RUNNING); + + if (kthread_should_stop()) + break; /* Process stuff */ - clear_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event); rfcomm_process_sessions(); + + schedule(); } + __set_current_state(TASK_RUNNING); rfcomm_kill_listener(); -- cgit v1.2.3 From 950e2d51e866623e4c360280aa63b85ab66d3403 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 24 Jul 2011 00:10:41 -0400 Subject: Bluetooth: rfcomm: Fix lost wakeups waiting to accept socket Fix race conditions which can cause lost wakeups (or missed signals) while waiting to accept an rfcomm socket connection. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/rfcomm/sock.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 8f01e6b11a70..482722bbc7a0 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -485,11 +485,6 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f lock_sock(sk); - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; - goto done; - } - if (sk->sk_type != SOCK_STREAM) { err = -EINVAL; goto done; @@ -501,19 +496,20 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f /* Wait for an incoming connection. (wake-one). */ add_wait_queue_exclusive(sk_sleep(sk), &wait); - while (!(nsk = bt_accept_dequeue(sk, newsock))) { + while (1) { set_current_state(TASK_INTERRUPTIBLE); - if (!timeo) { - err = -EAGAIN; + + if (sk->sk_state != BT_LISTEN) { + err = -EBADFD; break; } - release_sock(sk); - timeo = schedule_timeout(timeo); - lock_sock(sk); + nsk = bt_accept_dequeue(sk, newsock); + if (nsk) + break; - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; + if (!timeo) { + err = -EAGAIN; break; } @@ -521,8 +517,12 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f err = sock_intr_errno(timeo); break; } + + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); } - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); if (err) -- cgit v1.2.3 From 9be4e3fbf2d3603e7a7010ede0697166738a788b Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 24 Jul 2011 00:10:46 -0400 Subject: Bluetooth: Fix lost wakeups waiting for sock state change Fix race conditions which can cause lost wakeups while waiting for sock state to change. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/af_bluetooth.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 8add9b499912..117e0d161780 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -494,9 +494,8 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) BT_DBG("sk %p", sk); add_wait_queue(sk_sleep(sk), &wait); + set_current_state(TASK_INTERRUPTIBLE); while (sk->sk_state != state) { - set_current_state(TASK_INTERRUPTIBLE); - if (!timeo) { err = -EINPROGRESS; break; @@ -510,12 +509,13 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); + set_current_state(TASK_INTERRUPTIBLE); err = sock_error(sk); if (err) break; } - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); return err; } -- cgit v1.2.3 From f9a3c20aa07462108fc6fd759dea956053f020bb Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 24 Jul 2011 00:10:52 -0400 Subject: Bluetooth: l2cap: Fix lost wakeups waiting to accept socket Fix race conditions which can cause lost wakeups (or misssed signals) while waiting to accept an l2cap socket connection. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_sock.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 5c36b3e8739c..7d713b1c4cbd 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -235,30 +235,26 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl lock_sock_nested(sk, SINGLE_DEPTH_NESTING); - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; - goto done; - } - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); BT_DBG("sk %p timeo %ld", sk, timeo); /* Wait for an incoming connection. (wake-one). */ add_wait_queue_exclusive(sk_sleep(sk), &wait); - while (!(nsk = bt_accept_dequeue(sk, newsock))) { + while (1) { set_current_state(TASK_INTERRUPTIBLE); - if (!timeo) { - err = -EAGAIN; + + if (sk->sk_state != BT_LISTEN) { + err = -EBADFD; break; } - release_sock(sk); - timeo = schedule_timeout(timeo); - lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + nsk = bt_accept_dequeue(sk, newsock); + if (nsk) + break; - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; + if (!timeo) { + err = -EAGAIN; break; } @@ -266,8 +262,12 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl err = sock_intr_errno(timeo); break; } + + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); } - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); if (err) -- cgit v1.2.3 From 552b0d3cb9ff648aa503011ef50ca24019cd0f5f Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 24 Jul 2011 00:11:01 -0400 Subject: Bluetooth: sco: Fix lost wakeups waiting to accept socket Fix race conditions which can cause lost wakeups (or missed signals) while waiting to accept a sco socket connection. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/sco.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 4c3621b5e0aa..8270f05e3f1f 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -564,30 +564,26 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag lock_sock(sk); - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; - goto done; - } - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); BT_DBG("sk %p timeo %ld", sk, timeo); /* Wait for an incoming connection. (wake-one). */ add_wait_queue_exclusive(sk_sleep(sk), &wait); - while (!(ch = bt_accept_dequeue(sk, newsock))) { + while (1) { set_current_state(TASK_INTERRUPTIBLE); - if (!timeo) { - err = -EAGAIN; + + if (sk->sk_state != BT_LISTEN) { + err = -EBADFD; break; } - release_sock(sk); - timeo = schedule_timeout(timeo); - lock_sock(sk); + ch = bt_accept_dequeue(sk, newsock); + if (ch) + break; - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; + if (!timeo) { + err = -EAGAIN; break; } @@ -595,8 +591,12 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag err = sock_intr_errno(timeo); break; } + + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); } - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); if (err) -- cgit v1.2.3 From 38d57555616afcdad7381b02b523d494327494cd Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 24 Jul 2011 00:11:07 -0400 Subject: Bluetooth: bnep: Fix lost wakeup of session thread Fix race condition which can result in missing the wakeup intended to stop the session thread. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/bnep/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index ca39fcf010ce..7e8ff3c24942 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -484,9 +484,11 @@ static int bnep_session(void *arg) init_waitqueue_entry(&wait, current); add_wait_queue(sk_sleep(sk), &wait); - while (!kthread_should_stop()) { + while (1) { set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; /* RX */ while ((skb = skb_dequeue(&sk->sk_receive_queue))) { skb_orphan(skb); @@ -504,7 +506,7 @@ static int bnep_session(void *arg) schedule(); } - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); /* Cleanup session */ -- cgit v1.2.3 From 3a3f5c7df55a1294c9e6e2d0b8cea604b137438f Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 24 Jul 2011 00:11:10 -0400 Subject: Bluetooth: cmtp: Fix lost wakeup of session thread Fix race condition which can result in missing the wakeup intended to stop the session thread. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/cmtp/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index c5b11af908be..2eb854ab10f6 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -292,9 +292,11 @@ static int cmtp_session(void *arg) init_waitqueue_entry(&wait, current); add_wait_queue(sk_sleep(sk), &wait); - while (!kthread_should_stop()) { + while (1) { set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; if (sk->sk_state != BT_CONNECTED) break; @@ -307,7 +309,7 @@ static int cmtp_session(void *arg) schedule(); } - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); down_write(&cmtp_session_sem); -- cgit v1.2.3 From a71a0cf4e9cdb1c43843977a1efc43f96f6efc21 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Mon, 25 Jul 2011 18:36:26 -0400 Subject: Bluetooth: l2cap: Fix lost wakeup waiting for ERTM acks Fix race condition which can result in missing wakeup during l2cap socket shutdown. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 3204ba8a701c..b3bdb482bbe6 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1159,9 +1159,8 @@ int __l2cap_wait_ack(struct sock *sk) int timeo = HZ/5; add_wait_queue(sk_sleep(sk), &wait); - while ((chan->unacked_frames > 0 && chan->conn)) { - set_current_state(TASK_INTERRUPTIBLE); - + set_current_state(TASK_INTERRUPTIBLE); + while (chan->unacked_frames > 0 && chan->conn) { if (!timeo) timeo = HZ/5; @@ -1173,6 +1172,7 @@ int __l2cap_wait_ack(struct sock *sk) release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); + set_current_state(TASK_INTERRUPTIBLE); err = sock_error(sk); if (err) -- cgit v1.2.3 From 6be6b11f006840ba7d8d4b959b3fa0c522f8468a Mon Sep 17 00:00:00 2001 From: Chen Ganir Date: Thu, 28 Jul 2011 15:42:09 +0300 Subject: Bluetooth: Fixed wrong L2CAP Sock timer value L2CAP connection timeout needs to be assigned as miliseconds and not as jiffies. Signed-off-by: Chen Ganir Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 7d713b1c4cbd..61f1f623091d 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -993,7 +993,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p INIT_LIST_HEAD(&bt_sk(sk)->accept_q); sk->sk_destruct = l2cap_sock_destruct; - sk->sk_sndtimeo = msecs_to_jiffies(L2CAP_CONN_TIMEOUT); + sk->sk_sndtimeo = L2CAP_CONN_TIMEOUT; sock_reset_flag(sk, SOCK_ZAPPED); -- cgit v1.2.3 From 7bdb8a5cf17f66614a9897645efcd4ccc27535ee Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Tue, 26 Jul 2011 22:46:54 +0200 Subject: Bluetooth: Don't use cmd_timer to timeout HCI reset command No command should be send before Command Complete event for HCI reset is received. This fix regression introduced by commit 6bd32326cda(Bluetooth: Use proper timer for hci command timout) for chips whose reset command takes longer to complete (e.g. CSR) resulting in next command being send before HCI reset completed. Signed-off-by: Szymon Janc Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index fca62dcd7f1b..56943add45cc 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1209,7 +1209,6 @@ static void hci_cmd_timer(unsigned long arg) BT_ERR("%s command tx timeout", hdev->name); atomic_set(&hdev->cmd_cnt, 1); - clear_bit(HCI_RESET, &hdev->flags); tasklet_schedule(&hdev->cmd_task); } @@ -2408,7 +2407,10 @@ static void hci_cmd_task(unsigned long arg) if (hdev->sent_cmd) { atomic_dec(&hdev->cmd_cnt); hci_send_frame(skb); - mod_timer(&hdev->cmd_timer, + if (test_bit(HCI_RESET, &hdev->flags)) + del_timer(&hdev->cmd_timer); + else + mod_timer(&hdev->cmd_timer, jiffies + msecs_to_jiffies(HCI_CMD_TIMEOUT)); } else { skb_queue_head(&hdev->cmd_q, skb); -- cgit v1.2.3 From 751c10a56802513a6b057c8cf1552cecc1c9afde Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 5 Aug 2011 10:41:35 -0400 Subject: Bluetooth: bnep: Fix deadlock in session deletion Commit f4d7cd4a4c introduced the usage of kthread API. kthread_stop is a blocking function which returns only when the thread exits. In this case, the thread can't exit because it's waiting for the write lock, which is being held by bnep_del_connection() which is waiting for the thread to exit -- deadlock. Use atomic_t/wake_up_process instead to signal to the thread to exit. Signed-off-by: Jaikumar Ganesh Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/bnep/bnep.h | 1 + net/bluetooth/bnep/core.c | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h index 8e6c06158f8e..e7ee5314f39a 100644 --- a/net/bluetooth/bnep/bnep.h +++ b/net/bluetooth/bnep/bnep.h @@ -155,6 +155,7 @@ struct bnep_session { unsigned int role; unsigned long state; unsigned long flags; + atomic_t terminate; struct task_struct *task; struct ethhdr eh; diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 7e8ff3c24942..d9edfe8bf9d6 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -487,7 +487,7 @@ static int bnep_session(void *arg) while (1) { set_current_state(TASK_INTERRUPTIBLE); - if (kthread_should_stop()) + if (atomic_read(&s->terminate)) break; /* RX */ while ((skb = skb_dequeue(&sk->sk_receive_queue))) { @@ -642,9 +642,10 @@ int bnep_del_connection(struct bnep_conndel_req *req) down_read(&bnep_session_sem); s = __bnep_get_session(req->dst); - if (s) - kthread_stop(s->task); - else + if (s) { + atomic_inc(&s->terminate); + wake_up_process(s->task); + } else err = -ENOENT; up_read(&bnep_session_sem); -- cgit v1.2.3 From 7176522cdca1f0b78a1434b41761f0334511822a Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 5 Aug 2011 10:44:21 -0400 Subject: Bluetooth: cmtp: Fix deadlock in session deletion Commit fada4ac339 introduced the usage of kthread API. kthread_stop is a blocking function which returns only when the thread exits. In this case, the thread can't exit because it's waiting for the write lock, which is being held by cmtp_del_connection() which is waiting for the thread to exit -- deadlock. Revert cmtp_reset_ctr to its original behavior: non-blocking signalling for the session to terminate. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/cmtp/capi.c | 3 ++- net/bluetooth/cmtp/cmtp.h | 1 + net/bluetooth/cmtp/core.c | 5 +++-- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c index 040f67b12978..50f0d135eb8f 100644 --- a/net/bluetooth/cmtp/capi.c +++ b/net/bluetooth/cmtp/capi.c @@ -386,7 +386,8 @@ static void cmtp_reset_ctr(struct capi_ctr *ctrl) capi_ctr_down(ctrl); - kthread_stop(session->task); + atomic_inc(&session->terminate); + wake_up_process(session->task); } static void cmtp_register_appl(struct capi_ctr *ctrl, __u16 appl, capi_register_params *rp) diff --git a/net/bluetooth/cmtp/cmtp.h b/net/bluetooth/cmtp/cmtp.h index db43b54ac9af..c32638dddbf9 100644 --- a/net/bluetooth/cmtp/cmtp.h +++ b/net/bluetooth/cmtp/cmtp.h @@ -81,6 +81,7 @@ struct cmtp_session { char name[BTNAMSIZ]; + atomic_t terminate; struct task_struct *task; wait_queue_head_t wait; diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 2eb854ab10f6..42cb2f4c0db1 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -295,7 +295,7 @@ static int cmtp_session(void *arg) while (1) { set_current_state(TASK_INTERRUPTIBLE); - if (kthread_should_stop()) + if (atomic_read(&session->terminate)) break; if (sk->sk_state != BT_CONNECTED) break; @@ -416,7 +416,8 @@ int cmtp_del_connection(struct cmtp_conndel_req *req) skb_queue_purge(&session->transmit); /* Stop session thread */ - kthread_stop(session->task); + atomic_inc(&session->terminate); + wake_up_process(session->task); } else err = -ENOENT; -- cgit v1.2.3 From e9d5cb541b22aa651edc29990092ec5f8174cd39 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 5 Aug 2011 10:51:26 -0400 Subject: Bluetooth: hidp: Fix session cleanup on failed conn add Once the session thread is running, cleanup must be handled by the session thread only. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hidp/core.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 43b4c2deb7cc..7e19a012970e 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -1044,8 +1044,12 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, } err = hid_add_device(session->hid); - if (err < 0) - goto err_add_device; + if (err < 0) { + atomic_inc(&session->terminate); + wake_up_process(session->task); + up_write(&hidp_session_sem); + return err; + } if (session->input) { hidp_send_ctrl_message(session, @@ -1059,12 +1063,6 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, up_write(&hidp_session_sem); return 0; -err_add_device: - hid_destroy_device(session->hid); - session->hid = NULL; - atomic_inc(&session->terminate); - wake_up_process(session->task); - unlink: hidp_del_timer(session); -- cgit v1.2.3 From 1c97e94c0b7c56319754ee6f9ccd2e93fe1ee2b3 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 5 Aug 2011 10:51:34 -0400 Subject: Bluetooth: hidp: Fix memory leak of cached report descriptor Free the cached HID report descriptor on thread terminate. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hidp/core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 7e19a012970e..26f0d109ff41 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -764,6 +764,7 @@ static int hidp_session(void *arg) up_write(&hidp_session_sem); + kfree(session->rd_data); kfree(session); return 0; } -- cgit v1.2.3 From 615aedd6e5add8104f031b0d547285652d04d330 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 5 Aug 2011 10:51:50 -0400 Subject: Bluetooth: hidp: Only free input device if failed register When an hidp connection is added for a boot protocol input device, only free the allocated device if device registration fails. Subsequent failures should only unregister the device (the input device api documents that unregister will also free the allocated device). Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hidp/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 26f0d109ff41..a859f9078df6 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -842,6 +842,8 @@ static int hidp_setup_input(struct hidp_session *session, err = input_register_device(input); if (err < 0) { + input_free_device(input); + session->input = NULL; hci_conn_put_device(session->conn); return err; } @@ -1089,7 +1091,6 @@ purge: failed: up_write(&hidp_session_sem); - input_free_device(session->input); kfree(session); return err; } -- cgit v1.2.3 From ff062ea109217329b88693bc9081da893eb8b71b Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 5 Aug 2011 10:52:01 -0400 Subject: Bluetooth: hidp: Don't release device ref if never held When an hidp connection is added for a boot protocol input device, don't release a device reference that was never acquired. The device reference is acquired when the session is linked to the session list (which hasn't happened yet when hidp_setup_input is called). Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hidp/core.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index a859f9078df6..fb68f344c34a 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -844,7 +844,6 @@ static int hidp_setup_input(struct hidp_session *session, if (err < 0) { input_free_device(input); session->input = NULL; - hci_conn_put_device(session->conn); return err; } -- cgit v1.2.3 From 687beaa0d1d937c327e2f97b4b4fa6c23ca70624 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 5 Aug 2011 10:53:52 -0400 Subject: Bluetooth: cmtp: Fix session cleanup on failed conn add Once the session thread is running, cleanup must be handled by the session thread only. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/cmtp/core.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 42cb2f4c0db1..521baa4fe835 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -382,16 +382,17 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock) if (!(session->flags & (1 << CMTP_LOOPBACK))) { err = cmtp_attach_device(session); - if (err < 0) - goto detach; + if (err < 0) { + atomic_inc(&session->terminate); + wake_up_process(session->task); + up_write(&cmtp_session_sem); + return err; + } } up_write(&cmtp_session_sem); return 0; -detach: - cmtp_detach_device(session); - unlink: __cmtp_unlink_session(session); -- cgit v1.2.3 From c2bceb3d7f145af5a0916bea700f2f9d380901ea Mon Sep 17 00:00:00 2001 From: Lionel Elie Mamane Date: Sat, 13 Aug 2011 14:04:38 +0000 Subject: sit tunnels: propagate IPv6 transport class to IPv4 Type of Service sit tunnels (IPv6 tunnel over IPv4) do not implement the "tos inherit" case to copy the IPv6 transport class byte from the inner packet to the IPv4 type of service byte in the outer packet. By contrast, ipip tunnels and GRE tunnels do. This patch, adapted from the similar code in net/ipv4/ipip.c and net/ipv4/ip_gre.c, implements that. This patch applies to 3.0.1, and has been tested on that version. Signed-off-by: Lionel Elie Mamane Signed-off-by: David S. Miller --- net/ipv6/sit.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 07bf1085458f..00b15ac7a702 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -672,6 +672,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (skb->protocol != htons(ETH_P_IPV6)) goto tx_error; + if (tos == 1) + tos = ipv6_get_dsfield(iph6); + /* ISATAP (RFC4214) - must come before 6to4 */ if (dev->priv_flags & IFF_ISATAP) { struct neighbour *neigh = NULL; -- cgit v1.2.3 From 8919bc13e8d92c5b082c5c0321567383a071f5bc Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 15 Aug 2011 05:25:40 +0000 Subject: net_sched: fix port mirror/redirect stats reporting When a redirected or mirrored packet is dropped by the target device we need to record statistics. Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 102fc212cd64..e051398fdf6b 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -196,8 +196,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, skb2->skb_iif = skb->dev->ifindex; skb2->dev = dev; - dev_queue_xmit(skb2); - err = 0; + err = dev_queue_xmit(skb2); out: if (err) { -- cgit v1.2.3 From c5114cd59d2664f258b0d021d79b1532d94bdc2b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 18 Aug 2011 21:29:27 -0700 Subject: vlan: reset headers on accel emulation path It's after all necessary to do reset headers here. The reason is we cannot depend that it gets reseted in __netif_receive_skb once skb is reinjected. For incoming vlanids without vlan_dev, vlan_do_receive() returns false with skb != NULL and __netif_reveive_skb continues, skb is not reinjected. This might be good material for 3.0-stable as well Reported-by: Mike Auty Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/8021q/vlan_core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 5f27f8e30254..f1f2f7bb6661 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -167,6 +167,8 @@ struct sk_buff *vlan_untag(struct sk_buff *skb) if (unlikely(!skb)) goto err_free; + skb_reset_network_header(skb); + skb_reset_transport_header(skb); return skb; err_free: -- cgit v1.2.3 From 98e77438aed3cd3343cbb86825127b1d9d2bea33 Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Fri, 19 Aug 2011 03:19:07 -0700 Subject: ipv6: Fix ipv6_getsockopt for IPV6_2292PKTOPTIONS IPV6_2292PKTOPTIONS is broken for 32-bit applications running in COMPAT mode on 64-bit kernels. The same problem was fixed for IPv4 with the patch: ipv4: Fix ip_getsockopt for IP_PKTOPTIONS, commit dd23198e58cd35259dd09e8892bbdb90f1d57748 Signed-off-by: Sorin Dumitru Signed-off-by: Daniel Baluta Signed-off-by: David S. Miller --- net/ipv6/ipv6_sockglue.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 9cb191ecaba8..147ede38ab48 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -913,7 +913,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, } static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) + char __user *optval, int __user *optlen, unsigned flags) { struct ipv6_pinfo *np = inet6_sk(sk); int len; @@ -962,7 +962,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, msg.msg_control = optval; msg.msg_controllen = len; - msg.msg_flags = 0; + msg.msg_flags = flags; lock_sock(sk); skb = np->pktoptions; @@ -1222,7 +1222,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, if(level != SOL_IPV6) return -ENOPROTOOPT; - err = do_ipv6_getsockopt(sk, level, optname, optval, optlen); + err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, 0); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) { @@ -1264,7 +1264,8 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, return compat_mc_getsockopt(sk, level, optname, optval, optlen, ipv6_getsockopt); - err = do_ipv6_getsockopt(sk, level, optname, optval, optlen); + err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, + MSG_CMSG_COMPAT); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) { -- cgit v1.2.3 From fbe5e29ec1886967255e76946aaf537b8cc9b81e Mon Sep 17 00:00:00 2001 From: Daniel Schwierzeck Date: Fri, 19 Aug 2011 12:04:20 +0000 Subject: atm: br2684: Fix oops due to skb->dev being NULL This oops have been already fixed with commit 27141666b69f535a4d63d7bc6d9e84ee5032f82a atm: [br2684] Fix oops due to skb->dev being NULL It happens that if a packet arrives in a VC between the call to open it on the hardware and the call to change the backend to br2684, br2684_regvcc processes the packet and oopses dereferencing skb->dev because it is NULL before the call to br2684_push(). but have been introduced again with commit b6211ae7f2e56837c6a4849316396d1535606e90 atm: Use SKB queue and list helpers instead of doing it by-hand. Signed-off-by: Daniel Schwierzeck Signed-off-by: David S. Miller --- net/atm/br2684.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 52cfd0c3ea71..d07223c834af 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -558,12 +558,13 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg) spin_unlock_irqrestore(&rq->lock, flags); skb_queue_walk_safe(&queue, skb, tmp) { - struct net_device *dev = skb->dev; + struct net_device *dev; + + br2684_push(atmvcc, skb); + dev = skb->dev; dev->stats.rx_bytes -= skb->len; dev->stats.rx_packets--; - - br2684_push(atmvcc, skb); } /* initialize netdev carrier state */ -- cgit v1.2.3 From ecb4433550f0620f3d1471ae7099037ede30a91e Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 12 Aug 2011 14:00:59 +0200 Subject: mac80211: fix suspend/resume races with unregister hw Do not call ->suspend, ->resume methods after we unregister wiphy. Also delete sta_clanup timer after we finish wiphy unregister to avoid this: WARNING: at lib/debugobjects.c:262 debug_print_object+0x85/0xa0() Hardware name: 6369CTO ODEBUG: free active (active state 0) object type: timer_list hint: sta_info_cleanup+0x0/0x180 [mac80211] Modules linked in: aes_i586 aes_generic fuse bridge stp llc autofs4 sunrpc cpufreq_ondemand acpi_cpufreq mperf ext2 dm_mod uinput thinkpad_acpi hwmon sg arc4 rt2800usb rt2800lib crc_ccitt rt2x00usb rt2x00lib mac80211 cfg80211 i2c_i801 iTCO_wdt iTCO_vendor_support e1000e ext4 mbcache jbd2 sd_mod crc_t10dif sr_mod cdrom yenta_socket ahci libahci pata_acpi ata_generic ata_piix i915 drm_kms_helper drm i2c_algo_bit video [last unloaded: microcode] Pid: 5663, comm: pm-hibernate Not tainted 3.1.0-rc1-wl+ #19 Call Trace: [] warn_slowpath_common+0x6d/0xa0 [] ? debug_print_object+0x85/0xa0 [] ? debug_print_object+0x85/0xa0 [] warn_slowpath_fmt+0x2e/0x30 [] debug_print_object+0x85/0xa0 [] ? sta_info_alloc+0x1a0/0x1a0 [mac80211] [] debug_check_no_obj_freed+0xe2/0x180 [] kfree+0x8b/0x150 [] cfg80211_dev_free+0x7e/0x90 [cfg80211] [] wiphy_dev_release+0xd/0x10 [cfg80211] [] device_release+0x19/0x80 [] kobject_release+0x7a/0x1c0 [] ? rtnl_unlock+0x8/0x10 [] ? wiphy_resume+0x6b/0x80 [cfg80211] [] ? kobject_del+0x30/0x30 [] kref_put+0x2d/0x60 [] kobject_put+0x1d/0x50 [] ? mutex_lock+0x14/0x40 [] put_device+0xf/0x20 [] dpm_resume+0xca/0x160 [] hibernation_snapshot+0xcd/0x260 [] ? freeze_processes+0x3f/0x90 [] hibernate+0xcb/0x1e0 [] ? pm_async_store+0x40/0x40 [] state_store+0xa0/0xb0 [] ? pm_async_store+0x40/0x40 [] kobj_attr_store+0x20/0x30 [] sysfs_write_file+0x94/0xf0 [] vfs_write+0x9a/0x160 [] ? sysfs_open_file+0x200/0x200 [] sys_write+0x3d/0x70 [] sysenter_do_call+0x12/0x28 Cc: stable@kernel.org Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville --- include/net/cfg80211.h | 3 +++ net/mac80211/main.c | 2 +- net/wireless/core.c | 7 +++++++ net/wireless/sysfs.c | 6 ++++-- 4 files changed, 15 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d17f47fc9e31..408ae4882d22 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1865,6 +1865,9 @@ struct wiphy { * you need use set_wiphy_dev() (see below) */ struct device dev; + /* protects ->resume, ->suspend sysfs callbacks against unregister hw */ + bool registered; + /* dir in debugfs: ieee80211/ */ struct dentry *debugfsdir; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 866f269183cf..acb44230b251 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1012,7 +1012,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) cancel_work_sync(&local->reconfig_filter); ieee80211_clear_tx_pending(local); - sta_info_stop(local); rate_control_deinitialize(local); if (skb_queue_len(&local->skb_queue) || @@ -1024,6 +1023,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) destroy_workqueue(local->workqueue); wiphy_unregister(local->hw.wiphy); + sta_info_stop(local); ieee80211_wep_free(local); ieee80211_led_exit(local); kfree(local->int_scan_req); diff --git a/net/wireless/core.c b/net/wireless/core.c index 645437cfc464..c14865172da7 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -616,6 +616,9 @@ int wiphy_register(struct wiphy *wiphy) if (res) goto out_rm_dev; + rtnl_lock(); + rdev->wiphy.registered = true; + rtnl_unlock(); return 0; out_rm_dev: @@ -647,6 +650,10 @@ void wiphy_unregister(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + rtnl_lock(); + rdev->wiphy.registered = false; + rtnl_unlock(); + rfkill_unregister(rdev->rfkill); /* protect the device list */ diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index c6e4ca6a7d2e..ff574597a854 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -93,7 +93,8 @@ static int wiphy_suspend(struct device *dev, pm_message_t state) if (rdev->ops->suspend) { rtnl_lock(); - ret = rdev->ops->suspend(&rdev->wiphy, rdev->wowlan); + if (rdev->wiphy.registered) + ret = rdev->ops->suspend(&rdev->wiphy, rdev->wowlan); rtnl_unlock(); } @@ -112,7 +113,8 @@ static int wiphy_resume(struct device *dev) if (rdev->ops->resume) { rtnl_lock(); - ret = rdev->ops->resume(&rdev->wiphy); + if (rdev->wiphy.registered) + ret = rdev->ops->resume(&rdev->wiphy); rtnl_unlock(); } -- cgit v1.2.3 From 11f3a6bdc2528d1ce2af50202dbf7138fdee1b34 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Aug 2011 06:05:59 +0000 Subject: bridge: fix a possible net_device leak Jan Beulich reported a possible net_device leak in bridge code after commit bb900b27a2f4 (bridge: allow creating bridge devices with netlink) Reported-by: Jan Beulich Signed-off-by: Eric Dumazet Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_if.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 2cdf0070419f..e73815456adf 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -231,6 +231,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, int br_add_bridge(struct net *net, const char *name) { struct net_device *dev; + int res; dev = alloc_netdev(sizeof(struct net_bridge), name, br_dev_setup); @@ -240,7 +241,10 @@ int br_add_bridge(struct net *net, const char *name) dev_net_set(dev, net); - return register_netdev(dev); + res = register_netdev(dev); + if (res) + free_netdev(dev); + return res; } int br_del_bridge(struct net *net, const char *name) -- cgit v1.2.3 From e05c4ad3ed874ee4f5e2c969e55d318ec654332c Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 23 Aug 2011 22:54:37 +0000 Subject: mcast: Fix source address selection for multicast listener report Should check use count of include mode filter instead of total number of include mode filters. Signed-off-by: Zheng Yan Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 2 +- net/ipv6/mcast.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 283c0a26e03f..d577199eabd5 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -767,7 +767,7 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) break; for (i=0; isfcount[MCAST_INCLUDE] || + if (psf->sf_count[MCAST_INCLUDE] || pmc->sfcount[MCAST_EXCLUDE] != psf->sf_count[MCAST_EXCLUDE]) continue; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 3e6ebcdb4779..ee7839f4d6e3 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1059,7 +1059,7 @@ static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, break; for (i=0; imca_sfcount[MCAST_INCLUDE] || + if (psf->sf_count[MCAST_INCLUDE] || pmc->mca_sfcount[MCAST_EXCLUDE] != psf->sf_count[MCAST_EXCLUDE]) continue; -- cgit v1.2.3 From 4b275d7efa1c4412f0d572fcd7f78ed0919370b3 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 23 Aug 2011 22:54:33 +0000 Subject: bridge: Pseudo-header required for the checksum of ICMPv6 Checksum of ICMPv6 is not properly computed because the pseudo header is not used. Thus, the MLD packet gets dropped by the bridge. Signed-off-by: Zheng Yan Reported-by: Ang Way Chuang Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 2d85ca7111d3..22d2d1af1c83 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1520,16 +1520,23 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, err = pskb_trim_rcsum(skb2, len); if (err) goto out; + err = -EINVAL; } + ip6h = ipv6_hdr(skb2); + switch (skb2->ip_summed) { case CHECKSUM_COMPLETE: - if (!csum_fold(skb2->csum)) + if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, skb2->len, + IPPROTO_ICMPV6, skb2->csum)) break; /*FALLTHROUGH*/ case CHECKSUM_NONE: - skb2->csum = 0; - if (skb_checksum_complete(skb2)) + skb2->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr, + &ip6h->daddr, + skb2->len, + IPPROTO_ICMPV6, 0)); + if (__skb_checksum_complete(skb2)) goto out; } -- cgit v1.2.3 From 22df13319d1fec30b8f9bcaadc295829647109bb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Aug 2011 19:57:05 +0000 Subject: bridge: fix a possible use after free br_multicast_ipv6_rcv() can call pskb_trim_rcsum() and therefore skb head can be reallocated. Cache icmp6_type field instead of dereferencing twice the struct icmp6hdr pointer. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 22d2d1af1c83..995cbe0ac0b2 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1456,7 +1456,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, { struct sk_buff *skb2; const struct ipv6hdr *ip6h; - struct icmp6hdr *icmp6h; + u8 icmp6_type; u8 nexthdr; unsigned len; int offset; @@ -1502,9 +1502,9 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, __skb_pull(skb2, offset); skb_reset_transport_header(skb2); - icmp6h = icmp6_hdr(skb2); + icmp6_type = icmp6_hdr(skb2)->icmp6_type; - switch (icmp6h->icmp6_type) { + switch (icmp6_type) { case ICMPV6_MGM_QUERY: case ICMPV6_MGM_REPORT: case ICMPV6_MGM_REDUCTION: @@ -1544,7 +1544,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, BR_INPUT_SKB_CB(skb)->igmp = 1; - switch (icmp6h->icmp6_type) { + switch (icmp6_type) { case ICMPV6_MGM_REPORT: { struct mld_msg *mld; -- cgit v1.2.3 From 20e6074eb8e096b3a595c093d1cb222f378cd671 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Aug 2011 19:32:42 +0000 Subject: arp: fix rcu lockdep splat in arp_process() Dave Jones reported a lockdep splat triggered by an arp_process() call from parp_redo(). Commit faa9dcf793be (arp: RCU changes) is the origin of the bug, since it assumed arp_process() was called under rcu_read_lock(), which is not true in this particular path. Instead of adding rcu_read_lock() in parp_redo(), I chose to add it in neigh_proxy_process() to take care of IPv6 side too. =================================================== [ INFO: suspicious rcu_dereference_check() usage. ] --------------------------------------------------- include/linux/inetdevice.h:209 invoked rcu_dereference_check() without protection! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 4 locks held by setfiles/2123: #0: (&sb->s_type->i_mutex_key#13){+.+.+.}, at: [] walk_component+0x1ef/0x3e8 #1: (&isec->lock){+.+.+.}, at: [] inode_doinit_with_dentry+0x3f/0x41f #2: (&tbl->proxy_timer){+.-...}, at: [] run_timer_softirq+0x157/0x372 #3: (class){+.-...}, at: [] neigh_proxy_process +0x36/0x103 stack backtrace: Pid: 2123, comm: setfiles Tainted: G W 3.1.0-0.rc2.git7.2.fc16.x86_64 #1 Call Trace: [] lockdep_rcu_dereference+0xa7/0xaf [] __in_dev_get_rcu+0x55/0x5d [] arp_process+0x25/0x4d7 [] parp_redo+0xe/0x10 [] neigh_proxy_process+0x9a/0x103 [] run_timer_softirq+0x218/0x372 [] ? run_timer_softirq+0x157/0x372 [] ? neigh_stat_seq_open+0x41/0x41 [] ? mark_held_locks+0x6d/0x95 [] __do_softirq+0x112/0x25a [] call_softirq+0x1c/0x30 [] do_softirq+0x4b/0xa2 [] irq_exit+0x5d/0xcf [] smp_apic_timer_interrupt+0x7c/0x8a [] apic_timer_interrupt+0x73/0x80 [] ? trace_hardirqs_on_caller+0x121/0x158 [] ? __slab_free+0x30/0x24c [] ? __slab_free+0x2e/0x24c [] ? inode_doinit_with_dentry+0x2e9/0x41f [] ? inode_doinit_with_dentry+0x2e9/0x41f [] ? inode_doinit_with_dentry+0x2e9/0x41f [] kfree+0x108/0x131 [] inode_doinit_with_dentry+0x2e9/0x41f [] selinux_d_instantiate+0x1c/0x1e [] security_d_instantiate+0x21/0x23 [] d_instantiate+0x5c/0x61 [] d_splice_alias+0xbc/0xd2 [] ext4_lookup+0xba/0xeb [] d_alloc_and_lookup+0x45/0x6b [] walk_component+0x215/0x3e8 [] lookup_last+0x3b/0x3d [] path_lookupat+0x82/0x2af [] ? might_fault+0xa5/0xac [] ? might_fault+0x5c/0xac [] ? getname_flags+0x31/0x1ca [] do_path_lookup+0x28/0x97 [] user_path_at+0x59/0x96 [] ? cp_new_stat+0xf7/0x10d [] vfs_fstatat+0x44/0x6e [] vfs_lstat+0x1e/0x20 [] sys_newlstat+0x1a/0x33 [] ? trace_hardirqs_on_caller+0x121/0x158 [] ? trace_hardirqs_on_thunk+0x3a/0x3f [] system_call_fastpath+0x16/0x1b Reported-by: Dave Jones Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/neighbour.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8fab9b0bb203..1334d7e56f02 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1319,11 +1319,15 @@ static void neigh_proxy_process(unsigned long arg) if (tdif <= 0) { struct net_device *dev = skb->dev; + __skb_unlink(skb, &tbl->proxy_queue); - if (tbl->proxy_redo && netif_running(dev)) + if (tbl->proxy_redo && netif_running(dev)) { + rcu_read_lock(); tbl->proxy_redo(skb); - else + rcu_read_unlock(); + } else { kfree_skb(skb); + } dev_put(dev); } else if (!sched_next || tdif < sched_next) -- cgit v1.2.3 From bc909d9ddbf7778371e36a651d6e4194b1cc7d4c Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 24 Aug 2011 19:45:03 -0700 Subject: sendmmsg/sendmsg: fix unsafe user pointer access Dereferencing a user pointer directly from kernel-space without going through the copy_from_user family of functions is a bad idea. Two of such usages can be found in the sendmsg code path called from sendmmsg, added by commit c71d8ebe7a4496fb7231151cb70a6baa0cb56f9a upstream. commit 5b47b8038f183b44d2d8ff1c7d11a5c1be706b34 in the 3.0-stable tree. Usages are performed through memcmp() and memcpy() directly. Fix those by using the already copied msg_sys structure instead of the __user *msg structure. Note that msg_sys can be set to NULL by verify_compat_iovec() or verify_iovec(), which requires additional NULL pointer checks. Signed-off-by: Mathieu Desnoyers Signed-off-by: David Goulet CC: Tetsuo Handa CC: Anton Blanchard CC: David S. Miller CC: stable Signed-off-by: David S. Miller --- net/socket.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 24a77400b65e..ffe92ca32f2a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1965,8 +1965,9 @@ static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg, * used_address->name_len is initialized to UINT_MAX so that the first * destination address never matches. */ - if (used_address && used_address->name_len == msg_sys->msg_namelen && - !memcmp(&used_address->name, msg->msg_name, + if (used_address && msg_sys->msg_name && + used_address->name_len == msg_sys->msg_namelen && + !memcmp(&used_address->name, msg_sys->msg_name, used_address->name_len)) { err = sock_sendmsg_nosec(sock, msg_sys, total_len); goto out_freectl; @@ -1978,8 +1979,9 @@ static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg, */ if (used_address && err >= 0) { used_address->name_len = msg_sys->msg_namelen; - memcpy(&used_address->name, msg->msg_name, - used_address->name_len); + if (msg_sys->msg_name) + memcpy(&used_address->name, msg_sys->msg_name, + used_address->name_len); } out_freectl: -- cgit v1.2.3 From e9278a475f845833b569ca47171e64fe48c616e0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 26 Aug 2011 06:26:15 +0000 Subject: netpoll: fix incorrect access to skb data in __netpoll_rx __netpoll_rx() doesnt properly handle skbs with small header pskb_may_pull() or pskb_trim_rcsum() can change skb->data, we must reload it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/netpoll.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index adf84dd8c7b5..52622517e0d8 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -558,13 +558,14 @@ int __netpoll_rx(struct sk_buff *skb) if (skb_shared(skb)) goto out; - iph = (struct iphdr *)skb->data; if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto out; + iph = (struct iphdr *)skb->data; if (iph->ihl < 5 || iph->version != 4) goto out; if (!pskb_may_pull(skb, iph->ihl*4)) goto out; + iph = (struct iphdr *)skb->data; if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) goto out; @@ -579,6 +580,7 @@ int __netpoll_rx(struct sk_buff *skb) if (pskb_trim_rcsum(skb, len)) goto out; + iph = (struct iphdr *)skb->data; if (iph->protocol != IPPROTO_UDP) goto out; -- cgit v1.2.3 From c6675233f9015d3c0460c8aab53ed9b99d915c64 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 30 Aug 2011 15:01:20 +0200 Subject: netfilter: nf_queue: reject NF_STOLEN verdicts from userspace A userspace listener may send (bogus) NF_STOLEN verdict, which causes skb leak. This problem was previously fixed via 64507fdbc29c3a622180378210ecea8659b14e40 (netfilter: nf_queue: fix NF_STOLEN skb leak) but this had to be reverted because NF_STOLEN can also be returned by a netfilter hook when iterating the rules in nf_reinject. Reject userspace NF_STOLEN verdict, as suggested by Michal Miroslaw. This is complementary to commit fad54440438a7c231a6ae347738423cbabc936d9 (netfilter: avoid double free in nf_reinject). Cc: Julian Anastasov Cc: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ip_queue.c | 11 ++++------- net/ipv6/netfilter/ip6_queue.c | 11 ++++------- net/netfilter/nfnetlink_queue.c | 4 ++-- 3 files changed, 10 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 48f7d5b4ff37..e59aabd0eae4 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -314,7 +314,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) { struct nf_queue_entry *entry; - if (vmsg->value > NF_MAX_VERDICT) + if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN) return -EINVAL; entry = ipq_find_dequeue_entry(vmsg->id); @@ -359,12 +359,9 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg, break; case IPQM_VERDICT: - if (pmsg->msg.verdict.value > NF_MAX_VERDICT) - status = -EINVAL; - else - status = ipq_set_verdict(&pmsg->msg.verdict, - len - sizeof(*pmsg)); - break; + status = ipq_set_verdict(&pmsg->msg.verdict, + len - sizeof(*pmsg)); + break; default: status = -EINVAL; } diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 87b243a25afa..e63c3972a739 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -314,7 +314,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) { struct nf_queue_entry *entry; - if (vmsg->value > NF_MAX_VERDICT) + if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN) return -EINVAL; entry = ipq_find_dequeue_entry(vmsg->id); @@ -359,12 +359,9 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg, break; case IPQM_VERDICT: - if (pmsg->msg.verdict.value > NF_MAX_VERDICT) - status = -EINVAL; - else - status = ipq_set_verdict(&pmsg->msg.verdict, - len - sizeof(*pmsg)); - break; + status = ipq_set_verdict(&pmsg->msg.verdict, + len - sizeof(*pmsg)); + break; default: status = -EINVAL; } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 00bd475eab4b..a80b0cb03f17 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -646,8 +646,8 @@ verdicthdr_get(const struct nlattr * const nfqa[]) return NULL; vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]); - verdict = ntohl(vhdr->verdict); - if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) + verdict = ntohl(vhdr->verdict) & NF_VERDICT_MASK; + if (verdict > NF_MAX_VERDICT || verdict == NF_STOLEN) return NULL; return vhdr; } -- cgit v1.2.3 From 4c6e4209662b2a4147cde16c2144a253a7430a49 Mon Sep 17 00:00:00 2001 From: Sanket Shah Date: Tue, 30 Aug 2011 15:23:03 +0200 Subject: netfilter: nf_ct_pptp: fix DNATed PPTP connection address translation When both the server and the client are NATed, the set-link-info control packet containing the peer's call-id field is not properly translated. I have verified that it was working in 2.6.16.13 kernel previously but due to rewrite, this scenario stopped working (Not knowing exact version when it stopped working). Signed-off-by: Sanket Shah Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_pptp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 2fd4565144de..31d56b23b9e9 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -364,6 +364,7 @@ pptp_inbound_pkt(struct sk_buff *skb, break; case PPTP_WAN_ERROR_NOTIFY: + case PPTP_SET_LINK_INFO: case PPTP_ECHO_REQUEST: case PPTP_ECHO_REPLY: /* I don't have to explain these ;) */ -- cgit v1.2.3 From 4a5cc84ae7e19fb7a72a30332ba67af43e0ad1ad Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 30 Aug 2011 15:45:10 +0200 Subject: netfilter: nf_ct_tcp: fix incorrect handling of invalid TCP option Michael M. Builov reported that in the tcp_options and tcp_sack functions of netfilter TCP conntrack the incorrect handling of invalid TCP option with too big opsize may lead to read access beyond tcp-packet or buffer allocated on stack (netfilter bugzilla #738). The fix is to stop parsing the options at detecting the broken option. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_proto_tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 37bf94394be0..afc4ab7cfe01 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -409,7 +409,7 @@ static void tcp_options(const struct sk_buff *skb, if (opsize < 2) /* "silly options" */ return; if (opsize > length) - break; /* don't parse partial options */ + return; /* don't parse partial options */ if (opcode == TCPOPT_SACK_PERM && opsize == TCPOLEN_SACK_PERM) @@ -469,7 +469,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, if (opsize < 2) /* "silly options" */ return; if (opsize > length) - break; /* don't parse partial options */ + return; /* don't parse partial options */ if (opcode == TCPOPT_SACK && opsize >= (TCPOLEN_SACK_BASE -- cgit v1.2.3 From bb9fc37358ffa9de1cc2b2b6f1a559b926ef50d9 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 30 Aug 2011 15:46:13 +0200 Subject: netfilter: nf_ct_tcp: wrong multiplication of TCPOLEN_TSTAMP_ALIGNED in tcp_sack skips fastpath The wrong multiplication of TCPOLEN_TSTAMP_ALIGNED by 4 skips the fast path for the timestamp-only option. Bug reported by Michael M. Builov (netfilter bugzilla #738). Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_proto_tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index afc4ab7cfe01..8235b86b4e87 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -447,7 +447,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, BUG_ON(ptr == NULL); /* Fast path for timestamp-only option */ - if (length == TCPOLEN_TSTAMP_ALIGNED*4 + if (length == TCPOLEN_TSTAMP_ALIGNED && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) -- cgit v1.2.3 From ec0506dbe4e240ecd4c32bf74c84a88ce1ddb414 Mon Sep 17 00:00:00 2001 From: Maciej Å»enczykowski Date: Sun, 28 Aug 2011 12:35:31 +0000 Subject: net: relax PKTINFO non local ipv6 udp xmit check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow transparent sockets to be less restrictive about the source ip of ipv6 udp packets being sent. Google-Bug-Id: 5018138 Signed-off-by: Maciej Å»enczykowski CC: "Erik Kline" CC: "Lorenzo Colitti" Signed-off-by: David S. Miller --- include/net/transp_v6.h | 1 + net/ipv6/datagram.c | 5 +++-- net/ipv6/ip6_flowlabel.c | 8 ++++---- net/ipv6/ipv6_sockglue.c | 2 +- net/ipv6/raw.c | 4 ++-- net/ipv6/udp.c | 4 ++-- 6 files changed, 13 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 5271a741c3a3..498433dd067d 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -39,6 +39,7 @@ extern int datagram_recv_ctl(struct sock *sk, struct sk_buff *skb); extern int datagram_send_ctl(struct net *net, + struct sock *sk, struct msghdr *msg, struct flowi6 *fl6, struct ipv6_txoptions *opt, diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 9ef1831746ef..b46e9f88ce37 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -599,7 +599,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) return 0; } -int datagram_send_ctl(struct net *net, +int datagram_send_ctl(struct net *net, struct sock *sk, struct msghdr *msg, struct flowi6 *fl6, struct ipv6_txoptions *opt, int *hlimit, int *tclass, int *dontfrag) @@ -658,7 +658,8 @@ int datagram_send_ctl(struct net *net, if (addr_type != IPV6_ADDR_ANY) { int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; - if (!ipv6_chk_addr(net, &src_info->ipi6_addr, + if (!inet_sk(sk)->transparent && + !ipv6_chk_addr(net, &src_info->ipi6_addr, strict ? dev : NULL, 0)) err = -EINVAL; else diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index f3caf1b8d572..543039450193 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -322,8 +322,8 @@ static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned lo } static struct ip6_flowlabel * -fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, - int optlen, int *err_p) +fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, + char __user *optval, int optlen, int *err_p) { struct ip6_flowlabel *fl = NULL; int olen; @@ -360,7 +360,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, msg.msg_control = (void*)(fl->opt+1); memset(&flowi6, 0, sizeof(flowi6)); - err = datagram_send_ctl(net, &msg, &flowi6, fl->opt, &junk, + err = datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, &junk, &junk, &junk); if (err) goto done; @@ -528,7 +528,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) if (freq.flr_label & ~IPV6_FLOWLABEL_MASK) return -EINVAL; - fl = fl_create(net, &freq, optval, optlen, &err); + fl = fl_create(net, sk, &freq, optval, optlen, &err); if (fl == NULL) return err; sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 147ede38ab48..2fbda5fc4cc4 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -475,7 +475,7 @@ sticky_done: msg.msg_controllen = optlen; msg.msg_control = (void*)(opt+1); - retv = datagram_send_ctl(net, &msg, &fl6, opt, &junk, &junk, + retv = datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, &junk, &junk); if (retv) goto done; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 6a79f3081bdb..343852e5c703 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -817,8 +817,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit, - &tclass, &dontfrag); + err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, + &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 29213b51c499..bb95e8e1c6f9 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1090,8 +1090,8 @@ do_udp_sendmsg: memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit, - &tclass, &dontfrag); + err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, + &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; -- cgit v1.2.3 From 29c486df6a208432b370bd4be99ae1369ede28d8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Aug 2011 18:57:00 -0400 Subject: net: ipv4: relax AF_INET check in bind() commit d0733d2e29b65 (Check for mistakenly passed in non-IPv4 address) added regression on legacy apps that use bind() with AF_UNSPEC family. Relax the check, but make sure the bind() is done on INADDR_ANY addresses, as AF_UNSPEC has probably no sane meaning for other addresses. Bugzilla reference : https://bugzilla.kernel.org/show_bug.cgi?id=42012 Signed-off-by: Eric Dumazet Reported-and-bisected-by: Rene Meier CC: Marcus Meissner Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1b745d412cf6..dd2b9478ddd1 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -466,8 +466,13 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; if (addr->sin_family != AF_INET) { + /* Compatibility games : accept AF_UNSPEC (mapped to AF_INET) + * only if s_addr is INADDR_ANY. + */ err = -EAFNOSUPPORT; - goto out; + if (addr->sin_family != AF_UNSPEC || + addr->sin_addr.s_addr != htonl(INADDR_ANY)) + goto out; } chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); -- cgit v1.2.3 From aca420bc51f48b0701963ba3a6234442a0cabebd Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 31 Aug 2011 14:45:53 -0700 Subject: libceph: fix leak of osd structs during shutdown We want to remove all OSDs, not just those on the idle LRU. Signed-off-by: Sage Weil --- net/ceph/osd_client.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index ce310eee708d..16836a7df7a6 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -685,6 +685,18 @@ static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) put_osd(osd); } +static void remove_all_osds(struct ceph_osd_client *osdc) +{ + dout("__remove_old_osds %p\n", osdc); + mutex_lock(&osdc->request_mutex); + while (!RB_EMPTY_ROOT(&osdc->osds)) { + struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds), + struct ceph_osd, o_node); + __remove_osd(osdc, osd); + } + mutex_unlock(&osdc->request_mutex); +} + static void __move_osd_to_lru(struct ceph_osd_client *osdc, struct ceph_osd *osd) { @@ -701,14 +713,14 @@ static void __remove_osd_from_lru(struct ceph_osd *osd) list_del_init(&osd->o_osd_lru); } -static void remove_old_osds(struct ceph_osd_client *osdc, int remove_all) +static void remove_old_osds(struct ceph_osd_client *osdc) { struct ceph_osd *osd, *nosd; dout("__remove_old_osds %p\n", osdc); mutex_lock(&osdc->request_mutex); list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) { - if (!remove_all && time_before(jiffies, osd->lru_ttl)) + if (time_before(jiffies, osd->lru_ttl)) break; __remove_osd(osdc, osd); } @@ -751,6 +763,7 @@ static void __insert_osd(struct ceph_osd_client *osdc, struct ceph_osd *new) struct rb_node *parent = NULL; struct ceph_osd *osd = NULL; + dout("__insert_osd %p osd%d\n", new, new->o_osd); while (*p) { parent = *p; osd = rb_entry(parent, struct ceph_osd, o_node); @@ -1144,7 +1157,7 @@ static void handle_osds_timeout(struct work_struct *work) dout("osds timeout\n"); down_read(&osdc->map_sem); - remove_old_osds(osdc, 0); + remove_old_osds(osdc); up_read(&osdc->map_sem); schedule_delayed_work(&osdc->osds_timeout_work, @@ -1862,8 +1875,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc) ceph_osdmap_destroy(osdc->osdmap); osdc->osdmap = NULL; } - remove_old_osds(osdc, 1); - WARN_ON(!RB_EMPTY_ROOT(&osdc->osds)); + remove_all_osds(osdc); mempool_destroy(osdc->req_mempool); ceph_msgpool_destroy(&osdc->msgpool_op); ceph_msgpool_destroy(&osdc->msgpool_op_reply); -- cgit v1.2.3 From b49d8b5d7007a673796f3f99688b46931293873e Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 17 Aug 2011 16:56:04 +0000 Subject: net/9p: Fix kernel crash with msize 512K With msize equal to 512K (PAGE_SIZE * VIRTQUEUE_NUM), we hit multiple crashes. This patch fix those. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Eric Van Hensbergen --- net/9p/trans_virtio.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 175b5135bdcf..e317583fcc73 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -263,7 +263,6 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req) { int in, out, inp, outp; struct virtio_chan *chan = client->trans; - char *rdata = (char *)req->rc+sizeof(struct p9_fcall); unsigned long flags; size_t pdata_off = 0; struct trans_rpage_info *rpinfo = NULL; @@ -346,7 +345,8 @@ req_retry_pinned: * Arrange in such a way that server places header in the * alloced memory and payload onto the user buffer. */ - inp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 11); + inp = pack_sg_list(chan->sg, out, + VIRTQUEUE_NUM, req->rc->sdata, 11); /* * Running executables in the filesystem may result in * a read request with kernel buffer as opposed to user buffer. @@ -366,8 +366,8 @@ req_retry_pinned: } in += inp; } else { - in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, - req->rc->capacity); + in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, + req->rc->sdata, req->rc->capacity); } err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); @@ -592,7 +592,14 @@ static struct p9_trans_module p9_virtio_trans = { .close = p9_virtio_close, .request = p9_virtio_request, .cancel = p9_virtio_cancel, - .maxsize = PAGE_SIZE*VIRTQUEUE_NUM, + + /* + * We leave one entry for input and one entry for response + * headers. We also skip one more entry to accomodate, address + * that are not at page boundary, that can result in an extra + * page in zero copy. + */ + .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3), .pref = P9_TRANS_PREF_PAYLOAD_SEP, .def = 0, .owner = THIS_MODULE, -- cgit v1.2.3 From 4bae7d976976fa52d345805ba686934cd548343e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 Sep 2011 12:47:39 +0200 Subject: mac80211: fix missing sta_lock in __sta_info_destroy Since my commit 34e895075e21be3e21e71d6317440d1ee7969ad0 ("mac80211: allow station add/remove to sleep") there is a race in mac80211 when it clears the TIM bit because a sleeping station disconnected, the spinlock isn't held around the relevant code any more. Use the right API to acquire the spinlock correctly. Cc: stable@kernel.org [2.6.34+] Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/sta_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 3db78b696c5c..21070e9bc8d0 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -665,7 +665,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta) BUG_ON(!sdata->bss); atomic_dec(&sdata->bss->num_sta_ps); - __sta_info_clear_tim_bit(sdata->bss, sta); + sta_info_clear_tim_bit(sta); } local->num_sta--; -- cgit v1.2.3 From 2d20a26a92f72e3bb658fe8ce99c3663756e9e7a Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 30 Aug 2011 15:52:18 +0200 Subject: Bluetooth: Fix timeout on scanning for the second time The checks for HCI_INQUIRY and HCI_MGMT were in the wrong order, so that second scans always failed. Signed-off-by: Oliver Neukum Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_event.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index a40170e022e8..7ef4eb4435fb 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -58,8 +58,8 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) if (status) return; - if (test_bit(HCI_MGMT, &hdev->flags) && - test_and_clear_bit(HCI_INQUIRY, &hdev->flags)) + if (test_and_clear_bit(HCI_INQUIRY, &hdev->flags) && + test_bit(HCI_MGMT, &hdev->flags)) mgmt_discovering(hdev->id, 0); hci_req_complete(hdev, HCI_OP_INQUIRY_CANCEL, status); @@ -76,8 +76,8 @@ static void hci_cc_exit_periodic_inq(struct hci_dev *hdev, struct sk_buff *skb) if (status) return; - if (test_bit(HCI_MGMT, &hdev->flags) && - test_and_clear_bit(HCI_INQUIRY, &hdev->flags)) + if (test_and_clear_bit(HCI_INQUIRY, &hdev->flags) && + test_bit(HCI_MGMT, &hdev->flags)) mgmt_discovering(hdev->id, 0); hci_conn_check_pending(hdev); @@ -959,9 +959,8 @@ static inline void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) return; } - if (test_bit(HCI_MGMT, &hdev->flags) && - !test_and_set_bit(HCI_INQUIRY, - &hdev->flags)) + if (!test_and_set_bit(HCI_INQUIRY, &hdev->flags) && + test_bit(HCI_MGMT, &hdev->flags)) mgmt_discovering(hdev->id, 1); } @@ -1340,8 +1339,8 @@ static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff BT_DBG("%s status %d", hdev->name, status); - if (test_bit(HCI_MGMT, &hdev->flags) && - test_and_clear_bit(HCI_INQUIRY, &hdev->flags)) + if (test_and_clear_bit(HCI_INQUIRY, &hdev->flags) && + test_bit(HCI_MGMT, &hdev->flags)) mgmt_discovering(hdev->id, 0); hci_req_complete(hdev, HCI_OP_INQUIRY, status); -- cgit v1.2.3 From 27e95a8c670e0c587990ec5b9a87a7ea17873d28 Mon Sep 17 00:00:00 2001 From: Igor Maravić Date: Tue, 30 Aug 2011 03:12:55 +0000 Subject: pkt_sched: cls_rsvp.h was outdated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit File cls_rsvp.h in /net/sched was outdated. I'm sending you patch for this file. [ tb[] array should be indexed by X not X-1 -DaveM ] Signed-off-by: Igor Maravić Signed-off-by: David S. Miller --- net/sched/cls_rsvp.h | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index be4505ee67a9..b01427924f81 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -425,7 +425,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, struct rsvp_filter *f, **fp; struct rsvp_session *s, **sp; struct tc_rsvp_pinfo *pinfo = NULL; - struct nlattr *opt = tca[TCA_OPTIONS-1]; + struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_RSVP_MAX + 1]; struct tcf_exts e; unsigned int h1, h2; @@ -439,7 +439,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, if (err < 0) return err; - err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map); + err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map); if (err < 0) return err; @@ -449,8 +449,8 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, if (f->handle != handle && handle) goto errout2; - if (tb[TCA_RSVP_CLASSID-1]) { - f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]); + if (tb[TCA_RSVP_CLASSID]) { + f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]); tcf_bind_filter(tp, &f->res, base); } @@ -462,7 +462,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, err = -EINVAL; if (handle) goto errout2; - if (tb[TCA_RSVP_DST-1] == NULL) + if (tb[TCA_RSVP_DST] == NULL) goto errout2; err = -ENOBUFS; @@ -471,19 +471,19 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, goto errout2; h2 = 16; - if (tb[TCA_RSVP_SRC-1]) { - memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src)); + if (tb[TCA_RSVP_SRC]) { + memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src)); h2 = hash_src(f->src); } - if (tb[TCA_RSVP_PINFO-1]) { - pinfo = nla_data(tb[TCA_RSVP_PINFO-1]); + if (tb[TCA_RSVP_PINFO]) { + pinfo = nla_data(tb[TCA_RSVP_PINFO]); f->spi = pinfo->spi; f->tunnelhdr = pinfo->tunnelhdr; } - if (tb[TCA_RSVP_CLASSID-1]) - f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]); + if (tb[TCA_RSVP_CLASSID]) + f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]); - dst = nla_data(tb[TCA_RSVP_DST-1]); + dst = nla_data(tb[TCA_RSVP_DST]); h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0); err = -ENOMEM; @@ -642,8 +642,7 @@ nla_put_failure: return -1; } -static struct tcf_proto_ops RSVP_OPS = { - .next = NULL, +static struct tcf_proto_ops RSVP_OPS __read_mostly = { .kind = RSVP_ID, .classify = rsvp_classify, .init = rsvp_init, -- cgit v1.2.3 From 946cedccbd7387488d2cee5da92cdfeb28d2e670 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Aug 2011 03:21:44 +0000 Subject: tcp: Change possible SYN flooding messages "Possible SYN flooding on port xxxx " messages can fill logs on servers. Change logic to log the message only once per listener, and add two new SNMP counters to track : TCPReqQFullDoCookies : number of times a SYNCOOKIE was replied to client TCPReqQFullDrop : number of times a SYN request was dropped because syncookies were not enabled. Based on a prior patch from Tom Herbert, and suggestions from David. Signed-off-by: Eric Dumazet CC: Tom Herbert Signed-off-by: David S. Miller --- include/linux/snmp.h | 2 ++ include/net/request_sock.h | 3 ++- include/net/tcp.h | 3 +++ net/ipv4/proc.c | 2 ++ net/ipv4/tcp_ipv4.c | 49 ++++++++++++++++++++++++++-------------------- net/ipv6/tcp_ipv6.c | 31 +++-------------------------- 6 files changed, 40 insertions(+), 50 deletions(-) (limited to 'net') diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 12b2b18e50c1..e16557a357e5 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -231,6 +231,8 @@ enum LINUX_MIB_TCPDEFERACCEPTDROP, LINUX_MIB_IPRPFILTER, /* IP Reverse Path Filter (rp_filter) */ LINUX_MIB_TCPTIMEWAITOVERFLOW, /* TCPTimeWaitOverflow */ + LINUX_MIB_TCPREQQFULLDOCOOKIES, /* TCPReqQFullDoCookies */ + LINUX_MIB_TCPREQQFULLDROP, /* TCPReqQFullDrop */ __LINUX_MIB_MAX }; diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 99e6e19b57c2..4c0766e201e3 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -96,7 +96,8 @@ extern int sysctl_max_syn_backlog; */ struct listen_sock { u8 max_qlen_log; - /* 3 bytes hole, try to use */ + u8 synflood_warned; + /* 2 bytes hole, try to use */ int qlen; int qlen_young; int clock_hand; diff --git a/include/net/tcp.h b/include/net/tcp.h index 149a415d1e0a..e9b48b094683 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -460,6 +460,9 @@ extern int tcp_write_wakeup(struct sock *); extern void tcp_send_fin(struct sock *sk); extern void tcp_send_active_reset(struct sock *sk, gfp_t priority); extern int tcp_send_synack(struct sock *); +extern int tcp_syn_flood_action(struct sock *sk, + const struct sk_buff *skb, + const char *proto); extern void tcp_push_one(struct sock *, unsigned int mss_now); extern void tcp_send_ack(struct sock *sk); extern void tcp_send_delayed_ack(struct sock *sk); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index b14ec7d03b6e..4bfad5da94f4 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -254,6 +254,8 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER), SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW), + SNMP_MIB_ITEM("TCPReqQFullDoCookies", LINUX_MIB_TCPREQQFULLDOCOOKIES), + SNMP_MIB_ITEM("TCPReqQFullDrop", LINUX_MIB_TCPREQQFULLDROP), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1c12b8ec849d..c34f01513945 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -808,20 +808,38 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) kfree(inet_rsk(req)->opt); } -static void syn_flood_warning(const struct sk_buff *skb) +/* + * Return 1 if a syncookie should be sent + */ +int tcp_syn_flood_action(struct sock *sk, + const struct sk_buff *skb, + const char *proto) { - const char *msg; + const char *msg = "Dropping request"; + int want_cookie = 0; + struct listen_sock *lopt; + + #ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) + if (sysctl_tcp_syncookies) { msg = "Sending cookies"; - else + want_cookie = 1; + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); + } else #endif - msg = "Dropping request"; + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); - pr_info("TCP: Possible SYN flooding on port %d. %s.\n", - ntohs(tcp_hdr(skb)->dest), msg); + lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; + if (!lopt->synflood_warned) { + lopt->synflood_warned = 1; + pr_info("%s: Possible SYN flooding on port %d. %s. " + " Check SNMP counters.\n", + proto, ntohs(tcp_hdr(skb)->dest), msg); + } + return want_cookie; } +EXPORT_SYMBOL(tcp_syn_flood_action); /* * Save and compile IPv4 options into the request_sock if needed. @@ -1235,11 +1253,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) __be32 saddr = ip_hdr(skb)->saddr; __be32 daddr = ip_hdr(skb)->daddr; __u32 isn = TCP_SKB_CB(skb)->when; -#ifdef CONFIG_SYN_COOKIES int want_cookie = 0; -#else -#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */ -#endif /* Never answer to SYNs send to broadcast or multicast */ if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) @@ -1250,14 +1264,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * evidently real one. */ if (inet_csk_reqsk_queue_is_full(sk) && !isn) { - if (net_ratelimit()) - syn_flood_warning(skb); -#ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) { - want_cookie = 1; - } else -#endif - goto drop; + want_cookie = tcp_syn_flood_action(sk, skb, "TCP"); + if (!want_cookie) + goto drop; } /* Accept backlog is full. If we have already queued enough @@ -1303,9 +1312,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) while (l-- > 0) *c++ ^= *hash_location++; -#ifdef CONFIG_SYN_COOKIES want_cookie = 0; /* not our kind of cookie */ -#endif tmp_ext.cookie_out_never = 0; /* false */ tmp_ext.cookie_plus = tmp_opt.cookie_plus; } else if (!tp->rx_opt.cookie_in_always) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d1fb63f4aeb7..3c9fa618b69d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -531,20 +531,6 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req, return tcp_v6_send_synack(sk, req, rvp); } -static inline void syn_flood_warning(struct sk_buff *skb) -{ -#ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) - printk(KERN_INFO - "TCPv6: Possible SYN flooding on port %d. " - "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest)); - else -#endif - printk(KERN_INFO - "TCPv6: Possible SYN flooding on port %d. " - "Dropping request.\n", ntohs(tcp_hdr(skb)->dest)); -} - static void tcp_v6_reqsk_destructor(struct request_sock *req) { kfree_skb(inet6_rsk(req)->pktopts); @@ -1179,11 +1165,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); __u32 isn = TCP_SKB_CB(skb)->when; struct dst_entry *dst = NULL; -#ifdef CONFIG_SYN_COOKIES int want_cookie = 0; -#else -#define want_cookie 0 -#endif if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_conn_request(sk, skb); @@ -1192,14 +1174,9 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) goto drop; if (inet_csk_reqsk_queue_is_full(sk) && !isn) { - if (net_ratelimit()) - syn_flood_warning(skb); -#ifdef CONFIG_SYN_COOKIES - if (sysctl_tcp_syncookies) - want_cookie = 1; - else -#endif - goto drop; + want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6"); + if (!want_cookie) + goto drop; } if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) @@ -1249,9 +1226,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) while (l-- > 0) *c++ ^= *hash_location++; -#ifdef CONFIG_SYN_COOKIES want_cookie = 0; /* not our kind of cookie */ -#endif tmp_ext.cookie_out_never = 0; /* false */ tmp_ext.cookie_plus = tmp_opt.cookie_plus; } else if (!tp->rx_opt.cookie_in_always) { -- cgit v1.2.3 From d9e64f83ebb8f563810b10536b23516d3bd30e80 Mon Sep 17 00:00:00 2001 From: "rajan.aggarwal85@gmail.com" Date: Tue, 30 Aug 2011 23:57:38 +0000 Subject: net/can/af_can.c: Change del_timer to del_timer_sync This is important for SMP platform to check if timer function is executing on other CPU with deleting the timer. Signed-off-by: Rajan Aggarwal Acked-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/af_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/can/af_can.c b/net/can/af_can.c index 8ce926d3b2cb..9b0c32a2690c 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -857,7 +857,7 @@ static __exit void can_exit(void) struct net_device *dev; if (stats_timer) - del_timer(&can_stattimer); + del_timer_sync(&can_stattimer); can_remove_proc(); -- cgit v1.2.3 From 0542b69e2c57fc9668ce6a03155bea6e1f557901 Mon Sep 17 00:00:00 2001 From: dpward Date: Wed, 31 Aug 2011 06:05:27 +0000 Subject: net: Make flow cache namespace-aware flow_cache_lookup will return a cached object (or null pointer) that the resolver (i.e. xfrm_policy_lookup) previously found for another namespace using the same key/family/dir. Instead, make the namespace part of what identifies entries in the cache. As before, flow_entry_valid will return 0 for entries where the namespace has been deleted, and they will be removed from the cache the next time flow_cache_gc_task is run. Reported-by: Andrew Dickinson Signed-off-by: David Ward Signed-off-by: David S. Miller --- net/core/flow.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/flow.c b/net/core/flow.c index bf32c33cad3b..47b6d26c2afb 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -30,6 +30,7 @@ struct flow_cache_entry { struct hlist_node hlist; struct list_head gc_list; } u; + struct net *net; u16 family; u8 dir; u32 genid; @@ -232,7 +233,8 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, hash = flow_hash_code(fc, fcp, key); hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) { - if (tfle->family == family && + if (tfle->net == net && + tfle->family == family && tfle->dir == dir && flow_key_compare(key, &tfle->key) == 0) { fle = tfle; @@ -246,6 +248,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); if (fle) { + fle->net = net; fle->family = family; fle->dir = dir; memcpy(&fle->key, key, sizeof(*key)); -- cgit v1.2.3 From 48c830120f2a20b44220aa26feda9ed15f49eaab Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 31 Aug 2011 08:03:29 +0000 Subject: net: copy userspace buffers on device forwarding dev_forward_skb loops an skb back into host networking stack which might hang on the memory indefinitely. In particular, this can happen in macvtap in bridged mode. Copy the userspace fragments to avoid blocking the sender in that case. As this patch makes skb_copy_ubufs extern now, I also added some documentation and made it clear the SKBTX_DEV_ZEROCOPY flag automatically instead of doing it in all callers. This can be made into a separate patch if people feel it's worth it. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + net/core/dev.c | 8 ++++++++ net/core/skbuff.c | 22 +++++++++++++++++----- 3 files changed, 26 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7b996ed86d5b..8bd383caa363 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -524,6 +524,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, extern bool skb_recycle_check(struct sk_buff *skb, int skb_size); extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); +extern int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); extern struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); extern struct sk_buff *skb_copy(const struct sk_buff *skb, diff --git a/net/core/dev.c b/net/core/dev.c index 17d67b579beb..b10ff0a71855 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1515,6 +1515,14 @@ static inline bool is_skb_forwardable(struct net_device *dev, */ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, GFP_ATOMIC)) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + } + skb_orphan(skb); nf_reset(skb); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 27002dffe7ed..387703f56fce 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -611,8 +611,21 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) } EXPORT_SYMBOL_GPL(skb_morph); -/* skb frags copy userspace buffers to kernel */ -static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) +/* skb_copy_ubufs - copy userspace skb frags buffers to kernel + * @skb: the skb to modify + * @gfp_mask: allocation priority + * + * This must be called on SKBTX_DEV_ZEROCOPY skb. + * It will copy all frags into kernel and drop the reference + * to userspace pages. + * + * If this function is called from an interrupt gfp_mask() must be + * %GFP_ATOMIC. + * + * Returns 0 on success or a negative error code on failure + * to allocate kernel memory to copy to. + */ +int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) { int i; int num_frags = skb_shinfo(skb)->nr_frags; @@ -652,6 +665,8 @@ static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) skb_shinfo(skb)->frags[i - 1].page = head; head = (struct page *)head->private; } + + skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; return 0; } @@ -677,7 +692,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { if (skb_copy_ubufs(skb, gfp_mask)) return NULL; - skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; } n = skb + 1; @@ -803,7 +817,6 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) n = NULL; goto out; } - skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; @@ -896,7 +909,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { if (skb_copy_ubufs(skb, gfp_mask)) goto nofrags; - skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) get_page(skb_shinfo(skb)->frags[i].page); -- cgit v1.2.3 From 31dda0ae933bb9fea9cfe000b698c41af0417cac Mon Sep 17 00:00:00 2001 From: nhorman Date: Wed, 14 Sep 2011 03:05:02 +0000 Subject: net: don't clear IFF_XMIT_DST_RELEASE in ether_setup d88733150 introduced the IFF_SKB_TX_SHARING flag, which I unilaterally set in ether_setup. In doing this I didn't realize that other flags (such as IFF_XMIT_DST_RELEASE) might be set prior to calling the ether_setup routine. This patch changes ether_setup to or in SKB_TX_SHARING so as not to inadvertently clear other existing flags. Thanks to Pekka Riikonen for pointing out my error Signed-off-by: Neil Horman Reported-by: Pekka Riikonen CC: "David S. Miller" Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ethernet/eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 27997d35ebd3..a2468363978e 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -340,7 +340,7 @@ void ether_setup(struct net_device *dev) dev->addr_len = ETH_ALEN; dev->tx_queue_len = 1000; /* Ethernet wants good queues */ dev->flags = IFF_BROADCAST|IFF_MULTICAST; - dev->priv_flags = IFF_TX_SKB_SHARING; + dev->priv_flags |= IFF_TX_SKB_SHARING; memset(dev->broadcast, 0xFF, ETH_ALEN); -- cgit v1.2.3 From c0d5f9db1c7d1b8a9e2f217706e8ea233bac2754 Mon Sep 17 00:00:00 2001 From: Jim Schutt Date: Fri, 16 Sep 2011 08:27:31 -0600 Subject: libceph: initialize ack_stamp to avoid unnecessary connection reset Commit 4cf9d544631c recorded when an outgoing ceph message was ACKed, in order to avoid unnecessary connection resets when an OSD is busy. However, ack_stamp is uninitialized, so there is a window between when the message is sent and when it is ACKed in which handle_timeout() interprets the unitialized value as an expired timeout, and resets the connection unnecessarily. Close the window by initializing ack_stamp. Signed-off-by: Jim Schutt Signed-off-by: Sage Weil --- net/ceph/messenger.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index c340e2e0765b..9918e9eb276e 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2307,6 +2307,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) m->front_max = front_len; m->front_is_vmalloc = false; m->more_to_follow = false; + m->ack_stamp = 0; m->pool = NULL; /* middle */ -- cgit v1.2.3 From 1cad78932a0d139dceff78e68808e160a224d57a Mon Sep 17 00:00:00 2001 From: Noah Watkins Date: Mon, 12 Sep 2011 14:51:53 -0700 Subject: libceph: fix parse options memory leak ceph_destroy_options does not free opt->mon_addr that is allocated in ceph_parse_options. Signed-off-by: Noah Watkins Signed-off-by: Sage Weil --- net/ceph/ceph_common.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 132963abc266..2883ea01e680 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -232,6 +232,7 @@ void ceph_destroy_options(struct ceph_options *opt) ceph_crypto_key_destroy(opt->key); kfree(opt->key); } + kfree(opt->mon_addr); kfree(opt); } EXPORT_SYMBOL(ceph_destroy_options); -- cgit v1.2.3 From 935b639a049053d0ccbcf7422f2f9cd221642f58 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 16 Sep 2011 11:13:17 -0700 Subject: libceph: fix linger request requeuing The r_req_lru_item list node moves between several lists, and that cycle is not directly related (and does not begin) with __register_request(). Initialize it in the request constructor, not __register_request(). This fixes later badness (below) when OSDs restart underneath an rbd mount. Crashes we've seen due to this include: [ 213.974288] kernel BUG at net/ceph/messenger.c:2193! and [ 144.035274] BUG: unable to handle kernel NULL pointer dereference at 0000000000000048 [ 144.035278] IP: [] con_work+0x1463/0x2ce0 [libceph] Signed-off-by: Sage Weil --- net/ceph/osd_client.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 16836a7df7a6..88ad8a2501b5 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -217,6 +217,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, INIT_LIST_HEAD(&req->r_unsafe_item); INIT_LIST_HEAD(&req->r_linger_item); INIT_LIST_HEAD(&req->r_linger_osd); + INIT_LIST_HEAD(&req->r_req_lru_item); req->r_flags = flags; WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); @@ -816,13 +817,10 @@ static void __register_request(struct ceph_osd_client *osdc, { req->r_tid = ++osdc->last_tid; req->r_request->hdr.tid = cpu_to_le64(req->r_tid); - INIT_LIST_HEAD(&req->r_req_lru_item); - dout("__register_request %p tid %lld\n", req, req->r_tid); __insert_request(osdc, req); ceph_osdc_get_request(req); osdc->num_requests++; - if (osdc->num_requests == 1) { dout(" first request, scheduling timeout\n"); __schedule_osd_timeout(osdc); -- cgit v1.2.3 From aa3d7eef398dd4f29045e9889b817d5161afe03e Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Wed, 14 Sep 2011 14:28:17 +0530 Subject: wireless: Reset beacon_found while updating regulatory During the association, the regulatory is updated by country IE that reaps the previously found beacons. The impact is that after a STA disconnects *or* when for any reason a regulatory domain change happens the beacon hint flag is not cleared therefore preventing future beacon hints to be learned. This is important as a regulatory domain change or a restore of regulatory settings would set back the passive scan and no-ibss flags on the channel. This is the right place to do this given that it covers any regulatory domain change. Cc: stable@kernel.org Reviewed-by: Luis R. Rodriguez Signed-off-by: Rajkumar Manoharan Acked-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 02751dbc5a97..68a471ba193f 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -852,6 +852,7 @@ static void handle_channel(struct wiphy *wiphy, return; } + chan->beacon_found = false; chan->flags = flags | bw_flags | map_regdom_flags(reg_rule->flags); chan->max_antenna_gain = min(chan->orig_mag, (int) MBI_TO_DBI(power_rule->max_antenna_gain)); -- cgit v1.2.3 From 3965ac00204e0ccd89e1e73ead4d2098dc8f7bd1 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Thu, 15 Sep 2011 15:12:29 +0530 Subject: wireless: Fix rate mask for scan request The scan request received from cfg80211_connect do not have proper rate mast. So the probe request sent on each channel do not have proper the supported rates ie. Cc: stable@kernel.org Reviewed-by: Johannes Berg Signed-off-by: Rajkumar Manoharan Signed-off-by: John W. Linville --- net/wireless/sme.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/wireless/sme.c b/net/wireless/sme.c index b7b6ff8be553..dec0fa28372e 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -118,6 +118,8 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) i++, j++) request->channels[i] = &wdev->wiphy->bands[band]->channels[j]; + request->rates[band] = + (1 << wdev->wiphy->bands[band]->n_bitrates) - 1; } } request->n_channels = n_channels; -- cgit v1.2.3 From d5ccd496601b8776a516d167a6485754575dc38f Mon Sep 17 00:00:00 2001 From: Max Matveev Date: Mon, 29 Aug 2011 21:02:24 +0000 Subject: sctp: deal with multiple COOKIE_ECHO chunks Attempt to reduce the number of IP packets emitted in response to single SCTP packet (2e3216cd) introduced a complication - if a packet contains two COOKIE_ECHO chunks and nothing else then SCTP state machine corks the socket while processing first COOKIE_ECHO and then loses the association and forgets to uncork the socket. To deal with the issue add new SCTP command which can be used to set association explictly. Use this new command when processing second COOKIE_ECHO chunk to restore the context for SCTP state machine. Signed-off-by: Max Matveev Signed-off-by: David S. Miller --- include/net/sctp/command.h | 1 + net/sctp/sm_sideeffect.c | 5 +++++ net/sctp/sm_statefuns.c | 6 ++++++ 3 files changed, 12 insertions(+) (limited to 'net') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 6506458ccd33..712b3bebeda7 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -109,6 +109,7 @@ typedef enum { SCTP_CMD_SEND_MSG, /* Send the whole use message */ SCTP_CMD_SEND_NEXT_ASCONF, /* Send the next ASCONF after ACK */ SCTP_CMD_PURGE_ASCONF_QUEUE, /* Purge all asconf queues.*/ + SCTP_CMD_SET_ASOC, /* Restore association context */ SCTP_CMD_LAST } sctp_verb_t; diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 167c880cf8da..76388b083f28 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1689,6 +1689,11 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_PURGE_ASCONF_QUEUE: sctp_asconf_queue_teardown(asoc); break; + + case SCTP_CMD_SET_ASOC: + asoc = cmd->obj.asoc; + break; + default: pr_warn("Impossible command: %u, %p\n", cmd->verb, cmd->obj.ptr); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 49b847b00f99..a0f31e6c1c63 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2047,6 +2047,12 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); + /* Restore association pointer to provide SCTP command interpeter + * with a valid context in case it needs to manipulate + * the queues */ + sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC, + SCTP_ASOC((struct sctp_association *)asoc)); + return retval; nomem: -- cgit v1.2.3 From 4fb66b8210c7d7147b164e19b1b44da916a75691 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 Sep 2011 02:19:23 +0000 Subject: caif: fix a potential NULL dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit bd30ce4bc0b7 (caif: Use RCU instead of spin-lock in caif_dev.c) added a potential NULL dereference in case alloc_percpu() fails. caif_device_alloc() can also use GFP_KERNEL instead of GFP_ATOMIC. Signed-off-by: Eric Dumazet CC: Sjur Brændeland Acked-by: Sjur Brændeland Signed-off-by: David S. Miller --- net/caif/caif_dev.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 7c2fa0a08148..7f9ac0742d19 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -93,10 +93,14 @@ static struct caif_device_entry *caif_device_alloc(struct net_device *dev) caifdevs = caif_device_list(dev_net(dev)); BUG_ON(!caifdevs); - caifd = kzalloc(sizeof(*caifd), GFP_ATOMIC); + caifd = kzalloc(sizeof(*caifd), GFP_KERNEL); if (!caifd) return NULL; caifd->pcpu_refcnt = alloc_percpu(int); + if (!caifd->pcpu_refcnt) { + kfree(caifd); + return NULL; + } caifd->netdev = dev; dev_hold(dev); return caifd; -- cgit v1.2.3 From 19c1ea14c930db5e9c0cd7c3c6f4d01457dfcd69 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sun, 4 Sep 2011 20:24:20 +0000 Subject: ipv4: Fix fib_info->fib_metrics leak Commit 4670994d(net,rcu: convert call_rcu(fc_rport_free_rcu) to kfree_rcu()) introduced a memory leak. This patch reverts it. Signed-off-by: Zheng Yan Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 33e2c35b74b7..80106d89d548 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -142,6 +142,14 @@ const struct fib_prop fib_props[RTN_MAX + 1] = { }; /* Release a nexthop info record */ +static void free_fib_info_rcu(struct rcu_head *head) +{ + struct fib_info *fi = container_of(head, struct fib_info, rcu); + + if (fi->fib_metrics != (u32 *) dst_default_metrics) + kfree(fi->fib_metrics); + kfree(fi); +} void free_fib_info(struct fib_info *fi) { @@ -156,7 +164,7 @@ void free_fib_info(struct fib_info *fi) } endfor_nexthops(fi); fib_info_cnt--; release_net(fi->fib_net); - kfree_rcu(fi, rcu); + call_rcu(&fi->rcu, free_fib_info_rcu); } void fib_release_info(struct fib_info *fi) -- cgit v1.2.3 From aa1c366e4febc7f5c2b84958a2dd7cd70e28f9d0 Mon Sep 17 00:00:00 2001 From: dpward Date: Mon, 5 Sep 2011 16:47:24 +0000 Subject: net: Handle different key sizes between address families in flow cache With the conversion of struct flowi to a union of AF-specific structs, some operations on the flow cache need to account for the exact size of the key. Signed-off-by: David Ward Signed-off-by: David S. Miller --- include/net/flow.h | 19 +++++++++++++++++++ net/core/flow.c | 31 +++++++++++++++++-------------- 2 files changed, 36 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/include/net/flow.h b/include/net/flow.h index 2ec377d9ab9f..a09447749e2d 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -7,6 +7,7 @@ #ifndef _NET_FLOW_H #define _NET_FLOW_H +#include #include #include @@ -161,6 +162,24 @@ static inline struct flowi *flowidn_to_flowi(struct flowidn *fldn) return container_of(fldn, struct flowi, u.dn); } +typedef unsigned long flow_compare_t; + +static inline size_t flow_key_size(u16 family) +{ + switch (family) { + case AF_INET: + BUILD_BUG_ON(sizeof(struct flowi4) % sizeof(flow_compare_t)); + return sizeof(struct flowi4) / sizeof(flow_compare_t); + case AF_INET6: + BUILD_BUG_ON(sizeof(struct flowi6) % sizeof(flow_compare_t)); + return sizeof(struct flowi6) / sizeof(flow_compare_t); + case AF_DECnet: + BUILD_BUG_ON(sizeof(struct flowidn) % sizeof(flow_compare_t)); + return sizeof(struct flowidn) / sizeof(flow_compare_t); + } + return 0; +} + #define FLOW_DIR_IN 0 #define FLOW_DIR_OUT 1 #define FLOW_DIR_FWD 2 diff --git a/net/core/flow.c b/net/core/flow.c index 47b6d26c2afb..555a456efb07 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -173,29 +173,26 @@ static void flow_new_hash_rnd(struct flow_cache *fc, static u32 flow_hash_code(struct flow_cache *fc, struct flow_cache_percpu *fcp, - const struct flowi *key) + const struct flowi *key, + size_t keysize) { const u32 *k = (const u32 *) key; + const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32); - return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) + return jhash2(k, length, fcp->hash_rnd) & (flow_cache_hash_size(fc) - 1); } -typedef unsigned long flow_compare_t; - /* I hear what you're saying, use memcmp. But memcmp cannot make - * important assumptions that we can here, such as alignment and - * constant size. + * important assumptions that we can here, such as alignment. */ -static int flow_key_compare(const struct flowi *key1, const struct flowi *key2) +static int flow_key_compare(const struct flowi *key1, const struct flowi *key2, + size_t keysize) { const flow_compare_t *k1, *k1_lim, *k2; - const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t); - - BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t)); k1 = (const flow_compare_t *) key1; - k1_lim = k1 + n_elem; + k1_lim = k1 + keysize; k2 = (const flow_compare_t *) key2; @@ -216,6 +213,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, struct flow_cache_entry *fle, *tfle; struct hlist_node *entry; struct flow_cache_object *flo; + size_t keysize; unsigned int hash; local_bh_disable(); @@ -223,6 +221,11 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, fle = NULL; flo = NULL; + + keysize = flow_key_size(family); + if (!keysize) + goto nocache; + /* Packet really early in init? Making flow_cache_init a * pre-smp initcall would solve this. --RR */ if (!fcp->hash_table) @@ -231,12 +234,12 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, if (fcp->hash_rnd_recalc) flow_new_hash_rnd(fc, fcp); - hash = flow_hash_code(fc, fcp, key); + hash = flow_hash_code(fc, fcp, key, keysize); hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) { if (tfle->net == net && tfle->family == family && tfle->dir == dir && - flow_key_compare(key, &tfle->key) == 0) { + flow_key_compare(key, &tfle->key, keysize) == 0) { fle = tfle; break; } @@ -251,7 +254,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, fle->net = net; fle->family = family; fle->dir = dir; - memcpy(&fle->key, key, sizeof(*key)); + memcpy(&fle->key, key, keysize * sizeof(flow_compare_t)); fle->object = NULL; hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]); fcp->hash_count++; -- cgit v1.2.3 From 9566042ef84fd2a282d00d3163074ec9b3f93a70 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 16 Sep 2011 09:09:50 +0000 Subject: IRDA: Fix global type conflicts in net/irda/irsysctl.c v2 The externs here didn't agree with the declarations in qos.c. Better would be probably to move this into a header, but since it's common practice to have naked externs with sysctls I left it for now. Cc: samuel@sortiz.org Signed-off-by: Andi Kleen Signed-off-by: David S. Miller --- net/irda/irsysctl.c | 6 +++--- net/irda/qos.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index d0b70dadf73b..2615ffc8e785 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c @@ -40,9 +40,9 @@ extern int sysctl_slot_timeout; extern int sysctl_fast_poll_increase; extern char sysctl_devname[]; extern int sysctl_max_baud_rate; -extern int sysctl_min_tx_turn_time; -extern int sysctl_max_tx_data_size; -extern int sysctl_max_tx_window; +extern unsigned int sysctl_min_tx_turn_time; +extern unsigned int sysctl_max_tx_data_size; +extern unsigned int sysctl_max_tx_window; extern int sysctl_max_noreply_time; extern int sysctl_warn_noreply_time; extern int sysctl_lap_keepalive_time; diff --git a/net/irda/qos.c b/net/irda/qos.c index 1b51bcf42394..4369f7f41bcb 100644 --- a/net/irda/qos.c +++ b/net/irda/qos.c @@ -60,7 +60,7 @@ int sysctl_max_noreply_time = 12; * Default is 10us which means using the unmodified value given by the * peer except if it's 0 (0 is likely a bug in the other stack). */ -unsigned sysctl_min_tx_turn_time = 10; +unsigned int sysctl_min_tx_turn_time = 10; /* * Maximum data size to be used in transmission in payload of LAP frame. * There is a bit of confusion in the IrDA spec : @@ -75,13 +75,13 @@ unsigned sysctl_min_tx_turn_time = 10; * bytes frames or all negotiated frame sizes, but you can use the sysctl * to play with this value anyway. * Jean II */ -unsigned sysctl_max_tx_data_size = 2042; +unsigned int sysctl_max_tx_data_size = 2042; /* * Maximum transmit window, i.e. number of LAP frames between turn-around. * This allow to override what the peer told us. Some peers are buggy and * don't always support what they tell us. * Jean II */ -unsigned sysctl_max_tx_window = 7; +unsigned int sysctl_max_tx_window = 7; static int irlap_param_baud_rate(void *instance, irda_param_t *param, int get); static int irlap_param_link_disconnect(void *instance, irda_param_t *parm, -- cgit v1.2.3 From 8e2ec639173f325977818c45011ee176ef2b11f6 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 5 Sep 2011 21:34:30 +0000 Subject: ipv6: don't use inetpeer to store metrics for routes. Current IPv6 implementation uses inetpeer to store metrics for routes. The problem of inetpeer is that it doesn't take subnet prefix length in to consideration. If two routes have the same address but different prefix length, they share same inetpeer. So changing metrics of one route also affects the other. The fix is to allocate separate metrics storage for each route. Signed-off-by: Zheng Yan Signed-off-by: David S. Miller --- net/ipv6/route.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9e69eb0ec6dd..1250f9020670 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -104,6 +104,9 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) struct inet_peer *peer; u32 *p = NULL; + if (!(rt->dst.flags & DST_HOST)) + return NULL; + if (!rt->rt6i_peer) rt6_bind_peer(rt, 1); @@ -252,6 +255,9 @@ static void ip6_dst_destroy(struct dst_entry *dst) struct inet6_dev *idev = rt->rt6i_idev; struct inet_peer *peer = rt->rt6i_peer; + if (!(rt->dst.flags & DST_HOST)) + dst_destroy_metrics_generic(dst); + if (idev != NULL) { rt->rt6i_idev = NULL; in6_dev_put(idev); @@ -723,9 +729,7 @@ static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort, ipv6_addr_copy(&rt->rt6i_gateway, daddr); } - rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; - rt->dst.flags |= DST_HOST; #ifdef CONFIG_IPV6_SUBTREES if (rt->rt6i_src.plen && saddr) { @@ -775,9 +779,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct rt6_info *rt = ip6_rt_copy(ort, daddr); if (rt) { - rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; - rt->dst.flags |= DST_HOST; dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst))); } return rt; @@ -1078,12 +1080,15 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, neigh = NULL; } - rt->rt6i_idev = idev; + rt->dst.flags |= DST_HOST; + rt->dst.output = ip6_output; dst_set_neighbour(&rt->dst, neigh); atomic_set(&rt->dst.__refcnt, 1); - ipv6_addr_copy(&rt->rt6i_dst.addr, addr); dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); - rt->dst.output = ip6_output; + + ipv6_addr_copy(&rt->rt6i_dst.addr, addr); + rt->rt6i_dst.plen = 128; + rt->rt6i_idev = idev; spin_lock_bh(&icmp6_dst_lock); rt->dst.next = icmp6_dst_gc_list; @@ -1261,6 +1266,14 @@ int ip6_route_add(struct fib6_config *cfg) if (rt->rt6i_dst.plen == 128) rt->dst.flags |= DST_HOST; + if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) { + u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + if (!metrics) { + err = -ENOMEM; + goto out; + } + dst_init_metrics(&rt->dst, metrics, 0); + } #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); rt->rt6i_src.plen = cfg->fc_src_len; @@ -1607,9 +1620,6 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, if (on_link) nrt->rt6i_flags &= ~RTF_GATEWAY; - nrt->rt6i_dst.plen = 128; - nrt->dst.flags |= DST_HOST; - ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); dst_set_neighbour(&nrt->dst, neigh_clone(neigh)); @@ -1754,9 +1764,10 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, if (rt) { rt->dst.input = ort->dst.input; rt->dst.output = ort->dst.output; + rt->dst.flags |= DST_HOST; ipv6_addr_copy(&rt->rt6i_dst.addr, dest); - rt->rt6i_dst.plen = ort->rt6i_dst.plen; + rt->rt6i_dst.plen = 128; dst_copy_metrics(&rt->dst, &ort->dst); rt->dst.error = ort->dst.error; rt->rt6i_idev = ort->rt6i_idev; -- cgit v1.2.3 From f779b2d60ab95c17f1e025778ed0df3ec2f05d75 Mon Sep 17 00:00:00 2001 From: Zheng Yan Date: Sun, 18 Sep 2011 22:37:34 -0400 Subject: tcp: fix validation of D-SACK D-SACK is allowed to reside below snd_una. But the corresponding check in tcp_is_sackblock_valid() is the exact opposite. It looks like a typo. Signed-off-by: Zheng Yan Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ea0d2183df4b..21fab3edb92c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1124,7 +1124,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack, return 0; /* ...Then it's D-SACK, and must reside below snd_una completely */ - if (!after(end_seq, tp->snd_una)) + if (after(end_seq, tp->snd_una)) return 0; if (!before(start_seq, tp->undo_marker)) -- cgit v1.2.3 From 8603e33d01cb6bd32de46b2596fe47f0c4df6c12 Mon Sep 17 00:00:00 2001 From: Roy Li Date: Tue, 20 Sep 2011 15:10:16 -0400 Subject: ipv6: fix a possible double free When calling snmp6_alloc_dev fails, the snmp6 relevant memory are freed by snmp6_alloc_dev. Calling in6_dev_finish_destroy will free these memory twice. Double free will lead that undefined behavior occurs. Signed-off-by: Roy Li Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f012ebd87b43..12368c586068 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -374,8 +374,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) "%s(): cannot allocate memory for statistics; dev=%s.\n", __func__, dev->name)); neigh_parms_release(&nd_tbl, ndev->nd_parms); - ndev->dead = 1; - in6_dev_finish_destroy(ndev); + dev_put(dev); + kfree(ndev); return NULL; } -- cgit v1.2.3 From 561dac2d410ffac0b57a23b85ae0a623c1a076ca Mon Sep 17 00:00:00 2001 From: Gao feng Date: Sun, 11 Sep 2011 15:36:05 +0000 Subject: fib:fix BUG_ON in fib_nl_newrule when add new fib rule add new fib rule can cause BUG_ON happen the reproduce shell is ip rule add pref 38 ip rule add pref 38 ip rule add to 192.168.3.0/24 goto 38 ip rule del pref 38 ip rule add to 192.168.3.0/24 goto 38 ip rule add pref 38 then the BUG_ON will happen del BUG_ON and use (ctarget == NULL) identify whether this rule is unresolved Signed-off-by: Gao feng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/fib_rules.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index e7ab0c0285b5..3231b468bb72 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -384,8 +384,8 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) */ list_for_each_entry(r, &ops->rules_list, list) { if (r->action == FR_ACT_GOTO && - r->target == rule->pref) { - BUG_ON(rtnl_dereference(r->ctarget) != NULL); + r->target == rule->pref && + rtnl_dereference(r->ctarget) == NULL) { rcu_assign_pointer(r->ctarget, rule); if (--ops->unresolved_rules == 0) break; -- cgit v1.2.3 From bcf66bf54aabffc150acd1c99e0f4bc51935eada Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 20 Sep 2011 23:38:58 +0000 Subject: xfrm: Perform a replay check after return from async codepaths When asyncronous crypto algorithms are used, there might be many packets that passed the xfrm replay check, but the replay advance function is not called yet for these packets. So the replay check function would accept a replay of all of these packets. Also the system might crash if there are more packets in async processing than the size of the anti replay window, because the replay advance function would try to update the replay window beyond the bounds. This pach adds a second replay check after resuming from the async processing to fix these issues. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_input.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index a026b0ef2443..54a0dc2e2f8d 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -212,6 +212,11 @@ resume: /* only the first xfrm gets the encap type */ encap_type = 0; + if (async && x->repl->check(x, skb, seq)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); + goto drop_unlock; + } + x->repl->advance(x, seq); x->curlft.bytes += skb->len; -- cgit v1.2.3 From 1b9ca0272ffae212e726380f66777b30a56ed7a5 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 21 Sep 2011 16:13:07 +0300 Subject: cfg80211: Fix validation of AKM suites Incorrect variable was used in validating the akm_suites array from NL80211_ATTR_AKM_SUITES. In addition, there was no explicit validation of the array length (we only have room for NL80211_MAX_NR_AKM_SUITES). This can result in a buffer write overflow for stack variables with arbitrary data from user space. The nl80211 commands using the affected functionality require GENL_ADMIN_PERM, so this is only exposed to admin users. Cc: stable@kernel.org Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e83e7fee3bc0..ea40d540a990 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4113,9 +4113,12 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, if (len % sizeof(u32)) return -EINVAL; + if (settings->n_akm_suites > NL80211_MAX_NR_AKM_SUITES) + return -EINVAL; + memcpy(settings->akm_suites, data, len); - for (i = 0; i < settings->n_ciphers_pairwise; i++) + for (i = 0; i < settings->n_akm_suites; i++) if (!nl80211_valid_akm_suite(settings->akm_suites[i])) return -EINVAL; } -- cgit v1.2.3 From 8b267b312df9343fea3bd679c509b36214b5a854 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Wed, 21 Sep 2011 16:06:42 +0200 Subject: batman-adv: do_bcast has to be true for broadcast packets only corrects a critical bug of the GW feature. This bug made all the unicast packets destined to a GW to be sent as broadcast. This bug is present even if the sender GW feature is configured as OFF. It's an urgent bug fix and should be committed as soon as possible. This was a regression introduced by 43676ab590c3f8686fd047d34c3e33803eef71f0 Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/soft-interface.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 3e2f91ffa4e2..05dd35114a27 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -565,7 +565,7 @@ static int interface_tx(struct sk_buff *skb, struct net_device *soft_iface) struct orig_node *orig_node = NULL; int data_len = skb->len, ret; short vid = -1; - bool do_bcast = false; + bool do_bcast; if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE) goto dropped; @@ -598,15 +598,15 @@ static int interface_tx(struct sk_buff *skb, struct net_device *soft_iface) tt_local_add(soft_iface, ethhdr->h_source); orig_node = transtable_search(bat_priv, ethhdr->h_dest); - if (is_multicast_ether_addr(ethhdr->h_dest) || - (orig_node && orig_node->gw_flags)) { + do_bcast = is_multicast_ether_addr(ethhdr->h_dest); + if (do_bcast || (orig_node && orig_node->gw_flags)) { ret = gw_is_target(bat_priv, skb, orig_node); if (ret < 0) goto dropped; - if (ret == 0) - do_bcast = true; + if (ret) + do_bcast = false; } /* ethernet packet should be broadcasted */ -- cgit v1.2.3 From 2015de5fe2a47086a3260802275932bfd810884e Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Tue, 27 Sep 2011 15:16:08 -0400 Subject: ipv6-multicast: Fix memory leak in input path. Have to free the skb before returning if we fail the fib lookup. Signed-off-by: Ben Greear Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 705c82886281..825d02fa6586 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2052,8 +2052,10 @@ int ip6_mr_input(struct sk_buff *skb) int err; err = ip6mr_fib_lookup(net, &fl6, &mrt); - if (err < 0) + if (err < 0) { + kfree_skb(skb); return err; + } read_lock(&mrt_lock); cache = ip6mr_cache_find(mrt, -- cgit v1.2.3 From d4cae56219755ccf8acfc8e2c1927009ff29d8c6 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Mon, 26 Sep 2011 07:04:36 +0000 Subject: net: check return value for dst_alloc return value of dst_alloc must be checked before use Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 94fdcc7f1030..552df27dcf53 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1349,14 +1349,16 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) BUG(); } xdst = dst_alloc(dst_ops, NULL, 0, 0, 0); - memset(&xdst->u.rt6.rt6i_table, 0, sizeof(*xdst) - sizeof(struct dst_entry)); - xfrm_policy_put_afinfo(afinfo); - if (likely(xdst)) + if (likely(xdst)) { + memset(&xdst->u.rt6.rt6i_table, 0, + sizeof(*xdst) - sizeof(struct dst_entry)); xdst->flo.ops = &xfrm_bundle_fc_ops; - else + } else xdst = ERR_PTR(-ENOBUFS); + xfrm_policy_put_afinfo(afinfo); + return xdst; } -- cgit v1.2.3 From fbe58186901155c0cb5398dd343337be0c456c04 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Mon, 26 Sep 2011 07:04:56 +0000 Subject: ipv6: check return value for dst_alloc return value of dst_alloc must be checked before use Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- net/ipv6/route.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1250f9020670..fb545edef6ea 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -244,7 +244,9 @@ static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops, { struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags); - memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); + if (rt != NULL) + memset(&rt->rt6i_table, 0, + sizeof(*rt) - sizeof(struct dst_entry)); return rt; } -- cgit v1.2.3 From 67928c4041606f02725f3c95c4c0404e4532df1b Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Fri, 23 Sep 2011 13:11:01 +0000 Subject: ipv6-multicast: Fix memory leak in IPv6 multicast. If reg_vif_xmit cannot find a routing entry, be sure to free the skb before returning the error. Signed-off-by: Ben Greear Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 825d02fa6586..def0538e2413 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -696,8 +696,10 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, int err; err = ip6mr_fib_lookup(net, &fl6, &mrt); - if (err < 0) + if (err < 0) { + kfree_skb(skb); return err; + } read_lock(&mrt_lock); dev->stats.tx_bytes += skb->len; -- cgit v1.2.3 From 782e182e91e97f529a1edb30fdece9f1bef90ecc Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 28 Sep 2011 10:08:27 -0700 Subject: libceph: fix pg_temp mapping calculation We need to apply the modulo pg_num calculation before looking up a pgid in the pg_temp mapping rbtree. This fixes pg_temp mappings, and fixes (some) misdirected requests that result in messages like [WRN] client4104 10.0.1.219:0/275025290 misdirected client4104.1:129 0.1 to osd0 not [1,0] in e11/11 on the server and stall make the client block without getting a reply (at least until the pg_temp mapping goes way, but that can take a long long time). Reorder calc_pg_raw() a bit to make more sense. Signed-off-by: Sage Weil --- net/ceph/osdmap.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index e97c3588c3ec..eceb8d56703c 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1046,10 +1046,25 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, struct ceph_pg_mapping *pg; struct ceph_pg_pool_info *pool; int ruleno; - unsigned poolid, ps, pps; + unsigned poolid, ps, pps, t; int preferred; + poolid = le32_to_cpu(pgid.pool); + ps = le16_to_cpu(pgid.ps); + preferred = (s16)le16_to_cpu(pgid.preferred); + + pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); + if (!pool) + return NULL; + /* pg_temp? */ + if (preferred >= 0) + t = ceph_stable_mod(ps, le32_to_cpu(pool->v.lpg_num), + pool->lpgp_num_mask); + else + t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num), + pool->pgp_num_mask); + pgid.ps = cpu_to_le16(t); pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); if (pg) { *num = pg->len; @@ -1057,18 +1072,6 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, } /* crush */ - poolid = le32_to_cpu(pgid.pool); - ps = le16_to_cpu(pgid.ps); - preferred = (s16)le16_to_cpu(pgid.preferred); - - /* don't forcefeed bad device ids to crush */ - if (preferred >= osdmap->max_osd || - preferred >= osdmap->crush->max_devices) - preferred = -1; - - pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); - if (!pool) - return NULL; ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, pool->v.type, pool->v.size); if (ruleno < 0) { @@ -1078,6 +1081,11 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, return NULL; } + /* don't forcefeed bad device ids to crush */ + if (preferred >= osdmap->max_osd || + preferred >= osdmap->crush->max_devices) + preferred = -1; + if (preferred >= 0) pps = ceph_stable_mod(ps, le32_to_cpu(pool->v.lpgp_num), -- cgit v1.2.3 From 8adc8b3d780363d5df0dd6ace10336e3d7e331a1 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 28 Sep 2011 10:11:04 -0700 Subject: libceph: fix pg_temp mapping update The incremental map updates have a record for each pg_temp mapping that is to be add/updated (len > 0) or removed (len == 0). The old code was written as if the updates were a complete enumeration; that was just wrong. Update the code to remove 0-length entries and drop the rbtree traversal. This avoids misdirected (and hung) requests that manifest as server errors like [WRN] client4104 10.0.1.219:0/275025290 misdirected client4104.1:129 0.1 to osd0 not [1,0] in e11/11 Signed-off-by: Sage Weil --- net/ceph/osdmap.c | 50 ++++++++++++++++++++++++-------------------------- 1 file changed, 24 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index eceb8d56703c..fd863fe76934 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -339,6 +339,7 @@ static int __insert_pg_mapping(struct ceph_pg_mapping *new, struct ceph_pg_mapping *pg = NULL; int c; + dout("__insert_pg_mapping %llx %p\n", *(u64 *)&new->pgid, new); while (*p) { parent = *p; pg = rb_entry(parent, struct ceph_pg_mapping, node); @@ -366,16 +367,33 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root, while (n) { pg = rb_entry(n, struct ceph_pg_mapping, node); c = pgid_cmp(pgid, pg->pgid); - if (c < 0) + if (c < 0) { n = n->rb_left; - else if (c > 0) + } else if (c > 0) { n = n->rb_right; - else + } else { + dout("__lookup_pg_mapping %llx got %p\n", + *(u64 *)&pgid, pg); return pg; + } } return NULL; } +static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid) +{ + struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid); + + if (pg) { + dout("__remove_pg_mapping %llx %p\n", *(u64 *)&pgid, pg); + rb_erase(&pg->node, root); + kfree(pg); + return 0; + } + dout("__remove_pg_mapping %llx dne\n", *(u64 *)&pgid); + return -ENOENT; +} + /* * rbtree of pg pool info */ @@ -711,7 +729,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, void *start = *p; int err = -EINVAL; u16 version; - struct rb_node *rbp; ceph_decode_16_safe(p, end, version, bad); if (version > CEPH_OSDMAP_INC_VERSION) { @@ -861,7 +878,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, } /* new_pg_temp */ - rbp = rb_first(&map->pg_temp); ceph_decode_32_safe(p, end, len, bad); while (len--) { struct ceph_pg_mapping *pg; @@ -872,18 +888,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, ceph_decode_copy(p, &pgid, sizeof(pgid)); pglen = ceph_decode_32(p); - /* remove any? */ - while (rbp && pgid_cmp(rb_entry(rbp, struct ceph_pg_mapping, - node)->pgid, pgid) <= 0) { - struct ceph_pg_mapping *cur = - rb_entry(rbp, struct ceph_pg_mapping, node); - - rbp = rb_next(rbp); - dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid); - rb_erase(&cur->node, &map->pg_temp); - kfree(cur); - } - if (pglen) { /* insert */ ceph_decode_need(p, end, pglen*sizeof(u32), bad); @@ -903,17 +907,11 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, } dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, pglen); + } else { + /* remove */ + __remove_pg_mapping(&map->pg_temp, pgid); } } - while (rbp) { - struct ceph_pg_mapping *cur = - rb_entry(rbp, struct ceph_pg_mapping, node); - - rbp = rb_next(rbp); - dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid); - rb_erase(&cur->node, &map->pg_temp); - kfree(cur); - } /* ignore the rest */ *p = end; -- cgit v1.2.3 From aabdcb0b553b9c9547b1a506b34d55a764745870 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 23 Sep 2011 08:23:47 +0000 Subject: can bcm: fix tx_setup off-by-one errors This patch fixes two off-by-one errors that canceled each other out. Checking for the same condition two times in bcm_tx_timeout_tsklet() reduced the count of frames to be sent by one. This did not show up the first time tx_setup is invoked as an additional frame is sent due to TX_ANNONCE. Invoking a second tx_setup on the same item led to a reduced (by 1) number of sent frames. Reported-by: Andre Naujoks Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/bcm.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/can/bcm.c b/net/can/bcm.c index d6c8ae5b2e6a..c9cdb8d78e70 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -365,9 +365,6 @@ static void bcm_tx_timeout_tsklet(unsigned long data) bcm_send_to_user(op, &msg_head, NULL, 0); } - } - - if (op->kt_ival1.tv64 && (op->count > 0)) { /* send (next) frame */ bcm_can_tx(op); @@ -970,8 +967,9 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, /* spec: send can_frame when starting timer */ op->flags |= TX_ANNOUNCE; - if (op->kt_ival1.tv64 && (op->count > 0)) { - /* op->count-- is done in bcm_tx_timeout_handler */ + /* only start timer when having more frames than sent below */ + if (op->kt_ival1.tv64 && (op->count > 1)) { + /* op->count-- is done in bcm_tx_timeout_tsklet */ hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL); } else @@ -979,8 +977,11 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, HRTIMER_MODE_REL); } - if (op->flags & TX_ANNOUNCE) + if (op->flags & TX_ANNOUNCE) { bcm_can_tx(op); + if (op->kt_ival1.tv64 && (op->count > 0)) + op->count--; + } return msg_head->nframes * CFSIZ + MHSIZ; } -- cgit v1.2.3 From 676a1184e8afd4fed7948232df1ff91517400859 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sun, 25 Sep 2011 02:21:30 +0000 Subject: ipv6: nullify ipv6_ac_list and ipv6_fl_list when creating new socket ipv6_ac_list and ipv6_fl_list from listening socket are inadvertently shared with new socket created for connection. Signed-off-by: Zheng Yan Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3c9fa618b69d..79cc6469508d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1383,6 +1383,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif + newnp->ipv6_ac_list = NULL; + newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); @@ -1447,6 +1449,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, First: no IPv4 options. */ newinet->inet_opt = NULL; + newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; /* Clone RX bits */ -- cgit v1.2.3 From 85a64889492b45f931ddac87ec09d84aa7347ee1 Mon Sep 17 00:00:00 2001 From: Jonathan Lallinger Date: Thu, 29 Sep 2011 07:58:41 +0000 Subject: RDSRDMA: Fix cleanup of rds_iw_mr_pool In the rds_iw_mr_pool struct the free_pinned field keeps track of memory pinned by free MRs. While this field is incremented properly upon allocation, it is never decremented upon unmapping. This would cause the rds_rdma module to crash the kernel upon unloading, by triggering the BUG_ON in the rds_iw_destroy_mr_pool function. This change keeps track of the MRs that become unpinned, so that free_pinned can be decremented appropriately. Signed-off-by: Jonathan Lallinger Signed-off-by: Steve Wise Signed-off-by: David S. Miller --- net/rds/iw_rdma.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index 8b77edbab272..4e1de171866c 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -84,7 +84,8 @@ static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool, static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, struct list_head *unmap_list, - struct list_head *kill_list); + struct list_head *kill_list, + int *unpinned); static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwdev, struct rdma_cm_id **cm_id) @@ -499,7 +500,7 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) LIST_HEAD(unmap_list); LIST_HEAD(kill_list); unsigned long flags; - unsigned int nfreed = 0, ncleaned = 0, free_goal; + unsigned int nfreed = 0, ncleaned = 0, unpinned = 0, free_goal; int ret = 0; rds_iw_stats_inc(s_iw_rdma_mr_pool_flush); @@ -524,7 +525,8 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) * will be destroyed by the unmap function. */ if (!list_empty(&unmap_list)) { - ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list, &kill_list); + ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list, + &kill_list, &unpinned); /* If we've been asked to destroy all MRs, move those * that were simply cleaned to the kill list */ if (free_all) @@ -548,6 +550,7 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) spin_unlock_irqrestore(&pool->list_lock, flags); } + atomic_sub(unpinned, &pool->free_pinned); atomic_sub(ncleaned, &pool->dirty_count); atomic_sub(nfreed, &pool->item_count); @@ -828,7 +831,8 @@ static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, struct list_head *unmap_list, - struct list_head *kill_list) + struct list_head *kill_list, + int *unpinned) { struct rds_iw_mapping *mapping, *next; unsigned int ncleaned = 0; @@ -855,6 +859,7 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, spin_lock_irqsave(&pool->list_lock, flags); list_for_each_entry_safe(mapping, next, unmap_list, m_list) { + *unpinned += mapping->m_sg.len; list_move(&mapping->m_list, &laundered); ncleaned++; } -- cgit v1.2.3 From 12d0d0d3a7349daa95dbfd5d7df8146255bc7c67 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Thu, 29 Sep 2011 15:33:47 -0400 Subject: can bcm: fix incomplete tx_setup fix The commit aabdcb0b553b9c9547b1a506b34d55a764745870 ("can bcm: fix tx_setup off-by-one errors") fixed only a part of the original problem reported by Andre Naujoks. It turned out that the original code needed to be re-ordered to reduce complexity and to finally fix the reported frame counting issues. Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/bcm.c | 48 +++++++++++++++++++++--------------------------- 1 file changed, 21 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/can/bcm.c b/net/can/bcm.c index c9cdb8d78e70..c84963d2dee6 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -344,6 +344,18 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, } } +static void bcm_tx_start_timer(struct bcm_op *op) +{ + if (op->kt_ival1.tv64 && op->count) + hrtimer_start(&op->timer, + ktime_add(ktime_get(), op->kt_ival1), + HRTIMER_MODE_ABS); + else if (op->kt_ival2.tv64) + hrtimer_start(&op->timer, + ktime_add(ktime_get(), op->kt_ival2), + HRTIMER_MODE_ABS); +} + static void bcm_tx_timeout_tsklet(unsigned long data) { struct bcm_op *op = (struct bcm_op *)data; @@ -365,23 +377,12 @@ static void bcm_tx_timeout_tsklet(unsigned long data) bcm_send_to_user(op, &msg_head, NULL, 0); } - - /* send (next) frame */ bcm_can_tx(op); - hrtimer_start(&op->timer, - ktime_add(ktime_get(), op->kt_ival1), - HRTIMER_MODE_ABS); - } else { - if (op->kt_ival2.tv64) { + } else if (op->kt_ival2.tv64) + bcm_can_tx(op); - /* send (next) frame */ - bcm_can_tx(op); - hrtimer_start(&op->timer, - ktime_add(ktime_get(), op->kt_ival2), - HRTIMER_MODE_ABS); - } - } + bcm_tx_start_timer(op); } /* @@ -961,28 +962,21 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, hrtimer_cancel(&op->timer); } - if ((op->flags & STARTTIMER) && - ((op->kt_ival1.tv64 && op->count) || op->kt_ival2.tv64)) { - + if (op->flags & STARTTIMER) { + hrtimer_cancel(&op->timer); /* spec: send can_frame when starting timer */ op->flags |= TX_ANNOUNCE; - - /* only start timer when having more frames than sent below */ - if (op->kt_ival1.tv64 && (op->count > 1)) { - /* op->count-- is done in bcm_tx_timeout_tsklet */ - hrtimer_start(&op->timer, op->kt_ival1, - HRTIMER_MODE_REL); - } else - hrtimer_start(&op->timer, op->kt_ival2, - HRTIMER_MODE_REL); } if (op->flags & TX_ANNOUNCE) { bcm_can_tx(op); - if (op->kt_ival1.tv64 && (op->count > 0)) + if (op->count) op->count--; } + if (op->flags & STARTTIMER) + bcm_tx_start_timer(op); + return msg_head->nframes * CFSIZ + MHSIZ; } -- cgit v1.2.3 From 7091fbd82cd5686444ffe9935ed6a8190101fe9d Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 30 Sep 2011 10:38:28 +0000 Subject: make PACKET_STATISTICS getsockopt report consistently between ring and non-ring This is a minor change. Up until kernel 2.6.32, getsockopt(fd, SOL_PACKET, PACKET_STATISTICS, ...) would return total and dropped packets since its last invocation. The introduction of socket queue overflow reporting [1] changed drop rate calculation in the normal packet socket path, but not when using a packet ring. As a result, the getsockopt now returns different statistics depending on the reception method used. With a ring, it still returns the count since the last call, as counts are incremented in tpacket_rcv and reset in getsockopt. Without a ring, it returns 0 if no drops occurred since the last getsockopt and the total drops over the lifespan of the socket otherwise. The culprit is this line in packet_rcv, executed on a drop: drop_n_acct: po->stats.tp_drops = atomic_inc_return(&sk->sk_drops); As it shows, the new drop number it taken from the socket drop counter, which is not reset at getsockopt. I put together a small example that demonstrates the issue [2]. It runs for 10 seconds and overflows the queue/ring on every odd second. The reported drop rates are: ring: 16, 0, 16, 0, 16, ... non-ring: 0, 15, 0, 30, 0, 46, 0, 60, 0 , 74. Note how the even ring counts monotonically increase. Because the getsockopt adds tp_drops to tp_packets, total counts are similarly reported cumulatively. Long story short, reinstating the original code, as the below patch does, fixes the issue at the cost of additional per-packet cycles. Another solution that does not introduce per-packet overhead is be to keep the current data path, record the value of sk_drops at getsockopt() at call N in a new field in struct packetsock and subtract that when reporting at call N+1. I'll be happy to code that, instead, it's just more messy. [1] http://patchwork.ozlabs.org/patch/35665/ [2] http://kernel.googlecode.com/files/test-packetsock-getstatistics.c Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/packet/af_packet.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c698cec0a445..fabb4fafa281 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -961,7 +961,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, return 0; drop_n_acct: - po->stats.tp_drops = atomic_inc_return(&sk->sk_drops); + spin_lock(&sk->sk_receive_queue.lock); + po->stats.tp_drops++; + atomic_inc(&sk->sk_drops); + spin_unlock(&sk->sk_receive_queue.lock); drop_n_restore: if (skb_head != skb->data && skb_shared(skb)) { -- cgit v1.2.3