From 77238f2b942b38ab4e7f3aced44084493e4a8675 Mon Sep 17 00:00:00 2001 From: Tomoki Sekiyama Date: Sun, 18 Oct 2009 23:17:37 -0700 Subject: AF_UNIX: Fix deadlock on connecting to shutdown socket I found a deadlock bug in UNIX domain socket, which makes able to DoS attack against the local machine by non-root users. How to reproduce: 1. Make a listening AF_UNIX/SOCK_STREAM socket with an abstruct namespace(*), and shutdown(2) it. 2. Repeat connect(2)ing to the listening socket from the other sockets until the connection backlog is full-filled. 3. connect(2) takes the CPU forever. If every core is taken, the system hangs. PoC code: (Run as many times as cores on SMP machines.) int main(void) { int ret; int csd; int lsd; struct sockaddr_un sun; /* make an abstruct name address (*) */ memset(&sun, 0, sizeof(sun)); sun.sun_family = PF_UNIX; sprintf(&sun.sun_path[1], "%d", getpid()); /* create the listening socket and shutdown */ lsd = socket(AF_UNIX, SOCK_STREAM, 0); bind(lsd, (struct sockaddr *)&sun, sizeof(sun)); listen(lsd, 1); shutdown(lsd, SHUT_RDWR); /* connect loop */ alarm(15); /* forcely exit the loop after 15 sec */ for (;;) { csd = socket(AF_UNIX, SOCK_STREAM, 0); ret = connect(csd, (struct sockaddr *)&sun, sizeof(sun)); if (-1 == ret) { perror("connect()"); break; } puts("Connection OK"); } return 0; } (*) Make sun_path[0] = 0 to use the abstruct namespace. If a file-based socket is used, the system doesn't deadlock because of context switches in the file system layer. Why this happens: Error checks between unix_socket_connect() and unix_wait_for_peer() are inconsistent. The former calls the latter to wait until the backlog is processed. Despite the latter returns without doing anything when the socket is shutdown, the former doesn't check the shutdown state and just retries calling the latter forever. Patch: The patch below adds shutdown check into unix_socket_connect(), so connect(2) to the shutdown socket will return -ECONREFUSED. Signed-off-by: Tomoki Sekiyama Signed-off-by: Masanori Yoshida Signed-off-by: David S. Miller --- net/unix/af_unix.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 51ab497115eb..fc820cd75453 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1074,6 +1074,8 @@ restart: err = -ECONNREFUSED; if (other->sk_state != TCP_LISTEN) goto out_unlock; + if (other->sk_shutdown & RCV_SHUTDOWN) + goto out_unlock; if (unix_recvq_full(other)) { err = -EAGAIN; -- cgit v1.2.3 From a1a2ad9151c26d92e5c733a33d52108f5d3a5b57 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 19 Oct 2009 19:12:36 -0700 Subject: Revert "tcp: fix tcp_defer_accept to consider the timeout" This reverts commit 6d01a026b7d3009a418326bdcf313503a314f1ea. Julian Anastasov, Willy Tarreau and Eric Dumazet have come up with a more correct way to deal with this. Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index e320afea07fc..624c3c9b3c2b 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -644,7 +644,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { - inet_csk(sk)->icsk_accept_queue.rskq_defer_accept--; inet_rsk(req)->acked = 1; return NULL; } -- cgit v1.2.3 From d1b99ba41d6c5aa1ed2fc634323449dd656899e9 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 19 Oct 2009 10:01:56 +0000 Subject: tcp: accept socket after TCP_DEFER_ACCEPT period Willy Tarreau and many other folks in recent years were concerned what happens when the TCP_DEFER_ACCEPT period expires for clients which sent ACK packet. They prefer clients that actively resend ACK on our SYN-ACK retransmissions to be converted from open requests to sockets and queued to the listener for accepting after the deferring period is finished. Then application server can decide to wait longer for data or to properly terminate the connection with FIN if read() returns EAGAIN which is an indication for accepting after the deferring period. This change still can have side effects for applications that expect always to see data on the accepted socket. Others can be prepared to work in both modes (with or without TCP_DEFER_ACCEPT period) and their data processing can ignore the read=EAGAIN notification and to allocate resources for clients which proved to have no data to send during the deferring period. OTOH, servers that use TCP_DEFER_ACCEPT=1 as flag (not as a timeout) to wait for data will notice clients that didn't send data for 3 seconds but that still resend ACKs. Thanks to Willy Tarreau for the initial idea and to Eric Dumazet for the review and testing the change. Signed-off-by: Julian Anastasov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 624c3c9b3c2b..4c03598ed924 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -641,8 +641,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (!(flg & TCP_FLAG_ACK)) return NULL; - /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ - if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && + /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */ + if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { inet_rsk(req)->acked = 1; return NULL; -- cgit v1.2.3 From 0c3d79bce48034018e840468ac5a642894a521a3 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 19 Oct 2009 10:03:58 +0000 Subject: tcp: reduce SYN-ACK retrans for TCP_DEFER_ACCEPT Change SYN-ACK retransmitting code for the TCP_DEFER_ACCEPT users to not retransmit SYN-ACKs during the deferring period if ACK from client was received. The goal is to reduce traffic during the deferring period. When the period is finished we continue with sending SYN-ACKs (at least one) but this time any traffic from client will change the request to established socket allowing application to terminate it properly. Also, do not drop acked request if sending of SYN-ACK fails. Signed-off-by: Julian Anastasov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 4351ca2cf0b8..537731b3bcb3 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -446,6 +446,28 @@ extern int sysctl_tcp_synack_retries; EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); +/* Decide when to expire the request and when to resend SYN-ACK */ +static inline void syn_ack_recalc(struct request_sock *req, const int thresh, + const int max_retries, + const u8 rskq_defer_accept, + int *expire, int *resend) +{ + if (!rskq_defer_accept) { + *expire = req->retrans >= thresh; + *resend = 1; + return; + } + *expire = req->retrans >= thresh && + (!inet_rsk(req)->acked || req->retrans >= max_retries); + /* + * Do not resend while waiting for data after ACK, + * start to resend on end of deferring period to give + * last chance for data or ACK to create established socket. + */ + *resend = !inet_rsk(req)->acked || + req->retrans >= rskq_defer_accept - 1; +} + void inet_csk_reqsk_queue_prune(struct sock *parent, const unsigned long interval, const unsigned long timeout, @@ -501,9 +523,15 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, reqp=&lopt->syn_table[i]; while ((req = *reqp) != NULL) { if (time_after_eq(now, req->expires)) { - if ((req->retrans < thresh || - (inet_rsk(req)->acked && req->retrans < max_retries)) - && !req->rsk_ops->rtx_syn_ack(parent, req)) { + int expire = 0, resend = 0; + + syn_ack_recalc(req, thresh, max_retries, + queue->rskq_defer_accept, + &expire, &resend); + if (!expire && + (!resend || + !req->rsk_ops->rtx_syn_ack(parent, req) || + inet_rsk(req)->acked)) { unsigned long timeo; if (req->retrans++ == 0) -- cgit v1.2.3 From b103cf34382f26ff48a87931b83f13b177b47c1a Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 19 Oct 2009 10:10:40 +0000 Subject: tcp: fix TCP_DEFER_ACCEPT retrans calculation Fix TCP_DEFER_ACCEPT conversion between seconds and retransmission to match the TCP SYN-ACK retransmission periods because the time is converted to such retransmissions. The old algorithm selects one more retransmission in some cases. Allow up to 255 retransmissions. Signed-off-by: Julian Anastasov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 55 +++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 64d0af675823..9b2756fbdf9b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -326,6 +326,43 @@ void tcp_enter_memory_pressure(struct sock *sk) EXPORT_SYMBOL(tcp_enter_memory_pressure); +/* Convert seconds to retransmits based on initial and max timeout */ +static u8 secs_to_retrans(int seconds, int timeout, int rto_max) +{ + u8 res = 0; + + if (seconds > 0) { + int period = timeout; + + res = 1; + while (seconds > period && res < 255) { + res++; + timeout <<= 1; + if (timeout > rto_max) + timeout = rto_max; + period += timeout; + } + } + return res; +} + +/* Convert retransmits to seconds based on initial and max timeout */ +static int retrans_to_secs(u8 retrans, int timeout, int rto_max) +{ + int period = 0; + + if (retrans > 0) { + period = timeout; + while (--retrans) { + timeout <<= 1; + if (timeout > rto_max) + timeout = rto_max; + period += timeout; + } + } + return period; +} + /* * Wait for a TCP event. * @@ -2163,16 +2200,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level, break; case TCP_DEFER_ACCEPT: - icsk->icsk_accept_queue.rskq_defer_accept = 0; - if (val > 0) { - /* Translate value in seconds to number of - * retransmits */ - while (icsk->icsk_accept_queue.rskq_defer_accept < 32 && - val > ((TCP_TIMEOUT_INIT / HZ) << - icsk->icsk_accept_queue.rskq_defer_accept)) - icsk->icsk_accept_queue.rskq_defer_accept++; - icsk->icsk_accept_queue.rskq_defer_accept++; - } + /* Translate value in seconds to number of retransmits */ + icsk->icsk_accept_queue.rskq_defer_accept = + secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, + TCP_RTO_MAX / HZ); break; case TCP_WINDOW_CLAMP: @@ -2353,8 +2384,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = (val ? : sysctl_tcp_fin_timeout) / HZ; break; case TCP_DEFER_ACCEPT: - val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 : - ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1)); + val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, + TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ); break; case TCP_WINDOW_CLAMP: val = tp->window_clamp; -- cgit v1.2.3 From f74c77cb1124a11acf69c98d10c0fdc22f322664 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Sun, 18 Oct 2009 20:24:41 +0000 Subject: bluetooth: scheduling while atomic bug fix Due to driver core changes dev_set_drvdata will call kzalloc which should be in might_sleep context, but hci_conn_add will be called in atomic context Like dev_set_name move dev_set_drvdata to work queue function. oops as following: Oct 2 17:41:59 darkstar kernel: [ 438.001341] BUG: sleeping function called from invalid context at mm/slqb.c:1546 Oct 2 17:41:59 darkstar kernel: [ 438.001345] in_atomic(): 1, irqs_disabled(): 0, pid: 2133, name: sdptool Oct 2 17:41:59 darkstar kernel: [ 438.001348] 2 locks held by sdptool/2133: Oct 2 17:41:59 darkstar kernel: [ 438.001350] #0: (sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP){+.+.+.}, at: [] lock_sock+0xa/0xc [l2cap] Oct 2 17:41:59 darkstar kernel: [ 438.001360] #1: (&hdev->lock){+.-.+.}, at: [] l2cap_sock_connect+0x103/0x26b [l2cap] Oct 2 17:41:59 darkstar kernel: [ 438.001371] Pid: 2133, comm: sdptool Not tainted 2.6.31-mm1 #2 Oct 2 17:41:59 darkstar kernel: [ 438.001373] Call Trace: Oct 2 17:41:59 darkstar kernel: [ 438.001381] [] __might_sleep+0xde/0xe5 Oct 2 17:41:59 darkstar kernel: [ 438.001386] [] __kmalloc+0x4a/0x15a Oct 2 17:41:59 darkstar kernel: [ 438.001392] [] ? kzalloc+0xb/0xd Oct 2 17:41:59 darkstar kernel: [ 438.001396] [] kzalloc+0xb/0xd Oct 2 17:41:59 darkstar kernel: [ 438.001400] [] device_private_init+0x15/0x3d Oct 2 17:41:59 darkstar kernel: [ 438.001405] [] dev_set_drvdata+0x18/0x26 Oct 2 17:41:59 darkstar kernel: [ 438.001414] [] hci_conn_init_sysfs+0x40/0xd9 [bluetooth] Oct 2 17:41:59 darkstar kernel: [ 438.001422] [] ? hci_conn_add+0x128/0x186 [bluetooth] Oct 2 17:41:59 darkstar kernel: [ 438.001429] [] hci_conn_add+0x177/0x186 [bluetooth] Oct 2 17:41:59 darkstar kernel: [ 438.001437] [] hci_connect+0x3c/0xfb [bluetooth] Oct 2 17:41:59 darkstar kernel: [ 438.001442] [] l2cap_sock_connect+0x174/0x26b [l2cap] Oct 2 17:41:59 darkstar kernel: [ 438.001448] [] sys_connect+0x60/0x7a Oct 2 17:41:59 darkstar kernel: [ 438.001453] [] ? lock_release_non_nested+0x84/0x1de Oct 2 17:41:59 darkstar kernel: [ 438.001458] [] ? might_fault+0x47/0x81 Oct 2 17:41:59 darkstar kernel: [ 438.001462] [] ? might_fault+0x47/0x81 Oct 2 17:41:59 darkstar kernel: [ 438.001468] [] ? __copy_from_user_ll+0x11/0xce Oct 2 17:41:59 darkstar kernel: [ 438.001472] [] sys_socketcall+0x82/0x17b Oct 2 17:41:59 darkstar kernel: [ 438.001477] [] syscall_call+0x7/0xb Signed-off-by: Dave Young Signed-off-by: David S. Miller --- net/bluetooth/hci_sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 7f939ce29801..2bc6f6a8de68 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -92,6 +92,8 @@ static void add_conn(struct work_struct *work) dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); + dev_set_drvdata(&conn->dev, conn); + if (device_add(&conn->dev) < 0) { BT_ERR("Failed to register connection device"); return; @@ -144,8 +146,6 @@ void hci_conn_init_sysfs(struct hci_conn *conn) conn->dev.class = bt_class; conn->dev.parent = &hdev->dev; - dev_set_drvdata(&conn->dev, conn); - device_initialize(&conn->dev); INIT_WORK(&conn->work_add, add_conn); -- cgit v1.2.3 From 45054dc1bf2367ccb0e7c0486037907cd9395f8b Mon Sep 17 00:00:00 2001 From: Dave Young Date: Sun, 18 Oct 2009 20:28:30 +0000 Subject: bluetooth: static lock key fix When shutdown ppp connection, lockdep waring about non-static key will happen, it is caused by the lock is not initialized properly at that time. Fix with tuning the lock/skb_queue_head init order [ 94.339261] INFO: trying to register non-static key. [ 94.342509] the code is fine but needs lockdep annotation. [ 94.342509] turning off the locking correctness validator. [ 94.342509] Pid: 0, comm: swapper Not tainted 2.6.31-mm1 #2 [ 94.342509] Call Trace: [ 94.342509] [] register_lock_class+0x58/0x241 [ 94.342509] [] ? __lock_acquire+0xb57/0xb73 [ 94.342509] [] __lock_acquire+0xac/0xb73 [ 94.342509] [] ? lock_release_non_nested+0x17b/0x1de [ 94.342509] [] lock_acquire+0x67/0x84 [ 94.342509] [] ? skb_dequeue+0x15/0x41 [ 94.342509] [] _spin_lock_irqsave+0x2f/0x3f [ 94.342509] [] ? skb_dequeue+0x15/0x41 [ 94.342509] [] skb_dequeue+0x15/0x41 [ 94.342509] [] ? _read_unlock+0x1d/0x20 [ 94.342509] [] skb_queue_purge+0x14/0x1b [ 94.342509] [] l2cap_recv_frame+0xea1/0x115a [l2cap] [ 94.342509] [] ? __lock_acquire+0xb57/0xb73 [ 94.342509] [] ? mark_lock+0x1e/0x1c7 [ 94.342509] [] ? hci_rx_task+0xd2/0x1bc [bluetooth] [ 94.342509] [] l2cap_recv_acldata+0xb1/0x1c6 [l2cap] [ 94.342509] [] hci_rx_task+0x106/0x1bc [bluetooth] [ 94.342509] [] ? l2cap_recv_acldata+0x0/0x1c6 [l2cap] [ 94.342509] [] tasklet_action+0x69/0xc1 [ 94.342509] [] __do_softirq+0x94/0x11e [ 94.342509] [] do_softirq+0x36/0x5a [ 94.342509] [] irq_exit+0x35/0x68 [ 94.342509] [] do_IRQ+0x72/0x89 [ 94.342509] [] common_interrupt+0x2e/0x34 [ 94.342509] [] ? pm_qos_add_requirement+0x63/0x9d [ 94.342509] [] ? acpi_idle_enter_bm+0x209/0x238 [ 94.342509] [] cpuidle_idle_call+0x5c/0x94 [ 94.342509] [] cpu_idle+0x4e/0x6f [ 94.342509] [] rest_init+0x53/0x55 [ 94.342509] [] start_kernel+0x2f0/0x2f5 [ 94.342509] [] i386_start_kernel+0x91/0x96 Reported-by: Oliver Hartkopp Signed-off-by: Dave Young Tested-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/bluetooth/l2cap.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 555d9da1869b..77e9fb130adb 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -555,12 +555,12 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) conn->feat_mask = 0; - setup_timer(&conn->info_timer, l2cap_info_timeout, - (unsigned long) conn); - spin_lock_init(&conn->lock); rwlock_init(&conn->chan_list.lock); + setup_timer(&conn->info_timer, l2cap_info_timeout, + (unsigned long) conn); + conn->disc_reason = 0x13; return conn; @@ -783,6 +783,9 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent) /* Default config options */ pi->conf_len = 0; pi->flush_to = L2CAP_DEFAULT_FLUSH_TO; + skb_queue_head_init(TX_QUEUE(sk)); + skb_queue_head_init(SREJ_QUEUE(sk)); + INIT_LIST_HEAD(SREJ_LIST(sk)); } static struct proto l2cap_proto = { -- cgit v1.2.3 From 55b8050353c4a212c94d7156e2bd5885225b869b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 19 Oct 2009 06:41:58 +0000 Subject: net: Fix IP_MULTICAST_IF ipv4/ipv6 setsockopt(IP_MULTICAST_IF) have dubious __dev_get_by_index() calls. This function should be called only with RTNL or dev_base_lock held, or reader could see a corrupt hash chain and eventually enter an endless loop. Fix is to call dev_get_by_index()/dev_put(). If this happens to be performance critical, we could define a new dev_exist_by_index() function to avoid touching dev refcount. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 7 +++---- net/ipv6/ipv6_sockglue.c | 6 +++++- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 0c0b6e363a20..e982b5c1ee17 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -634,17 +634,16 @@ static int do_ip_setsockopt(struct sock *sk, int level, break; } dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr); - if (dev) { + if (dev) mreq.imr_ifindex = dev->ifindex; - dev_put(dev); - } } else - dev = __dev_get_by_index(sock_net(sk), mreq.imr_ifindex); + dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex); err = -EADDRNOTAVAIL; if (!dev) break; + dev_put(dev); err = -EINVAL; if (sk->sk_bound_dev_if && diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 14f54eb5a7fc..4f7aaf6996a3 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -496,13 +496,17 @@ done: goto e_inval; if (val) { + struct net_device *dev; + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val) goto e_inval; - if (__dev_get_by_index(net, val) == NULL) { + dev = dev_get_by_index(net, val); + if (!dev) { retv = -ENODEV; break; } + dev_put(dev); } np->mcast_oif = val; retv = 0; -- cgit v1.2.3 From b6b39e8f3fbbb31001b836afec87bcaf4811a7bf Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 19 Oct 2009 19:41:06 +0000 Subject: tcp: Try to catch MSG_PEEK bug This patch tries to print out more information when we hit the MSG_PEEK bug in tcp_recvmsg. It's been around since at least 2005 and it's about time that we finally fix it. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 9b2756fbdf9b..90b2e0649bfb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1442,7 +1442,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto found_ok_skb; if (tcp_hdr(skb)->fin) goto found_fin_ok; - WARN_ON(!(flags & MSG_PEEK)); + if (WARN_ON(!(flags & MSG_PEEK))) + printk(KERN_INFO "recvmsg bug 2: copied %X " + "seq %X\n", *seq, TCP_SKB_CB(skb)->seq); } /* Well, if we have backlog, try to process it now yet. */ -- cgit v1.2.3 From e95646c3ec33c8ec0693992da4332a6b32eb7e31 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 30 Sep 2009 11:17:21 +0200 Subject: virtio: let header files include virtio_ids.h Rusty, commit 3ca4f5ca73057a617f9444a91022d7127041970a virtio: add virtio IDs file moved all device IDs into a single file. While the change itself is a very good one, it can break userspace applications. For example if a userspace tool wanted to get the ID of virtio_net it used to include virtio_net.h. This does no longer work, since virtio_net.h does not include virtio_ids.h. This patch moves all "#include " from the C files into the header files, making the header files compatible with the old ones. In addition, this patch exports virtio_ids.h to userspace. CC: Fernando Luis Vazquez Cao Signed-off-by: Christian Borntraeger Signed-off-by: Rusty Russell --- Documentation/lguest/lguest.c | 1 - drivers/block/virtio_blk.c | 1 - drivers/char/hw_random/virtio-rng.c | 1 - drivers/char/virtio_console.c | 1 - drivers/net/virtio_net.c | 1 - drivers/virtio/virtio_balloon.c | 1 - include/linux/Kbuild | 1 + include/linux/virtio_9p.h | 1 + include/linux/virtio_balloon.h | 1 + include/linux/virtio_blk.h | 1 + include/linux/virtio_console.h | 1 + include/linux/virtio_net.h | 1 + include/linux/virtio_rng.h | 1 + net/9p/trans_virtio.c | 1 - 14 files changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index ba9373f82ab5..098de5bce00a 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -42,7 +42,6 @@ #include #include "linux/lguest_launcher.h" #include "linux/virtio_config.h" -#include #include "linux/virtio_net.h" #include "linux/virtio_blk.h" #include "linux/virtio_console.h" diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 348befaaec73..55635d1f697d 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -3,7 +3,6 @@ #include #include #include -#include #include #include diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 962968f05b94..b6c24dcc987d 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -21,7 +21,6 @@ #include #include #include -#include #include /* The host will fill any buffer we give it with sweet, sweet randomness. We diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 0d328b59568d..a035ae39a359 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include "hvc_console.h" diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 8d009760277c..50ac94ce9c16 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 200c22f55130..39789232646d 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -19,7 +19,6 @@ */ //#define DEBUG #include -#include #include #include #include diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 3f384d4b163a..1feed71551c9 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -364,6 +364,7 @@ unifdef-y += utsname.h unifdef-y += videodev2.h unifdef-y += videodev.h unifdef-y += virtio_config.h +unifdef-y += virtio_ids.h unifdef-y += virtio_blk.h unifdef-y += virtio_net.h unifdef-y += virtio_9p.h diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h index ea7226a45acb..095e10d148b4 100644 --- a/include/linux/virtio_9p.h +++ b/include/linux/virtio_9p.h @@ -2,6 +2,7 @@ #define _LINUX_VIRTIO_9P_H /* This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. */ +#include #include /* Maximum number of virtio channels per partition (1 for now) */ diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h index 09d730085060..1418f048cb34 100644 --- a/include/linux/virtio_balloon.h +++ b/include/linux/virtio_balloon.h @@ -2,6 +2,7 @@ #define _LINUX_VIRTIO_BALLOON_H /* This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. */ +#include #include /* The feature bitmap for virtio balloon */ diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index 15cb666581d7..1e19470d2da2 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -3,6 +3,7 @@ /* This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. */ #include +#include #include /* Feature bits */ diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h index b5f519806014..fe885174cc1f 100644 --- a/include/linux/virtio_console.h +++ b/include/linux/virtio_console.h @@ -1,6 +1,7 @@ #ifndef _LINUX_VIRTIO_CONSOLE_H #define _LINUX_VIRTIO_CONSOLE_H #include +#include #include /* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so * anyone can use the definitions to implement compatible drivers/servers. */ diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 1f41734bbb77..085e42298ce5 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -3,6 +3,7 @@ /* This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. */ #include +#include #include #include diff --git a/include/linux/virtio_rng.h b/include/linux/virtio_rng.h index 48121c3c434b..c4d5de896f0c 100644 --- a/include/linux/virtio_rng.h +++ b/include/linux/virtio_rng.h @@ -2,6 +2,7 @@ #define _LINUX_VIRTIO_RNG_H /* This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. */ +#include #include #endif /* _LINUX_VIRTIO_RNG_H */ diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index b2e07f0dd298..ea1e3daabefe 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #define VIRTQUEUE_NUM 128 -- cgit v1.2.3 From c62f4c453ab4b0240ab857bfd089da2c01ad91e7 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 22 Oct 2009 21:37:56 -0700 Subject: net: use WARN() for the WARN_ON in commit b6b39e8f3fbbb Commit b6b39e8f3fbbb (tcp: Try to catch MSG_PEEK bug) added a printk() to the WARN_ON() that's in tcp.c. This patch changes this combination to WARN(); the advantage of WARN() is that the printk message shows up inside the message, so that kerneloops.org will collect the message. In addition, this gets rid of an extra if() statement. Signed-off-by: Arjan van de Ven Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 90b2e0649bfb..98440ad82558 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1442,9 +1442,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto found_ok_skb; if (tcp_hdr(skb)->fin) goto found_fin_ok; - if (WARN_ON(!(flags & MSG_PEEK))) - printk(KERN_INFO "recvmsg bug 2: copied %X " - "seq %X\n", *seq, TCP_SKB_CB(skb)->seq); + WARN(!(flags & MSG_PEEK), KERN_INFO "recvmsg bug 2: " + "copied %X seq %X\n", *seq, + TCP_SKB_CB(skb)->seq); } /* Well, if we have backlog, try to process it now yet. */ -- cgit v1.2.3 From 66ed1e5ec1d979e572554643063734a7664261bb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 24 Oct 2009 06:55:20 -0700 Subject: pktgen: Dont leak kernel memory While playing with pktgen, I realized IP ID was not filled and a random value was taken, possibly leaking 2 bytes of kernel memory. We can use an increasing ID, this can help diagnostics anyway. Also clear packet payload, instead of leaking kernel memory. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/pktgen.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 86acdba0a97d..6eb8d47cbf3a 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -335,6 +335,7 @@ struct pktgen_dev { __u32 cur_src_mac_offset; __be32 cur_saddr; __be32 cur_daddr; + __u16 ip_id; __u16 cur_udp_dst; __u16 cur_udp_src; __u16 cur_queue_map; @@ -2630,6 +2631,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, iph->protocol = IPPROTO_UDP; /* UDP */ iph->saddr = pkt_dev->cur_saddr; iph->daddr = pkt_dev->cur_daddr; + iph->id = htons(pkt_dev->ip_id); + pkt_dev->ip_id++; iph->frag_off = 0; iplen = 20 + 8 + datalen; iph->tot_len = htons(iplen); @@ -2641,24 +2644,26 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, skb->dev = odev; skb->pkt_type = PACKET_HOST; - if (pkt_dev->nfrags <= 0) + if (pkt_dev->nfrags <= 0) { pgh = (struct pktgen_hdr *)skb_put(skb, datalen); - else { + memset(pgh + 1, 0, datalen - sizeof(struct pktgen_hdr)); + } else { int frags = pkt_dev->nfrags; - int i; + int i, len; pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8); if (frags > MAX_SKB_FRAGS) frags = MAX_SKB_FRAGS; if (datalen > frags * PAGE_SIZE) { - skb_put(skb, datalen - frags * PAGE_SIZE); + len = datalen - frags * PAGE_SIZE; + memset(skb_put(skb, len), 0, len); datalen = frags * PAGE_SIZE; } i = 0; while (datalen > 0) { - struct page *page = alloc_pages(GFP_KERNEL, 0); + struct page *page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); skb_shinfo(skb)->frags[i].page = page; skb_shinfo(skb)->frags[i].page_offset = 0; skb_shinfo(skb)->frags[i].size = -- cgit v1.2.3 From d419b9f0fa69e79ccba3e5e79a58a52ae0c2ed6a Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Mon, 19 Oct 2009 14:55:37 -0700 Subject: mac80211: fix ibss joining Recent commit "mac80211: fix logic error ibss merge bssid check" fixed joining of ibss cell when static bssid is provided. In this case ifibss->bssid is set before the cell is joined and comparing that address to a bss should thus always succeed. Unfortunately this change broke the other case of joining a ibss cell without providing a static bssid where the value of ifibss->bssid is not set before the cell is joined. Since ifibss->bssid may be set before or after joining the cell we do not learn anything by comparing it to a known bss. Remove this check. Signed-off-by: Reinette Chatre Signed-off-by: John W. Linville --- net/mac80211/ibss.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 6eaf69823439..ca8ecce31d34 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -538,13 +538,12 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) WLAN_CAPABILITY_PRIVACY, capability); + if (bss) { #ifdef CONFIG_MAC80211_IBSS_DEBUG - if (bss) printk(KERN_DEBUG " sta_find_ibss: selected %pM current " "%pM\n", bss->cbss.bssid, ifibss->bssid); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ - if (bss && !memcmp(ifibss->bssid, bss->cbss.bssid, ETH_ALEN)) { printk(KERN_DEBUG "%s: Selected IBSS BSSID %pM" " based on configured SSID\n", sdata->dev->name, bss->cbss.bssid); @@ -552,8 +551,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) ieee80211_sta_join_ibss(sdata, bss); ieee80211_rx_bss_put(local, bss); return; - } else if (bss) - ieee80211_rx_bss_put(local, bss); + } #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG " did not try to join ibss\n"); -- cgit v1.2.3 From 2ef6e4440926668cfa9eac4b79e63528ebcbe0c1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 20 Oct 2009 15:08:12 +0900 Subject: mac80211: keep auth state when assoc fails When association fails, we should stay authenticated, which in mac80211 is represented by the existence of the mlme work struct, so we cannot free that, instead we need to just set it to idle. (Brought to you by the hacking session at Kernel Summit 2009 in Tokyo, Japan. -- JWL) Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 8d26e9bf8964..dc5049d58c51 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1457,8 +1457,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, if (status_code != WLAN_STATUS_SUCCESS) { printk(KERN_DEBUG "%s: AP denied association (code=%d)\n", sdata->dev->name, status_code); - list_del(&wk->list); - kfree(wk); + wk->state = IEEE80211_MGD_STATE_IDLE; return RX_MGMT_CFG80211_ASSOC; } -- cgit v1.2.3 From 7d930bc33653d5592dc386a76a38f39c2e962344 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 20 Oct 2009 15:08:53 +0900 Subject: cfg80211: sme: deauthenticate on assoc failure When the in-kernel SME gets an association failure from the AP we don't deauthenticate, and thus get into a very confused state which will lead to warnings later on. Fix this by actually deauthenticating when the AP indicates an association failure. (Brought to you by the hacking session at Kernel Summit 2009 in Tokyo, Japan. -- JWL) Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/core.h | 1 + net/wireless/mlme.c | 9 +++++++++ net/wireless/sme.c | 21 +++++++++++++++++++-- 3 files changed, 29 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/core.h b/net/wireless/core.h index 2a33d8bc886b..68b321997d4c 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -358,6 +358,7 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); void cfg80211_conn_work(struct work_struct *work); +void cfg80211_sme_failed_assoc(struct wireless_dev *wdev); bool cfg80211_sme_failed_reassoc(struct wireless_dev *wdev); /* internal helpers */ diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 79d2eec54cec..0a6b7a0eca6b 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -62,6 +62,7 @@ void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len) u8 *ie = mgmt->u.assoc_resp.variable; int i, ieoffs = offsetof(struct ieee80211_mgmt, u.assoc_resp.variable); struct cfg80211_internal_bss *bss = NULL; + bool need_connect_result = true; wdev_lock(wdev); @@ -94,6 +95,14 @@ void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len) } WARN_ON(!bss); + } else if (wdev->conn) { + cfg80211_sme_failed_assoc(wdev); + need_connect_result = false; + /* + * do not call connect_result() now because the + * sme will schedule work that does it later. + */ + goto out; } if (!wdev->conn && wdev->sme_state == CFG80211_SME_IDLE) { diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 93c3ed329204..ece378d531ef 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -26,6 +26,7 @@ struct cfg80211_conn { CFG80211_CONN_AUTHENTICATING, CFG80211_CONN_ASSOCIATE_NEXT, CFG80211_CONN_ASSOCIATING, + CFG80211_CONN_DEAUTH_ASSOC_FAIL, } state; u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; u8 *ie; @@ -148,6 +149,12 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) NULL, 0, WLAN_REASON_DEAUTH_LEAVING); return err; + case CFG80211_CONN_DEAUTH_ASSOC_FAIL: + __cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, + NULL, 0, + WLAN_REASON_DEAUTH_LEAVING); + /* return an error so that we call __cfg80211_connect_result() */ + return -EINVAL; default: return 0; } @@ -158,6 +165,7 @@ void cfg80211_conn_work(struct work_struct *work) struct cfg80211_registered_device *rdev = container_of(work, struct cfg80211_registered_device, conn_work); struct wireless_dev *wdev; + u8 bssid[ETH_ALEN]; rtnl_lock(); cfg80211_lock_rdev(rdev); @@ -173,10 +181,10 @@ void cfg80211_conn_work(struct work_struct *work) wdev_unlock(wdev); continue; } + memcpy(bssid, wdev->conn->params.bssid, ETH_ALEN); if (cfg80211_conn_do_work(wdev)) __cfg80211_connect_result( - wdev->netdev, - wdev->conn->params.bssid, + wdev->netdev, bssid, NULL, 0, NULL, 0, WLAN_STATUS_UNSPECIFIED_FAILURE, false, NULL); @@ -337,6 +345,15 @@ bool cfg80211_sme_failed_reassoc(struct wireless_dev *wdev) return true; } +void cfg80211_sme_failed_assoc(struct wireless_dev *wdev) +{ + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + wdev->conn->state = CFG80211_CONN_DEAUTH_ASSOC_FAIL; + schedule_work(&rdev->conn_work); +} + void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, const u8 *req_ie, size_t req_ie_len, const u8 *resp_ie, size_t resp_ie_len, -- cgit v1.2.3 From f99288d1761fb3b0decb0fdc4d746406addd29d5 Mon Sep 17 00:00:00 2001 From: Andrey Yurovsky Date: Tue, 20 Oct 2009 12:17:34 -0700 Subject: mac80211: trivial: fix spelling in mesh_hwmp Fix a typo in the description of hwmp_route_info_get(), no function changes. Signed-off-by: Andrey Yurovsky Signed-off-by: John W. Linville --- net/mac80211/mesh_hwmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index e12a786e26b8..29b82e98effa 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -259,7 +259,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, * @hwmp_ie: hwmp information element (PREP or PREQ) * * This function updates the path routing information to the originator and the - * transmitter of a HWMP PREQ or PREP fram. + * transmitter of a HWMP PREQ or PREP frame. * * Returns: metric to frame originator or 0 if the frame should not be further * processed -- cgit v1.2.3 From 9b1ce526eb917c8b5c8497c327768130ee683392 Mon Sep 17 00:00:00 2001 From: Björn Smedman Date: Sat, 24 Oct 2009 20:55:09 +0200 Subject: mac80211: fix for incorrect sequence number on hostapd injected frames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When hostapd injects a frame, e.g. an authentication or association response, mac80211 looks for a suitable access point virtual interface to associate the frame with based on its source address. This makes it possible e.g. to correctly assign sequence numbers to the frames. A small typo in the ethernet address comparison statement caused a failure to find a suitable ap interface. Sequence numbers on such frames where therefore left unassigned causing some clients (especially windows-based 11b/g clients) to reject them and fail to authenticate or associate with the access point. This patch fixes the typo in the address comparison statement. Signed-off-by: Björn Smedman Reviewed-by: Johannes Berg Cc: stable@kernel.org Signed-off-by: John W. Linville --- net/mac80211/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index db4bda681ec9..eaa4118de988 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1445,7 +1445,7 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, if (tmp_sdata->vif.type != NL80211_IFTYPE_AP) continue; if (compare_ether_addr(tmp_sdata->dev->dev_addr, - hdr->addr2)) { + hdr->addr2) == 0) { dev_hold(tmp_sdata->dev); dev_put(sdata->dev); sdata = tmp_sdata; -- cgit v1.2.3 From 55888dfb6ba7e318bb3d6a44d25009906206bf6a Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 28 Oct 2009 08:59:47 +0000 Subject: AF_RAW: Augment raw_send_hdrinc to expand skb to fit iphdr->ihl (v2) Augment raw_send_hdrinc to correct for incorrect ip header length values A series of oopses was reported to me recently. Apparently when using AF_RAW sockets to send data to peers that were reachable via ipsec encapsulation, people could panic or BUG halt their systems. I've tracked the problem down to user space sending an invalid ip header over an AF_RAW socket with IP_HDRINCL set to 1. Basically what happens is that userspace sends down an ip frame that includes only the header (no data), but sets the ip header ihl value to a large number, one that is larger than the total amount of data passed to the sendmsg call. In raw_send_hdrincl, we allocate an skb based on the size of the data in the msghdr that was passed in, but assume the data is all valid. Later during ipsec encapsulation, xfrm4_tranport_output moves the entire frame back in the skbuff to provide headroom for the ipsec headers. During this operation, the skb->transport_header is repointed to a spot computed by skb->network_header + the ip header length (ihl). Since so little data was passed in relative to the value of ihl provided by the raw socket, we point transport header to an unknown location, resulting in various crashes. This fix for this is pretty straightforward, simply validate the value of of iph->ihl when sending over a raw socket. If (iph->ihl*4U) > user data buffer size, drop the frame and return -EINVAL. I just confirmed this fixes the reported crashes. Signed-off-by: Neil Horman Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/raw.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 757c9171e7c2..ab996f9c0fe0 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -352,13 +352,24 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, skb->ip_summed = CHECKSUM_NONE; skb->transport_header = skb->network_header; - err = memcpy_fromiovecend((void *)iph, from, 0, length); - if (err) - goto error_fault; + err = -EFAULT; + if (memcpy_fromiovecend((void *)iph, from, 0, length)) + goto error_free; - /* We don't modify invalid header */ iphlen = iph->ihl * 4; - if (iphlen >= sizeof(*iph) && iphlen <= length) { + + /* + * We don't want to modify the ip header, but we do need to + * be sure that it won't cause problems later along the network + * stack. Specifically we want to make sure that iph->ihl is a + * sane value. If ihl points beyond the length of the buffer passed + * in, reject the frame as invalid + */ + err = -EINVAL; + if (iphlen > length) + goto error_free; + + if (iphlen >= sizeof(*iph)) { if (!iph->saddr) iph->saddr = rt->rt_src; iph->check = 0; @@ -381,8 +392,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, out: return 0; -error_fault: - err = -EFAULT; +error_free: kfree_skb(skb); error: IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); -- cgit v1.2.3 From b5dd884e682cae6b8c037f9d11f3b623b4cf2011 Mon Sep 17 00:00:00 2001 From: Gabor Gombas Date: Thu, 29 Oct 2009 03:19:11 -0700 Subject: net: Fix 'Re: PACKET_TX_RING: packet size is too long' Currently PACKET_TX_RING forces certain amount of every frame to remain unused. This probably originates from an early version of the PACKET_TX_RING patch that in fact used the extra space when the (since removed) CONFIG_PACKET_MMAP_ZERO_COPY option was enabled. The current code does not make any use of this extra space. This patch removes the extra space reservation and lets userspace make use of the full frame size. Signed-off-by: Gabor Gombas Signed-off-by: David S. Miller --- net/packet/af_packet.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 1238949e66a9..48b18dad6763 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -982,10 +982,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) goto out_put; size_max = po->tx_ring.frame_size - - sizeof(struct skb_shared_info) - - po->tp_hdrlen - - LL_ALLOCATED_SPACE(dev) - - sizeof(struct sockaddr_ll); + - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); if (size_max > dev->mtu + reserve) size_max = dev->mtu + reserve; -- cgit v1.2.3 From b0c110ca8e89f2c9cd52ec7fb1b98c5b7aa78496 Mon Sep 17 00:00:00 2001 From: jamal Date: Sun, 18 Oct 2009 02:12:33 +0000 Subject: net: Fix RPF to work with policy routing Policy routing is not looked up by mark on reverse path filtering. This fixes it. Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/ip_fib.h | 3 ++- net/ipv4/fib_frontend.c | 5 ++++- net/ipv4/route.c | 8 ++++---- 3 files changed, 10 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index ef91fe924ba4..4d22fabc7719 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -210,7 +210,8 @@ extern struct fib_table *fib_get_table(struct net *net, u32 id); extern const struct nla_policy rtm_ipv4_policy[]; extern void ip_fib_init(void); extern int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, - struct net_device *dev, __be32 *spec_dst, u32 *itag); + struct net_device *dev, __be32 *spec_dst, + u32 *itag, u32 mark); extern void fib_select_default(struct net *net, const struct flowi *flp, struct fib_result *res); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index e2f950592566..aa00398be80e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -229,14 +229,17 @@ unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, */ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, - struct net_device *dev, __be32 *spec_dst, u32 *itag) + struct net_device *dev, __be32 *spec_dst, + u32 *itag, u32 mark) { struct in_device *in_dev; struct flowi fl = { .nl_u = { .ip4_u = { .daddr = src, .saddr = dst, .tos = tos } }, + .mark = mark, .iif = oif }; + struct fib_result res; int no_addr, rpf; int ret; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index bb4199252026..5b1050a5d874 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1854,7 +1854,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto e_inval; spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); } else if (fib_validate_source(saddr, 0, tos, 0, - dev, &spec_dst, &itag) < 0) + dev, &spec_dst, &itag, 0) < 0) goto e_inval; rth = dst_alloc(&ipv4_dst_ops); @@ -1967,7 +1967,7 @@ static int __mkroute_input(struct sk_buff *skb, err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res), - in_dev->dev, &spec_dst, &itag); + in_dev->dev, &spec_dst, &itag, skb->mark); if (err < 0) { ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, saddr); @@ -2141,7 +2141,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, int result; result = fib_validate_source(saddr, daddr, tos, net->loopback_dev->ifindex, - dev, &spec_dst, &itag); + dev, &spec_dst, &itag, skb->mark); if (result < 0) goto martian_source; if (result) @@ -2170,7 +2170,7 @@ brd_input: spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); else { err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, - &itag); + &itag, skb->mark); if (err < 0) goto martian_source; if (err) -- cgit v1.2.3 From 9d410c796067686b1e032d54ce475b7055537138 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 30 Oct 2009 05:03:53 +0000 Subject: net: fix sk_forward_alloc corruption On UDP sockets, we must call skb_free_datagram() with socket locked, or risk sk_forward_alloc corruption. This requirement is not respected in SUNRPC. Add a convenient helper, skb_free_datagram_locked() and use it in SUNRPC Reported-by: Francis Moreau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ net/core/datagram.c | 10 +++++++++- net/ipv4/udp.c | 4 +--- net/ipv6/udp.c | 4 +--- net/sunrpc/svcsock.c | 10 +++++----- 5 files changed, 18 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6aebfceca3eb..bcdd6606f468 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1757,6 +1757,8 @@ extern int skb_copy_datagram_const_iovec(const struct sk_buff *from, int to_offset, int size); extern void skb_free_datagram(struct sock *sk, struct sk_buff *skb); +extern void skb_free_datagram_locked(struct sock *sk, + struct sk_buff *skb); extern int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); extern __wsum skb_checksum(const struct sk_buff *skb, int offset, diff --git a/net/core/datagram.c b/net/core/datagram.c index 1c6cf3a1a4f6..4ade3011bb3c 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -224,6 +224,15 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb) consume_skb(skb); sk_mem_reclaim_partial(sk); } +EXPORT_SYMBOL(skb_free_datagram); + +void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) +{ + lock_sock(sk); + skb_free_datagram(sk, skb); + release_sock(sk); +} +EXPORT_SYMBOL(skb_free_datagram_locked); /** * skb_kill_datagram - Free a datagram skbuff forcibly @@ -752,5 +761,4 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, EXPORT_SYMBOL(datagram_poll); EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec); EXPORT_SYMBOL(skb_copy_datagram_iovec); -EXPORT_SYMBOL(skb_free_datagram); EXPORT_SYMBOL(skb_recv_datagram); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d0d436d6216c..0fa9f70e4b19 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -999,9 +999,7 @@ try_again: err = ulen; out_free: - lock_sock(sk); - skb_free_datagram(sk, skb); - release_sock(sk); + skb_free_datagram_locked(sk, skb); out: return err; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3a60f12b34ed..cf538ed5ef6a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -288,9 +288,7 @@ try_again: err = ulen; out_free: - lock_sock(sk); - skb_free_datagram(sk, skb); - release_sock(sk); + skb_free_datagram_locked(sk, skb); out: return err; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index ccc5e83cae5d..1c246a4f491e 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -111,7 +111,7 @@ static void svc_release_skb(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); - skb_free_datagram(svsk->sk_sk, skb); + skb_free_datagram_locked(svsk->sk_sk, skb); } } @@ -578,7 +578,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) "svc: received unknown control message %d/%d; " "dropping RPC reply datagram\n", cmh->cmsg_level, cmh->cmsg_type); - skb_free_datagram(svsk->sk_sk, skb); + skb_free_datagram_locked(svsk->sk_sk, skb); return 0; } @@ -588,18 +588,18 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) { local_bh_enable(); /* checksum error */ - skb_free_datagram(svsk->sk_sk, skb); + skb_free_datagram_locked(svsk->sk_sk, skb); return 0; } local_bh_enable(); - skb_free_datagram(svsk->sk_sk, skb); + skb_free_datagram_locked(svsk->sk_sk, skb); } else { /* we can use it in-place */ rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr); rqstp->rq_arg.head[0].iov_len = len; if (skb_checksum_complete(skb)) { - skb_free_datagram(svsk->sk_sk, skb); + skb_free_datagram_locked(svsk->sk_sk, skb); return 0; } rqstp->rq_xprt_ctxt = skb; -- cgit v1.2.3 From 2e9526b352061ee0fd2a1580a2e3a5af960dabc4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 30 Oct 2009 05:51:48 +0000 Subject: gre: Fix dev_addr clobbering for gretap Nathan Neulinger noticed that gretap devices get their MAC address from the local IP address, which results in invalid MAC addresses half of the time. This is because gretap is still using the tunnel netdev ops rather than the correct tap netdev ops struct. This patch also fixes changelink to not clobber the MAC address for the gretap case. Signed-off-by: Herbert Xu Acked-by: Stephen Hemminger Tested-by: Nathan Neulinger Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 41ada9904d31..143333852624 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1464,7 +1464,7 @@ static void ipgre_tap_setup(struct net_device *dev) ether_setup(dev); - dev->netdev_ops = &ipgre_netdev_ops; + dev->netdev_ops = &ipgre_tap_netdev_ops; dev->destructor = free_netdev; dev->iflink = 0; @@ -1525,25 +1525,29 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], if (t->dev != dev) return -EEXIST; } else { - unsigned nflags = 0; - t = nt; - if (ipv4_is_multicast(p.iph.daddr)) - nflags = IFF_BROADCAST; - else if (p.iph.daddr) - nflags = IFF_POINTOPOINT; + if (dev->type != ARPHRD_ETHER) { + unsigned nflags = 0; - if ((dev->flags ^ nflags) & - (IFF_POINTOPOINT | IFF_BROADCAST)) - return -EINVAL; + if (ipv4_is_multicast(p.iph.daddr)) + nflags = IFF_BROADCAST; + else if (p.iph.daddr) + nflags = IFF_POINTOPOINT; + + if ((dev->flags ^ nflags) & + (IFF_POINTOPOINT | IFF_BROADCAST)) + return -EINVAL; + } ipgre_tunnel_unlink(ign, t); t->parms.iph.saddr = p.iph.saddr; t->parms.iph.daddr = p.iph.daddr; t->parms.i_key = p.i_key; - memcpy(dev->dev_addr, &p.iph.saddr, 4); - memcpy(dev->broadcast, &p.iph.daddr, 4); + if (dev->type != ARPHRD_ETHER) { + memcpy(dev->dev_addr, &p.iph.saddr, 4); + memcpy(dev->broadcast, &p.iph.daddr, 4); + } ipgre_tunnel_link(ign, t); netdev_state_change(dev); } -- cgit v1.2.3 From f446d10f214091408b7300f15c9adf60569edf28 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Oct 2009 15:12:32 +0100 Subject: mac80211: fix BSS leak The IBSS code leaks a BSS struct after telling cfg80211 about a given BSS by passing a frame. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ibss.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index ca8ecce31d34..f1362f32c17d 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -73,6 +73,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt; u8 *pos; struct ieee80211_supported_band *sband; + struct cfg80211_bss *bss; u32 bss_change; u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; @@ -177,8 +178,9 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, mod_timer(&ifibss->timer, round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL)); - cfg80211_inform_bss_frame(local->hw.wiphy, local->hw.conf.channel, - mgmt, skb->len, 0, GFP_KERNEL); + bss = cfg80211_inform_bss_frame(local->hw.wiphy, local->hw.conf.channel, + mgmt, skb->len, 0, GFP_KERNEL); + cfg80211_put_bss(bss); cfg80211_ibss_joined(sdata->dev, ifibss->bssid, GFP_KERNEL); } -- cgit v1.2.3 From 2171abc58644e09dbba546d91366b12743115396 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 29 Oct 2009 08:34:00 +0100 Subject: mac80211: fix addba timer The addba timer function acquires the sta spinlock, but at the same time we try to del_timer_sync() it under the spinlock which can produce deadlocks. To fix this, always del_timer_sync() the timer in ieee80211_process_addba_resp() and add it again after checking the conditions, if necessary. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-tx.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index bd765f30dba2..b09948ceec4a 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -666,26 +666,25 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, state = &sta->ampdu_mlme.tid_state_tx[tid]; + del_timer_sync(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); + spin_lock_bh(&sta->lock); - if (!(*state & HT_ADDBA_REQUESTED_MSK)) { - spin_unlock_bh(&sta->lock); - return; - } + if (!(*state & HT_ADDBA_REQUESTED_MSK)) + goto timer_still_needed; if (mgmt->u.action.u.addba_resp.dialog_token != sta->ampdu_mlme.tid_tx[tid]->dialog_token) { - spin_unlock_bh(&sta->lock); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "wrong addBA response token, tid %d\n", tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - return; + goto timer_still_needed; } - del_timer_sync(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "switched off addBA timer for tid %d \n", tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ + if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) == WLAN_STATUS_SUCCESS) { u8 curstate = *state; @@ -699,5 +698,11 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, } else { ___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR); } + + goto out; + + timer_still_needed: + add_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); + out: spin_unlock_bh(&sta->lock); } -- cgit v1.2.3 From 372362ade2fe5c33d749e017f1c5bc8140769a3e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 29 Oct 2009 10:09:28 +0100 Subject: mac80211: fix reason code output endianness When HT debugging is enabled and we receive a DelBA frame we print out the reason code in the wrong byte order. Fix that so we don't get weird values printed. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ht.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 0891bfb06996..48ef1a282b91 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -153,7 +153,7 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, if (net_ratelimit()) printk(KERN_DEBUG "delba from %pM (%s) tid %d reason code %d\n", mgmt->sa, initiator ? "initiator" : "recipient", tid, - mgmt->u.action.u.delba.reason_code); + le16_to_cpu(mgmt->u.action.u.delba.reason_code)); #endif /* CONFIG_MAC80211_HT_DEBUG */ if (initiator == WLAN_BACK_INITIATOR) -- cgit v1.2.3 From 3e2796a90cf349527e50b3bc4d0b2f4019b1ce7a Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Mon, 2 Nov 2009 08:39:28 -0600 Subject: 9p: fix readdir corner cases The patch below also addresses a couple of other corner cases in readdir seen with a large (e.g. 64k) msize. I'm not sure what people think of my co-opting of fid->aux here. I'd be happy to rework if there's a better way. When the size of the user supplied buffer passed to readdir is smaller than the data returned in one go by the 9P read request, v9fs_dir_readdir() currently discards extra data so that, on the next call, a 9P read request will be issued with offset < previous offset + bytes returned, which voilates the constraint described in paragraph 3 of read(5) description. This patch preseves the leftover data in fid->aux for use in the next call. Signed-off-by: Jim Garlick Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_dir.c | 94 +++++++++++++++++++++++++++++++++++-------------- include/net/9p/client.h | 7 ++-- net/9p/client.c | 5 ++- 3 files changed, 72 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index cae53d405f21..15cce53bf61e 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -39,6 +39,24 @@ #include "v9fs_vfs.h" #include "fid.h" +/** + * struct p9_rdir - readdir accounting + * @mutex: mutex protecting readdir + * @head: start offset of current dirread buffer + * @tail: end offset of current dirread buffer + * @buf: dirread buffer + * + * private structure for keeping track of readdir + * allocated on demand + */ + +struct p9_rdir { + struct mutex mutex; + int head; + int tail; + uint8_t *buf; +}; + /** * dt_type - return file type * @mistat: mistat structure @@ -70,57 +88,79 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) { int over; struct p9_wstat st; - int err; + int err = 0; struct p9_fid *fid; int buflen; - char *statbuf; - int n, i = 0; + int reclen = 0; + struct p9_rdir *rdir; P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); fid = filp->private_data; buflen = fid->clnt->msize - P9_IOHDRSZ; - statbuf = kmalloc(buflen, GFP_KERNEL); - if (!statbuf) - return -ENOMEM; - - while (1) { - err = v9fs_file_readn(filp, statbuf, NULL, buflen, - fid->rdir_fpos); - if (err <= 0) - break; - - i = 0; - n = err; - while (i < n) { - err = p9stat_read(statbuf + i, buflen-i, &st, - fid->clnt->dotu); + + /* allocate rdir on demand */ + if (!fid->rdir) { + rdir = kmalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL); + + if (rdir == NULL) { + err = -ENOMEM; + goto exit; + } + spin_lock(&filp->f_dentry->d_lock); + if (!fid->rdir) { + rdir->buf = (uint8_t *)rdir + sizeof(struct p9_rdir); + mutex_init(&rdir->mutex); + rdir->head = rdir->tail = 0; + fid->rdir = (void *) rdir; + rdir = NULL; + } + spin_unlock(&filp->f_dentry->d_lock); + kfree(rdir); + } + rdir = (struct p9_rdir *) fid->rdir; + + err = mutex_lock_interruptible(&rdir->mutex); + while (err == 0) { + if (rdir->tail == rdir->head) { + err = v9fs_file_readn(filp, rdir->buf, NULL, + buflen, filp->f_pos); + if (err <= 0) + goto unlock_and_exit; + + rdir->head = 0; + rdir->tail = err; + } + + while (rdir->head < rdir->tail) { + err = p9stat_read(rdir->buf + rdir->head, + buflen - rdir->head, &st, + fid->clnt->dotu); if (err) { P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err); err = -EIO; p9stat_free(&st); - goto free_and_exit; + goto unlock_and_exit; } - - i += st.size+2; - fid->rdir_fpos += st.size+2; + reclen = st.size+2; over = filldir(dirent, st.name, strlen(st.name), filp->f_pos, v9fs_qid2ino(&st.qid), dt_type(&st)); - filp->f_pos += st.size+2; - p9stat_free(&st); if (over) { err = 0; - goto free_and_exit; + goto unlock_and_exit; } + rdir->head += reclen; + filp->f_pos += reclen; } } -free_and_exit: - kfree(statbuf); +unlock_and_exit: + mutex_unlock(&rdir->mutex); +exit: return err; } diff --git a/include/net/9p/client.h b/include/net/9p/client.h index e26812274b75..fb00b329f0d3 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -159,8 +159,7 @@ struct p9_client { * @qid: the &p9_qid server identifier this handle points to * @iounit: the server reported maximum transaction size for this file * @uid: the numeric uid of the local user who owns this handle - * @aux: transport specific information (unused?) - * @rdir_fpos: tracks offset of file position when reading directory contents + * @rdir: readdir accounting structure (allocated on demand) * @flist: per-client-instance fid tracking * @dlist: per-dentry fid tracking * @@ -174,9 +173,9 @@ struct p9_fid { struct p9_qid qid; u32 iounit; uid_t uid; - void *aux; - int rdir_fpos; + void *rdir; + struct list_head flist; struct list_head dlist; /* list of all fids attached to a dentry */ }; diff --git a/net/9p/client.c b/net/9p/client.c index 5bf5f227dbe0..8af95b2dddd6 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -582,11 +582,9 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) memset(&fid->qid, 0, sizeof(struct p9_qid)); fid->mode = -1; - fid->rdir_fpos = 0; fid->uid = current_fsuid(); fid->clnt = clnt; - fid->aux = NULL; - + fid->rdir = NULL; spin_lock_irqsave(&clnt->lock, flags); list_add(&fid->flist, &clnt->fidlist); spin_unlock_irqrestore(&clnt->lock, flags); @@ -609,6 +607,7 @@ static void p9_fid_destroy(struct p9_fid *fid) spin_lock_irqsave(&clnt->lock, flags); list_del(&fid->flist); spin_unlock_irqrestore(&clnt->lock, flags); + kfree(fid->rdir); kfree(fid); } -- cgit v1.2.3 From 7400f42e9d765fa0656b432f3ab1245f9710f190 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 31 Oct 2009 07:40:37 +0100 Subject: cfg80211: fix NULL ptr deref commit 211a4d12abf86fe0df4cd68fc6327cbb58f56f81 Author: Johannes Berg Date: Tue Oct 20 15:08:53 2009 +0900 cfg80211: sme: deauthenticate on assoc failure introduced a potential NULL pointer dereference that some people have been hitting for some reason -- the params.bssid pointer is not guaranteed to be non-NULL for what seems to be a race between various ways of reaching the same thing. While I'm trying to analyse the problem more let's first fix the crash. I think the real fix may be to avoid doing _anything_ if it ended up being NULL, but right now I'm not sure yet. I think http://bugzilla.kernel.org/show_bug.cgi?id=14342 might also be this issue. Reported-by: Parag Warudkar Tested-by: Parag Warudkar Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/sme.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/sme.c b/net/wireless/sme.c index ece378d531ef..9f0b2800a9d7 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -165,7 +165,7 @@ void cfg80211_conn_work(struct work_struct *work) struct cfg80211_registered_device *rdev = container_of(work, struct cfg80211_registered_device, conn_work); struct wireless_dev *wdev; - u8 bssid[ETH_ALEN]; + u8 bssid_buf[ETH_ALEN], *bssid = NULL; rtnl_lock(); cfg80211_lock_rdev(rdev); @@ -181,7 +181,10 @@ void cfg80211_conn_work(struct work_struct *work) wdev_unlock(wdev); continue; } - memcpy(bssid, wdev->conn->params.bssid, ETH_ALEN); + if (wdev->conn->params.bssid) { + memcpy(bssid_buf, wdev->conn->params.bssid, ETH_ALEN); + bssid = bssid_buf; + } if (cfg80211_conn_do_work(wdev)) __cfg80211_connect_result( wdev->netdev, bssid, -- cgit v1.2.3 From c1f9a764cf47686b1f5a0cf87ada68d90056136a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 1 Nov 2009 19:25:40 +0100 Subject: mac80211: check interface is down before type change For some strange reason the netif_running() check ended up after the actual type change instead of before, potentially causing all kinds of problems if the interface is up while changing the type; one of the problems manifests itself as a warning: WARNING: at net/mac80211/iface.c:651 ieee80211_teardown_sdata+0xda/0x1a0 [mac80211]() Hardware name: Aspire one Pid: 2596, comm: wpa_supplicant Tainted: G W 2.6.31-10-generic #32-Ubuntu Call Trace: [] warn_slowpath_common+0x6d/0xa0 [] warn_slowpath_null+0x15/0x20 [] ieee80211_teardown_sdata+0xda/0x1a0 [mac80211] [] ieee80211_if_change_type+0x4a/0xc0 [mac80211] [] ieee80211_change_iface+0x61/0xa0 [mac80211] [] cfg80211_wext_siwmode+0xc7/0x120 [cfg80211] [] ioctl_standard_call+0x58/0xf0 (http://www.kerneloops.org/searchweek.php?search=ieee80211_teardown_sdata) Cc: Arjan van de Ven Cc: stable@kernel.org Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 5608f6c68413..7b5131bd6fa1 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -72,6 +72,9 @@ static int ieee80211_change_iface(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata; int ret; + if (netif_running(dev)) + return -EBUSY; + if (!nl80211_type_check(type)) return -EINVAL; @@ -81,9 +84,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy, if (ret) return ret; - if (netif_running(sdata->dev)) - return -EBUSY; - if (ieee80211_vif_is_mesh(&sdata->vif) && params->mesh_id_len) ieee80211_sdata_set_mesh_id(sdata, params->mesh_id_len, -- cgit v1.2.3 From 1056bd51674e529813213186471bb4ac6689a755 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 5 Nov 2009 20:46:52 -0800 Subject: bridge: prevent bridging wrong device The bridge code assumes ethernet addressing, so be more strict in the what is allowed. This showed up when GRE had a bug and was not using correct address format. Add some more comments for increased clarity. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_if.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index b1b3b0fbf41c..4a9f52732655 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -377,12 +377,16 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) struct net_bridge_port *p; int err = 0; - if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER) + /* Don't allow bridging non-ethernet like devices */ + if ((dev->flags & IFF_LOOPBACK) || + dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN) return -EINVAL; + /* No bridging of bridges */ if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) return -ELOOP; + /* Device is already being bridged */ if (dev->br_port != NULL) return -EBUSY; -- cgit v1.2.3 From b4ec824021493ba6cb7eeb61572f4d2f8a80a52e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Nov 2009 20:56:07 -0800 Subject: rose: device refcount leak While hunting dev_put() for net-next-2.6, I found a device refcount leak in ROSE, ioctl(SIOCADDRT) error path. Fix is to not touch device refcount, as we hold RTNL Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/rose/rose_route.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 9478d9b3d977..f3e21989b88c 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -578,18 +578,18 @@ static int rose_clear_routes(void) /* * Check that the device given is a valid AX.25 interface that is "up". + * called whith RTNL */ -static struct net_device *rose_ax25_dev_get(char *devname) +static struct net_device *rose_ax25_dev_find(char *devname) { struct net_device *dev; - if ((dev = dev_get_by_name(&init_net, devname)) == NULL) + if ((dev = __dev_get_by_name(&init_net, devname)) == NULL) return NULL; if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25) return dev; - dev_put(dev); return NULL; } @@ -720,27 +720,23 @@ int rose_rt_ioctl(unsigned int cmd, void __user *arg) case SIOCADDRT: if (copy_from_user(&rose_route, arg, sizeof(struct rose_route_struct))) return -EFAULT; - if ((dev = rose_ax25_dev_get(rose_route.device)) == NULL) + if ((dev = rose_ax25_dev_find(rose_route.device)) == NULL) return -EINVAL; - if (rose_dev_exists(&rose_route.address)) { /* Can't add routes to ourself */ - dev_put(dev); + if (rose_dev_exists(&rose_route.address)) /* Can't add routes to ourself */ return -EINVAL; - } if (rose_route.mask > 10) /* Mask can't be more than 10 digits */ return -EINVAL; if (rose_route.ndigis > AX25_MAX_DIGIS) return -EINVAL; err = rose_add_node(&rose_route, dev); - dev_put(dev); return err; case SIOCDELRT: if (copy_from_user(&rose_route, arg, sizeof(struct rose_route_struct))) return -EFAULT; - if ((dev = rose_ax25_dev_get(rose_route.device)) == NULL) + if ((dev = rose_ax25_dev_find(rose_route.device)) == NULL) return -EINVAL; err = rose_del_node(&rose_route, dev); - dev_put(dev); return err; case SIOCRSCLRRT: -- cgit v1.2.3 From f9dd09c7f7199685601d75882447a6598be8a3e0 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 6 Nov 2009 00:43:42 -0800 Subject: netfilter: nf_nat: fix NAT issue in 2.6.30.4+ Vitezslav Samel discovered that since 2.6.30.4+ active FTP can not work over NAT. The "cause" of the problem was a fix of unacknowledged data detection with NAT (commit a3a9f79e361e864f0e9d75ebe2a0cb43d17c4272). However, actually, that fix uncovered a long standing bug in TCP conntrack: when NAT was enabled, we simply updated the max of the right edge of the segments we have seen (td_end), by the offset NAT produced with changing IP/port in the data. However, we did not update the other parameter (td_maxend) which is affected by the NAT offset. Thus that could drift away from the correct value and thus resulted breaking active FTP. The patch below fixes the issue by *not* updating the conntrack parameters from NAT, but instead taking into account the NAT offsets in conntrack in a consistent way. (Updating from NAT would be more harder and expensive because it'd need to re-calculate parameters we already calculated in conntrack.) Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack.h | 8 ++--- include/net/netfilter/nf_nat_helper.h | 4 +++ net/ipv4/netfilter/nf_nat_core.c | 3 ++ net/ipv4/netfilter/nf_nat_helper.c | 34 ++++++++++++------ net/netfilter/nf_conntrack_core.c | 8 +++++ net/netfilter/nf_conntrack_proto_tcp.c | 64 ++++++++++++++-------------------- 6 files changed, 67 insertions(+), 54 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index cbdd6284996d..5cf7270e3ffc 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -255,11 +255,9 @@ static inline bool nf_ct_kill(struct nf_conn *ct) } /* These are for NAT. Icky. */ -/* Update TCP window tracking data when NAT mangles the packet */ -extern void nf_conntrack_tcp_update(const struct sk_buff *skb, - unsigned int dataoff, - struct nf_conn *ct, int dir, - s16 offset); +extern s16 (*nf_ct_nat_offset)(const struct nf_conn *ct, + enum ip_conntrack_dir dir, + u32 seq); /* Fake conntrack entry for untracked connections */ extern struct nf_conn nf_conntrack_untracked; diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h index 237a961f40e1..4222220920a5 100644 --- a/include/net/netfilter/nf_nat_helper.h +++ b/include/net/netfilter/nf_nat_helper.h @@ -32,4 +32,8 @@ extern int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, * to port ct->master->saved_proto. */ extern void nf_nat_follow_master(struct nf_conn *ct, struct nf_conntrack_expect *this); + +extern s16 nf_nat_get_offset(const struct nf_conn *ct, + enum ip_conntrack_dir dir, + u32 seq); #endif diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 68afc6ecd343..fe1a64479dd0 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -750,6 +750,8 @@ static int __init nf_nat_init(void) BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, nfnetlink_parse_nat_setup); + BUG_ON(nf_ct_nat_offset != NULL); + rcu_assign_pointer(nf_ct_nat_offset, nf_nat_get_offset); return 0; cleanup_extend: @@ -764,6 +766,7 @@ static void __exit nf_nat_cleanup(void) nf_ct_extend_unregister(&nat_extend); rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL); rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL); + rcu_assign_pointer(nf_ct_nat_offset, NULL); synchronize_net(); } diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 09172a65d9b6..f9520fa3aba9 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -73,6 +73,28 @@ adjust_tcp_sequence(u32 seq, DUMP_OFFSET(this_way); } +/* Get the offset value, for conntrack */ +s16 nf_nat_get_offset(const struct nf_conn *ct, + enum ip_conntrack_dir dir, + u32 seq) +{ + struct nf_conn_nat *nat = nfct_nat(ct); + struct nf_nat_seq *this_way; + s16 offset; + + if (!nat) + return 0; + + this_way = &nat->seq[dir]; + spin_lock_bh(&nf_nat_seqofs_lock); + offset = after(seq, this_way->correction_pos) + ? this_way->offset_after : this_way->offset_before; + spin_unlock_bh(&nf_nat_seqofs_lock); + + return offset; +} +EXPORT_SYMBOL_GPL(nf_nat_get_offset); + /* Frobs data inside this packet, which is linear. */ static void mangle_contents(struct sk_buff *skb, unsigned int dataoff, @@ -189,11 +211,6 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, adjust_tcp_sequence(ntohl(tcph->seq), (int)rep_len - (int)match_len, ct, ctinfo); - /* Tell TCP window tracking about seq change */ - nf_conntrack_tcp_update(skb, ip_hdrlen(skb), - ct, CTINFO2DIR(ctinfo), - (int)rep_len - (int)match_len); - nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); } return 1; @@ -415,12 +432,7 @@ nf_nat_seq_adjust(struct sk_buff *skb, tcph->seq = newseq; tcph->ack_seq = newack; - if (!nf_nat_sack_adjust(skb, tcph, ct, ctinfo)) - return 0; - - nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir, seqoff); - - return 1; + return nf_nat_sack_adjust(skb, tcph, ct, ctinfo); } /* Setup NAT on this expected conntrack so it follows master. */ diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7c9ec3dee96e..0cdfb388a191 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1350,6 +1350,11 @@ err_stat: return ret; } +s16 (*nf_ct_nat_offset)(const struct nf_conn *ct, + enum ip_conntrack_dir dir, + u32 seq); +EXPORT_SYMBOL_GPL(nf_ct_nat_offset); + int nf_conntrack_init(struct net *net) { int ret; @@ -1367,6 +1372,9 @@ int nf_conntrack_init(struct net *net) /* For use by REJECT target */ rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach); rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); + + /* Howto get NAT offsets */ + rcu_assign_pointer(nf_ct_nat_offset, NULL); } return 0; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 97a82ba75376..ba2b76937283 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -492,6 +492,21 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, } } +#ifdef CONFIG_NF_NAT_NEEDED +static inline s16 nat_offset(const struct nf_conn *ct, + enum ip_conntrack_dir dir, + u32 seq) +{ + typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset); + + return get_offset != NULL ? get_offset(ct, dir, seq) : 0; +} +#define NAT_OFFSET(pf, ct, dir, seq) \ + (pf == NFPROTO_IPV4 ? nat_offset(ct, dir, seq) : 0) +#else +#define NAT_OFFSET(pf, ct, dir, seq) 0 +#endif + static bool tcp_in_window(const struct nf_conn *ct, struct ip_ct_tcp *state, enum ip_conntrack_dir dir, @@ -506,6 +521,7 @@ static bool tcp_in_window(const struct nf_conn *ct, struct ip_ct_tcp_state *receiver = &state->seen[!dir]; const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; __u32 seq, ack, sack, end, win, swin; + s16 receiver_offset; bool res; /* @@ -519,11 +535,16 @@ static bool tcp_in_window(const struct nf_conn *ct, if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) tcp_sack(skb, dataoff, tcph, &sack); + /* Take into account NAT sequence number mangling */ + receiver_offset = NAT_OFFSET(pf, ct, !dir, ack - 1); + ack -= receiver_offset; + sack -= receiver_offset; + pr_debug("tcp_in_window: START\n"); pr_debug("tcp_in_window: "); nf_ct_dump_tuple(tuple); - pr_debug("seq=%u ack=%u sack=%u win=%u end=%u\n", - seq, ack, sack, win, end); + pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", + seq, ack, receiver_offset, sack, receiver_offset, win, end); pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, @@ -613,8 +634,8 @@ static bool tcp_in_window(const struct nf_conn *ct, pr_debug("tcp_in_window: "); nf_ct_dump_tuple(tuple); - pr_debug("seq=%u ack=%u sack =%u win=%u end=%u\n", - seq, ack, sack, win, end); + pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", + seq, ack, receiver_offset, sack, receiver_offset, win, end); pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, @@ -700,7 +721,7 @@ static bool tcp_in_window(const struct nf_conn *ct, before(seq, sender->td_maxend + 1) ? after(end, sender->td_end - receiver->td_maxwin - 1) ? before(sack, receiver->td_end + 1) ? - after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG" + after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG" : "ACK is under the lower bound (possible overly delayed ACK)" : "ACK is over the upper bound (ACKed data not seen yet)" : "SEQ is under the lower bound (already ACKed data retransmitted)" @@ -715,39 +736,6 @@ static bool tcp_in_window(const struct nf_conn *ct, return res; } -#ifdef CONFIG_NF_NAT_NEEDED -/* Update sender->td_end after NAT successfully mangled the packet */ -/* Caller must linearize skb at tcp header. */ -void nf_conntrack_tcp_update(const struct sk_buff *skb, - unsigned int dataoff, - struct nf_conn *ct, int dir, - s16 offset) -{ - const struct tcphdr *tcph = (const void *)skb->data + dataoff; - const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[dir]; - const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[!dir]; - __u32 end; - - end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph); - - spin_lock_bh(&ct->lock); - /* - * We have to worry for the ack in the reply packet only... - */ - if (ct->proto.tcp.seen[dir].td_end + offset == end) - ct->proto.tcp.seen[dir].td_end = end; - ct->proto.tcp.last_end = end; - spin_unlock_bh(&ct->lock); - pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " - "receiver end=%u maxend=%u maxwin=%u scale=%i\n", - sender->td_end, sender->td_maxend, sender->td_maxwin, - sender->td_scale, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin, - receiver->td_scale); -} -EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update); -#endif - #define TH_FIN 0x01 #define TH_SYN 0x02 #define TH_RST 0x04 -- cgit v1.2.3 From 887e671f324d9898aaedb29a6ece6c853c394067 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 6 Nov 2009 00:50:39 -0800 Subject: decnet: netdevice refcount leak While working on device refcount stuff, I found a device refcount leak through DECNET. This nasty bug can be used to hold refcounts on any !DECNET netdevice. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/decnet/sysctl_net_decnet.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 26b0ab1e9f56..2036568beea9 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -263,11 +263,10 @@ static int dn_def_dev_strategy(ctl_table *table, return -ENODEV; rv = -ENODEV; - if (dev->dn_ptr != NULL) { + if (dev->dn_ptr != NULL) rv = dn_dev_set_default(dev, 1); - if (rv) - dev_put(dev); - } + if (rv) + dev_put(dev); } return rv; -- cgit v1.2.3 From 539054a8fa5141c9a4e9ac6a86d249e3f2bdef45 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 6 Nov 2009 18:08:32 -0800 Subject: netfilter: xt_connlimit: fix regression caused by zero family value Commit v2.6.28-rc1~717^2~109^2~2 was slightly incomplete; not all instances of par->match->family were changed to par->family. References: http://bugzilla.netfilter.org/show_bug.cgi?id=610 Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_connlimit.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 680980954395..38f03f75a636 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -103,7 +103,7 @@ static int count_them(struct xt_connlimit_data *data, const struct nf_conntrack_tuple *tuple, const union nf_inet_addr *addr, const union nf_inet_addr *mask, - const struct xt_match *match) + u_int8_t family) { const struct nf_conntrack_tuple_hash *found; struct xt_connlimit_conn *conn; @@ -113,8 +113,7 @@ static int count_them(struct xt_connlimit_data *data, bool addit = true; int matches = 0; - - if (match->family == NFPROTO_IPV6) + if (family == NFPROTO_IPV6) hash = &data->iphash[connlimit_iphash6(addr, mask)]; else hash = &data->iphash[connlimit_iphash(addr->ip & mask->ip)]; @@ -157,8 +156,7 @@ static int count_them(struct xt_connlimit_data *data, continue; } - if (same_source_net(addr, mask, &conn->tuple.src.u3, - match->family)) + if (same_source_net(addr, mask, &conn->tuple.src.u3, family)) /* same source network -> be counted! */ ++matches; nf_ct_put(found_ct); @@ -207,7 +205,7 @@ connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par) spin_lock_bh(&info->data->lock); connections = count_them(info->data, tuple_ptr, &addr, - &info->mask, par->match); + &info->mask, par->family); spin_unlock_bh(&info->data->lock); if (connections < 0) { -- cgit v1.2.3 From 23ca0c989e46924393f1d54bec84801d035dd28e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 6 Nov 2009 10:37:41 +0000 Subject: ipip: Fix handling of DF packets when pmtudisc is OFF RFC 2003 requires the outer header to have DF set if DF is set on the inner header, even when PMTU discovery is off for the tunnel. Our implementation does exactly that. For this to work properly the IPIP gateway also needs to engate in PMTU when the inner DF bit is set. As otherwise the original host would not be able to carry out its PMTU successfully since part of the path is only visible to the gateway. Unfortunately when the tunnel PMTU discovery setting is off, we do not collect the necessary soft state, resulting in blackholes when the original host tries to perform PMTU discovery. This problem is not reproducible on the IPIP gateway itself as the inner packet usually has skb->local_df set. This is not correctly cleared (an unrelated bug) when the packet passes through the tunnel, which allows fragmentation to occur. For hosts behind the IPIP gateway it is readily visible with a simple ping. This patch fixes the problem by performing PMTU discovery for all packets with the inner DF bit set, regardless of the PMTU discovery setting on the tunnel itself. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/ipip.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 08ccd344de7a..ae40ed1ba560 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -438,25 +438,27 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_error; } - if (tiph->frag_off) + df |= old_iph->frag_off & htons(IP_DF); + + if (df) { mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); - else - mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; - if (mtu < 68) { - stats->collisions++; - ip_rt_put(rt); - goto tx_error; - } - if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + if (mtu < 68) { + stats->collisions++; + ip_rt_put(rt); + goto tx_error; + } - df |= (old_iph->frag_off&htons(IP_DF)); + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); - if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); - ip_rt_put(rt); - goto tx_error; + if ((old_iph->frag_off & htons(IP_DF)) && + mtu < ntohs(old_iph->tot_len)) { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); + ip_rt_put(rt); + goto tx_error; + } } if (tunnel->err_count > 0) { -- cgit v1.2.3 From 6755aebaaf9fc5416acfd4578ab7a1e122ecbc74 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 6 Nov 2009 00:23:01 +0000 Subject: can: should not use __dev_get_by_index() without locks bcm_proc_getifname() is called with RTNL and dev_base_lock not held. It calls __dev_get_by_index() without locks, and this is illegal (might crash) Close the race by holding dev_base_lock and copying dev->name in the protected section. Signed-off-by: Eric Dumazet Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/bcm.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/can/bcm.c b/net/can/bcm.c index 597da4f8f888..e8d58f33fe09 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -132,23 +132,27 @@ static inline struct bcm_sock *bcm_sk(const struct sock *sk) /* * procfs functions */ -static char *bcm_proc_getifname(int ifindex) +static char *bcm_proc_getifname(char *result, int ifindex) { struct net_device *dev; if (!ifindex) return "any"; - /* no usage counting */ + read_lock(&dev_base_lock); dev = __dev_get_by_index(&init_net, ifindex); if (dev) - return dev->name; + strcpy(result, dev->name); + else + strcpy(result, "???"); + read_unlock(&dev_base_lock); - return "???"; + return result; } static int bcm_proc_show(struct seq_file *m, void *v) { + char ifname[IFNAMSIZ]; struct sock *sk = (struct sock *)m->private; struct bcm_sock *bo = bcm_sk(sk); struct bcm_op *op; @@ -157,7 +161,7 @@ static int bcm_proc_show(struct seq_file *m, void *v) seq_printf(m, " / sk %p", sk); seq_printf(m, " / bo %p", bo); seq_printf(m, " / dropped %lu", bo->dropped_usr_msgs); - seq_printf(m, " / bound %s", bcm_proc_getifname(bo->ifindex)); + seq_printf(m, " / bound %s", bcm_proc_getifname(ifname, bo->ifindex)); seq_printf(m, " <<<\n"); list_for_each_entry(op, &bo->rx_ops, list) { @@ -169,7 +173,7 @@ static int bcm_proc_show(struct seq_file *m, void *v) continue; seq_printf(m, "rx_op: %03X %-5s ", - op->can_id, bcm_proc_getifname(op->ifindex)); + op->can_id, bcm_proc_getifname(ifname, op->ifindex)); seq_printf(m, "[%d]%c ", op->nframes, (op->flags & RX_CHECK_DLC)?'d':' '); if (op->kt_ival1.tv64) @@ -194,7 +198,8 @@ static int bcm_proc_show(struct seq_file *m, void *v) list_for_each_entry(op, &bo->tx_ops, list) { seq_printf(m, "tx_op: %03X %s [%d] ", - op->can_id, bcm_proc_getifname(op->ifindex), + op->can_id, + bcm_proc_getifname(ifname, op->ifindex), op->nframes); if (op->kt_ival1.tv64) -- cgit v1.2.3