From 1038a00b458997661bcd0e780a24dc280a8841fc Mon Sep 17 00:00:00 2001 From: Nick Pelly Date: Wed, 3 Feb 2010 11:42:26 -0800 Subject: Bluetooth: Fallback eSCO to SCO on error 0x1a (Unsupported Remote Feature) General Motors carkits that use LGE BT chipsets return this error code when an eSCO is attempted, despite advertising eSCO support. 2009-08-13 14:41:39.755518 < HCI Command: Setup Synchronous Connection (0x01|0x0028) plen 17 handle 1 voice setting 0x0060 2009-08-13 14:41:39.757563 > HCI Event: Command Status (0x0f) plen 4 Setup Synchronous Connection (0x01|0x0028) status 0x00 ncmd 1 2009-08-13 14:41:39.789484 > HCI Event: Synchronous Connect Complete (0x2c) plen 17 status 0x1a handle 257 bdaddr 00:1E:B2:23:5E:B3 type eSCO Error: Unsupported Remote Feature / Unsupported LMP Feature Signed-off-by: Jaikumar Ganesh Signed-off-by: Nick Pelly Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_event.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 28517bad796c..592da5c909c1 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1699,6 +1699,7 @@ static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_bu break; case 0x1c: /* SCO interval rejected */ + case 0x1a: /* Unsupported Remote Feature */ case 0x1f: /* Unspecified error */ if (conn->out && conn->attempt < 2) { conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) | -- cgit v1.2.3 From 485f1eff73a7b932fd3abb0dfcf804e1a1f59025 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 3 Feb 2010 15:52:18 -0800 Subject: Bluetooth: Fix sleeping function in RFCOMM within invalid context With the commit 9e726b17422bade75fba94e625cd35fd1353e682 the rfcomm_session_put() gets accidentially called from a timeout callback and results in this: BUG: sleeping function called from invalid context at net/core/sock.c:1897 in_atomic(): 1, irqs_disabled(): 0, pid: 0, name: swapper Pid: 0, comm: swapper Tainted: P 2.6.32 #31 Call Trace: [] __might_sleep+0xf8/0xfa [] lock_sock_nested+0x29/0xc4 [] lock_sock+0xb/0xd [l2cap] [] l2cap_sock_shutdown+0x1c/0x76 [l2cap] [] ? clockevents_program_event+0x75/0x7e [] ? tick_dev_program_event+0x37/0xa5 [] l2cap_sock_release+0x27/0x67 [l2cap] [] sock_release+0x1a/0x67 [] rfcomm_session_del+0x34/0x53 [rfcomm] [] rfcomm_session_put+0x14/0x16 [rfcomm] [] rfcomm_session_timeout+0xe/0x1a [rfcomm] [] run_timer_softirq+0x1e2/0x29a [] ? rfcomm_session_timeout+0x0/0x1a [rfcomm] [] __do_softirq+0xfe/0x1c5 [] ? timer_interrupt+0x1a/0x21 [] call_softirq+0x1c/0x28 [] do_softirq+0x33/0x6b [] irq_exit+0x36/0x85 [] do_IRQ+0xa6/0xbd [] ret_from_intr+0x0/0xa [] ? acpi_idle_enter_bm+0x269/0x294 [] ? acpi_idle_enter_bm+0x25f/0x294 [] ? cpuidle_idle_call+0x97/0x107 [] ? cpu_idle+0x53/0xaa [] ? rest_init+0x7a/0x7c [] ? start_kernel+0x389/0x394 [] ? x86_64_start_reservations+0xac/0xb0 [] ? x86_64_start_kernel+0xe4/0xeb To fix this, the rfcomm_session_put() needs to be moved out of rfcomm_session_timeout() into rfcomm_process_sessions(). In that context it is perfectly fine to sleep and disconnect the socket. Signed-off-by: Marcel Holtmann Tested-by: David John --- net/bluetooth/rfcomm/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index fc5ee3296e22..2b506373957a 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -252,7 +252,6 @@ static void rfcomm_session_timeout(unsigned long arg) BT_DBG("session %p state %ld", s, s->state); set_bit(RFCOMM_TIMED_OUT, &s->flags); - rfcomm_session_put(s); rfcomm_schedule(RFCOMM_SCHED_TIMEO); } @@ -1920,6 +1919,7 @@ static inline void rfcomm_process_sessions(void) if (test_and_clear_bit(RFCOMM_TIMED_OUT, &s->flags)) { s->state = BT_DISCONN; rfcomm_send_disc(s, 0); + rfcomm_session_put(s); continue; } -- cgit v1.2.3 From 6c2718da59613d76013b501bf0f8bcf9d7794b2d Mon Sep 17 00:00:00 2001 From: Nick Pelly Date: Wed, 3 Feb 2010 16:18:36 -0800 Subject: Bluetooth: Do not call rfcomm_session_put() for RFCOMM UA on closed socket When processing a RFCOMM UA frame when the socket is closed and we were not the RFCOMM initiator would cause rfcomm_session_put() to be called twice during rfcomm_process_rx(). This would cause a kernel panic in rfcomm_session_close() then. This could be easily reproduced during disconnect with devices such as Motorola H270 that send RFCOMM UA followed quickly by L2CAP disconnect request. This trace for this looks like: 2009-09-21 17:22:37.788895 < ACL data: handle 1 flags 0x02 dlen 8 L2CAP(d): cid 0x0041 len 4 [psm 3] RFCOMM(s): DISC: cr 0 dlci 20 pf 1 ilen 0 fcs 0x7d 2009-09-21 17:22:37.906204 > HCI Event: Number of Completed Packets (0x13) plen 5 handle 1 packets 1 2009-09-21 17:22:37.933090 > ACL data: handle 1 flags 0x02 dlen 8 L2CAP(d): cid 0x0040 len 4 [psm 3] RFCOMM(s): UA: cr 0 dlci 20 pf 1 ilen 0 fcs 0x57 2009-09-21 17:22:38.636764 < ACL data: handle 1 flags 0x02 dlen 8 L2CAP(d): cid 0x0041 len 4 [psm 3] RFCOMM(s): DISC: cr 0 dlci 0 pf 1 ilen 0 fcs 0x9c 2009-09-21 17:22:38.744125 > HCI Event: Number of Completed Packets (0x13) plen 5 handle 1 packets 1 2009-09-21 17:22:38.763687 > ACL data: handle 1 flags 0x02 dlen 8 L2CAP(d): cid 0x0040 len 4 [psm 3] RFCOMM(s): UA: cr 0 dlci 0 pf 1 ilen 0 fcs 0xb6 2009-09-21 17:22:38.783554 > ACL data: handle 1 flags 0x02 dlen 12 L2CAP(s): Disconn req: dcid 0x0040 scid 0x0041 Avoid calling rfcomm_session_put() twice by skipping this call in rfcomm_recv_ua() if the socket is closed. Signed-off-by: Nick Pelly Signed-off-by: Marcel Holtmann --- net/bluetooth/rfcomm/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 2b506373957a..89f4a59eb82b 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1150,7 +1150,11 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) break; case BT_DISCONN: - rfcomm_session_put(s); + /* When socket is closed and we are not RFCOMM + * initiator rfcomm_process_rx already calls + * rfcomm_session_put() */ + if (s->sock->sk->sk_state != BT_CLOSED) + rfcomm_session_put(s); break; } } -- cgit v1.2.3 From 180211b841b5bf13ab10d19202adab3eb7749f6c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 30 Jan 2010 02:53:27 +0000 Subject: af_key: fix netns ops ordering on module load/unload 1. After sock_register() returns, it's possible to create sockets, even if module still not initialized fully (blame generic module code for that!) 2. Consequently, pfkey_create() can be called with pfkey_net_id still not initialized which will BUG_ON in net_generic(): kernel BUG at include/net/netns/generic.h:43! 3. During netns shutdown, netns ops should be unregistered after key manager unregistered because key manager calls can be triggered from xfrm_user module: general protection fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC pfkey_broadcast+0x111/0x210 [af_key] pfkey_send_notify+0x16a/0x300 [af_key] km_state_notify+0x41/0x70 xfrm_flush_sa+0x75/0x90 [xfrm_user] 4. Unregister netns ops after socket ops just in case and for symmetry. Reported by Luca Tettamanti. Signed-off-by: Alexey Dobriyan Tested-by: Luca Tettamanti Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/key/af_key.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index 76fa6fef6473..539f43bc97db 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3794,9 +3794,9 @@ static struct pernet_operations pfkey_net_ops = { static void __exit ipsec_pfkey_exit(void) { - unregister_pernet_subsys(&pfkey_net_ops); xfrm_unregister_km(&pfkeyv2_mgr); sock_unregister(PF_KEY); + unregister_pernet_subsys(&pfkey_net_ops); proto_unregister(&key_proto); } @@ -3807,21 +3807,22 @@ static int __init ipsec_pfkey_init(void) if (err != 0) goto out; - err = sock_register(&pfkey_family_ops); + err = register_pernet_subsys(&pfkey_net_ops); if (err != 0) goto out_unregister_key_proto; + err = sock_register(&pfkey_family_ops); + if (err != 0) + goto out_unregister_pernet; err = xfrm_register_km(&pfkeyv2_mgr); if (err != 0) goto out_sock_unregister; - err = register_pernet_subsys(&pfkey_net_ops); - if (err != 0) - goto out_xfrm_unregister_km; out: return err; -out_xfrm_unregister_km: - xfrm_unregister_km(&pfkeyv2_mgr); + out_sock_unregister: sock_unregister(PF_KEY); +out_unregister_pernet: + unregister_pernet_subsys(&pfkey_net_ops); out_unregister_key_proto: proto_unregister(&key_proto); goto out; -- cgit v1.2.3 From 974c37e9d88c3e5a3e56eb98cb9c84232eb2bdcb Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 30 Jan 2010 10:05:05 +0000 Subject: netlink: fix for too early rmmod Netlink code does module autoload if protocol userspace is asking for is not ready. However, module can dissapear right after it was autoloaded. Example: modprobe/rmmod stress-testing and xfrm_user.ko providing NETLINK_XFRM. netlink_create() in such situation _will_ create userspace socket and _will_not_ pin module. Now if module was removed and we're going to call ->netlink_rcv into nothing: BUG: unable to handle kernel paging request at ffffffffa02f842a ^^^^^^^^^^^^^^^^ modules are loaded near these addresses here IP: [] 0xffffffffa02f842a PGD 161f067 PUD 1623063 PMD baa12067 PTE 0 Oops: 0010 [#1] PREEMPT SMP DEBUG_PAGEALLOC last sysfs file: /sys/devices/pci0000:00/0000:00:1f.2/host0/target0:0:0/0:0:0:0/block/sda/uevent CPU 1 Pid: 11515, comm: ip Not tainted 2.6.33-rc5-netns-00594-gaaa5728-dirty #6 P5E/P5E RIP: 0010:[] [] 0xffffffffa02f842a RSP: 0018:ffff8800baa3db48 EFLAGS: 00010292 RAX: ffff8800baa3dfd8 RBX: ffff8800be353640 RCX: 0000000000000000 RDX: ffffffff81959380 RSI: ffff8800bab7f130 RDI: 0000000000000001 RBP: ffff8800baa3db58 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000011 R13: ffff8800be353640 R14: ffff8800bcdec240 R15: ffff8800bd488010 FS: 00007f93749656f0(0000) GS:ffff880002300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: ffffffffa02f842a CR3: 00000000ba82b000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process ip (pid: 11515, threadinfo ffff8800baa3c000, task ffff8800bab7eb30) Stack: ffffffff813637c0 ffff8800bd488000 ffff8800baa3dba8 ffffffff8136397d <0> 0000000000000000 ffffffff81344adc 7fffffffffffffff 0000000000000000 <0> ffff8800baa3ded8 ffff8800be353640 ffff8800bcdec240 0000000000000000 Call Trace: [] ? netlink_unicast+0x100/0x2d0 [] netlink_unicast+0x2bd/0x2d0 netlink_unicast_kernel: nlk->netlink_rcv(skb); [] ? memcpy_fromiovec+0x6c/0x90 [] netlink_sendmsg+0x1d3/0x2d0 [] sock_sendmsg+0xbb/0xf0 [] ? __lock_acquire+0x27b/0xa60 [] ? might_fault+0x73/0xd0 [] ? might_fault+0x73/0xd0 [] ? __lock_release+0x82/0x170 [] ? might_fault+0xbe/0xd0 [] ? might_fault+0x73/0xd0 [] ? verify_iovec+0x47/0xd0 [] sys_sendmsg+0x1a9/0x360 [] ? _raw_spin_unlock_irqrestore+0x65/0x70 [] ? trace_hardirqs_on+0xd/0x10 [] ? _raw_spin_unlock_irqrestore+0x42/0x70 [] ? __up_read+0x84/0xb0 [] ? trace_hardirqs_on_caller+0x145/0x190 [] ? trace_hardirqs_on_thunk+0x3a/0x3f [] system_call_fastpath+0x16/0x1b Code: Bad RIP value. RIP [] 0xffffffffa02f842a RSP CR2: ffffffffa02f842a If module was quickly removed after autoloading, return -E. Return -EPROTONOSUPPORT if module was quickly removed after autoloading. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index a4957bf2ca60..4c5972ba8c78 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -455,9 +455,14 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, if (nl_table[protocol].registered && try_module_get(nl_table[protocol].module)) module = nl_table[protocol].module; + else + err = -EPROTONOSUPPORT; cb_mutex = nl_table[protocol].cb_mutex; netlink_unlock_table(); + if (err < 0) + goto out; + err = __netlink_create(net, sock, cb_mutex, protocol); if (err < 0) goto out_module; -- cgit v1.2.3 From 8ed030dd0aa400d18c63861c2c6deb7c38f4edde Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 1 Feb 2010 02:12:19 +0000 Subject: dccp: fix bug in cache allocation This fixes a bug introduced in commit de4ef86cfce60d2250111f34f8a084e769f23b16 ("dccp: fix dccp rmmod when kernel configured to use slub", 17 Jan): the vsnprintf used sizeof(slab_name_fmt), which became truncated to 4 bytes, since slab_name_fmt is now a 4-byte pointer and no longer a 32-character array. This lead to error messages such as FATAL: Error inserting dccp: No buffer space available >> kernel: [ 1456.341501] kmem_cache_create: duplicate cache cci generated due to the truncation after the 3rd character. Fixed for the moment by introducing a symbolic constant. Tested to fix the bug. Signed-off-by: Gerrit Renker Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/dccp/ccid.c | 2 +- net/dccp/ccid.h | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index 57dfb9c8c4f2..ff16e9df1969 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -83,7 +83,7 @@ static struct kmem_cache *ccid_kmem_cache_create(int obj_size, char *slab_name_f va_list args; va_start(args, fmt); - vsnprintf(slab_name_fmt, sizeof(slab_name_fmt), fmt, args); + vsnprintf(slab_name_fmt, CCID_SLAB_NAME_LENGTH, fmt, args); va_end(args); slab = kmem_cache_create(slab_name_fmt, sizeof(struct ccid) + obj_size, 0, diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 269958bf7fe9..6df6f8ac9636 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -19,7 +19,9 @@ #include #include -#define CCID_MAX 255 +/* maximum value for a CCID (RFC 4340, 19.5) */ +#define CCID_MAX 255 +#define CCID_SLAB_NAME_LENGTH 32 struct tcp_info; @@ -49,8 +51,8 @@ struct ccid_operations { const char *ccid_name; struct kmem_cache *ccid_hc_rx_slab, *ccid_hc_tx_slab; - char ccid_hc_rx_slab_name[32]; - char ccid_hc_tx_slab_name[32]; + char ccid_hc_rx_slab_name[CCID_SLAB_NAME_LENGTH]; + char ccid_hc_tx_slab_name[CCID_SLAB_NAME_LENGTH]; __u32 ccid_hc_rx_obj_size, ccid_hc_tx_obj_size; /* Interface Routines */ -- cgit v1.2.3 From 1386be55e32a3c5d8ef4a2b243c530a7b664c02c Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 2 Feb 2010 20:16:56 +0000 Subject: dccp: fix auto-loading of dccp(_probe) This fixes commit (38ff3e6bb987ec583268da8eb22628293095d43b) ("dccp_probe: Fix module load dependencies between dccp and dccp_probe", from 15 Jan). It fixes the construction of the first argument of try_then_request_module(), where only valid return codes from the first argument should be returned. What we do now is assign the result of register_jprobe() to ret, without the side effect of the comparison. Acked-by: Gerrit Renker Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- net/dccp/probe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dccp/probe.c b/net/dccp/probe.c index bace1d8cbcfd..f5b3464f1242 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -161,8 +161,8 @@ static __init int dccpprobe_init(void) if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops)) goto err0; - ret = try_then_request_module((register_jprobe(&dccp_send_probe) == 0), - "dccp"); + try_then_request_module((ret = register_jprobe(&dccp_send_probe)) == 0, + "dccp"); if (ret) goto err1; -- cgit v1.2.3 From c390216b3e868b16d8154939f4b6f8c16dbd9a9f Mon Sep 17 00:00:00 2001 From: Nick Pelly Date: Fri, 13 Nov 2009 14:16:32 -0800 Subject: Bluetooth: Enter active mode before establishing a SCO link. When in sniff mode with a long interval time (1.28s) it can take 4+ seconds to establish a SCO link. Fix by requesting active mode before requesting SCO connection. This improves SCO setup time to ~500ms. Bluetooth headsets that use a long interval time, and exhibit the long SCO connection time include Motorola H790, HX1 and H17. They have a CSR 2.1 chipset. Verified this behavior and fix with host Bluetooth chipsets: BCM4329 and TI1271. 2009-10-13 14:17:46.183722 > HCI Event: Mode Change (0x14) plen 6 status 0x00 handle 1 mode 0x02 interval 2048 Mode: Sniff 2009-10-13 14:17:53.436285 < HCI Command: Setup Synchronous Connection (0x01|0x0028) plen 17 handle 1 voice setting 0x0060 2009-10-13 14:17:53.445593 > HCI Event: Command Status (0x0f) plen 4 Setup Synchronous Connection (0x01|0x0028) status 0x00 ncmd 1 2009-10-13 14:17:57.788855 > HCI Event: Synchronous Connect Complete 0x2c) plen 17 status 0x00 handle 257 bdaddr 00:1A:0E:F1:A4:7F type eSCO Air mode: CVSD Signed-off-by: Nick Pelly Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index b7c4224f4e7d..b10e3cdb08f8 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -377,6 +377,9 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 if (acl->state == BT_CONNECTED && (sco->state == BT_OPEN || sco->state == BT_CLOSED)) { + acl->power_save = 1; + hci_conn_enter_active_mode(acl); + if (lmp_esco_capable(hdev)) hci_setup_sync(sco, acl->handle); else -- cgit v1.2.3 From 454debe45c86102528c90c12eb6a99245b773bfe Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Mon, 1 Feb 2010 08:21:34 +0000 Subject: irda: unbalanced lock_kernel in irnet_ppp Add the missing unlock_kernel in one ioctl operation. Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: David S. Miller --- net/irda/irnet/irnet_ppp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 156020d138b5..7c22c126f0ea 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -706,7 +706,8 @@ dev_irnet_ioctl( lock_kernel(); if(ap->ppp_open && !put_user(ppp_unit_number(&ap->chan), (int __user *)argp)) - err = 0; + err = 0; + unlock_kernel(); break; /* All these ioctls can be passed both directly and from ppp_generic, -- cgit v1.2.3 From 3fdde0a1602d20c02a7d66e07ab6718ab8d79b12 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Mon, 1 Feb 2010 08:21:35 +0000 Subject: irda: add missing BKL in irnet_ppp ioctl One ioctl has been forgotten when the BKL was push down into irnet_ppp ioctl function. Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: David S. Miller --- net/irda/irnet/irnet_ppp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 7c22c126f0ea..6b3602de359a 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -698,9 +698,11 @@ dev_irnet_ioctl( /* Query PPP channel and unit number */ case PPPIOCGCHAN: + lock_kernel(); if(ap->ppp_open && !put_user(ppp_channel_index(&ap->chan), (int __user *)argp)) err = 0; + unlock_kernel(); break; case PPPIOCGUNIT: lock_kernel(); -- cgit v1.2.3 From 1b3f720bf033fde1fbb6231f9b156b918c5f68d8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 4 Feb 2010 14:00:41 -0800 Subject: pktgen: Fix freezing problem Add missing try_to_freeze() to one of the pktgen_thread_worker() code paths so that it doesn't block suspend/hibernation. Fixes http://bugzilla.kernel.org/show_bug.cgi?id=15006 Signed-off-by: Rafael J. Wysocki Reported-and-tested-by: Ciprian Dorin Craciun Signed-off-by: David S. Miller --- net/core/pktgen.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index de0c2c726420..2e692afdc55d 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3524,6 +3524,7 @@ static int pktgen_thread_worker(void *arg) wait_event_interruptible_timeout(t->queue, t->control != 0, HZ/10); + try_to_freeze(); continue; } -- cgit v1.2.3 From 15c697ce1c5b408c5e20dcdc6aea2968d1125b75 Mon Sep 17 00:00:00 2001 From: Michael Poole Date: Fri, 5 Feb 2010 12:23:43 -0500 Subject: Bluetooth: Keep a copy of each HID device's report descriptor The report descriptor is read by user space (via the Service Discovery Protocol), so it is only available during the ioctl to connect. However, the HID probe function that needs the descriptor might not be called until a specific module is loaded. Keep a copy of the descriptor so it is available for later use. Signed-off-by: Michael Poole Signed-off-by: Marcel Holtmann --- net/bluetooth/hidp/core.c | 49 +++++++++++++++++++++++------------------------ net/bluetooth/hidp/hidp.h | 4 +++- 2 files changed, 27 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 6cf526d06e21..fc6ec1e72652 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -703,29 +703,9 @@ static void hidp_close(struct hid_device *hid) static int hidp_parse(struct hid_device *hid) { struct hidp_session *session = hid->driver_data; - struct hidp_connadd_req *req = session->req; - unsigned char *buf; - int ret; - - buf = kmalloc(req->rd_size, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - if (copy_from_user(buf, req->rd_data, req->rd_size)) { - kfree(buf); - return -EFAULT; - } - - ret = hid_parse_report(session->hid, buf, req->rd_size); - - kfree(buf); - - if (ret) - return ret; - - session->req = NULL; - return 0; + return hid_parse_report(session->hid, session->rd_data, + session->rd_size); } static int hidp_start(struct hid_device *hid) @@ -770,12 +750,24 @@ static int hidp_setup_hid(struct hidp_session *session, bdaddr_t src, dst; int err; + session->rd_data = kzalloc(req->rd_size, GFP_KERNEL); + if (!session->rd_data) + return -ENOMEM; + + if (copy_from_user(session->rd_data, req->rd_data, req->rd_size)) { + err = -EFAULT; + goto fault; + } + session->rd_size = req->rd_size; + hid = hid_allocate_device(); - if (IS_ERR(hid)) - return PTR_ERR(hid); + if (IS_ERR(hid)) { + err = PTR_ERR(hid); + goto fault; + } session->hid = hid; - session->req = req; + hid->driver_data = session; baswap(&src, &bt_sk(session->ctrl_sock->sk)->src); @@ -806,6 +798,10 @@ failed: hid_destroy_device(hid); session->hid = NULL; +fault: + kfree(session->rd_data); + session->rd_data = NULL; + return err; } @@ -900,6 +896,9 @@ unlink: session->hid = NULL; } + kfree(session->rd_data); + session->rd_data = NULL; + purge: skb_queue_purge(&session->ctrl_transmit); skb_queue_purge(&session->intr_transmit); diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h index faf3d74c3586..a4e215d50c10 100644 --- a/net/bluetooth/hidp/hidp.h +++ b/net/bluetooth/hidp/hidp.h @@ -154,7 +154,9 @@ struct hidp_session { struct sk_buff_head ctrl_transmit; struct sk_buff_head intr_transmit; - struct hidp_connadd_req *req; + /* Report descriptor */ + __u8 *rd_data; + uint rd_size; }; static inline void hidp_schedule(struct hidp_session *session) -- cgit v1.2.3 From 9edd7ca0a3e3999c260642c92fa008892d82ca6e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 8 Feb 2010 11:16:26 -0800 Subject: netfilter: nf_conntrack: fix memory corruption with multiple namespaces As discovered by Jon Masters , the "untracked" conntrack, which is located in the data section, might be accidentally freed when a new namespace is instantiated while the untracked conntrack is attached to a skb because the reference count it re-initialized. The best fix would be to use a seperate untracked conntrack per namespace since it includes a namespace pointer. Unfortunately this is not possible without larger changes since the namespace is not easily available everywhere we need it. For now move the untracked conntrack initialization to the init_net setup function to make sure the reference count is not re-initialized and handle cleanup in the init_net cleanup function to make sure namespaces can exit properly while the untracked conntrack is in use in other namespaces. Cc: stable@kernel.org Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_core.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0e98c3282d42..37e2b88313f2 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1113,6 +1113,10 @@ static void nf_ct_release_dying_list(struct net *net) static void nf_conntrack_cleanup_init_net(void) { + /* wait until all references to nf_conntrack_untracked are dropped */ + while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1) + schedule(); + nf_conntrack_helper_fini(); nf_conntrack_proto_fini(); kmem_cache_destroy(nf_conntrack_cachep); @@ -1127,9 +1131,6 @@ static void nf_conntrack_cleanup_net(struct net *net) schedule(); goto i_see_dead_people; } - /* wait until all references to nf_conntrack_untracked are dropped */ - while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1) - schedule(); nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, nf_conntrack_htable_size); @@ -1288,6 +1289,14 @@ static int nf_conntrack_init_init_net(void) if (ret < 0) goto err_helper; + /* Set up fake conntrack: to never be deleted, not in any hashes */ +#ifdef CONFIG_NET_NS + nf_conntrack_untracked.ct_net = &init_net; +#endif + atomic_set(&nf_conntrack_untracked.ct_general.use, 1); + /* - and look it like as a confirmed connection */ + set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); + return 0; err_helper: @@ -1333,15 +1342,6 @@ static int nf_conntrack_init_net(struct net *net) if (ret < 0) goto err_ecache; - /* Set up fake conntrack: - - to never be deleted, not in any hashes */ -#ifdef CONFIG_NET_NS - nf_conntrack_untracked.ct_net = &init_net; -#endif - atomic_set(&nf_conntrack_untracked.ct_general.use, 1); - /* - and look it like as a confirmed connection */ - set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); - return 0; err_ecache: -- cgit v1.2.3 From 5b3501faa8741d50617ce4191c20061c6ef36cb3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 8 Feb 2010 11:16:56 -0800 Subject: netfilter: nf_conntrack: per netns nf_conntrack_cachep nf_conntrack_cachep is currently shared by all netns instances, but because of SLAB_DESTROY_BY_RCU special semantics, this is wrong. If we use a shared slab cache, one object can instantly flight between one hash table (netns ONE) to another one (netns TWO), and concurrent reader (doing a lookup in netns ONE, 'finding' an object of netns TWO) can be fooled without notice, because no RCU grace period has to be observed between object freeing and its reuse. We dont have this problem with UDP/TCP slab caches because TCP/UDP hashtables are global to the machine (and each object has a pointer to its netns). If we use per netns conntrack hash tables, we also *must* use per netns conntrack slab caches, to guarantee an object can not escape from one namespace to another one. Signed-off-by: Eric Dumazet [Patrick: added unique slab name allocation] Cc: stable@kernel.org Signed-off-by: Patrick McHardy --- include/net/netns/conntrack.h | 2 ++ net/netfilter/nf_conntrack_core.c | 39 +++++++++++++++++++++++---------------- 2 files changed, 25 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index ba1ba0c5efd1..aed23b6c8478 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -11,6 +11,7 @@ struct nf_conntrack_ecache; struct netns_ct { atomic_t count; unsigned int expect_count; + struct kmem_cache *nf_conntrack_cachep; struct hlist_nulls_head *hash; struct hlist_head *expect_hash; struct hlist_nulls_head unconfirmed; @@ -28,5 +29,6 @@ struct netns_ct { #endif int hash_vmalloc; int expect_vmalloc; + char *slabname; }; #endif diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 37e2b88313f2..9de4bd4c0dd7 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -63,8 +63,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max); struct nf_conn nf_conntrack_untracked __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_untracked); -static struct kmem_cache *nf_conntrack_cachep __read_mostly; - static int nf_conntrack_hash_rnd_initted; static unsigned int nf_conntrack_hash_rnd; @@ -572,7 +570,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, * Do not use kmem_cache_zalloc(), as this cache uses * SLAB_DESTROY_BY_RCU. */ - ct = kmem_cache_alloc(nf_conntrack_cachep, gfp); + ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp); if (ct == NULL) { pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n"); atomic_dec(&net->ct.count); @@ -611,7 +609,7 @@ void nf_conntrack_free(struct nf_conn *ct) nf_ct_ext_destroy(ct); atomic_dec(&net->ct.count); nf_ct_ext_free(ct); - kmem_cache_free(nf_conntrack_cachep, ct); + kmem_cache_free(net->ct.nf_conntrack_cachep, ct); } EXPORT_SYMBOL_GPL(nf_conntrack_free); @@ -1119,7 +1117,6 @@ static void nf_conntrack_cleanup_init_net(void) nf_conntrack_helper_fini(); nf_conntrack_proto_fini(); - kmem_cache_destroy(nf_conntrack_cachep); } static void nf_conntrack_cleanup_net(struct net *net) @@ -1137,6 +1134,8 @@ static void nf_conntrack_cleanup_net(struct net *net) nf_conntrack_ecache_fini(net); nf_conntrack_acct_fini(net); nf_conntrack_expect_fini(net); + kmem_cache_destroy(net->ct.nf_conntrack_cachep); + kfree(net->ct.slabname); free_percpu(net->ct.stat); } @@ -1272,15 +1271,6 @@ static int nf_conntrack_init_init_net(void) NF_CONNTRACK_VERSION, nf_conntrack_htable_size, nf_conntrack_max); - nf_conntrack_cachep = kmem_cache_create("nf_conntrack", - sizeof(struct nf_conn), - 0, SLAB_DESTROY_BY_RCU, NULL); - if (!nf_conntrack_cachep) { - printk(KERN_ERR "Unable to create nf_conn slab cache\n"); - ret = -ENOMEM; - goto err_cache; - } - ret = nf_conntrack_proto_init(); if (ret < 0) goto err_proto; @@ -1302,8 +1292,6 @@ static int nf_conntrack_init_init_net(void) err_helper: nf_conntrack_proto_fini(); err_proto: - kmem_cache_destroy(nf_conntrack_cachep); -err_cache: return ret; } @@ -1325,6 +1313,21 @@ static int nf_conntrack_init_net(struct net *net) ret = -ENOMEM; goto err_stat; } + + net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net); + if (!net->ct.slabname) { + ret = -ENOMEM; + goto err_slabname; + } + + net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname, + sizeof(struct nf_conn), 0, + SLAB_DESTROY_BY_RCU, NULL); + if (!net->ct.nf_conntrack_cachep) { + printk(KERN_ERR "Unable to create nf_conn slab cache\n"); + ret = -ENOMEM; + goto err_cache; + } net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, &net->ct.hash_vmalloc, 1); if (!net->ct.hash) { @@ -1352,6 +1355,10 @@ err_expect: nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, nf_conntrack_htable_size); err_hash: + kmem_cache_destroy(net->ct.nf_conntrack_cachep); +err_cache: + kfree(net->ct.slabname); +err_slabname: free_percpu(net->ct.stat); err_stat: return ret; -- cgit v1.2.3 From 13ccdfc2af03e09e60791f7d4bc4ccf53398af7c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 8 Feb 2010 11:17:22 -0800 Subject: netfilter: nf_conntrack: restrict runtime expect hashsize modifications Expectation hashtable size was simply glued to a variable with no code to rehash expectations, so it was a bug to allow writing to it. Make "expect_hashsize" readonly. Signed-off-by: Alexey Dobriyan Cc: stable@kernel.org Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_expect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index fdf5d2a1d9b4..4ad7d1d809af 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -569,7 +569,7 @@ static void exp_proc_remove(struct net *net) #endif /* CONFIG_PROC_FS */ } -module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600); +module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400); int nf_conntrack_expect_init(struct net *net) { -- cgit v1.2.3 From 14c7dbe043d01a83a30633ab6b109ba2ac61d9f7 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 8 Feb 2010 11:17:43 -0800 Subject: netfilter: xtables: compat out of scope fix As per C99 6.2.4(2) when temporary table data goes out of scope, the behaviour is undefined: if (compat) { struct foo tmp; ... private = &tmp; } [dereference private] Signed-off-by: Alexey Dobriyan Cc: stable@kernel.org Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/arp_tables.c | 4 ++-- net/ipv4/netfilter/ip_tables.c | 4 ++-- net/ipv6/netfilter/ip6_tables.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 06632762ba5f..90203e1b9187 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -925,10 +925,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) if (t && !IS_ERR(t)) { struct arpt_getinfo info; const struct xt_table_info *private = t->private; - #ifdef CONFIG_COMPAT + struct xt_table_info tmp; + if (compat) { - struct xt_table_info tmp; ret = compat_table_info(private, &tmp); xt_compat_flush_offsets(NFPROTO_ARP); private = &tmp; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 572330a552ef..3ce53cf13d5a 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1132,10 +1132,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) if (t && !IS_ERR(t)) { struct ipt_getinfo info; const struct xt_table_info *private = t->private; - #ifdef CONFIG_COMPAT + struct xt_table_info tmp; + if (compat) { - struct xt_table_info tmp; ret = compat_table_info(private, &tmp); xt_compat_flush_offsets(AF_INET); private = &tmp; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 480d7f8c9802..8a7e0f52e177 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1164,10 +1164,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) if (t && !IS_ERR(t)) { struct ip6t_getinfo info; const struct xt_table_info *private = t->private; - #ifdef CONFIG_COMPAT + struct xt_table_info tmp; + if (compat) { - struct xt_table_info tmp; ret = compat_table_info(private, &tmp); xt_compat_flush_offsets(AF_INET6); private = &tmp; -- cgit v1.2.3 From d696c7bdaa55e2208e56c6f98e6bc1599f34286d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 8 Feb 2010 11:18:07 -0800 Subject: netfilter: nf_conntrack: fix hash resizing with namespaces As noticed by Jon Masters , the conntrack hash size is global and not per namespace, but modifiable at runtime through /sys/module/nf_conntrack/hashsize. Changing the hash size will only resize the hash in the current namespace however, so other namespaces will use an invalid hash size. This can cause crashes when enlarging the hashsize, or false negative lookups when shrinking it. Move the hash size into the per-namespace data and only use the global hash size to initialize the per-namespace value when instanciating a new namespace. Additionally restrict hash resizing to init_net for now as other namespaces are not handled currently. Cc: stable@kernel.org Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netns/conntrack.h | 1 + include/net/netns/ipv4.h | 1 + net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- .../netfilter/nf_conntrack_l3proto_ipv4_compat.c | 4 +- net/ipv4/netfilter/nf_nat_core.c | 22 ++++----- net/netfilter/nf_conntrack_core.c | 53 ++++++++++++---------- net/netfilter/nf_conntrack_expect.c | 2 +- net/netfilter/nf_conntrack_helper.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 2 +- net/netfilter/nf_conntrack_standalone.c | 7 +-- 10 files changed, 49 insertions(+), 47 deletions(-) (limited to 'net') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index aed23b6c8478..63d449807d9b 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -11,6 +11,7 @@ struct nf_conntrack_ecache; struct netns_ct { atomic_t count; unsigned int expect_count; + unsigned int htable_size; struct kmem_cache *nf_conntrack_cachep; struct hlist_nulls_head *hash; struct hlist_head *expect_hash; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2eb3814d6258..9a4b8b714079 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -40,6 +40,7 @@ struct netns_ipv4 { struct xt_table *iptable_security; struct xt_table *nat_table; struct hlist_head *nat_bysource; + unsigned int nat_htable_size; int nat_vmalloced; #endif diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index d171b123a656..d1ea38a7c490 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -210,7 +210,7 @@ static ctl_table ip_ct_sysctl_table[] = { }, { .procname = "ip_conntrack_buckets", - .data = &nf_conntrack_htable_size, + .data = &init_net.ct.htable_size, .maxlen = sizeof(unsigned int), .mode = 0444, .proc_handler = proc_dointvec, diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 8668a3defda6..2fb7b76da94f 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -32,7 +32,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) struct hlist_nulls_node *n; for (st->bucket = 0; - st->bucket < nf_conntrack_htable_size; + st->bucket < net->ct.htable_size; st->bucket++) { n = rcu_dereference(net->ct.hash[st->bucket].first); if (!is_a_nulls(n)) @@ -50,7 +50,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, head = rcu_dereference(head->next); while (is_a_nulls(head)) { if (likely(get_nulls_value(head) == st->bucket)) { - if (++st->bucket >= nf_conntrack_htable_size) + if (++st->bucket >= net->ct.htable_size) return NULL; } head = rcu_dereference(net->ct.hash[st->bucket].first); diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index fe1a64479dd0..26066a2327ad 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -35,9 +35,6 @@ static DEFINE_SPINLOCK(nf_nat_lock); static struct nf_conntrack_l3proto *l3proto __read_mostly; -/* Calculated at init based on memory size */ -static unsigned int nf_nat_htable_size __read_mostly; - #define MAX_IP_NAT_PROTO 256 static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] __read_mostly; @@ -72,7 +69,7 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put); /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int -hash_by_src(const struct nf_conntrack_tuple *tuple) +hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple) { unsigned int hash; @@ -80,7 +77,7 @@ hash_by_src(const struct nf_conntrack_tuple *tuple) hash = jhash_3words((__force u32)tuple->src.u3.ip, (__force u32)tuple->src.u.all, tuple->dst.protonum, 0); - return ((u64)hash * nf_nat_htable_size) >> 32; + return ((u64)hash * net->ipv4.nat_htable_size) >> 32; } /* Is this tuple already taken? (not by us) */ @@ -147,7 +144,7 @@ find_appropriate_src(struct net *net, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { - unsigned int h = hash_by_src(tuple); + unsigned int h = hash_by_src(net, tuple); const struct nf_conn_nat *nat; const struct nf_conn *ct; const struct hlist_node *n; @@ -330,7 +327,7 @@ nf_nat_setup_info(struct nf_conn *ct, if (have_to_hash) { unsigned int srchash; - srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + srchash = hash_by_src(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); spin_lock_bh(&nf_nat_lock); /* nf_conntrack_alter_reply might re-allocate exntension aera */ nat = nfct_nat(ct); @@ -679,8 +676,10 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, static int __net_init nf_nat_net_init(struct net *net) { - net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, - &net->ipv4.nat_vmalloced, 0); + /* Leave them the same for the moment. */ + net->ipv4.nat_htable_size = net->ct.htable_size; + net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, + &net->ipv4.nat_vmalloced, 0); if (!net->ipv4.nat_bysource) return -ENOMEM; return 0; @@ -703,7 +702,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) nf_ct_iterate_cleanup(net, &clean_nat, NULL); synchronize_rcu(); nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, - nf_nat_htable_size); + net->ipv4.nat_htable_size); } static struct pernet_operations nf_nat_net_ops = { @@ -724,9 +723,6 @@ static int __init nf_nat_init(void) return ret; } - /* Leave them the same for the moment. */ - nf_nat_htable_size = nf_conntrack_htable_size; - ret = register_pernet_subsys(&nf_nat_net_ops); if (ret < 0) goto cleanup_extend; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 9de4bd4c0dd7..4d79e3c1616c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -84,9 +85,10 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, return ((u64)h * size) >> 32; } -static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple) +static inline u_int32_t hash_conntrack(const struct net *net, + const struct nf_conntrack_tuple *tuple) { - return __hash_conntrack(tuple, nf_conntrack_htable_size, + return __hash_conntrack(tuple, net->ct.htable_size, nf_conntrack_hash_rnd); } @@ -294,7 +296,7 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - unsigned int hash = hash_conntrack(tuple); + unsigned int hash = hash_conntrack(net, tuple); /* Disable BHs the entire time since we normally need to disable them * at least once for the stats anyway. @@ -364,10 +366,11 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, void nf_conntrack_hash_insert(struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); unsigned int hash, repl_hash; - hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); __nf_conntrack_hash_insert(ct, hash, repl_hash); } @@ -395,8 +398,8 @@ __nf_conntrack_confirm(struct sk_buff *skb) if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) return NF_ACCEPT; - hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); /* We're not in hash table, and we refuse to set up related connections for unconfirmed conns. But packet copies and @@ -466,7 +469,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, struct net *net = nf_ct_net(ignored_conntrack); struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - unsigned int hash = hash_conntrack(tuple); + unsigned int hash = hash_conntrack(net, tuple); /* Disable BHs the entire time since we need to disable them at * least once for the stats anyway. @@ -501,7 +504,7 @@ static noinline int early_drop(struct net *net, unsigned int hash) int dropped = 0; rcu_read_lock(); - for (i = 0; i < nf_conntrack_htable_size; i++) { + for (i = 0; i < net->ct.htable_size; i++) { hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { tmp = nf_ct_tuplehash_to_ctrack(h); @@ -521,7 +524,7 @@ static noinline int early_drop(struct net *net, unsigned int hash) if (cnt >= NF_CT_EVICTION_RANGE) break; - hash = (hash + 1) % nf_conntrack_htable_size; + hash = (hash + 1) % net->ct.htable_size; } rcu_read_unlock(); @@ -555,7 +558,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, if (nf_conntrack_max && unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { - unsigned int hash = hash_conntrack(orig); + unsigned int hash = hash_conntrack(net, orig); if (!early_drop(net, hash)) { atomic_dec(&net->ct.count); if (net_ratelimit()) @@ -1012,7 +1015,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), struct hlist_nulls_node *n; spin_lock_bh(&nf_conntrack_lock); - for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { + for (; *bucket < net->ct.htable_size; (*bucket)++) { hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (iter(ct, data)) @@ -1130,7 +1133,7 @@ static void nf_conntrack_cleanup_net(struct net *net) } nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, - nf_conntrack_htable_size); + net->ct.htable_size); nf_conntrack_ecache_fini(net); nf_conntrack_acct_fini(net); nf_conntrack_expect_fini(net); @@ -1190,10 +1193,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) { int i, bucket, vmalloced, old_vmalloced; unsigned int hashsize, old_size; - int rnd; struct hlist_nulls_head *hash, *old_hash; struct nf_conntrack_tuple_hash *h; + if (current->nsproxy->net_ns != &init_net) + return -EOPNOTSUPP; + /* On boot, we can set this without any fancy locking. */ if (!nf_conntrack_htable_size) return param_set_uint(val, kp); @@ -1206,33 +1211,29 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) if (!hash) return -ENOMEM; - /* We have to rehahs for the new table anyway, so we also can - * use a newrandom seed */ - get_random_bytes(&rnd, sizeof(rnd)); - /* Lookups in the old hash might happen in parallel, which means we * might get false negatives during connection lookup. New connections * created because of a false negative won't make it into the hash * though since that required taking the lock. */ spin_lock_bh(&nf_conntrack_lock); - for (i = 0; i < nf_conntrack_htable_size; i++) { + for (i = 0; i < init_net.ct.htable_size; i++) { while (!hlist_nulls_empty(&init_net.ct.hash[i])) { h = hlist_nulls_entry(init_net.ct.hash[i].first, struct nf_conntrack_tuple_hash, hnnode); hlist_nulls_del_rcu(&h->hnnode); - bucket = __hash_conntrack(&h->tuple, hashsize, rnd); + bucket = __hash_conntrack(&h->tuple, hashsize, + nf_conntrack_hash_rnd); hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); } } - old_size = nf_conntrack_htable_size; + old_size = init_net.ct.htable_size; old_vmalloced = init_net.ct.hash_vmalloc; old_hash = init_net.ct.hash; - nf_conntrack_htable_size = hashsize; + init_net.ct.htable_size = nf_conntrack_htable_size = hashsize; init_net.ct.hash_vmalloc = vmalloced; init_net.ct.hash = hash; - nf_conntrack_hash_rnd = rnd; spin_unlock_bh(&nf_conntrack_lock); nf_ct_free_hashtable(old_hash, old_vmalloced, old_size); @@ -1328,7 +1329,9 @@ static int nf_conntrack_init_net(struct net *net) ret = -ENOMEM; goto err_cache; } - net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, + + net->ct.htable_size = nf_conntrack_htable_size; + net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, &net->ct.hash_vmalloc, 1); if (!net->ct.hash) { ret = -ENOMEM; @@ -1353,7 +1356,7 @@ err_acct: nf_conntrack_expect_fini(net); err_expect: nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, - nf_conntrack_htable_size); + net->ct.htable_size); err_hash: kmem_cache_destroy(net->ct.nf_conntrack_cachep); err_cache: diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 4ad7d1d809af..2f25ff610982 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -577,7 +577,7 @@ int nf_conntrack_expect_init(struct net *net) if (net_eq(net, &init_net)) { if (!nf_ct_expect_hsize) { - nf_ct_expect_hsize = nf_conntrack_htable_size / 256; + nf_ct_expect_hsize = net->ct.htable_size / 256; if (!nf_ct_expect_hsize) nf_ct_expect_hsize = 1; } diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 65c2a7bc3afc..4b1a56bd074c 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -192,7 +192,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, /* Get rid of expecteds, set helpers to NULL. */ hlist_nulls_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode) unhelp(h, me); - for (i = 0; i < nf_conntrack_htable_size; i++) { + for (i = 0; i < net->ct.htable_size; i++) { hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) unhelp(h, me); } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 42f21c01a93e..0ffe689dfe97 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -594,7 +594,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); last = (struct nf_conn *)cb->args[1]; - for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { + for (; cb->args[0] < init_net.ct.htable_size; cb->args[0]++) { restart: hlist_nulls_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]], hnnode) { diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 028aba667ef7..e310f1561bb2 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -51,7 +51,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) struct hlist_nulls_node *n; for (st->bucket = 0; - st->bucket < nf_conntrack_htable_size; + st->bucket < net->ct.htable_size; st->bucket++) { n = rcu_dereference(net->ct.hash[st->bucket].first); if (!is_a_nulls(n)) @@ -69,7 +69,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, head = rcu_dereference(head->next); while (is_a_nulls(head)) { if (likely(get_nulls_value(head) == st->bucket)) { - if (++st->bucket >= nf_conntrack_htable_size) + if (++st->bucket >= net->ct.htable_size) return NULL; } head = rcu_dereference(net->ct.hash[st->bucket].first); @@ -355,7 +355,7 @@ static ctl_table nf_ct_sysctl_table[] = { }, { .procname = "nf_conntrack_buckets", - .data = &nf_conntrack_htable_size, + .data = &init_net.ct.htable_size, .maxlen = sizeof(unsigned int), .mode = 0444, .proc_handler = proc_dointvec, @@ -421,6 +421,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) goto out_kmemdup; table[1].data = &net->ct.count; + table[2].data = &net->ct.htable_size; table[3].data = &net->ct.sysctl_checksum; table[4].data = &net->ct.sysctl_log_invalid; -- cgit v1.2.3 From 2fc1b5dd99f66d93ffc23fd8df82d384c1a354c8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 8 Feb 2010 15:00:39 -0800 Subject: dst: call cond_resched() in dst_gc_task() Kernel bugzilla #15239 On some workloads, it is quite possible to get a huge dst list to process in dst_gc_task(), and trigger soft lockup detection. Fix is to call cond_resched(), as we run in process context. Reported-by: Pawel Staszewski Tested-by: Pawel Staszewski Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dst.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/dst.c b/net/core/dst.c index 57bc4d5b8d08..cb1b3488b739 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -79,6 +80,7 @@ loop: while ((dst = next) != NULL) { next = dst->next; prefetch(&next->next); + cond_resched(); if (likely(atomic_read(&dst->__refcnt))) { last->next = dst; last = dst; -- cgit v1.2.3 From d4ae20b3799e0b6fa0d832a645a422da9f239868 Mon Sep 17 00:00:00 2001 From: Jan Luebbe Date: Mon, 8 Feb 2010 22:41:44 -0800 Subject: net/sched: Fix module name in Kconfig The action modules have been prefixed with 'act_', but the Kconfig description was not changed. Signed-off-by: Jan Luebbe Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/Kconfig | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 929218a47620..21f9c7678aa3 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -433,7 +433,7 @@ config NET_ACT_POLICE module. To compile this code as a module, choose M here: the - module will be called police. + module will be called act_police. config NET_ACT_GACT tristate "Generic actions" @@ -443,7 +443,7 @@ config NET_ACT_GACT accepting packets. To compile this code as a module, choose M here: the - module will be called gact. + module will be called act_gact. config GACT_PROB bool "Probability support" @@ -459,7 +459,7 @@ config NET_ACT_MIRRED other devices. To compile this code as a module, choose M here: the - module will be called mirred. + module will be called act_mirred. config NET_ACT_IPT tristate "IPtables targets" @@ -469,7 +469,7 @@ config NET_ACT_IPT classification. To compile this code as a module, choose M here: the - module will be called ipt. + module will be called act_ipt. config NET_ACT_NAT tristate "Stateless NAT" @@ -479,7 +479,7 @@ config NET_ACT_NAT netfilter for NAT unless you know what you are doing. To compile this code as a module, choose M here: the - module will be called nat. + module will be called act_nat. config NET_ACT_PEDIT tristate "Packet Editing" @@ -488,7 +488,7 @@ config NET_ACT_PEDIT Say Y here if you want to mangle the content of packets. To compile this code as a module, choose M here: the - module will be called pedit. + module will be called act_pedit. config NET_ACT_SIMP tristate "Simple Example (Debug)" @@ -502,7 +502,7 @@ config NET_ACT_SIMP If unsure, say N. To compile this code as a module, choose M here: the - module will be called simple. + module will be called act_simple. config NET_ACT_SKBEDIT tristate "SKB Editing" @@ -513,7 +513,7 @@ config NET_ACT_SKBEDIT If unsure, say N. To compile this code as a module, choose M here: the - module will be called skbedit. + module will be called act_skbedit. config NET_CLS_IND bool "Incoming device classification" -- cgit v1.2.3