From 035534ed3377d9def2c17717899fd64a111a785b Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 6 Aug 2012 18:02:29 +0200 Subject: canfd: remove redundant CAN FD flag The first idea of the CAN FD implementation started with a new struct canfd_frame to be used for both CAN FD frames and legacy CAN frames. The now mainlined implementation supports both CAN frame types simultaneously and distinguishes them only by their required sizes: CAN_MTU and CANFD_MTU. Only the struct canfd_frame contains a flags element which is needed for the additional CAN FD information. As CAN FD implicitly means that the 'Extened Data Length' mode is enabled the formerly defined CANFD_EDL bit became redundant and also confusing as an unset bit would be an error and would always need to be tested. This patch removes the obsolete CANFD_EDL bit and clarifies the documentation for the use of struct canfd_frame and the CAN FD relevant flags. Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/linux/can.h | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/can.h b/include/linux/can.h index 018055efc034..e52958d7c2d1 100644 --- a/include/linux/can.h +++ b/include/linux/can.h @@ -74,20 +74,21 @@ struct can_frame { /* * defined bits for canfd_frame.flags * - * As the default for CAN FD should be to support the high data rate in the - * payload section of the frame (HDR) and to support up to 64 byte in the - * data section (EDL) the bits are only set in the non-default case. - * Btw. as long as there's no real implementation for CAN FD network driver - * these bits are only preliminary. + * The use of struct canfd_frame implies the Extended Data Length (EDL) bit to + * be set in the CAN frame bitstream on the wire. The EDL bit switch turns + * the CAN controllers bitstream processor into the CAN FD mode which creates + * two new options within the CAN FD frame specification: * - * RX: NOHDR/NOEDL - info about received CAN FD frame - * ESI - bit from originating CAN controller - * TX: NOHDR/NOEDL - control per-frame settings if supported by CAN controller - * ESI - bit is set by local CAN controller + * Bit Rate Switch - to indicate a second bitrate is/was used for the payload + * Error State Indicator - represents the error state of the transmitting node + * + * As the CANFD_ESI bit is internally generated by the transmitting CAN + * controller only the CANFD_BRS bit is relevant for real CAN controllers when + * building a CAN FD frame for transmission. Setting the CANFD_ESI bit can make + * sense for virtual CAN interfaces to test applications with echoed frames. */ -#define CANFD_NOHDR 0x01 /* frame without high data rate */ -#define CANFD_NOEDL 0x02 /* frame without extended data length */ -#define CANFD_ESI 0x04 /* error state indicator */ +#define CANFD_BRS 0x01 /* bit rate switch (second bitrate for payload data) */ +#define CANFD_ESI 0x02 /* error state indicator of the transmitting node */ /** * struct canfd_frame - CAN flexible data rate frame structure -- cgit v1.2.3 From a37e6e344910a43b9ebc2bbf29a029f5ea942598 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Aug 2012 10:55:45 +0000 Subject: net: force dst_default_metrics to const section While investigating on network performance problems, I found this little gem : $ nm -v vmlinux | grep -1 dst_default_metrics ffffffff82736540 b busy.46605 ffffffff82736560 B dst_default_metrics ffffffff82736598 b dst_busy_list Apparently, declaring a const array without initializer put it in (writeable) bss section, in middle of possibly often dirtied cache lines. Since we really want dst_default_metrics be const to avoid any possible false sharing and catch any buggy writes, I force a null initializer. ffffffff818a4c20 R dst_default_metrics Signed-off-by: Eric Dumazet Cc: Ben Hutchings Signed-off-by: David S. Miller --- include/net/dst.h | 2 +- net/core/dst.c | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index baf597890064..621e3513ef5e 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -110,7 +110,7 @@ struct dst_entry { }; extern u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); -extern const u32 dst_default_metrics[RTAX_MAX]; +extern const u32 dst_default_metrics[]; #define DST_METRICS_READ_ONLY 0x1UL #define __DST_METRICS_PTR(Y) \ diff --git a/net/core/dst.c b/net/core/dst.c index 069d51d29414..56d63612e1e4 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -149,7 +149,15 @@ int dst_discard(struct sk_buff *skb) } EXPORT_SYMBOL(dst_discard); -const u32 dst_default_metrics[RTAX_MAX]; +const u32 dst_default_metrics[RTAX_MAX + 1] = { + /* This initializer is needed to force linker to place this variable + * into const section. Otherwise it might end into bss section. + * We really want to avoid false sharing on this variable, and catch + * any writes on it. + */ + [RTAX_MAX] = 0xdeadbeef, +}; + void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref, int initial_obsolete, unsigned short flags) -- cgit v1.2.3 From 63d02d157ec4124990258d66517b6c11fd6df0cf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Aug 2012 14:11:00 +0000 Subject: net: tcp: ipv6_mapped needs sk_rx_dst_set method commit 5d299f3d3c8a2fb (net: ipv6: fix TCP early demux) added a regression for ipv6_mapped case. [ 67.422369] SELinux: initialized (dev autofs, type autofs), uses genfs_contexts [ 67.449678] SELinux: initialized (dev autofs, type autofs), uses genfs_contexts [ 92.631060] BUG: unable to handle kernel NULL pointer dereference at (null) [ 92.631435] IP: [< (null)>] (null) [ 92.631645] PGD 0 [ 92.631846] Oops: 0010 [#1] SMP [ 92.632095] Modules linked in: autofs4 sunrpc ipv6 dm_mirror dm_region_hash dm_log dm_multipath dm_mod video sbs sbshc battery ac lp parport sg snd_hda_intel snd_hda_codec snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device pcspkr snd_pcm_oss snd_mixer_oss snd_pcm snd_timer serio_raw button floppy snd i2c_i801 i2c_core soundcore snd_page_alloc shpchp ide_cd_mod cdrom microcode ehci_hcd ohci_hcd uhci_hcd [ 92.634294] CPU 0 [ 92.634294] Pid: 4469, comm: sendmail Not tainted 3.6.0-rc1 #3 [ 92.634294] RIP: 0010:[<0000000000000000>] [< (null)>] (null) [ 92.634294] RSP: 0018:ffff880245fc7cb0 EFLAGS: 00010282 [ 92.634294] RAX: ffffffffa01985f0 RBX: ffff88024827ad00 RCX: 0000000000000000 [ 92.634294] RDX: 0000000000000218 RSI: ffff880254735380 RDI: ffff88024827ad00 [ 92.634294] RBP: ffff880245fc7cc8 R08: 0000000000000001 R09: 0000000000000000 [ 92.634294] R10: 0000000000000000 R11: ffff880245fc7bf8 R12: ffff880254735380 [ 92.634294] R13: ffff880254735380 R14: 0000000000000000 R15: 7fffffffffff0218 [ 92.634294] FS: 00007f4516ccd6f0(0000) GS:ffff880256600000(0000) knlGS:0000000000000000 [ 92.634294] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 92.634294] CR2: 0000000000000000 CR3: 0000000245ed1000 CR4: 00000000000007f0 [ 92.634294] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 92.634294] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 92.634294] Process sendmail (pid: 4469, threadinfo ffff880245fc6000, task ffff880254b8cac0) [ 92.634294] Stack: [ 92.634294] ffffffff813837a7 ffff88024827ad00 ffff880254b6b0e8 ffff880245fc7d68 [ 92.634294] ffffffff81385083 00000000001d2680 ffff8802547353a8 ffff880245fc7d18 [ 92.634294] ffffffff8105903a ffff88024827ad60 0000000000000002 00000000000000ff [ 92.634294] Call Trace: [ 92.634294] [] ? tcp_finish_connect+0x2c/0xfa [ 92.634294] [] tcp_rcv_state_process+0x2b6/0x9c6 [ 92.634294] [] ? sched_clock_cpu+0xc3/0xd1 [ 92.634294] [] ? local_clock+0x2b/0x3c [ 92.634294] [] tcp_v4_do_rcv+0x63a/0x670 [ 92.634294] [] release_sock+0x128/0x1bd [ 92.634294] [] __inet_stream_connect+0x1b1/0x352 [ 92.634294] [] ? lock_sock_nested+0x74/0x7f [ 92.634294] [] ? wake_up_bit+0x25/0x25 [ 92.634294] [] ? lock_sock_nested+0x74/0x7f [ 92.634294] [] ? inet_stream_connect+0x22/0x4b [ 92.634294] [] inet_stream_connect+0x33/0x4b [ 92.634294] [] sys_connect+0x78/0x9e [ 92.634294] [] ? sysret_check+0x1b/0x56 [ 92.634294] [] ? __audit_syscall_entry+0x195/0x1c8 [ 92.634294] [] ? trace_hardirqs_on_thunk+0x3a/0x3f [ 92.634294] [] system_call_fastpath+0x16/0x1b [ 92.634294] Code: Bad RIP value. [ 92.634294] RIP [< (null)>] (null) [ 92.634294] RSP [ 92.634294] CR2: 0000000000000000 [ 92.648982] ---[ end trace 24e2bed94314c8d9 ]--- [ 92.649146] Kernel panic - not syncing: Fatal exception in interrupt Fix this using inet_sk_rx_dst_set(), and export this function in case IPv6 is modular. Reported-by: Andrew Morton Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp_ipv4.c | 3 ++- net/ipv6/tcp_ipv6.c | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index e19124b84cd2..1f000ffe7075 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -464,6 +464,7 @@ extern int tcp_disconnect(struct sock *sk, int flags); void tcp_connect_init(struct sock *sk); void tcp_finish_connect(struct sock *sk, struct sk_buff *skb); int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size); +void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); /* From syncookies.c */ extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 272241f16fcb..767823764016 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1869,7 +1869,7 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = { .twsk_destructor= tcp_twsk_destructor, }; -static void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -1877,6 +1877,7 @@ static void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) sk->sk_rx_dst = dst; inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; } +EXPORT_SYMBOL(inet_sk_rx_dst_set); const struct inet_connection_sock_af_ops ipv4_specific = { .queue_xmit = ip_queue_xmit, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5a439e9a4c01..bb9ce2b2f377 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1777,6 +1777,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, + .sk_rx_dst_set = inet_sk_rx_dst_set, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .net_header_len = sizeof(struct iphdr), -- cgit v1.2.3 From b5ec8eeac46a99004c26791f70b15d001e970acf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 10 Aug 2012 02:22:47 +0000 Subject: ipv4: fix ip_send_skb() ip_send_skb() can send orphaned skb, so we must pass the net pointer to avoid possible NULL dereference in error path. Bug added by commit 3a7c384ffd57 (ipv4: tcp: unicast_sock should not land outside of TCP stack) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- net/ipv4/ip_output.c | 5 ++--- net/ipv4/udp.c | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index bd5e444a19ce..5a5d84d3d2c6 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -120,7 +120,7 @@ extern struct sk_buff *__ip_make_skb(struct sock *sk, struct flowi4 *fl4, struct sk_buff_head *queue, struct inet_cork *cork); -extern int ip_send_skb(struct sk_buff *skb); +extern int ip_send_skb(struct net *net, struct sk_buff *skb); extern int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4); extern void ip_flush_pending_frames(struct sock *sk); extern struct sk_buff *ip_make_skb(struct sock *sk, diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index ec410e08b4b9..147ccc3e93db 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1366,9 +1366,8 @@ out: return skb; } -int ip_send_skb(struct sk_buff *skb) +int ip_send_skb(struct net *net, struct sk_buff *skb) { - struct net *net = sock_net(skb->sk); int err; err = ip_local_out(skb); @@ -1391,7 +1390,7 @@ int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4) return 0; /* Netfilter gets whole the not fragmented skb. */ - return ip_send_skb(skb); + return ip_send_skb(sock_net(sk), skb); } /* diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b4c3582a991f..6f6d1aca3c3d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -758,7 +758,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) uh->check = CSUM_MANGLED_0; send: - err = ip_send_skb(skb); + err = ip_send_skb(sock_net(sk), skb); if (err) { if (err == -ENOBUFS && !inet->recverr) { UDP_INC_STATS_USER(sock_net(sk), -- cgit v1.2.3 From 2359a47671fc4fb0fe5e9945f76c2cb10792c0f8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Jul 2012 20:52:21 +0000 Subject: codel: refine one condition to avoid a nul rec_inv_sqrt One condition before codel_Newton_step() was not good if we never left the dropping state for a flow. As a result rec_inv_sqrt was 0, instead of the ~0 initial value. codel control law was then set to a very aggressive mode, dropping many packets before reaching 'target' and recovering from this problem. To keep codel_vars_init() as efficient as possible, refine the condition to make sure rec_inv_sqrt initial value is correct Many thanks to Anton Mich for discovering the issue and suggesting a fix. Reported-by: Anton Mich Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/codel.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/codel.h b/include/net/codel.h index 550debfc2403..389cf621161d 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -305,6 +305,8 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch, } } } else if (drop) { + u32 delta; + if (params->ecn && INET_ECN_set_ce(skb)) { stats->ecn_mark++; } else { @@ -320,9 +322,11 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch, * assume that the drop rate that controlled the queue on the * last cycle is a good starting point to control it now. */ - if (codel_time_before(now - vars->drop_next, + delta = vars->count - vars->lastcount; + if (delta > 1 && + codel_time_before(now - vars->drop_next, 16 * params->interval)) { - vars->count = (vars->count - vars->lastcount) | 1; + vars->count = delta; /* we dont care if rec_inv_sqrt approximation * is not very precise : * Next Newton steps will correct it quadratically. -- cgit v1.2.3