From d6c416148545bd99d3cc05e672460168245cc156 Mon Sep 17 00:00:00 2001 From: Chang Xiangzhong Date: Thu, 21 Nov 2013 22:56:28 +0100 Subject: net: sctp: find the correct highest_new_tsn in sack Function sctp_check_transmitted(transport t, ...) would iterate all of transport->transmitted queue and looking for the highest __newly__ acked tsn. The original algorithm would depend on the order of the assoc->transport_list (in function sctp_outq_sack line 1215 - 1226). The result might not be the expected due to the order of the tranport_list. Solution: checking if the exising is smaller than the new one before assigning Signed-off-by: Chang Xiangzhong Acked-by: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 94df75877869..abb6db008df1 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1391,7 +1391,8 @@ static void sctp_check_transmitted(struct sctp_outq *q, */ if (!tchunk->tsn_gap_acked) { tchunk->tsn_gap_acked = 1; - *highest_new_tsn_in_sack = tsn; + if (TSN_lt(*highest_new_tsn_in_sack, tsn)) + *highest_new_tsn_in_sack = tsn; bytes_acked += sctp_data_size(tchunk); if (!tchunk->transport) migrate_bytes += sctp_data_size(tchunk); -- cgit v1.2.3 From cc5c00bbb44c5d68b883aa5cb9d01514a2525d94 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 22 Nov 2013 10:31:29 +0800 Subject: gro: Only verify TCP checksums for candidates In some cases we may receive IP packets that are longer than their stated lengths. Such packets are never merged in GRO. However, we may end up computing their checksums incorrectly and end up allowing packets with a bogus checksum enter our stack with the checksum status set as verified. Since such packets are rare and not performance-critical, this patch simply skips the checksum verification for them. Reported-by: Alexander Duyck Signed-off-by: Herbert Xu Acked-by: Alexander Duyck Thanks, Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_offload.c | 5 +++++ net/ipv6/tcpv6_offload.c | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index a2b68a108eae..55aeec930140 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -276,6 +276,10 @@ static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff * __wsum wsum; __sum16 sum; + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (NAPI_GRO_CB(skb)->flush) + goto skip_csum; + switch (skb->ip_summed) { case CHECKSUM_COMPLETE: if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, @@ -301,6 +305,7 @@ flush: break; } +skip_csum: return tcp_gro_receive(head, skb); } diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index c1097c798900..71923d14127a 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -39,6 +39,10 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, __wsum wsum; __sum16 sum; + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (NAPI_GRO_CB(skb)->flush) + goto skip_csum; + switch (skb->ip_summed) { case CHECKSUM_COMPLETE: if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr, @@ -65,6 +69,7 @@ flush: break; } +skip_csum: return tcp_gro_receive(head, skb); } -- cgit v1.2.3 From b8ee93ba80b5a0b6c3c06b65c34dd1276f16c047 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 22 Nov 2013 10:32:11 +0800 Subject: gro: Clean up tcpX_gro_receive checksum verification This patch simplifies the checksum verification in tcpX_gro_receive by reusing the CHECKSUM_COMPLETE code for CHECKSUM_NONE. All it does for CHECKSUM_NONE is compute the partial checksum and then treat it as if it came from the hardware (CHECKSUM_COMPLETE). Signed-off-by: Herbert Xu Cheers, Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_offload.c | 26 ++++++++++---------------- net/ipv6/tcpv6_offload.c | 27 ++++++++++----------------- 2 files changed, 20 insertions(+), 33 deletions(-) diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 55aeec930140..05606353c7e7 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -274,35 +274,29 @@ static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff * { const struct iphdr *iph = skb_gro_network_header(skb); __wsum wsum; - __sum16 sum; /* Don't bother verifying checksum if we're going to flush anyway. */ if (NAPI_GRO_CB(skb)->flush) goto skip_csum; + wsum = skb->csum; + switch (skb->ip_summed) { + case CHECKSUM_NONE: + wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), + 0); + + /* fall through */ + case CHECKSUM_COMPLETE: if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, - skb->csum)) { + wsum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; break; } -flush: + NAPI_GRO_CB(skb)->flush = 1; return NULL; - - case CHECKSUM_NONE: - wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb_gro_len(skb), IPPROTO_TCP, 0); - sum = csum_fold(skb_checksum(skb, - skb_gro_offset(skb), - skb_gro_len(skb), - wsum)); - if (sum) - goto flush; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; } skip_csum: diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 71923d14127a..6d18157dc32c 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -37,36 +37,29 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, { const struct ipv6hdr *iph = skb_gro_network_header(skb); __wsum wsum; - __sum16 sum; /* Don't bother verifying checksum if we're going to flush anyway. */ if (NAPI_GRO_CB(skb)->flush) goto skip_csum; + wsum = skb->csum; + switch (skb->ip_summed) { + case CHECKSUM_NONE: + wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), + wsum); + + /* fall through */ + case CHECKSUM_COMPLETE: if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr, - skb->csum)) { + wsum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; break; } -flush: + NAPI_GRO_CB(skb)->flush = 1; return NULL; - - case CHECKSUM_NONE: - wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, - skb_gro_len(skb), - IPPROTO_TCP, 0)); - sum = csum_fold(skb_checksum(skb, - skb_gro_offset(skb), - skb_gro_len(skb), - wsum)); - if (sum) - goto flush; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; } skip_csum: -- cgit v1.2.3 From fb10f802b0fb76079612cb78505cbc9ad81e683b Mon Sep 17 00:00:00 2001 From: Gao feng Date: Fri, 22 Nov 2013 14:48:40 +0800 Subject: tcp_memcg: remove useless var old_lim nobody needs it. remove. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- net/ipv4/tcp_memcontrol.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 03e9154f7e68..269a89ecd2f4 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -60,7 +60,6 @@ EXPORT_SYMBOL(tcp_destroy_cgroup); static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) { struct cg_proto *cg_proto; - u64 old_lim; int i; int ret; @@ -71,7 +70,6 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) if (val > RES_COUNTER_MAX) val = RES_COUNTER_MAX; - old_lim = res_counter_read_u64(&cg_proto->memory_allocated, RES_LIMIT); ret = res_counter_set_limit(&cg_proto->memory_allocated, val); if (ret) return ret; -- cgit v1.2.3 From ca15a078bd907df5fc1c009477869c5cbde3b753 Mon Sep 17 00:00:00 2001 From: Oussama Ghorbel Date: Fri, 22 Nov 2013 16:23:20 +0100 Subject: sit: generate icmpv6 error when receiving icmpv4 error Send icmpv6 error with type "destination unreachable" and code "address unreachable" when receiving icmpv4 error and sufficient data bytes are available This patch enhances the compliance of sit tunnel with section 3.4 of rfc 4213 Signed-off-by: Oussama Ghorbel Signed-off-by: David S. Miller --- net/ipv6/sit.c | 44 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1b4a4a953675..8435267836a7 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -478,14 +478,44 @@ static void ipip6_tunnel_uninit(struct net_device *dev) dev_put(dev); } +/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH + * if sufficient data bytes are available + */ +static int ipip6_err_gen_icmpv6_unreach(struct sk_buff *skb) +{ + const struct iphdr *iph = (const struct iphdr *) skb->data; + struct rt6_info *rt; + struct sk_buff *skb2; + + if (!pskb_may_pull(skb, iph->ihl * 4 + sizeof(struct ipv6hdr) + 8)) + return 1; + + skb2 = skb_clone(skb, GFP_ATOMIC); + + if (!skb2) + return 1; + + skb_dst_drop(skb2); + skb_pull(skb2, iph->ihl * 4); + skb_reset_network_header(skb2); + + rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0); + + if (rt && rt->dst.dev) + skb2->dev = rt->dst.dev; + + icmpv6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); + + if (rt) + ip6_rt_put(rt); + + kfree_skb(skb2); + + return 0; +} static int ipip6_err(struct sk_buff *skb, u32 info) { - -/* All the routers (except for Linux) return only - 8 bytes of packet payload. It means, that precise relaying of - ICMP in the real Internet is absolutely infeasible. - */ const struct iphdr *iph = (const struct iphdr *)skb->data; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; @@ -500,7 +530,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info) case ICMP_DEST_UNREACH: switch (code) { case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: /* Impossible event. */ return 0; default: @@ -545,6 +574,9 @@ static int ipip6_err(struct sk_buff *skb, u32 info) goto out; err = 0; + if (!ipip6_err_gen_icmpv6_unreach(skb)) + goto out; + if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) goto out; -- cgit v1.2.3 From 85fbaa75037d0b6b786ff18658ddf0b4014ce2a4 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 23 Nov 2013 00:46:12 +0100 Subject: inet: fix addr_len/msg->msg_namelen assignment in recv_error and rxpmtu functions Commit bceaa90240b6019ed73b49965eac7d167610be69 ("inet: prevent leakage of uninitialized memory to user in recv syscalls") conditionally updated addr_len if the msg_name is written to. The recv_error and rxpmtu functions relied on the recvmsg functions to set up addr_len before. As this does not happen any more we have to pass addr_len to those functions as well and set it to the size of the corresponding sockaddr length. This broke traceroute and such. Fixes: bceaa90240b6 ("inet: prevent leakage of uninitialized memory to user in recv syscalls") Reported-by: Brad Spengler Reported-by: Tom Labanowski Cc: mpb Cc: David S. Miller Cc: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- include/net/ipv6.h | 6 ++++-- include/net/ping.h | 3 ++- net/ipv4/ip_sockglue.c | 3 ++- net/ipv4/ping.c | 5 +++-- net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/datagram.c | 7 +++++-- net/ipv6/ping.c | 3 ++- net/ipv6/raw.c | 4 ++-- net/ipv6/udp.c | 4 ++-- net/l2tp/l2tp_ip6.c | 2 +- 12 files changed, 26 insertions(+), 17 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 217bc5bfc6c6..5a25f36fe3a7 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -473,7 +473,7 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *)); -int ip_recv_error(struct sock *sk, struct msghdr *msg, int len); +int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len); void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload); void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 2a5f668cd683..eb198acaac1d 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -776,8 +776,10 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len); -int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len); -int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len); +int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, + int *addr_len); +int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, + int *addr_len); void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload); void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info); diff --git a/include/net/ping.h b/include/net/ping.h index 3f67704f3747..90f48417b03d 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -31,7 +31,8 @@ /* Compatibility glue so we can support IPv6 when it's compiled as a module */ struct pingv6_ops { - int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len); + int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len, + int *addr_len); int (*ip6_datagram_recv_ctl)(struct sock *sk, struct msghdr *msg, struct sk_buff *skb); int (*icmpv6_err_convert)(u8 type, u8 code, int *err); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 3f858266fa7e..ddf32a6bc415 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -386,7 +386,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf /* * Handle MSG_ERRQUEUE */ -int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) +int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct sock_exterr_skb *serr; struct sk_buff *skb, *skb2; @@ -423,6 +423,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) serr->addr_offset); sin->sin_port = serr->port; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); + *addr_len = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 876c6ca2d8f9..840cf1b9e6ee 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -841,10 +841,11 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (flags & MSG_ERRQUEUE) { if (family == AF_INET) { - return ip_recv_error(sk, msg, len); + return ip_recv_error(sk, msg, len, addr_len); #if IS_ENABLED(CONFIG_IPV6) } else if (family == AF_INET6) { - return pingv6_ops.ipv6_recv_error(sk, msg, len); + return pingv6_ops.ipv6_recv_error(sk, msg, len, + addr_len); #endif } } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 5cb8ddb505ee..23c3e5b5bb53 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -697,7 +697,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto out; if (flags & MSG_ERRQUEUE) { - err = ip_recv_error(sk, msg, len); + err = ip_recv_error(sk, msg, len, addr_len); goto out; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5944d7d668dd..44dfaa09b584 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1236,7 +1236,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, bool slow; if (flags & MSG_ERRQUEUE) - return ip_recv_error(sk, msg, len); + return ip_recv_error(sk, msg, len, addr_len); try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index a454b0ff57c7..d690204a0275 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -318,7 +318,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) /* * Handle MSG_ERRQUEUE */ -int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; @@ -369,6 +369,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) &sin->sin6_addr); sin->sin6_scope_id = 0; } + *addr_len = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); @@ -423,7 +424,8 @@ EXPORT_SYMBOL_GPL(ipv6_recv_error); /* * Handle IPV6_RECVPATHMTU */ -int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) +int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, + int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; @@ -457,6 +459,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) sin->sin6_port = 0; sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id; sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr; + *addr_len = sizeof(*sin); } put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 8815e31a87fe..a83243c3d656 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -57,7 +57,8 @@ static struct inet_protosw pingv6_protosw = { /* Compatibility glue so we can support IPv6 when it's compiled as a module */ -static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, + int *addr_len) { return -EAFNOSUPPORT; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e24ff1df0401..7fb4e14c467f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -466,10 +466,10 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, return -EOPNOTSUPP; if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); if (np->rxpmtu && np->rxopt.bits.rxpmtu) - return ipv6_recv_rxpmtu(sk, msg, len); + return ipv6_recv_rxpmtu(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 81eb8cf8389b..bcd5699313c3 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -393,10 +393,10 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, bool slow; if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); if (np->rxpmtu && np->rxopt.bits.rxpmtu) - return ipv6_recv_rxpmtu(sk, msg, len); + return ipv6_recv_rxpmtu(sk, msg, len, addr_len); try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index cfd65304be60..d9b437e55007 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -665,7 +665,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, *addr_len = sizeof(*lsa); if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) -- cgit v1.2.3 From 1fa4c710b6fe7b0aac9907240291b6fe6aafc3b8 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 23 Nov 2013 07:22:33 +0100 Subject: ipv6: fix leaking uninitialized port number of offender sockaddr Offenders don't have port numbers, so set it to 0. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index d690204a0275..8dfe1f4d3c1a 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -378,6 +378,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) { sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; + sin->sin6_port = 0; if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) -- cgit v1.2.3 From 4d0820cf6a55d72350cb2d24a4504f62fbde95d9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 23 Nov 2013 12:59:20 -0800 Subject: sch_tbf: handle too small burst If a too small burst is inadvertently set on TBF, we might trigger a bug in tbf_segment(), as 'skb' instead of 'segs' was used in a qdisc_reshape_fail() call. tc qdisc add dev eth0 root handle 1: tbf latency 50ms burst 1KB rate 50mbit Fix the bug, and add a warning, as such configuration is not going to work anyway for non GSO packets. (For some reason, one has to use a burst >= 1520 to get a working configuration, even with old kernels. This is a probable iproute2/tc bug) Based on a report and initial patch from Yang Yingliang Fixes: e43ac79a4bc6 ("sch_tbf: segment too big GSO packets") Signed-off-by: Eric Dumazet Reported-by: Yang Yingliang Signed-off-by: David S. Miller --- net/sched/sch_tbf.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 68f98595819c..a6090051c5db 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -21,6 +21,7 @@ #include #include #include +#include /* Simple Token Bucket Filter. @@ -117,6 +118,22 @@ struct tbf_sched_data { }; +/* + * Return length of individual segments of a gso packet, + * including all headers (MAC, IP, TCP/UDP) + */ +static unsigned int skb_gso_seglen(const struct sk_buff *skb) +{ + unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); + const struct skb_shared_info *shinfo = skb_shinfo(skb); + + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + hdr_len += tcp_hdrlen(skb); + else + hdr_len += sizeof(struct udphdr); + return hdr_len + shinfo->gso_size; +} + /* GSO packet is too big, segment it so that tbf can transmit * each segment in time */ @@ -136,12 +153,8 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch) while (segs) { nskb = segs->next; segs->next = NULL; - if (likely(segs->len <= q->max_size)) { - qdisc_skb_cb(segs)->pkt_len = segs->len; - ret = qdisc_enqueue(segs, q->qdisc); - } else { - ret = qdisc_reshape_fail(skb, sch); - } + qdisc_skb_cb(segs)->pkt_len = segs->len; + ret = qdisc_enqueue(segs, q->qdisc); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) sch->qstats.drops++; @@ -163,7 +176,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) int ret; if (qdisc_pkt_len(skb) > q->max_size) { - if (skb_is_gso(skb)) + if (skb_is_gso(skb) && skb_gso_seglen(skb) <= q->max_size) return tbf_segment(skb, sch); return qdisc_reshape_fail(skb, sch); } @@ -319,6 +332,11 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) if (max_size < 0) goto done; + if (max_size < psched_mtu(qdisc_dev(sch))) + pr_warn_ratelimited("sch_tbf: burst %u is lower than device %s mtu (%u) !\n", + max_size, qdisc_dev(sch)->name, + psched_mtu(qdisc_dev(sch))); + if (q->qdisc != &noop_qdisc) { err = fifo_set_limit(q->qdisc, qopt->limit); if (err) -- cgit v1.2.3 From d4360d6fb61af7aa8e027bf5983598d5437737cc Mon Sep 17 00:00:00 2001 From: Ajit Khaparde Date: Fri, 22 Nov 2013 12:51:09 -0600 Subject: be2net: Disabling and enabling interrupts in suspend and resume Interrupts need to be enabled in be_resume, when adapter boots back up from D3cold. disabling interrupts in be_suspend() just to be symmetric to be_resume(). Signed-off-by: Ravikumar Nelavelli Signed-off-by: Ajit Khaparde Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index abde97471636..9b148cf04704 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4599,6 +4599,7 @@ static int be_suspend(struct pci_dev *pdev, pm_message_t state) if (adapter->wol) be_setup_wol(adapter, true); + be_intr_set(adapter, false); cancel_delayed_work_sync(&adapter->func_recovery_work); netif_device_detach(netdev); @@ -4634,6 +4635,7 @@ static int be_resume(struct pci_dev *pdev) if (status) return status; + be_intr_set(adapter, true); /* tell fw we're ready to fire cmds */ status = be_cmd_fw_init(adapter); if (status) -- cgit v1.2.3 From 09e83a9d449c1af533321f5bda62b6a59a4098c6 Mon Sep 17 00:00:00 2001 From: Ajit Khaparde Date: Fri, 22 Nov 2013 12:51:20 -0600 Subject: be2net: set coalesce-wm in CQ_CREATE_V2 cmd It is not being set currently. (This field is not applicable for Lancer) Signed-off-by: Vasundhara Volam Signed-off-by: Sathya Perla Signed-off-by: Ajit Khaparde Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index dbcd5262c016..e0e8bc1ef14c 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -1032,6 +1032,13 @@ int be_cmd_cq_create(struct be_adapter *adapter, struct be_queue_info *cq, } else { req->hdr.version = 2; req->page_size = 1; /* 1 for 4K */ + + /* coalesce-wm field in this cmd is not relevant to Lancer. + * Lancer uses COMMON_MODIFY_CQ to set this field + */ + if (!lancer_chip(adapter)) + AMAP_SET_BITS(struct amap_cq_context_v2, coalescwm, + ctxt, coalesce_wm); AMAP_SET_BITS(struct amap_cq_context_v2, nodelay, ctxt, no_delay); AMAP_SET_BITS(struct amap_cq_context_v2, count, ctxt, -- cgit v1.2.3 From 2c7a9dc1641664173211c4ebc5db510a08684c46 Mon Sep 17 00:00:00 2001 From: Ajit Khaparde Date: Fri, 22 Nov 2013 12:51:28 -0600 Subject: be2net: Avoid programming permenant MAC by BE3-R VFs On BE3-R, the PF programs the initial MAC address for its VFs. Doing it again in VF probe, causes a FW error which although harmless generates an unnecessary error log message. Signed-off-by: Vasundhara Volam Signed-off-by: Sathya Perla Signed-off-by: Ajit Khaparde Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be.h | 1 + drivers/net/ethernet/emulex/benet/be_main.c | 10 ++++------ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index f4825db5d179..5878df619b53 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -503,6 +503,7 @@ struct be_adapter { }; #define be_physfn(adapter) (!adapter->virtfn) +#define be_virtfn(adapter) (adapter->virtfn) #define sriov_enabled(adapter) (adapter->num_vfs > 0) #define sriov_want(adapter) (be_physfn(adapter) && \ (num_vfs || pci_num_vf(adapter->pdev))) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 9b148cf04704..78a0e859760b 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -3253,12 +3253,10 @@ static int be_mac_setup(struct be_adapter *adapter) memcpy(mac, adapter->netdev->dev_addr, ETH_ALEN); } - /* On BE3 VFs this cmd may fail due to lack of privilege. - * Ignore the failure as in this case pmac_id is fetched - * in the IFACE_CREATE cmd. - */ - be_cmd_pmac_add(adapter, mac, adapter->if_handle, - &adapter->pmac_id[0], 0); + /* For BE3-R VFs, the PF programs the initial MAC address */ + if (!(BEx_chip(adapter) && be_virtfn(adapter))) + be_cmd_pmac_add(adapter, mac, adapter->if_handle, + &adapter->pmac_id[0], 0); return 0; } -- cgit v1.2.3 From 2fea6cd303c0d0cd9067da31d873b6a6d5bd75e7 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Thu, 21 Nov 2013 18:03:07 +0100 Subject: can: sja1000: fix {pre,post}_irq() handling and IRQ handler return value This patch fixes the issue that the sja1000_interrupt() function may have returned IRQ_NONE without processing the optional pre_irq() and post_irq() function before. Further the irq processing counter 'n' is moved to the end of the while statement to return correct IRQ_[NONE|HANDLED] values at error conditions. Reported-by: Wolfgang Grandegger Acked-by: Wolfgang Grandegger Signed-off-by: Oliver Hartkopp Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sja1000/sja1000.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index 7164a999f50f..f17c3018b7c7 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -494,20 +494,20 @@ irqreturn_t sja1000_interrupt(int irq, void *dev_id) uint8_t isrc, status; int n = 0; - /* Shared interrupts and IRQ off? */ - if (priv->read_reg(priv, SJA1000_IER) == IRQ_OFF) - return IRQ_NONE; - if (priv->pre_irq) priv->pre_irq(priv); + /* Shared interrupts and IRQ off? */ + if (priv->read_reg(priv, SJA1000_IER) == IRQ_OFF) + goto out; + while ((isrc = priv->read_reg(priv, SJA1000_IR)) && (n < SJA1000_MAX_IRQ)) { - n++; + status = priv->read_reg(priv, SJA1000_SR); /* check for absent controller due to hw unplug */ if (status == 0xFF && sja1000_is_absent(priv)) - return IRQ_NONE; + goto out; if (isrc & IRQ_WUI) netdev_warn(dev, "wakeup interrupt\n"); @@ -535,7 +535,7 @@ irqreturn_t sja1000_interrupt(int irq, void *dev_id) status = priv->read_reg(priv, SJA1000_SR); /* check for absent controller */ if (status == 0xFF && sja1000_is_absent(priv)) - return IRQ_NONE; + goto out; } } if (isrc & (IRQ_DOI | IRQ_EI | IRQ_BEI | IRQ_EPI | IRQ_ALI)) { @@ -543,8 +543,9 @@ irqreturn_t sja1000_interrupt(int irq, void *dev_id) if (sja1000_err(dev, isrc, status)) break; } + n++; } - +out: if (priv->post_irq) priv->post_irq(priv); -- cgit v1.2.3 From e35d46adc49b469fd92bdb64fea8af93640e6651 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sun, 24 Nov 2013 23:31:24 +0100 Subject: can: c_can: don't call pm_runtime_get_sync() from interrupt context The c_can driver contians a callpath (c_can_poll -> c_can_state_change -> c_can_get_berr_counter) which may call pm_runtime_get_sync() from the IRQ handler, which is not allowed and results in "BUG: scheduling while atomic". This problem is fixed by introducing __c_can_get_berr_counter, which will not call pm_runtime_get_sync(). Reported-by: Andrew Glen Tested-by: Andrew Glen Signed-off-by: Andrew Glen Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/c_can.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index e3fc07cf2f62..e59c42b446a9 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -712,22 +712,31 @@ static int c_can_set_mode(struct net_device *dev, enum can_mode mode) return 0; } -static int c_can_get_berr_counter(const struct net_device *dev, - struct can_berr_counter *bec) +static int __c_can_get_berr_counter(const struct net_device *dev, + struct can_berr_counter *bec) { unsigned int reg_err_counter; struct c_can_priv *priv = netdev_priv(dev); - c_can_pm_runtime_get_sync(priv); - reg_err_counter = priv->read_reg(priv, C_CAN_ERR_CNT_REG); bec->rxerr = (reg_err_counter & ERR_CNT_REC_MASK) >> ERR_CNT_REC_SHIFT; bec->txerr = reg_err_counter & ERR_CNT_TEC_MASK; + return 0; +} + +static int c_can_get_berr_counter(const struct net_device *dev, + struct can_berr_counter *bec) +{ + struct c_can_priv *priv = netdev_priv(dev); + int err; + + c_can_pm_runtime_get_sync(priv); + err = __c_can_get_berr_counter(dev, bec); c_can_pm_runtime_put_sync(priv); - return 0; + return err; } /* @@ -872,7 +881,7 @@ static int c_can_handle_state_change(struct net_device *dev, if (unlikely(!skb)) return 0; - c_can_get_berr_counter(dev, &bec); + __c_can_get_berr_counter(dev, &bec); reg_err_counter = priv->read_reg(priv, C_CAN_ERR_CNT_REG); rx_err_passive = (reg_err_counter & ERR_CNT_RP_MASK) >> ERR_CNT_RP_SHIFT; -- cgit v1.2.3 From 7ee330c7b3b738847bf297912b371bbcec3bc994 Mon Sep 17 00:00:00 2001 From: Holger Bechtold Date: Mon, 25 Nov 2013 15:48:19 +0100 Subject: can: c_can: fix calculation of transmitted bytes on tx complete The number of bytes transmitted was not updated correctly, if several CAN messages (with different length) were transmitted in one 'bunch'. Thus programs like 'ifconfig' showed wrong transmit byte counts. Reason was, that the message object whose DLC is to be read was not necessarily the active one at the time when priv->read_reg(priv, C_CAN_IFACE(MSGCTRL_REG, 0)) & IF_MCONT_DLC_MASK; was executed. Signed-off-by: Holger Bechtold Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/c_can.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index e59c42b446a9..77061eebb034 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -763,6 +763,7 @@ static void c_can_do_tx(struct net_device *dev) if (!(val & (1 << (msg_obj_no - 1)))) { can_get_echo_skb(dev, msg_obj_no - C_CAN_MSG_OBJ_TX_FIRST); + c_can_object_get(dev, 0, msg_obj_no, IF_COMM_ALL); stats->tx_bytes += priv->read_reg(priv, C_CAN_IFACE(MSGCTRL_REG, 0)) & IF_MCONT_DLC_MASK; -- cgit v1.2.3 From 1a3e5173f5e72cbf7f0c8927b33082e361c16d72 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 25 Nov 2013 22:15:20 +0100 Subject: can: flexcan: use correct clock as base for bit rate calculation The flexcan IP core uses the peripheral clock ("per") as basic clock for the bit timing calculation. However the driver uses the the wrong clock ("ipg"). This leads to wrong bit rates if the rates on both clock are different. This patch fixes the problem by using the correct clock for the bit rate calculation. Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index ae08cf129ebb..aaed97bee471 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -1020,13 +1020,13 @@ static int flexcan_probe(struct platform_device *pdev) dev_err(&pdev->dev, "no ipg clock defined\n"); return PTR_ERR(clk_ipg); } - clock_freq = clk_get_rate(clk_ipg); clk_per = devm_clk_get(&pdev->dev, "per"); if (IS_ERR(clk_per)) { dev_err(&pdev->dev, "no per clock defined\n"); return PTR_ERR(clk_per); } + clock_freq = clk_get_rate(clk_per); } mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); -- cgit v1.2.3 From 31e20bad8d5871c6f8673c28ea26ab7a0e520086 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 28 Nov 2013 18:31:05 +0100 Subject: diag: warn about missing first netlink attribute The first netlink attribute (value 0) must always be defined as none/unspec. This is correctly done in inet_diag.h, but other diag interfaces are wrong. Because we cannot change an existing API, I add a comment to point the mistake and avoid to propagate it in a new diag API in the future. CC: Thomas Graf Signed-off-by: Nicolas Dichtel Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/uapi/linux/netlink_diag.h | 1 + include/uapi/linux/packet_diag.h | 1 + include/uapi/linux/unix_diag.h | 1 + 3 files changed, 3 insertions(+) diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h index 4e31db4eea41..f2159d30d1f5 100644 --- a/include/uapi/linux/netlink_diag.h +++ b/include/uapi/linux/netlink_diag.h @@ -33,6 +33,7 @@ struct netlink_diag_ring { }; enum { + /* NETLINK_DIAG_NONE, standard nl API requires this attribute! */ NETLINK_DIAG_MEMINFO, NETLINK_DIAG_GROUPS, NETLINK_DIAG_RX_RING, diff --git a/include/uapi/linux/packet_diag.h b/include/uapi/linux/packet_diag.h index b2cc0cd9c4d9..d08c63f3dd6f 100644 --- a/include/uapi/linux/packet_diag.h +++ b/include/uapi/linux/packet_diag.h @@ -29,6 +29,7 @@ struct packet_diag_msg { }; enum { + /* PACKET_DIAG_NONE, standard nl API requires this attribute! */ PACKET_DIAG_INFO, PACKET_DIAG_MCLIST, PACKET_DIAG_RX_RING, diff --git a/include/uapi/linux/unix_diag.h b/include/uapi/linux/unix_diag.h index b9e2a6a7446f..1eb0b8dd1830 100644 --- a/include/uapi/linux/unix_diag.h +++ b/include/uapi/linux/unix_diag.h @@ -31,6 +31,7 @@ struct unix_diag_msg { }; enum { + /* UNIX_DIAG_NONE, standard nl API requires this attribute! */ UNIX_DIAG_NAME, UNIX_DIAG_VFS, UNIX_DIAG_PEER, -- cgit v1.2.3 From fe9d04afe9bee0ec37a9724937443b2c0e39ce4b Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Fri, 22 Nov 2013 22:28:43 +0800 Subject: bonding: disable arp and enable mii monitoring when bond change to no uses arp mode Because the ARP monitoring is not support for 802.3ad, but I still could change the mode to 802.3ad from ab mode while ARP monitoring is running, it is incorrect. So add a check for 802.3ad in bonding_store_mode to fix the problem, and make a new macro BOND_NO_USES_ARP() to simplify the code. v2: according to the Dan Williams's suggestion, bond mode is the most important bond option, it should override any of the other sub-options. So when the mode is changed, the conficting values should be cleared or reset, otherwise the user has to duplicate more operations to modify the logic. I disable the arp and enable mii monitoring when the bond mode is changed to AB, TB and 8023AD if the arp interval is true. v3: according to the Nik's suggestion, the default value of miimon should need a name, there is several place to use it, and the bond_store_arp_interval() could use micro BOND_NO_USES_ARP to make the code more simpify. Suggested-by: Dan Williams Suggested-by: Nikolay Aleksandrov Signed-off-by: Ding Tianhong Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 4 ++-- drivers/net/bonding/bond_options.c | 13 +++++++++---- drivers/net/bonding/bond_sysfs.c | 4 +--- drivers/net/bonding/bonding.h | 7 +++++++ 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 4dd5ee2a34cc..36eab0c4fb33 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4110,7 +4110,7 @@ static int bond_check_params(struct bond_params *params) if (!miimon) { pr_warning("Warning: miimon must be specified, otherwise bonding will not detect link failure, speed and duplex which are essential for 802.3ad operation\n"); pr_warning("Forcing miimon to 100msec\n"); - miimon = 100; + miimon = BOND_DEFAULT_MIIMON; } } @@ -4147,7 +4147,7 @@ static int bond_check_params(struct bond_params *params) if (!miimon) { pr_warning("Warning: miimon must be specified, otherwise bonding will not detect link failure and link speed which are essential for TLB/ALB load balancing\n"); pr_warning("Forcing miimon to 100msec\n"); - miimon = 100; + miimon = BOND_DEFAULT_MIIMON; } } diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 9a5223c7b4d1..ea6f640782b7 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -45,10 +45,15 @@ int bond_option_mode_set(struct bonding *bond, int mode) return -EPERM; } - if (BOND_MODE_IS_LB(mode) && bond->params.arp_interval) { - pr_err("%s: %s mode is incompatible with arp monitoring.\n", - bond->dev->name, bond_mode_tbl[mode].modename); - return -EINVAL; + if (BOND_NO_USES_ARP(mode) && bond->params.arp_interval) { + pr_info("%s: %s mode is incompatible with arp monitoring, start mii monitoring\n", + bond->dev->name, bond_mode_tbl[mode].modename); + /* disable arp monitoring */ + bond->params.arp_interval = 0; + /* set miimon to default value */ + bond->params.miimon = BOND_DEFAULT_MIIMON; + pr_info("%s: Setting MII monitoring interval to %d.\n", + bond->dev->name, bond->params.miimon); } /* don't cache arp_validate between modes */ diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 0ec2a7e8c8a9..abf5e106edc5 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -523,9 +523,7 @@ static ssize_t bonding_store_arp_interval(struct device *d, ret = -EINVAL; goto out; } - if (bond->params.mode == BOND_MODE_ALB || - bond->params.mode == BOND_MODE_TLB || - bond->params.mode == BOND_MODE_8023AD) { + if (BOND_NO_USES_ARP(bond->params.mode)) { pr_info("%s: ARP monitoring cannot be used with ALB/TLB/802.3ad. Only MII monitoring is supported on %s.\n", bond->dev->name, bond->dev->name); ret = -EINVAL; diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index ca31286aa028..a9f4f9f4d8ce 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -35,6 +35,8 @@ #define BOND_MAX_ARP_TARGETS 16 +#define BOND_DEFAULT_MIIMON 100 + #define IS_UP(dev) \ ((((dev)->flags & IFF_UP) == IFF_UP) && \ netif_running(dev) && \ @@ -55,6 +57,11 @@ ((mode) == BOND_MODE_TLB) || \ ((mode) == BOND_MODE_ALB)) +#define BOND_NO_USES_ARP(mode) \ + (((mode) == BOND_MODE_8023AD) || \ + ((mode) == BOND_MODE_TLB) || \ + ((mode) == BOND_MODE_ALB)) + #define TX_QUEUE_OVERRIDE(mode) \ (((mode) == BOND_MODE_ACTIVEBACKUP) || \ ((mode) == BOND_MODE_ROUNDROBIN)) -- cgit v1.2.3 From a2f4dfba9e736ebe339d560cb92678280b1eb5db Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 22 Nov 2013 22:07:57 -0800 Subject: tg3: Convert to use hwmon_device_register_with_groups Use new hwmon API to simplify code, provide missing mandatory 'name' sysfs attribute, and attach hwmon attributes to hwmon device instead of pci device. Signed-off-by: Guenter Roeck Reviewed-by: Jean Delvare Acked-by: Nithin Nayak Sujir Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index a9e068423ba0..369b736dde05 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -10629,10 +10629,8 @@ static void tg3_sd_scan_scratchpad(struct tg3 *tp, struct tg3_ocir *ocir) static ssize_t tg3_show_temp(struct device *dev, struct device_attribute *devattr, char *buf) { - struct pci_dev *pdev = to_pci_dev(dev); - struct net_device *netdev = pci_get_drvdata(pdev); - struct tg3 *tp = netdev_priv(netdev); struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); + struct tg3 *tp = dev_get_drvdata(dev); u32 temperature; spin_lock_bh(&tp->lock); @@ -10650,29 +10648,25 @@ static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, tg3_show_temp, NULL, static SENSOR_DEVICE_ATTR(temp1_max, S_IRUGO, tg3_show_temp, NULL, TG3_TEMP_MAX_OFFSET); -static struct attribute *tg3_attributes[] = { +static struct attribute *tg3_attrs[] = { &sensor_dev_attr_temp1_input.dev_attr.attr, &sensor_dev_attr_temp1_crit.dev_attr.attr, &sensor_dev_attr_temp1_max.dev_attr.attr, NULL }; - -static const struct attribute_group tg3_group = { - .attrs = tg3_attributes, -}; +ATTRIBUTE_GROUPS(tg3); static void tg3_hwmon_close(struct tg3 *tp) { if (tp->hwmon_dev) { hwmon_device_unregister(tp->hwmon_dev); tp->hwmon_dev = NULL; - sysfs_remove_group(&tp->pdev->dev.kobj, &tg3_group); } } static void tg3_hwmon_open(struct tg3 *tp) { - int i, err; + int i; u32 size = 0; struct pci_dev *pdev = tp->pdev; struct tg3_ocir ocirs[TG3_SD_NUM_RECS]; @@ -10690,18 +10684,11 @@ static void tg3_hwmon_open(struct tg3 *tp) if (!size) return; - /* Register hwmon sysfs hooks */ - err = sysfs_create_group(&pdev->dev.kobj, &tg3_group); - if (err) { - dev_err(&pdev->dev, "Cannot create sysfs group, aborting\n"); - return; - } - - tp->hwmon_dev = hwmon_device_register(&pdev->dev); + tp->hwmon_dev = hwmon_device_register_with_groups(&pdev->dev, "tg3", + tp, tg3_groups); if (IS_ERR(tp->hwmon_dev)) { tp->hwmon_dev = NULL; dev_err(&pdev->dev, "Cannot register hwmon device, aborting\n"); - sysfs_remove_group(&pdev->dev.kobj, &tg3_group); } } -- cgit v1.2.3 From 0f0e2159c0c101426b705d70f43b9f423d51c869 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sat, 23 Nov 2013 13:01:50 +0100 Subject: genetlink: Fix uninitialized variable in genl_validate_assign_mc_groups() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/netlink/genetlink.c: In function ‘genl_validate_assign_mc_groups’: net/netlink/genetlink.c:217: warning: ‘err’ may be used uninitialized in this function Commit 2a94fe48f32ccf7321450a2cc07f2b724a444e5b ("genetlink: make multicast groups const, prevent abuse") split genl_register_mc_group() in multiple functions, but dropped the initialization of err. Initialize err to zero to fix this. Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 4518a57aa5fe..803206e82450 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -214,7 +214,7 @@ static int genl_validate_assign_mc_groups(struct genl_family *family) { int first_id; int n_groups = family->n_mcgrps; - int err, i; + int err = 0, i; bool groups_allocated = false; if (!n_groups) -- cgit v1.2.3 From 5e53e689b737526308db2b5c9f56e9d0371a1676 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 24 Nov 2013 21:09:26 +0100 Subject: genetlink/pmcraid: use proper genetlink multicast API The pmcraid driver is abusing the genetlink API and is using its family ID as the multicast group ID, which is invalid and may belong to somebody else (and likely will.) Make it use the correct API, but since this may already be used as-is by userspace, reserve a family ID for this code and also reserve that group ID to not break userspace assumptions. My previous patch broke event delivery in the driver as I missed that it wasn't using the right API and forgot to update it later in my series. While changing this, I noticed that the genetlink code could use the static group ID instead of a strcmp(), so also do that for the VFS_DQUOT family. Cc: Anil Ravindranath Cc: "James E.J. Bottomley" Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- drivers/scsi/pmcraid.c | 20 +++++++++++++++----- include/uapi/linux/genetlink.h | 1 + net/netlink/genetlink.c | 11 +++++++++-- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index bd6f743d87a7..892ea6161376 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -1404,11 +1404,22 @@ enum { }; #define PMCRAID_AEN_CMD_MAX (__PMCRAID_AEN_CMD_MAX - 1) +static struct genl_multicast_group pmcraid_mcgrps[] = { + { .name = "events", /* not really used - see ID discussion below */ }, +}; + static struct genl_family pmcraid_event_family = { - .id = GENL_ID_GENERATE, + /* + * Due to prior multicast group abuse (the code having assumed that + * the family ID can be used as a multicast group ID) we need to + * statically allocate a family (and thus group) ID. + */ + .id = GENL_ID_PMCRAID, .name = "pmcraid", .version = 1, - .maxattr = PMCRAID_AEN_ATTR_MAX + .maxattr = PMCRAID_AEN_ATTR_MAX, + .mcgrps = pmcraid_mcgrps, + .n_mcgrps = ARRAY_SIZE(pmcraid_mcgrps), }; /** @@ -1511,9 +1522,8 @@ static int pmcraid_notify_aen( return result; } - result = - genlmsg_multicast(&pmcraid_event_family, skb, 0, - pmcraid_event_family.id, GFP_ATOMIC); + result = genlmsg_multicast(&pmcraid_event_family, skb, + 0, 0, GFP_ATOMIC); /* If there are no listeners, genlmsg_multicast may return non-zero * value. diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h index 1af72d8228e0..c3363ba1ae05 100644 --- a/include/uapi/linux/genetlink.h +++ b/include/uapi/linux/genetlink.h @@ -28,6 +28,7 @@ struct genlmsghdr { #define GENL_ID_GENERATE 0 #define GENL_ID_CTRL NLMSG_MIN_TYPE #define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1) +#define GENL_ID_PMCRAID (NLMSG_MIN_TYPE + 2) /************************************************************************** * Controller diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 803206e82450..713671ae45af 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -74,9 +74,12 @@ static struct list_head family_ht[GENL_FAM_TAB_SIZE]; * Bit 17 is marked as already used since the VFS quota code * also abused this API and relied on family == group ID, we * cater to that by giving it a static family and group ID. + * Bit 18 is marked as already used since the PMCRAID driver + * did the same thing as the VFS quota code (maybe copied?) */ static unsigned long mc_group_start = 0x3 | BIT(GENL_ID_CTRL) | - BIT(GENL_ID_VFS_DQUOT); + BIT(GENL_ID_VFS_DQUOT) | + BIT(GENL_ID_PMCRAID); static unsigned long *mc_groups = &mc_group_start; static unsigned long mc_groups_longs = 1; @@ -139,6 +142,7 @@ static u16 genl_generate_id(void) for (i = 0; i <= GENL_MAX_ID - GENL_MIN_ID; i++) { if (id_gen_idx != GENL_ID_VFS_DQUOT && + id_gen_idx != GENL_ID_PMCRAID && !genl_family_find_byid(id_gen_idx)) return id_gen_idx; if (++id_gen_idx > GENL_MAX_ID) @@ -236,9 +240,12 @@ static int genl_validate_assign_mc_groups(struct genl_family *family) } else if (strcmp(family->name, "NET_DM") == 0) { first_id = 1; BUG_ON(n_groups != 1); - } else if (strcmp(family->name, "VFS_DQUOT") == 0) { + } else if (family->id == GENL_ID_VFS_DQUOT) { first_id = GENL_ID_VFS_DQUOT; BUG_ON(n_groups != 1); + } else if (family->id == GENL_ID_PMCRAID) { + first_id = GENL_ID_PMCRAID; + BUG_ON(n_groups != 1); } else { groups_allocated = true; err = genl_allocate_reserve_groups(n_groups, &first_id); -- cgit v1.2.3 From 6eabca54d6781f61c7318517c1463a098acb7a87 Mon Sep 17 00:00:00 2001 From: Xufeng Zhang Date: Mon, 25 Nov 2013 11:26:57 +0800 Subject: sctp: Restore 'resent' bit to avoid retransmitted chunks for RTT measurements Currently retransmitted DATA chunks could also be used for RTT measurements since there are no flag to identify whether the transmitted DATA chunk is a new one or a retransmitted one. This problem is introduced by commit ae19c5486 ("sctp: remove 'resent' bit from the chunk") which inappropriately removed the 'resent' bit completely, instead of doing this, we should set the resent bit only for the retransmitted DATA chunks. Signed-off-by: Xufeng Zhang Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + net/sctp/output.c | 3 ++- net/sctp/outqueue.c | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 2174d8da0770..ea0ca5f6e629 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -629,6 +629,7 @@ struct sctp_chunk { #define SCTP_NEED_FRTX 0x1 #define SCTP_DONT_FRTX 0x2 __u16 rtt_in_progress:1, /* This chunk used for RTT calc? */ + resent:1, /* Has this chunk ever been resent. */ has_tsn:1, /* Does this chunk have a TSN yet? */ has_ssn:1, /* Does this chunk have a SSN yet? */ singleton:1, /* Only chunk in the packet? */ diff --git a/net/sctp/output.c b/net/sctp/output.c index e650978daf27..0e2644d0a773 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -474,10 +474,11 @@ int sctp_packet_transmit(struct sctp_packet *packet) * for a given destination transport address. */ - if (!tp->rto_pending) { + if (!chunk->resent && !tp->rto_pending) { chunk->rtt_in_progress = 1; tp->rto_pending = 1; } + has_data = 1; } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index abb6db008df1..f51ba985a36e 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -446,6 +446,8 @@ void sctp_retransmit_mark(struct sctp_outq *q, transport->rto_pending = 0; } + chunk->resent = 1; + /* Move the chunk to the retransmit queue. The chunks * on the retransmit queue are always kept in order. */ @@ -1375,6 +1377,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, * instance). */ if (!tchunk->tsn_gap_acked && + !tchunk->resent && tchunk->rtt_in_progress) { tchunk->rtt_in_progress = 0; rtt = jiffies - tchunk->sent_at; -- cgit v1.2.3 From 167f76a87743dfaf1fec954db5233715ddad49ef Mon Sep 17 00:00:00 2001 From: Shaohui Xie Date: Mon, 25 Nov 2013 12:40:49 +0800 Subject: phy: Add Vitesse 8514 phy ID Phy is compatible with Vitesse 82xx Signed-off-by: Shaohui Xie Signed-off-by: David S. Miller --- drivers/net/phy/vitesse.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c index 508e4359338b..14372c65a7e8 100644 --- a/drivers/net/phy/vitesse.c +++ b/drivers/net/phy/vitesse.c @@ -64,6 +64,7 @@ #define PHY_ID_VSC8234 0x000fc620 #define PHY_ID_VSC8244 0x000fc6c0 +#define PHY_ID_VSC8514 0x00070670 #define PHY_ID_VSC8574 0x000704a0 #define PHY_ID_VSC8662 0x00070660 #define PHY_ID_VSC8221 0x000fc550 @@ -131,6 +132,7 @@ static int vsc82xx_config_intr(struct phy_device *phydev) err = phy_write(phydev, MII_VSC8244_IMASK, (phydev->drv->phy_id == PHY_ID_VSC8234 || phydev->drv->phy_id == PHY_ID_VSC8244 || + phydev->drv->phy_id == PHY_ID_VSC8514 || phydev->drv->phy_id == PHY_ID_VSC8574) ? MII_VSC8244_IMASK_MASK : MII_VSC8221_IMASK_MASK); @@ -245,6 +247,18 @@ static struct phy_driver vsc82xx_driver[] = { .ack_interrupt = &vsc824x_ack_interrupt, .config_intr = &vsc82xx_config_intr, .driver = { .owner = THIS_MODULE,}, +}, { + .phy_id = PHY_ID_VSC8514, + .name = "Vitesse VSC8514", + .phy_id_mask = 0x000ffff0, + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_init = &vsc824x_config_init, + .config_aneg = &vsc82x4_config_aneg, + .read_status = &genphy_read_status, + .ack_interrupt = &vsc824x_ack_interrupt, + .config_intr = &vsc82xx_config_intr, + .driver = { .owner = THIS_MODULE,}, }, { .phy_id = PHY_ID_VSC8574, .name = "Vitesse VSC8574", @@ -315,6 +329,7 @@ module_exit(vsc82xx_exit); static struct mdio_device_id __maybe_unused vitesse_tbl[] = { { PHY_ID_VSC8234, 0x000ffff0 }, { PHY_ID_VSC8244, 0x000fffc0 }, + { PHY_ID_VSC8514, 0x000ffff0 }, { PHY_ID_VSC8574, 0x000ffff0 }, { PHY_ID_VSC8662, 0x000ffff0 }, { PHY_ID_VSC8221, 0x000ffff0 }, -- cgit v1.2.3 From cd3e22b75c01fe1e075d91504b29019a05d8d373 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 25 Nov 2013 17:19:04 +0800 Subject: macvtap: fix tx_dropped counting error After commit 8ffab51b3dfc54876f145f15b351c41f3f703195 (macvlan: lockless tx path), tx stat counter were converted to percpu stat structure. So we need use to this also for tx_dropped in macvtap. Otherwise, the management won't notice the dropping packet in macvtap tx path. Cc: Michael S. Tsirkin Cc: Vlad Yasevich Cc: Eric Dumazet Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index dc76670c2f2a..0605da8e25f5 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -744,7 +744,7 @@ err: rcu_read_lock(); vlan = rcu_dereference(q->vlan); if (vlan) - vlan->dev->stats.tx_dropped++; + this_cpu_inc(vlan->pcpu_stats->tx_dropped); rcu_read_unlock(); return err; -- cgit v1.2.3 From 66028310aedc782e3a9a221f1a9ec12b7e587e50 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 25 Nov 2013 17:21:24 +0800 Subject: sit: use kfree_skb to replace dev_kfree_skb In failure case, we should use kfree_skb not dev_kfree_skb to free skbuff, dev_kfree_skb is defined as consume_skb. Trace takes advantage of this point. Signed-off-by: Gao feng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/sit.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 8435267836a7..366fbba3359a 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -951,7 +951,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (!new_skb) { ip_rt_put(rt); dev->stats.tx_dropped++; - dev_kfree_skb(skb); + kfree_skb(skb); return NETDEV_TX_OK; } if (skb->sk) @@ -977,7 +977,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, tx_error_icmp: dst_link_failure(skb); tx_error: - dev_kfree_skb(skb); + kfree_skb(skb); out: dev->stats.tx_errors++; return NETDEV_TX_OK; @@ -1017,7 +1017,7 @@ static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb, tx_err: dev->stats.tx_errors++; - dev_kfree_skb(skb); + kfree_skb(skb); return NETDEV_TX_OK; } -- cgit v1.2.3 From ae5e8127b712313ec1b99356019ce9226fea8b88 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Mon, 25 Nov 2013 16:52:34 +0000 Subject: xen-netback: include definition of csum_ipv6_magic We are now using csum_ipv6_magic, include the appropriate header. Avoids the following error: drivers/net/xen-netback/netback.c:1313:4: error: implicit declaration of function 'csum_ipv6_magic' [-Werror=implicit-function-declaration] tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, Signed-off-by: Andy Whitcroft Acked-by: Ian Campbell Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 919b6509455c..64f0e0d18b81 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -39,6 +39,7 @@ #include #include +#include #include #include -- cgit v1.2.3 From bc9627e7e918a85e906c1a3f6d01d9b8ef911a96 Mon Sep 17 00:00:00 2001 From: françois romieu Date: Tue, 26 Nov 2013 00:40:58 +0100 Subject: via-velocity: fix netif_receive_skb use in irq disabled section. 2fdac010bdcf10a30711b6924612dfc40daf19b8 ("via-velocity.c: update napi implementation") overlooked an irq disabling spinlock when the Rx part of the NAPI poll handler was converted from netif_rx to netif_receive_skb. NAPI Rx processing can be taken out of the locked section with a pair of napi_{disable / enable} since it only races with the MTU change function. An heavier rework of the NAPI locking would be able to perform NAPI Tx before Rx where I simply removed one of velocity_tx_srv calls. References: https://bugzilla.redhat.com/show_bug.cgi?id=1022733 Fixes: 2fdac010bdcf (via-velocity.c: update napi implementation) Signed-off-by: Francois Romieu Tested-by: Alex A. Schmidt Cc: Jamie Heilman Cc: Michele Baldessari Cc: Julia Lawall Signed-off-by: David S. Miller --- drivers/net/ethernet/via/via-velocity.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index d022bf936572..ad61d26a44f3 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -2172,16 +2172,13 @@ static int velocity_poll(struct napi_struct *napi, int budget) unsigned int rx_done; unsigned long flags; - spin_lock_irqsave(&vptr->lock, flags); /* * Do rx and tx twice for performance (taken from the VIA * out-of-tree driver). */ - rx_done = velocity_rx_srv(vptr, budget / 2); - velocity_tx_srv(vptr); - rx_done += velocity_rx_srv(vptr, budget - rx_done); + rx_done = velocity_rx_srv(vptr, budget); + spin_lock_irqsave(&vptr->lock, flags); velocity_tx_srv(vptr); - /* If budget not fully consumed, exit the polling mode */ if (rx_done < budget) { napi_complete(napi); @@ -2342,6 +2339,8 @@ static int velocity_change_mtu(struct net_device *dev, int new_mtu) if (ret < 0) goto out_free_tmp_vptr_1; + napi_disable(&vptr->napi); + spin_lock_irqsave(&vptr->lock, flags); netif_stop_queue(dev); @@ -2362,6 +2361,8 @@ static int velocity_change_mtu(struct net_device *dev, int new_mtu) velocity_give_many_rx_descs(vptr); + napi_enable(&vptr->napi); + mac_enable_int(vptr->mac_regs); netif_start_queue(dev); -- cgit v1.2.3 From 39af0c409e8c06b51caf7dc43e49ef96ae790a55 Mon Sep 17 00:00:00 2001 From: Baker Zhang Date: Tue, 26 Nov 2013 09:29:15 +0800 Subject: net: remove outdated comment for ipv4 and ipv6 protocol handler since f9242b6b28d61295f2bf7e8adfb1060b382e5381 inet: Sanitize inet{,6} protocol demux. there are not pretended hash tables for ipv4 or ipv6 protocol handler. Signed-off-by: Baker Zhang Signed-off-by: David S. Miller --- net/ipv4/protocol.c | 8 -------- net/ipv6/protocol.c | 4 ---- 2 files changed, 12 deletions(-) diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index ce848461acbb..46d6a1c923a8 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -31,10 +31,6 @@ const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly; -/* - * Add a protocol handler to the hash tables - */ - int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) { if (!prot->netns_ok) { @@ -55,10 +51,6 @@ int inet_add_offload(const struct net_offload *prot, unsigned char protocol) } EXPORT_SYMBOL(inet_add_offload); -/* - * Remove a protocol from the hash tables. - */ - int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) { int ret; diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index 22d1bd4670da..e048cf1bb6a2 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -36,10 +36,6 @@ int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol } EXPORT_SYMBOL(inet6_add_protocol); -/* - * Remove a protocol from the hash tables. - */ - int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol) { int ret; -- cgit v1.2.3 From 55485e7b417b640870b14eceec4cfbcb2b3e7a92 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Tue, 26 Nov 2013 17:54:41 +0100 Subject: be2net: call napi_disable() for all event queues The recent be2net commit 6384a4d (adds a support for busy polling) introduces a regression that results in kernel crash. It incorrectly modified be_close() so napi_disable() is called only for the first queue. This breaks a correct pairing of napi_enable/_disable for the rest of event queues and causes a crash in subsequent be_open() call. Cc: Sathya Perla Cc: Subbu Seetharaman Cc: Ajit Khaparde Signed-off-by: Ivan Vecera Acked-by: Ajit Khaparde Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 78a0e859760b..0aec4d23eb58 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2663,8 +2663,8 @@ static int be_close(struct net_device *netdev) napi_disable(&eqo->napi); be_disable_busy_poll(eqo); } - adapter->flags &= ~BE_FLAGS_NAPI_ENABLED; } + adapter->flags &= ~BE_FLAGS_NAPI_ENABLED; be_async_mcc_disable(adapter); -- cgit v1.2.3 From 71237b6feb8361a00a1cb481a0c901fbee8f82a5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 28 Nov 2013 18:53:36 -0500 Subject: Revert "be2net: call napi_disable() for all event queues" This reverts commit 55485e7b417b640870b14eceec4cfbcb2b3e7a92. I applied the wrong version of this patch, the right one is coming up next. Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 0aec4d23eb58..78a0e859760b 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2663,8 +2663,8 @@ static int be_close(struct net_device *netdev) napi_disable(&eqo->napi); be_disable_busy_poll(eqo); } + adapter->flags &= ~BE_FLAGS_NAPI_ENABLED; } - adapter->flags &= ~BE_FLAGS_NAPI_ENABLED; be_async_mcc_disable(adapter); -- cgit v1.2.3 From dff345c5c85df3c5853491b007f6b1b578dc28a4 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 27 Nov 2013 08:59:32 +0100 Subject: be2net: call napi_disable() for all event queues The recent be2net commit 6384a4d (adds a support for busy polling) introduces a regression that results in kernel crash. It incorrectly modified be_close() so napi_disable() is called only for the first queue. This breaks a correct pairing of napi_enable/_disable for the rest of event queues and causes a crash in subsequent be_open() call. v2: Applied suggestions from Sathya Fixes: 6384a4d ("be2net: add support for ndo_busy_poll") Cc: Sathya Perla Cc: Subbu Seetharaman Cc: Ajit Khaparde Signed-off-by: Ivan Vecera Acked-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 78a0e859760b..fee64bf10446 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2658,8 +2658,8 @@ static int be_close(struct net_device *netdev) be_roce_dev_close(adapter); - for_all_evt_queues(adapter, eqo, i) { - if (adapter->flags & BE_FLAGS_NAPI_ENABLED) { + if (adapter->flags & BE_FLAGS_NAPI_ENABLED) { + for_all_evt_queues(adapter, eqo, i) { napi_disable(&eqo->napi); be_disable_busy_poll(eqo); } -- cgit v1.2.3 From 006da7b07bc4d3a7ffabad17cf639eec6849c9dc Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 26 Nov 2013 12:37:12 -0500 Subject: macvtap: Do not double-count received packets Currently macvlan will count received packets after calling each vlans receive handler. Macvtap attempts to count the packet yet again when the user reads the packet from the tap socket. This code doesn't do this consistently either. Remove the counting from macvtap and let only macvlan count received packets. Signed-off-by: Vlad Yasevich Acked-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 0605da8e25f5..9093004f9b63 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -767,7 +767,6 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q, const struct sk_buff *skb, const struct iovec *iv, int len) { - struct macvlan_dev *vlan; int ret; int vnet_hdr_len = 0; int vlan_offset = 0; @@ -821,15 +820,6 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q, copied += len; done: - rcu_read_lock(); - vlan = rcu_dereference(q->vlan); - if (vlan) { - preempt_disable(); - macvlan_count_rx(vlan, copied - vnet_hdr_len, ret == 0, 0); - preempt_enable(); - } - rcu_read_unlock(); - return ret ? ret : copied; } -- cgit v1.2.3 From ec6f809ff6f19fafba3212f6aff0dda71dfac8e8 Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Fri, 29 Nov 2013 09:53:23 +0100 Subject: af_packet: block BH in prb_shutdown_retire_blk_timer() Currently we're using plain spin_lock() in prb_shutdown_retire_blk_timer(), however the timer might fire right in the middle and thus try to re-aquire the same spinlock, leaving us in a endless loop. To fix that, use the spin_lock_bh() to block it. Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.") CC: "David S. Miller" CC: Daniel Borkmann CC: Willem de Bruijn CC: Phil Sutter CC: Eric Dumazet Reported-by: Jan Stancek Tested-by: Jan Stancek Signed-off-by: Veaceslav Falico Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/packet/af_packet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ac27c86ef6d1..ba2548bd85bf 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -439,9 +439,9 @@ static void prb_shutdown_retire_blk_timer(struct packet_sock *po, pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; - spin_lock(&rb_queue->lock); + spin_lock_bh(&rb_queue->lock); pkc->delete_blk_timer = 1; - spin_unlock(&rb_queue->lock); + spin_unlock_bh(&rb_queue->lock); prb_del_retire_blk_timer(pkc); } -- cgit v1.2.3 From db31c55a6fb245fdbb752a2ca4aefec89afabb06 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 27 Nov 2013 15:40:21 +0300 Subject: net: clamp ->msg_namelen instead of returning an error If kmsg->msg_namelen > sizeof(struct sockaddr_storage) then in the original code that would lead to memory corruption in the kernel if you had audit configured. If you didn't have audit configured it was harmless. There are some programs such as beta versions of Ruby which use too large of a buffer and returning an error code breaks them. We should clamp the ->msg_namelen value instead. Fixes: 1661bf364ae9 ("net: heap overflow in __audit_sockaddr()") Reported-by: Eric Wong Signed-off-by: Dan Carpenter Tested-by: Eric Wong Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/compat.c | 2 +- net/socket.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/compat.c b/net/compat.c index 618c6a8a911b..dd32e34c1e2c 100644 --- a/net/compat.c +++ b/net/compat.c @@ -72,7 +72,7 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg) __get_user(kmsg->msg_flags, &umsg->msg_flags)) return -EFAULT; if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) - return -EINVAL; + kmsg->msg_namelen = sizeof(struct sockaddr_storage); kmsg->msg_name = compat_ptr(tmp1); kmsg->msg_iov = compat_ptr(tmp2); kmsg->msg_control = compat_ptr(tmp3); diff --git a/net/socket.c b/net/socket.c index 0b18693f2be6..e83c416708af 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1973,7 +1973,7 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) return -EFAULT; if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) - return -EINVAL; + kmsg->msg_namelen = sizeof(struct sockaddr_storage); return 0; } -- cgit v1.2.3 From 1bac1072425c86f1ac85bd5967910706677ef8b3 Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 27 Nov 2013 15:48:36 +0800 Subject: r8169: check ALDPS bit and disable it if enabled for the 8168g Windows driver will enable ALDPS function, but linux driver and firmware do not have any configuration related to ALDPS function for 8168g. So restart system to linux and remove the NIC cable, LAN enter ALDPS, then LAN RX will be disabled. This issue can be easily reproduced on dual boot windows and linux system with RTL_GIGA_MAC_VER_40 chip. Realtek said, ALDPS function can be disabled by configuring to PHY, switch to page 0x0A43, reg0x10 bit2=0. Signed-off-by: David Chang Acked-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 799387570766..c737f0ea5de7 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -3465,6 +3465,11 @@ static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp) rtl_writephy(tp, 0x14, 0x9065); rtl_writephy(tp, 0x14, 0x1065); + /* Check ALDPS bit, disable it if enabled */ + rtl_writephy(tp, 0x1f, 0x0a43); + if (rtl_readphy(tp, 0x10) & 0x0004) + rtl_w1w0_phy(tp, 0x10, 0x0000, 0x0004); + rtl_writephy(tp, 0x1f, 0x0000); } -- cgit v1.2.3 From 7fe0ee099ad5e3dea88d4ee1b6f20246b1ca57c3 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 27 Nov 2013 14:32:52 +0800 Subject: net: 8139cp: fix a BUG_ON triggered by wrong bytes_compl Using iperf to send packets(GSO mode is on), a bug is triggered: [ 212.672781] kernel BUG at lib/dynamic_queue_limits.c:26! [ 212.673396] invalid opcode: 0000 [#1] SMP [ 212.673882] Modules linked in: 8139cp(O) nls_utf8 edd fuse loop dm_mod ipv6 i2c_piix4 8139too i2c_core intel_agp joydev pcspkr hid_generic intel_gtt floppy sr_mod mii button sg cdrom ext3 jbd mbcache usbhid hid uhci_hcd ehci_hcd usbcore sd_mod usb_common crc_t10dif crct10dif_common processor thermal_sys hwmon scsi_dh_emc scsi_dh_rdac scsi_dh_hp_sw scsi_dh ata_generic ata_piix libata scsi_mod [last unloaded: 8139cp] [ 212.676084] CPU: 0 PID: 4124 Comm: iperf Tainted: G O 3.12.0-0.7-default+ #16 [ 212.676084] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 212.676084] task: ffff8800d83966c0 ti: ffff8800db4c8000 task.ti: ffff8800db4c8000 [ 212.676084] RIP: 0010:[] [] dql_completed+0x17f/0x190 [ 212.676084] RSP: 0018:ffff880116e03e30 EFLAGS: 00010083 [ 212.676084] RAX: 00000000000005ea RBX: 0000000000000f7c RCX: 0000000000000002 [ 212.676084] RDX: ffff880111dd0dc0 RSI: 0000000000000bd4 RDI: ffff8800db6ffcc0 [ 212.676084] RBP: ffff880116e03e48 R08: 0000000000000992 R09: 0000000000000000 [ 212.676084] R10: ffffffff8181e400 R11: 0000000000000004 R12: 000000000000000f [ 212.676084] R13: ffff8800d94ec840 R14: ffff8800db440c80 R15: 000000000000000e [ 212.676084] FS: 00007f6685a3c700(0000) GS:ffff880116e00000(0000) knlGS:0000000000000000 [ 212.676084] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 212.676084] CR2: 00007f6685ad6460 CR3: 00000000db714000 CR4: 00000000000006f0 [ 212.676084] Stack: [ 212.676084] ffff8800db6ffc00 000000000000000f ffff8800d94ec840 ffff880116e03eb8 [ 212.676084] ffffffffa041509f ffff880116e03e88 0000000f16e03e88 ffff8800d94ec000 [ 212.676084] 00000bd400059858 000000050000000f ffffffff81094c36 ffff880116e03eb8 [ 212.676084] Call Trace: [ 212.676084] [ 212.676084] [] cp_interrupt+0x4ef/0x590 [8139cp] [ 212.676084] [] ? ktime_get+0x56/0xd0 [ 212.676084] [] handle_irq_event_percpu+0x53/0x170 [ 212.676084] [] handle_irq_event+0x3c/0x60 [ 212.676084] [] handle_fasteoi_irq+0x55/0xf0 [ 212.676084] [] handle_irq+0x1f/0x30 [ 212.676084] [] do_IRQ+0x5b/0xe0 [ 212.676084] [] common_interrupt+0x6a/0x6a [ 212.676084] [ 212.676084] [] ? cp_start_xmit+0x621/0x97c [8139cp] [ 212.676084] [] ? cp_start_xmit+0x609/0x97c [8139cp] [ 212.676084] [] dev_hard_start_xmit+0x2c9/0x550 [ 212.676084] [] sch_direct_xmit+0x179/0x1d0 [ 212.676084] [] dev_queue_xmit+0x293/0x440 [ 212.676084] [] ip_finish_output+0x236/0x450 [ 212.676084] [] ? __alloc_pages_nodemask+0x187/0xb10 [ 212.676084] [] ip_output+0x88/0x90 [ 212.676084] [] ip_local_out+0x24/0x30 [ 212.676084] [] ip_queue_xmit+0x14d/0x3e0 [ 212.676084] [] tcp_transmit_skb+0x501/0x840 [ 212.676084] [] tcp_write_xmit+0x1e3/0xb20 [ 212.676084] [] ? skb_page_frag_refill+0x87/0xd0 [ 212.676084] [] tcp_push_one+0x2b/0x40 [ 212.676084] [] tcp_sendmsg+0x926/0xc90 [ 212.676084] [] inet_sendmsg+0x61/0xc0 [ 212.676084] [] sock_aio_write+0x101/0x120 [ 212.676084] [] ? vma_adjust+0x2e1/0x5d0 [ 212.676084] [] ? timerqueue_add+0x60/0xb0 [ 212.676084] [] do_sync_write+0x60/0x90 [ 212.676084] [] ? rw_verify_area+0x54/0xf0 [ 212.676084] [] vfs_write+0x186/0x190 [ 212.676084] [] SyS_write+0x5d/0xa0 [ 212.676084] [] system_call_fastpath+0x16/0x1b [ 212.676084] Code: ca 41 89 dc 41 29 cc 45 31 db 29 c2 41 89 c5 89 d0 45 29 c5 f7 d0 c1 e8 1f e9 43 ff ff ff 66 0f 1f 44 00 00 31 c0 e9 7b ff ff ff <0f> 0b eb fe 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 c7 47 40 00 [ 212.676084] RIP [] dql_completed+0x17f/0x190 ------------[ cut here ]------------ When a skb has frags, bytes_compl plus skb->len nr_frags times in cp_tx(). It's not the correct value(actually, it should plus skb->len once) and it will trigger the BUG_ON(bytes_compl > num_queued - dql->num_completed). So only increase bytes_compl when finish sending all frags. pkts_compl also has a wrong value, fix it too. It's introduced by commit 871f0d4c ("8139cp: enable bql"). Suggested-by: Eric Dumazet Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/8139cp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index f2a2128165dd..737c1a881f78 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -678,9 +678,6 @@ static void cp_tx (struct cp_private *cp) le32_to_cpu(txd->opts1) & 0xffff, PCI_DMA_TODEVICE); - bytes_compl += skb->len; - pkts_compl++; - if (status & LastFrag) { if (status & (TxError | TxFIFOUnder)) { netif_dbg(cp, tx_err, cp->dev, @@ -702,6 +699,8 @@ static void cp_tx (struct cp_private *cp) netif_dbg(cp, tx_done, cp->dev, "tx done, slot %d\n", tx_tail); } + bytes_compl += skb->len; + pkts_compl++; dev_kfree_skb_irq(skb); } -- cgit v1.2.3 From b268daffdcb9762ad9aa3898096570a9dd92aa9b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 27 Nov 2013 13:33:21 +0100 Subject: net: smc91: fix crash regression on the versatile After commit e9e4ea74f06635f2ffc1dffe5ef40c854faa0a90 "net: smc91x: dont't use SMC_outw for fixing up halfword-aligned data" The Versatile SMSC LAN91C111 is crashing like this: ------------[ cut here ]------------ kernel BUG at /home/linus/linux/drivers/net/ethernet/smsc/smc91x.c:599! Internal error: Oops - BUG: 0 [#1] ARM Modules linked in: CPU: 0 PID: 43 Comm: udhcpc Not tainted 3.13.0-rc1+ #24 task: c6ccfaa0 ti: c6cd0000 task.ti: c6cd0000 PC is at smc_hardware_send_pkt+0x198/0x22c LR is at smc_hardware_send_pkt+0x24/0x22c pc : [] lr : [] psr: 20000013 sp : c6cd1d08 ip : 00000001 fp : 00000000 r10: c02adb08 r9 : 00000000 r8 : c6ced802 r7 : c786fba0 r6 : 00000146 r5 : c8800000 r4 : c78d6000 r3 : 0000000f r2 : 00000146 r1 : 00000000 r0 : 00000031 Flags: nzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user Control: 0005317f Table: 06cf4000 DAC: 00000015 Process udhcpc (pid: 43, stack limit = 0xc6cd01c0) Stack: (0xc6cd1d08 to 0xc6cd2000) 1d00: 00000010 c8800000 c78d6000 c786fba0 c78d6000 c01be868 1d20: c01be7a4 00004000 00000000 c786fba0 c6c12b80 c0208554 000004d0 c780fc60 1d40: 00000220 c01fb734 00000000 00000000 00000000 c6c9a440 c6c12b80 c78d6000 1d60: c786fba0 c6c9a440 00000000 c021d1d8 00000000 00000000 c6c12b80 c78d6000 1d80: c786fba0 00000001 c6c9a440 c02087f8 c6c9a4a0 00080008 00000000 00000000 1da0: c78d6000 c786fba0 c78d6000 00000138 00000000 00000000 00000000 00000000 1dc0: 00000000 c027ba74 00000138 00000138 00000001 00000010 c6cedc00 00000000 1de0: 00000008 c7404400 c6cd1eec c6cd1f14 c067a73c c065c0b8 00000000 c067a740 1e00: 01ffffff 002040d0 00000000 00000000 00000000 00000000 00000000 ffffffff 1e20: 43004400 00110022 c6cdef20 c027ae8c c6ccfaa0 be82d65c 00000014 be82d3cc 1e40: 00000000 00000000 00000000 c01f2870 00000000 00000000 00000000 c6cd1e88 1e60: c6ccfaa0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1e80: 00000000 00000000 00000031 c7802310 c7802300 00000138 c7404400 c0771da0 1ea0: 00000000 c6cd1eec c7800340 00000138 be82d65c 00000014 be82d3cc c6cd1f08 1ec0: 00000014 00000000 c7404400 c7404400 00000138 c01f4628 c78d6000 00000000 1ee0: 00000000 be82d3cc 00000138 c6cd1f08 00000014 c6cd1ee4 00000001 00000000 1f00: 00000000 00000000 00080011 00000002 06000000 ffffffff 0000ffff 00000002 1f20: 06000000 ffffffff 0000ffff c00928c8 c065c520 c6cd1f58 00000003 c009299c 1f40: 00000003 c065c520 c7404400 00000000 c7404400 c01f2218 c78106b0 c7441cb0 1f60: 00000000 00000006 c06799fc 00000000 00000000 00000006 00000000 c01f3ee0 1f80: 00000000 00000000 be82d678 be82d65c 00000014 00000001 00000122 c00139c8 1fa0: c6cd0000 c0013840 be82d65c 00000014 00000006 be82d3cc 00000138 00000000 1fc0: be82d65c 00000014 00000001 00000122 00000000 00000000 00018cb1 00000000 1fe0: 00003801 be82d3a8 0003a0c7 b6e9af08 60000010 00000006 00000000 00000000 [] (smc_hardware_send_pkt+0x198/0x22c) from [] (smc_hard_start_xmit+0xc4/0x1e8) [] (smc_hard_start_xmit+0xc4/0x1e8) from [] (dev_hard_start_xmit+0x460/0x4cc) [] (dev_hard_start_xmit+0x460/0x4cc) from [] (sch_direct_xmit+0x94/0x18c) [] (sch_direct_xmit+0x94/0x18c) from [] (dev_queue_xmit+0x238/0x42c) [] (dev_queue_xmit+0x238/0x42c) from [] (packet_sendmsg+0xbe8/0xd28) [] (packet_sendmsg+0xbe8/0xd28) from [] (sock_sendmsg+0x84/0xa8) [] (sock_sendmsg+0x84/0xa8) from [] (SyS_sendto+0xb8/0xdc) [] (SyS_sendto+0xb8/0xdc) from [] (ret_fast_syscall+0x0/0x2c) Code: e3130002 1a000001 e3130001 0affffcd (e7f001f2) ---[ end trace 81104fe70e8da7fe ]--- Kernel panic - not syncing: Fatal exception in interrupt This is because the macro operations in smc91x.h defined for Versatile are missing SMC_outsw() as used in this commit. The Versatile needs and uses the same accessors as the other platforms in the first if(...) clause, just switch it to using that and we have one problem less to worry about. Checkpatch complains about spacing, but I have opted to follow the style of this .h-file. Cc: Russell King Cc: Nicolas Pitre Cc: Eric Miao Cc: Jonathan Cameron Cc: Will Deacon Cc: stable@vger.kernel.org Signed-off-by: Linus Walleij Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smc91x.h | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/smsc/smc91x.h b/drivers/net/ethernet/smsc/smc91x.h index c9d4c872e81d..902a7c268d82 100644 --- a/drivers/net/ethernet/smsc/smc91x.h +++ b/drivers/net/ethernet/smsc/smc91x.h @@ -46,7 +46,8 @@ defined(CONFIG_MACH_LITTLETON) ||\ defined(CONFIG_MACH_ZYLONITE2) ||\ defined(CONFIG_ARCH_VIPER) ||\ - defined(CONFIG_MACH_STARGATE2) + defined(CONFIG_MACH_STARGATE2) ||\ + defined(CONFIG_ARCH_VERSATILE) #include @@ -206,23 +207,6 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg) #define RPC_LSA_DEFAULT RPC_LED_TX_RX #define RPC_LSB_DEFAULT RPC_LED_100_10 -#elif defined(CONFIG_ARCH_VERSATILE) - -#define SMC_CAN_USE_8BIT 1 -#define SMC_CAN_USE_16BIT 1 -#define SMC_CAN_USE_32BIT 1 -#define SMC_NOWAIT 1 - -#define SMC_inb(a, r) readb((a) + (r)) -#define SMC_inw(a, r) readw((a) + (r)) -#define SMC_inl(a, r) readl((a) + (r)) -#define SMC_outb(v, a, r) writeb(v, (a) + (r)) -#define SMC_outw(v, a, r) writew(v, (a) + (r)) -#define SMC_outl(v, a, r) writel(v, (a) + (r)) -#define SMC_insl(a, r, p, l) readsl((a) + (r), p, l) -#define SMC_outsl(a, r, p, l) writesl((a) + (r), p, l) -#define SMC_IRQ_FLAGS (-1) /* from resource */ - #elif defined(CONFIG_MN10300) /* -- cgit v1.2.3 From 9d38d28bd211451533d7a0d1de57485aaf86d00a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 29 Nov 2013 16:23:14 -0500 Subject: Revert "net: smc91: fix crash regression on the versatile" This reverts commit b268daffdcb9762ad9aa3898096570a9dd92aa9b. I applied the wrong version of this patch, the proper version is coming up next. Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smc91x.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/smsc/smc91x.h b/drivers/net/ethernet/smsc/smc91x.h index 902a7c268d82..c9d4c872e81d 100644 --- a/drivers/net/ethernet/smsc/smc91x.h +++ b/drivers/net/ethernet/smsc/smc91x.h @@ -46,8 +46,7 @@ defined(CONFIG_MACH_LITTLETON) ||\ defined(CONFIG_MACH_ZYLONITE2) ||\ defined(CONFIG_ARCH_VIPER) ||\ - defined(CONFIG_MACH_STARGATE2) ||\ - defined(CONFIG_ARCH_VERSATILE) + defined(CONFIG_MACH_STARGATE2) #include @@ -207,6 +206,23 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg) #define RPC_LSA_DEFAULT RPC_LED_TX_RX #define RPC_LSB_DEFAULT RPC_LED_100_10 +#elif defined(CONFIG_ARCH_VERSATILE) + +#define SMC_CAN_USE_8BIT 1 +#define SMC_CAN_USE_16BIT 1 +#define SMC_CAN_USE_32BIT 1 +#define SMC_NOWAIT 1 + +#define SMC_inb(a, r) readb((a) + (r)) +#define SMC_inw(a, r) readw((a) + (r)) +#define SMC_inl(a, r) readl((a) + (r)) +#define SMC_outb(v, a, r) writeb(v, (a) + (r)) +#define SMC_outw(v, a, r) writew(v, (a) + (r)) +#define SMC_outl(v, a, r) writel(v, (a) + (r)) +#define SMC_insl(a, r, p, l) readsl((a) + (r), p, l) +#define SMC_outsl(a, r, p, l) writesl((a) + (r), p, l) +#define SMC_IRQ_FLAGS (-1) /* from resource */ + #elif defined(CONFIG_MN10300) /* -- cgit v1.2.3 From a0c20fb02592d372e744d1d739cda3e1b3defaae Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 28 Nov 2013 14:33:52 +0100 Subject: net: smc91: fix crash regression on the versatile After commit e9e4ea74f06635f2ffc1dffe5ef40c854faa0a90 "net: smc91x: dont't use SMC_outw for fixing up halfword-aligned data" The Versatile SMSC LAN91C111 is crashing like this: ------------[ cut here ]------------ kernel BUG at /home/linus/linux/drivers/net/ethernet/smsc/smc91x.c:599! Internal error: Oops - BUG: 0 [#1] ARM Modules linked in: CPU: 0 PID: 43 Comm: udhcpc Not tainted 3.13.0-rc1+ #24 task: c6ccfaa0 ti: c6cd0000 task.ti: c6cd0000 PC is at smc_hardware_send_pkt+0x198/0x22c LR is at smc_hardware_send_pkt+0x24/0x22c pc : [] lr : [] psr: 20000013 sp : c6cd1d08 ip : 00000001 fp : 00000000 r10: c02adb08 r9 : 00000000 r8 : c6ced802 r7 : c786fba0 r6 : 00000146 r5 : c8800000 r4 : c78d6000 r3 : 0000000f r2 : 00000146 r1 : 00000000 r0 : 00000031 Flags: nzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user Control: 0005317f Table: 06cf4000 DAC: 00000015 Process udhcpc (pid: 43, stack limit = 0xc6cd01c0) Stack: (0xc6cd1d08 to 0xc6cd2000) 1d00: 00000010 c8800000 c78d6000 c786fba0 c78d6000 c01be868 1d20: c01be7a4 00004000 00000000 c786fba0 c6c12b80 c0208554 000004d0 c780fc60 1d40: 00000220 c01fb734 00000000 00000000 00000000 c6c9a440 c6c12b80 c78d6000 1d60: c786fba0 c6c9a440 00000000 c021d1d8 00000000 00000000 c6c12b80 c78d6000 1d80: c786fba0 00000001 c6c9a440 c02087f8 c6c9a4a0 00080008 00000000 00000000 1da0: c78d6000 c786fba0 c78d6000 00000138 00000000 00000000 00000000 00000000 1dc0: 00000000 c027ba74 00000138 00000138 00000001 00000010 c6cedc00 00000000 1de0: 00000008 c7404400 c6cd1eec c6cd1f14 c067a73c c065c0b8 00000000 c067a740 1e00: 01ffffff 002040d0 00000000 00000000 00000000 00000000 00000000 ffffffff 1e20: 43004400 00110022 c6cdef20 c027ae8c c6ccfaa0 be82d65c 00000014 be82d3cc 1e40: 00000000 00000000 00000000 c01f2870 00000000 00000000 00000000 c6cd1e88 1e60: c6ccfaa0 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1e80: 00000000 00000000 00000031 c7802310 c7802300 00000138 c7404400 c0771da0 1ea0: 00000000 c6cd1eec c7800340 00000138 be82d65c 00000014 be82d3cc c6cd1f08 1ec0: 00000014 00000000 c7404400 c7404400 00000138 c01f4628 c78d6000 00000000 1ee0: 00000000 be82d3cc 00000138 c6cd1f08 00000014 c6cd1ee4 00000001 00000000 1f00: 00000000 00000000 00080011 00000002 06000000 ffffffff 0000ffff 00000002 1f20: 06000000 ffffffff 0000ffff c00928c8 c065c520 c6cd1f58 00000003 c009299c 1f40: 00000003 c065c520 c7404400 00000000 c7404400 c01f2218 c78106b0 c7441cb0 1f60: 00000000 00000006 c06799fc 00000000 00000000 00000006 00000000 c01f3ee0 1f80: 00000000 00000000 be82d678 be82d65c 00000014 00000001 00000122 c00139c8 1fa0: c6cd0000 c0013840 be82d65c 00000014 00000006 be82d3cc 00000138 00000000 1fc0: be82d65c 00000014 00000001 00000122 00000000 00000000 00018cb1 00000000 1fe0: 00003801 be82d3a8 0003a0c7 b6e9af08 60000010 00000006 00000000 00000000 [] (smc_hardware_send_pkt+0x198/0x22c) from [] (smc_hard_start_xmit+0xc4/0x1e8) [] (smc_hard_start_xmit+0xc4/0x1e8) from [] (dev_hard_start_xmit+0x460/0x4cc) [] (dev_hard_start_xmit+0x460/0x4cc) from [] (sch_direct_xmit+0x94/0x18c) [] (sch_direct_xmit+0x94/0x18c) from [] (dev_queue_xmit+0x238/0x42c) [] (dev_queue_xmit+0x238/0x42c) from [] (packet_sendmsg+0xbe8/0xd28) [] (packet_sendmsg+0xbe8/0xd28) from [] (sock_sendmsg+0x84/0xa8) [] (sock_sendmsg+0x84/0xa8) from [] (SyS_sendto+0xb8/0xdc) [] (SyS_sendto+0xb8/0xdc) from [] (ret_fast_syscall+0x0/0x2c) Code: e3130002 1a000001 e3130001 0affffcd (e7f001f2) ---[ end trace 81104fe70e8da7fe ]--- Kernel panic - not syncing: Fatal exception in interrupt This is because the macro operations in smc91x.h defined for Versatile are missing SMC_outsw() as used in this commit. The Versatile needs and uses the same accessors as the other platforms in the first if(...) clause, just switch it to using that and we have one problem less to worry about. This includes a hunk of a patch from Will Deacon fixin the other 32bit platforms as well: Innokom, Ramses, PXA, PCM027. Checkpatch complains about spacing, but I have opted to follow the style of this .h-file. Cc: Russell King Cc: Nicolas Pitre Cc: Eric Miao Cc: Jonathan Cameron Cc: stable@vger.kernel.org Signed-off-by: Will Deacon Signed-off-by: Linus Walleij Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smc91x.h | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/smsc/smc91x.h b/drivers/net/ethernet/smsc/smc91x.h index c9d4c872e81d..749654b976bc 100644 --- a/drivers/net/ethernet/smsc/smc91x.h +++ b/drivers/net/ethernet/smsc/smc91x.h @@ -46,7 +46,8 @@ defined(CONFIG_MACH_LITTLETON) ||\ defined(CONFIG_MACH_ZYLONITE2) ||\ defined(CONFIG_ARCH_VIPER) ||\ - defined(CONFIG_MACH_STARGATE2) + defined(CONFIG_MACH_STARGATE2) ||\ + defined(CONFIG_ARCH_VERSATILE) #include @@ -154,6 +155,8 @@ static inline void SMC_outw(u16 val, void __iomem *ioaddr, int reg) #define SMC_outl(v, a, r) writel(v, (a) + (r)) #define SMC_insl(a, r, p, l) readsl((a) + (r), p, l) #define SMC_outsl(a, r, p, l) writesl((a) + (r), p, l) +#define SMC_insw(a, r, p, l) readsw((a) + (r), p, l) +#define SMC_outsw(a, r, p, l) writesw((a) + (r), p, l) #define SMC_IRQ_FLAGS (-1) /* from resource */ /* We actually can't write halfwords properly if not word aligned */ @@ -206,23 +209,6 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg) #define RPC_LSA_DEFAULT RPC_LED_TX_RX #define RPC_LSB_DEFAULT RPC_LED_100_10 -#elif defined(CONFIG_ARCH_VERSATILE) - -#define SMC_CAN_USE_8BIT 1 -#define SMC_CAN_USE_16BIT 1 -#define SMC_CAN_USE_32BIT 1 -#define SMC_NOWAIT 1 - -#define SMC_inb(a, r) readb((a) + (r)) -#define SMC_inw(a, r) readw((a) + (r)) -#define SMC_inl(a, r) readl((a) + (r)) -#define SMC_outb(v, a, r) writeb(v, (a) + (r)) -#define SMC_outw(v, a, r) writew(v, (a) + (r)) -#define SMC_outl(v, a, r) writel(v, (a) + (r)) -#define SMC_insl(a, r, p, l) readsl((a) + (r), p, l) -#define SMC_outsl(a, r, p, l) writesl((a) + (r), p, l) -#define SMC_IRQ_FLAGS (-1) /* from resource */ - #elif defined(CONFIG_MN10300) /* -- cgit v1.2.3 From 85493e6dd42dcaf0eaf0a19f12f1295e35cc3b7f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 27 Nov 2013 18:54:31 -0800 Subject: sfc: Convert to use hwmon_device_register_with_groups Simplify the code. Avoid race conditions caused by attributes being created after hwmon device registration. Implicitly (through hwmon API) add mandatory 'name' sysfs attribute. Reviewed-by: Ben Hutchings Signed-off-by: Guenter Roeck Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/mcdi.h | 2 + drivers/net/ethernet/sfc/mcdi_mon.c | 78 ++++++++++++++----------------------- 2 files changed, 32 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h index 656a3277c2b2..15816cacb548 100644 --- a/drivers/net/ethernet/sfc/mcdi.h +++ b/drivers/net/ethernet/sfc/mcdi.h @@ -75,6 +75,8 @@ struct efx_mcdi_mon { unsigned long last_update; struct device *device; struct efx_mcdi_mon_attribute *attrs; + struct attribute_group group; + const struct attribute_group *groups[2]; unsigned int n_attrs; }; diff --git a/drivers/net/ethernet/sfc/mcdi_mon.c b/drivers/net/ethernet/sfc/mcdi_mon.c index 4cc5d95b2a5a..d72ad4fc3617 100644 --- a/drivers/net/ethernet/sfc/mcdi_mon.c +++ b/drivers/net/ethernet/sfc/mcdi_mon.c @@ -139,17 +139,10 @@ static int efx_mcdi_mon_update(struct efx_nic *efx) return rc; } -static ssize_t efx_mcdi_mon_show_name(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - return sprintf(buf, "%s\n", KBUILD_MODNAME); -} - static int efx_mcdi_mon_get_entry(struct device *dev, unsigned int index, efx_dword_t *entry) { - struct efx_nic *efx = dev_get_drvdata(dev); + struct efx_nic *efx = dev_get_drvdata(dev->parent); struct efx_mcdi_mon *hwmon = efx_mcdi_mon(efx); int rc; @@ -263,7 +256,7 @@ static ssize_t efx_mcdi_mon_show_label(struct device *dev, efx_mcdi_sensor_type[mon_attr->type].label); } -static int +static void efx_mcdi_mon_add_attr(struct efx_nic *efx, const char *name, ssize_t (*reader)(struct device *, struct device_attribute *, char *), @@ -272,7 +265,6 @@ efx_mcdi_mon_add_attr(struct efx_nic *efx, const char *name, { struct efx_mcdi_mon *hwmon = efx_mcdi_mon(efx); struct efx_mcdi_mon_attribute *attr = &hwmon->attrs[hwmon->n_attrs]; - int rc; strlcpy(attr->name, name, sizeof(attr->name)); attr->index = index; @@ -286,10 +278,7 @@ efx_mcdi_mon_add_attr(struct efx_nic *efx, const char *name, attr->dev_attr.attr.name = attr->name; attr->dev_attr.attr.mode = S_IRUGO; attr->dev_attr.show = reader; - rc = device_create_file(&efx->pci_dev->dev, &attr->dev_attr); - if (rc == 0) - ++hwmon->n_attrs; - return rc; + hwmon->group.attrs[hwmon->n_attrs++] = &attr->dev_attr.attr; } int efx_mcdi_mon_probe(struct efx_nic *efx) @@ -338,26 +327,22 @@ int efx_mcdi_mon_probe(struct efx_nic *efx) efx_mcdi_mon_update(efx); /* Allocate space for the maximum possible number of - * attributes for this set of sensors: name of the driver plus + * attributes for this set of sensors: * value, min, max, crit, alarm and label for each sensor. */ - n_attrs = 1 + 6 * n_sensors; + n_attrs = 6 * n_sensors; hwmon->attrs = kcalloc(n_attrs, sizeof(*hwmon->attrs), GFP_KERNEL); if (!hwmon->attrs) { rc = -ENOMEM; goto fail; } - - hwmon->device = hwmon_device_register(&efx->pci_dev->dev); - if (IS_ERR(hwmon->device)) { - rc = PTR_ERR(hwmon->device); + hwmon->group.attrs = kcalloc(n_attrs + 1, sizeof(struct attribute *), + GFP_KERNEL); + if (!hwmon->group.attrs) { + rc = -ENOMEM; goto fail; } - rc = efx_mcdi_mon_add_attr(efx, "name", efx_mcdi_mon_show_name, 0, 0, 0); - if (rc) - goto fail; - for (i = 0, j = -1, type = -1; ; i++) { enum efx_hwmon_type hwmon_type; const char *hwmon_prefix; @@ -372,7 +357,7 @@ int efx_mcdi_mon_probe(struct efx_nic *efx) page = type / 32; j = -1; if (page == n_pages) - return 0; + goto hwmon_register; MCDI_SET_DWORD(inbuf, SENSOR_INFO_EXT_IN_PAGE, page); @@ -453,28 +438,22 @@ int efx_mcdi_mon_probe(struct efx_nic *efx) if (min1 != max1) { snprintf(name, sizeof(name), "%s%u_input", hwmon_prefix, hwmon_index); - rc = efx_mcdi_mon_add_attr( + efx_mcdi_mon_add_attr( efx, name, efx_mcdi_mon_show_value, i, type, 0); - if (rc) - goto fail; if (hwmon_type != EFX_HWMON_POWER) { snprintf(name, sizeof(name), "%s%u_min", hwmon_prefix, hwmon_index); - rc = efx_mcdi_mon_add_attr( + efx_mcdi_mon_add_attr( efx, name, efx_mcdi_mon_show_limit, i, type, min1); - if (rc) - goto fail; } snprintf(name, sizeof(name), "%s%u_max", hwmon_prefix, hwmon_index); - rc = efx_mcdi_mon_add_attr( + efx_mcdi_mon_add_attr( efx, name, efx_mcdi_mon_show_limit, i, type, max1); - if (rc) - goto fail; if (min2 != max2) { /* Assume max2 is critical value. @@ -482,32 +461,38 @@ int efx_mcdi_mon_probe(struct efx_nic *efx) */ snprintf(name, sizeof(name), "%s%u_crit", hwmon_prefix, hwmon_index); - rc = efx_mcdi_mon_add_attr( + efx_mcdi_mon_add_attr( efx, name, efx_mcdi_mon_show_limit, i, type, max2); - if (rc) - goto fail; } } snprintf(name, sizeof(name), "%s%u_alarm", hwmon_prefix, hwmon_index); - rc = efx_mcdi_mon_add_attr( + efx_mcdi_mon_add_attr( efx, name, efx_mcdi_mon_show_alarm, i, type, 0); - if (rc) - goto fail; if (type < ARRAY_SIZE(efx_mcdi_sensor_type) && efx_mcdi_sensor_type[type].label) { snprintf(name, sizeof(name), "%s%u_label", hwmon_prefix, hwmon_index); - rc = efx_mcdi_mon_add_attr( + efx_mcdi_mon_add_attr( efx, name, efx_mcdi_mon_show_label, i, type, 0); - if (rc) - goto fail; } } +hwmon_register: + hwmon->groups[0] = &hwmon->group; + hwmon->device = hwmon_device_register_with_groups(&efx->pci_dev->dev, + KBUILD_MODNAME, NULL, + hwmon->groups); + if (IS_ERR(hwmon->device)) { + rc = PTR_ERR(hwmon->device); + goto fail; + } + + return 0; + fail: efx_mcdi_mon_remove(efx); return rc; @@ -516,14 +501,11 @@ fail: void efx_mcdi_mon_remove(struct efx_nic *efx) { struct efx_mcdi_mon *hwmon = efx_mcdi_mon(efx); - unsigned int i; - for (i = 0; i < hwmon->n_attrs; i++) - device_remove_file(&efx->pci_dev->dev, - &hwmon->attrs[i].dev_attr); - kfree(hwmon->attrs); if (hwmon->device) hwmon_device_unregister(hwmon->device); + kfree(hwmon->attrs); + kfree(hwmon->group.attrs); efx_nic_free_buffer(efx, &hwmon->dma_buf); } -- cgit v1.2.3 From d3f7d56a7a4671d395e8af87071068a195257bf6 Mon Sep 17 00:00:00 2001 From: Shawn Landden Date: Sun, 24 Nov 2013 22:36:28 -0800 Subject: net: update consumers of MSG_MORE to recognize MSG_SENDPAGE_NOTLAST Commit 35f9c09fe (tcp: tcp_sendpages() should call tcp_push() once) added an internal flag MSG_SENDPAGE_NOTLAST, similar to MSG_MORE. algif_hash, algif_skcipher, and udp used MSG_MORE from tcp_sendpages() and need to see the new flag as identical to MSG_MORE. This fixes sendfile() on AF_ALG. v3: also fix udp Cc: Tom Herbert Cc: Eric Dumazet Cc: David S. Miller Cc: # 3.4.x + 3.2.x Reported-and-tested-by: Shawn Landden Original-patch: Richard Weinberger Signed-off-by: Shawn Landden Signed-off-by: David S. Miller --- crypto/algif_hash.c | 3 +++ crypto/algif_skcipher.c | 3 +++ net/ipv4/udp.c | 3 +++ 3 files changed, 9 insertions(+) diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index ef5356cd280a..850246206b12 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -114,6 +114,9 @@ static ssize_t hash_sendpage(struct socket *sock, struct page *page, struct hash_ctx *ctx = ask->private; int err; + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + lock_sock(sk); sg_init_table(ctx->sgl.sg, 1); sg_set_page(ctx->sgl.sg, page, size, offset); diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 6a6dfc062d2a..a19c027b29bd 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -378,6 +378,9 @@ static ssize_t skcipher_sendpage(struct socket *sock, struct page *page, struct skcipher_sg_list *sgl; int err = -EINVAL; + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + lock_sock(sk); if (!ctx->more && ctx->used) goto unlock; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 44dfaa09b584..bf2b85b25491 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1098,6 +1098,9 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, struct udp_sock *up = udp_sk(sk); int ret; + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + if (!up->pending) { struct msghdr msg = { .msg_flags = flags|MSG_MORE }; -- cgit v1.2.3 From f5e0d34382e18f396d7673a84df8e3342bea7eb6 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 28 Nov 2013 18:01:38 +0100 Subject: team: fix master carrier set when user linkup is enabled When user linkup is enabled and user sets linkup of individual port, we need to recompute linkup (carrier) of master interface so the change is reflected. Fix this by calling __team_carrier_check() which does the needed work. Please apply to all stable kernels as well. Thanks. Reported-by: Jan Tluka Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 34b0de09d881..736050d6b451 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1366,6 +1366,8 @@ static int team_user_linkup_option_get(struct team *team, return 0; } +static void __team_carrier_check(struct team *team); + static int team_user_linkup_option_set(struct team *team, struct team_gsetter_ctx *ctx) { @@ -1373,6 +1375,7 @@ static int team_user_linkup_option_set(struct team *team, port->user.linkup = ctx->data.bool_val; team_refresh_port_linkup(port); + __team_carrier_check(port->team); return 0; } @@ -1392,6 +1395,7 @@ static int team_user_linkup_en_option_set(struct team *team, port->user.linkup_enabled = ctx->data.bool_val; team_refresh_port_linkup(port); + __team_carrier_check(port->team); return 0; } -- cgit v1.2.3 From f1d8cba61c3c4b1eb88e507249c4cb8d635d9a76 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 28 Nov 2013 09:51:22 -0800 Subject: inet: fix possible seqlock deadlocks In commit c9e9042994d3 ("ipv4: fix possible seqlock deadlock") I left another places where IP_INC_STATS_BH() were improperly used. udp_sendmsg(), ping_v4_sendmsg() and tcp_v4_connect() are called from process context, not from softirq context. This was detected by lockdep seqlock support. Reported-by: jongman heo Fixes: 584bdf8cbdf6 ("[IPV4]: Fix "ipOutNoRoutes" counter error for TCP and UDP") Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") Signed-off-by: Eric Dumazet Cc: Hannes Frederic Sowa Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/ping.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/udp.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 840cf1b9e6ee..242e7f4ed6f4 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -772,7 +772,7 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, err = PTR_ERR(rt); rt = NULL; if (err == -ENETUNREACH) - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); goto out; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 59a6f8b90cd9..067213924751 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -177,7 +177,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (IS_ERR(rt)) { err = PTR_ERR(rt); if (err == -ENETUNREACH) - IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); return err; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index bf2b85b25491..44f6a20fa29d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -999,7 +999,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, err = PTR_ERR(rt); rt = NULL; if (err == -ENETUNREACH) - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); goto out; } -- cgit v1.2.3 From 42ce4126d8bc2e128e1f207cf79bb0623fac498f Mon Sep 17 00:00:00 2001 From: Akeem G Abodunrin Date: Fri, 8 Nov 2013 01:54:07 +0000 Subject: igb: Fixed Wake On LAN support This patch fixes Wake on LAN being reported as supported on some Ethernet ports, in contrary to Hardware capability. Signed-off-by: Akeem G Abodunrin Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_ethtool.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index b0f3666b1d7f..c3143da497c8 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2062,14 +2062,15 @@ static void igb_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) { struct igb_adapter *adapter = netdev_priv(netdev); - wol->supported = WAKE_UCAST | WAKE_MCAST | - WAKE_BCAST | WAKE_MAGIC | - WAKE_PHY; wol->wolopts = 0; if (!(adapter->flags & IGB_FLAG_WOL_SUPPORTED)) return; + wol->supported = WAKE_UCAST | WAKE_MCAST | + WAKE_BCAST | WAKE_MAGIC | + WAKE_PHY; + /* apply any specific unsupported masks here */ switch (adapter->hw.device_id) { default: -- cgit v1.2.3 From 6a7d64e3e09e11181a07a2e8cd6af5d6355133be Mon Sep 17 00:00:00 2001 From: yzhu1 Date: Sat, 23 Nov 2013 07:07:40 +0000 Subject: e1000: prevent oops when adapter is being closed and reset simultaneously This change is based on a similar change made to e1000e support in commit bb9e44d0d0f4 ("e1000e: prevent oops when adapter is being closed and reset simultaneously"). The same issue has also been observed on the older e1000 cards. Here, we have increased the RESET_COUNT value to 50 because there are too many accesses to e1000 nic on stress tests to e1000 nic, it is not enough to set RESET_COUT 25. Experimentation has shown that it is enough to set RESET_COUNT 50. Signed-off-by: yzhu1 Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000.h | 5 +++++ drivers/net/ethernet/intel/e1000/e1000_main.c | 9 +++++++++ 2 files changed, 14 insertions(+) diff --git a/drivers/net/ethernet/intel/e1000/e1000.h b/drivers/net/ethernet/intel/e1000/e1000.h index 58c147271a36..e4093d1f64cb 100644 --- a/drivers/net/ethernet/intel/e1000/e1000.h +++ b/drivers/net/ethernet/intel/e1000/e1000.h @@ -83,6 +83,11 @@ struct e1000_adapter; #define E1000_MAX_INTR 10 +/* + * Count for polling __E1000_RESET condition every 10-20msec. + */ +#define E1000_CHECK_RESET_COUNT 50 + /* TX/RX descriptor defines */ #define E1000_DEFAULT_TXD 256 #define E1000_MAX_TXD 256 diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index e38622825fa7..c0f52174e4c8 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -1440,6 +1440,10 @@ static int e1000_close(struct net_device *netdev) { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; + int count = E1000_CHECK_RESET_COUNT; + + while (test_bit(__E1000_RESETTING, &adapter->flags) && count--) + usleep_range(10000, 20000); WARN_ON(test_bit(__E1000_RESETTING, &adapter->flags)); e1000_down(adapter); @@ -4963,6 +4967,11 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake) netif_device_detach(netdev); if (netif_running(netdev)) { + int count = E1000_CHECK_RESET_COUNT; + + while (test_bit(__E1000_RESETTING, &adapter->flags) && count--) + usleep_range(10000, 20000); + WARN_ON(test_bit(__E1000_RESETTING, &adapter->flags)); e1000_down(adapter); } -- cgit v1.2.3 From b2f963bfaebadc9117b29f806630ea3bcaec403d Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Sat, 23 Nov 2013 07:17:56 +0000 Subject: e1000: fix lockdep warning in e1000_reset_task The patch fixes the following lockdep warning, which is 100% reproducible on network restart: ====================================================== [ INFO: possible circular locking dependency detected ] 3.12.0+ #47 Tainted: GF ------------------------------------------------------- kworker/1:1/27 is trying to acquire lock: ((&(&adapter->watchdog_task)->work)){+.+...}, at: [] flush_work+0x0/0x70 but task is already holding lock: (&adapter->mutex){+.+...}, at: [] e1000_reset_task+0x4a/0xa0 [e1000] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&adapter->mutex){+.+...}: [] lock_acquire+0x9d/0x120 [] mutex_lock_nested+0x4c/0x390 [] e1000_watchdog+0x7d/0x5b0 [e1000] [] process_one_work+0x1d2/0x510 [] worker_thread+0x120/0x3a0 [] kthread+0xee/0x110 [] ret_from_fork+0x7c/0xb0 -> #0 ((&(&adapter->watchdog_task)->work)){+.+...}: [] __lock_acquire+0x1710/0x1810 [] lock_acquire+0x9d/0x120 [] flush_work+0x3b/0x70 [] __cancel_work_timer+0x98/0x140 [] cancel_delayed_work_sync+0x13/0x20 [] e1000_down_and_stop+0x3c/0x60 [e1000] [] e1000_down+0x131/0x220 [e1000] [] e1000_reset_task+0x52/0xa0 [e1000] [] process_one_work+0x1d2/0x510 [] worker_thread+0x120/0x3a0 [] kthread+0xee/0x110 [] ret_from_fork+0x7c/0xb0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&adapter->mutex); lock((&(&adapter->watchdog_task)->work)); lock(&adapter->mutex); lock((&(&adapter->watchdog_task)->work)); *** DEADLOCK *** 3 locks held by kworker/1:1/27: #0: (events){.+.+.+}, at: [] process_one_work+0x166/0x510 #1: ((&adapter->reset_task)){+.+...}, at: [] process_one_work+0x166/0x510 #2: (&adapter->mutex){+.+...}, at: [] e1000_reset_task+0x4a/0xa0 [e1000] stack backtrace: CPU: 1 PID: 27 Comm: kworker/1:1 Tainted: GF 3.12.0+ #47 Hardware name: System manufacturer System Product Name/P5B-VM SE, BIOS 0501 05/31/2007 Workqueue: events e1000_reset_task [e1000] ffffffff820f6000 ffff88007b9dba98 ffffffff816b54a2 0000000000000002 ffffffff820f5e50 ffff88007b9dbae8 ffffffff810ba936 ffff88007b9dbac8 ffff88007b9dbb48 ffff88007b9d8f00 ffff88007b9d8780 ffff88007b9d8f00 Call Trace: [] dump_stack+0x49/0x5f [] print_circular_bug+0x216/0x310 [] __lock_acquire+0x1710/0x1810 [] ? __flush_work+0x250/0x250 [] lock_acquire+0x9d/0x120 [] ? __flush_work+0x250/0x250 [] flush_work+0x3b/0x70 [] ? __flush_work+0x250/0x250 [] __cancel_work_timer+0x98/0x140 [] cancel_delayed_work_sync+0x13/0x20 [] e1000_down_and_stop+0x3c/0x60 [e1000] [] e1000_down+0x131/0x220 [e1000] [] e1000_reset_task+0x52/0xa0 [e1000] [] process_one_work+0x1d2/0x510 [] ? process_one_work+0x166/0x510 [] worker_thread+0x120/0x3a0 [] ? manage_workers+0x2c0/0x2c0 [] kthread+0xee/0x110 [] ? __init_kthread_worker+0x70/0x70 [] ret_from_fork+0x7c/0xb0 [] ? __init_kthread_worker+0x70/0x70 == The issue background == The problem occurs, because e1000_down(), which is called under adapter->mutex by e1000_reset_task(), tries to synchronously cancel e1000 auxiliary works (reset_task, watchdog_task, phy_info_task, fifo_stall_task), which take adapter->mutex in their handlers. So the question is what does adapter->mutex protect there? The adapter->mutex was introduced by commit 0ef4ee ("e1000: convert to private mutex from rtnl") as a replacement for rtnl_lock() taken in the asynchronous handlers. It targeted on fixing a similar lockdep warning issued when e1000_down() was called under rtnl_lock(), and it fixed it, but unfortunately it introduced the lockdep warning described above. Anyway, that said the source of this bug is that the asynchronous works were made to take rtnl_lock() some time ago, so let's look deeper and find why it was added there. The rtnl_lock() was added to asynchronous handlers by commit 338c15 ("e1000: fix occasional panic on unload") in order to prevent asynchronous handlers from execution after the module is unloaded (e1000_down() is called) as it follows from the comment to the commit: > Net drivers in general have an issue where timers fired > by mod_timer or work threads with schedule_work are running > outside of the rtnl_lock. > > With no other lock protection these routines are vulnerable > to races with driver unload or reset paths. > > The longer term solution to this might be a redesign with > safer locks being taken in the driver to guarantee no > reentrance, but for now a safe and effective fix is > to take the rtnl_lock in these routines. I'm not sure if this locking scheme fixed the problem or just made it unlikely, although I incline to the latter. Anyway, this was long time ago when e1000 auxiliary works were implemented as timers scheduling real work handlers in their routines. The e1000_down() function only canceled the timers, but left the real handlers running if they were running, which could result in work execution after module unload. Today, the e1000 driver uses sane delayed works instead of the pair timer+work to implement its delayed asynchronous handlers, and the e1000_down() synchronously cancels all the works so that the problem that commit 338c15 tried to cope with disappeared, and we don't need any locks in the handlers any more. Moreover, any locking there can potentially result in a deadlock. So, this patch reverts commits 0ef4ee and 338c15. Fixes: 0ef4eedc2e98 ("e1000: convert to private mutex from rtnl") Fixes: 338c15e470d8 ("e1000: fix occasional panic on unload") Cc: Tushar Dave Cc: Patrick McHardy Signed-off-by: Vladimir Davydov Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000.h | 2 -- drivers/net/ethernet/intel/e1000/e1000_main.c | 36 +++------------------------ 2 files changed, 3 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000.h b/drivers/net/ethernet/intel/e1000/e1000.h index e4093d1f64cb..f9313b36c887 100644 --- a/drivers/net/ethernet/intel/e1000/e1000.h +++ b/drivers/net/ethernet/intel/e1000/e1000.h @@ -317,8 +317,6 @@ struct e1000_adapter { struct delayed_work watchdog_task; struct delayed_work fifo_stall_task; struct delayed_work phy_info_task; - - struct mutex mutex; }; enum e1000_state_t { diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index c0f52174e4c8..619b0cb60f2f 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -544,21 +544,8 @@ void e1000_down(struct e1000_adapter *adapter) e1000_clean_all_rx_rings(adapter); } -static void e1000_reinit_safe(struct e1000_adapter *adapter) -{ - while (test_and_set_bit(__E1000_RESETTING, &adapter->flags)) - msleep(1); - mutex_lock(&adapter->mutex); - e1000_down(adapter); - e1000_up(adapter); - mutex_unlock(&adapter->mutex); - clear_bit(__E1000_RESETTING, &adapter->flags); -} - void e1000_reinit_locked(struct e1000_adapter *adapter) { - /* if rtnl_lock is not held the call path is bogus */ - ASSERT_RTNL(); WARN_ON(in_interrupt()); while (test_and_set_bit(__E1000_RESETTING, &adapter->flags)) msleep(1); @@ -1316,7 +1303,6 @@ static int e1000_sw_init(struct e1000_adapter *adapter) e1000_irq_disable(adapter); spin_lock_init(&adapter->stats_lock); - mutex_init(&adapter->mutex); set_bit(__E1000_DOWN, &adapter->flags); @@ -2329,11 +2315,8 @@ static void e1000_update_phy_info_task(struct work_struct *work) struct e1000_adapter *adapter = container_of(work, struct e1000_adapter, phy_info_task.work); - if (test_bit(__E1000_DOWN, &adapter->flags)) - return; - mutex_lock(&adapter->mutex); + e1000_phy_get_info(&adapter->hw, &adapter->phy_info); - mutex_unlock(&adapter->mutex); } /** @@ -2349,9 +2332,6 @@ static void e1000_82547_tx_fifo_stall_task(struct work_struct *work) struct net_device *netdev = adapter->netdev; u32 tctl; - if (test_bit(__E1000_DOWN, &adapter->flags)) - return; - mutex_lock(&adapter->mutex); if (atomic_read(&adapter->tx_fifo_stall)) { if ((er32(TDT) == er32(TDH)) && (er32(TDFT) == er32(TDFH)) && @@ -2372,7 +2352,6 @@ static void e1000_82547_tx_fifo_stall_task(struct work_struct *work) schedule_delayed_work(&adapter->fifo_stall_task, 1); } } - mutex_unlock(&adapter->mutex); } bool e1000_has_link(struct e1000_adapter *adapter) @@ -2426,10 +2405,6 @@ static void e1000_watchdog(struct work_struct *work) struct e1000_tx_ring *txdr = adapter->tx_ring; u32 link, tctl; - if (test_bit(__E1000_DOWN, &adapter->flags)) - return; - - mutex_lock(&adapter->mutex); link = e1000_has_link(adapter); if ((netif_carrier_ok(netdev)) && link) goto link_up; @@ -2520,7 +2495,7 @@ link_up: adapter->tx_timeout_count++; schedule_work(&adapter->reset_task); /* exit immediately since reset is imminent */ - goto unlock; + return; } } @@ -2548,9 +2523,6 @@ link_up: /* Reschedule the task */ if (!test_bit(__E1000_DOWN, &adapter->flags)) schedule_delayed_work(&adapter->watchdog_task, 2 * HZ); - -unlock: - mutex_unlock(&adapter->mutex); } enum latency_range { @@ -3499,10 +3471,8 @@ static void e1000_reset_task(struct work_struct *work) struct e1000_adapter *adapter = container_of(work, struct e1000_adapter, reset_task); - if (test_bit(__E1000_DOWN, &adapter->flags)) - return; e_err(drv, "Reset adapter\n"); - e1000_reinit_safe(adapter); + e1000_reinit_locked(adapter); } /** -- cgit v1.2.3 From 74a1b1ea8a30b035aaad833bbd6b9263e72acfac Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Sat, 23 Nov 2013 07:18:01 +0000 Subject: e1000: fix possible reset_task running after adapter down On e1000_down(), we should ensure every asynchronous work is canceled before proceeding. Since the watchdog_task can schedule other works apart from itself, it should be stopped first, but currently it is stopped after the reset_task. This can result in the following race leading to the reset_task running after the module unload: e1000_down_and_stop(): e1000_watchdog(): ---------------------- ----------------- cancel_work_sync(reset_task) schedule_work(reset_task) cancel_delayed_work_sync(watchdog_task) The patch moves cancel_delayed_work_sync(watchdog_task) at the beginning of e1000_down_and_stop() thus ensuring the race is impossible. Cc: Tushar Dave Cc: Patrick McHardy Signed-off-by: Vladimir Davydov Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000_main.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 619b0cb60f2f..46e6544ed1b7 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -494,13 +494,20 @@ static void e1000_down_and_stop(struct e1000_adapter *adapter) { set_bit(__E1000_DOWN, &adapter->flags); - /* Only kill reset task if adapter is not resetting */ - if (!test_bit(__E1000_RESETTING, &adapter->flags)) - cancel_work_sync(&adapter->reset_task); - cancel_delayed_work_sync(&adapter->watchdog_task); + + /* + * Since the watchdog task can reschedule other tasks, we should cancel + * it first, otherwise we can run into the situation when a work is + * still running after the adapter has been turned down. + */ + cancel_delayed_work_sync(&adapter->phy_info_task); cancel_delayed_work_sync(&adapter->fifo_stall_task); + + /* Only kill reset task if adapter is not resetting */ + if (!test_bit(__E1000_RESETTING, &adapter->flags)) + cancel_work_sync(&adapter->reset_task); } void e1000_down(struct e1000_adapter *adapter) -- cgit v1.2.3 From ae72c8d068b3440c0b45df284303794cfbc0f4f2 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sat, 9 Nov 2013 07:11:26 +0000 Subject: ixgbe: ixgbe_fwd_ring_down needs to be static When compiling with -Wstrict-prototypes gcc catches a static I missed. ./ixgbe_main.c:4254: warning: no previous prototype for 'ixgbe_fwd_ring_down' Reported-by: Phillip Schmitt Signed-off-by: John Fastabend Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 0c55079ebee3..ad9d67095d52 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4251,8 +4251,8 @@ static void ixgbe_disable_fwd_ring(struct ixgbe_fwd_adapter *vadapter, rx_ring->l2_accel_priv = NULL; } -int ixgbe_fwd_ring_down(struct net_device *vdev, - struct ixgbe_fwd_adapter *accel) +static int ixgbe_fwd_ring_down(struct net_device *vdev, + struct ixgbe_fwd_adapter *accel) { struct ixgbe_adapter *adapter = accel->real_adapter; unsigned int rxbase = accel->rx_base_queue; -- cgit v1.2.3 From 8bf1264d2f12c6fa8645d097275568c1b62c246c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 12 Nov 2013 12:13:29 +0000 Subject: ixgbe: turn NETIF_F_HW_L2FW_DOFFLOAD off by default NETIF_F_HW_L2FW_DOFFLOAD allows upper layer net devices such as macvlan to use queues in the hardware to directly submit and receive skbs. This creates a subtle change in the datapath though. One change being the skb may no longer use the root devices qdisc. Because users may not expect this we can't enable the feature by default unless the hardware can offload all the software functionality above it. So for now disable it by default and let users opt in. Signed-off-by: John Fastabend Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index ad9d67095d52..cc06854296a3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7986,10 +7986,9 @@ skip_sriov: NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_RXHASH | - NETIF_F_RXCSUM | - NETIF_F_HW_L2FW_DOFFLOAD; + NETIF_F_RXCSUM; - netdev->hw_features = netdev->features; + netdev->hw_features = netdev->features | NETIF_F_HW_L2FW_DOFFLOAD; switch (adapter->hw.mac.type) { case ixgbe_mac_82599EB: -- cgit v1.2.3 From 8821754704a661623083a9dedf425114199600ba Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Sat, 23 Nov 2013 03:19:19 +0000 Subject: ixgbe: Make ixgbe_identify_qsfp_module_generic static Correct a namespace complaint by making the function static and moving the prototype into the .c file. Signed-off-by: Mark Rustad Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c | 3 ++- drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index e4c676006be9..39217e5ff7dc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -46,6 +46,7 @@ static bool ixgbe_get_i2c_data(u32 *i2cctl); static void ixgbe_i2c_bus_clear(struct ixgbe_hw *hw); static enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id); static s32 ixgbe_get_phy_id(struct ixgbe_hw *hw); +static s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw); /** * ixgbe_identify_phy_generic - Get physical layer module @@ -1164,7 +1165,7 @@ err_read_i2c_eeprom: * * Searches for and identifies the QSFP module and assigns appropriate PHY type **/ -s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw) +static s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw) { struct ixgbe_adapter *adapter = hw->back; s32 status = IXGBE_ERR_PHY_ADDR_INVALID; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h index aae900a256da..fffcbdd2bf0e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h @@ -145,7 +145,6 @@ s32 ixgbe_get_phy_firmware_version_generic(struct ixgbe_hw *hw, s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw); s32 ixgbe_identify_module_generic(struct ixgbe_hw *hw); s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw); -s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw); s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, u16 *list_offset, u16 *data_offset); -- cgit v1.2.3 From 7f88c6b23afbd31545c676dea77ba9593a1a14bf Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 29 Nov 2013 06:39:44 +0100 Subject: ipv6: fix possible seqlock deadlock in ip6_finish_output2 IPv6 stats are 64 bits and thus are protected with a seqlock. By not disabling bottom-half we could deadlock here if we don't disable bh and a softirq reentrantly updates the same mib. Cc: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 59df872e2f4d..4acdb63495db 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -116,8 +116,8 @@ static int ip6_finish_output2(struct sk_buff *skb) } rcu_read_unlock_bh(); - IP6_INC_STATS_BH(dev_net(dst->dev), - ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EINVAL; } -- cgit v1.2.3 From 19990e29fedc2fe7056e66da31364978d49ea1e1 Mon Sep 17 00:00:00 2001 From: Arvid Brodin Date: Fri, 29 Nov 2013 23:36:00 +0100 Subject: MAINTAINERS: Added net/hsr/ maintainer Signed-off-by: Arvid Brodin Signed-off-by: David S. Miller --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 63f30484932b..8899c60046eb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4044,6 +4044,12 @@ W: http://www.pharscape.org S: Maintained F: drivers/net/usb/hso.c +HSR NETWORK PROTOCOL +M: Arvid Brodin +L: netdev@vger.kernel.org +S: Maintained +F: net/hsr/ + HTCPEN TOUCHSCREEN DRIVER M: Pau Oliva Fora L: linux-input@vger.kernel.org -- cgit v1.2.3 From 213e3bc723e53af0976421d2808ea3f6cc821c56 Mon Sep 17 00:00:00 2001 From: Arvid Brodin Date: Fri, 29 Nov 2013 23:37:07 +0100 Subject: net/hsr: Very small fix of comment style. Signed-off-by: Arvid Brodin Signed-off-by: David S. Miller --- net/hsr/hsr_framereg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 003f5bb3acd2..4bdab1521878 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -288,7 +288,8 @@ void hsr_addr_subst_dest(struct hsr_priv *hsr_priv, struct ethhdr *ethhdr, static bool seq_nr_after(u16 a, u16 b) { /* Remove inconsistency where - * seq_nr_after(a, b) == seq_nr_before(a, b) */ + * seq_nr_after(a, b) == seq_nr_before(a, b) + */ if ((int) b - a == 32768) return false; -- cgit v1.2.3 From 98bf8362220af717862b8262b21348774890b7b4 Mon Sep 17 00:00:00 2001 From: Arvid Brodin Date: Fri, 29 Nov 2013 23:38:16 +0100 Subject: net/hsr: Support iproute print_opt ('ip -details ...') This implements the rtnl_link_ops fill_info routine for HSR. Signed-off-by: Arvid Brodin Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 4 +++- net/hsr/hsr_netlink.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index b78566f59aba..6db460121f84 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -488,7 +488,9 @@ enum { IFLA_HSR_UNSPEC, IFLA_HSR_SLAVE1, IFLA_HSR_SLAVE2, - IFLA_HSR_MULTICAST_SPEC, + IFLA_HSR_MULTICAST_SPEC, /* Last byte of supervision addr */ + IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */ + IFLA_HSR_SEQ_NR, __IFLA_HSR_MAX, }; diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 5325af85eea6..01a5261ac7a5 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -23,6 +23,8 @@ static const struct nla_policy hsr_policy[IFLA_HSR_MAX + 1] = { [IFLA_HSR_SLAVE1] = { .type = NLA_U32 }, [IFLA_HSR_SLAVE2] = { .type = NLA_U32 }, [IFLA_HSR_MULTICAST_SPEC] = { .type = NLA_U8 }, + [IFLA_HSR_SUPERVISION_ADDR] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [IFLA_HSR_SEQ_NR] = { .type = NLA_U16 }, }; @@ -59,6 +61,31 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, return hsr_dev_finalize(dev, link, multicast_spec); } +static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct hsr_priv *hsr_priv; + + hsr_priv = netdev_priv(dev); + + if (hsr_priv->slave[0]) + if (nla_put_u32(skb, IFLA_HSR_SLAVE1, hsr_priv->slave[0]->ifindex)) + goto nla_put_failure; + + if (hsr_priv->slave[1]) + if (nla_put_u32(skb, IFLA_HSR_SLAVE2, hsr_priv->slave[1]->ifindex)) + goto nla_put_failure; + + if (nla_put(skb, IFLA_HSR_SUPERVISION_ADDR, ETH_ALEN, + hsr_priv->sup_multicast_addr) || + nla_put_u16(skb, IFLA_HSR_SEQ_NR, hsr_priv->sequence_nr)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + static struct rtnl_link_ops hsr_link_ops __read_mostly = { .kind = "hsr", .maxtype = IFLA_HSR_MAX, @@ -66,6 +93,7 @@ static struct rtnl_link_ops hsr_link_ops __read_mostly = { .priv_size = sizeof(struct hsr_priv), .setup = hsr_dev_setup, .newlink = hsr_newlink, + .fill_info = hsr_fill_info, }; -- cgit v1.2.3 From 7c2781fa92f5b9ca3188817a56a2ced0400355f3 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 29 Nov 2013 11:02:43 -0800 Subject: netem: missing break in ge loss generator There is a missing break statement in the Gilbert Elliot loss model generator which makes state machine behave incorrectly. Reported-by: Martin Burri Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 75c94e59a3bd..6e91323f3dac 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -268,6 +268,7 @@ static bool loss_gilb_ell(struct netem_sched_data *q) clg->state = 2; if (net_random() < clg->a4) return true; + break; case 2: if (net_random() < clg->a2) clg->state = 1; -- cgit v1.2.3 From ab6c27be8178a4682446faa5aa017b948997937f Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 29 Nov 2013 11:03:35 -0800 Subject: netem: fix loss 4 state model Patch from developers of the alternative loss models, downloaded from: http://netgroup.uniroma2.it/twiki/bin/view.cgi/Main/NetemCLG "In the case 1 of the switch statement in the if conditions we need to add clg->a4 to clg->a1, according to the model." Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 6e91323f3dac..9685624f7bc7 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -215,10 +215,10 @@ static bool loss_4state(struct netem_sched_data *q) if (rnd < clg->a4) { clg->state = 4; return true; - } else if (clg->a4 < rnd && rnd < clg->a1) { + } else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) { clg->state = 3; return true; - } else if (clg->a1 < rnd) + } else if (clg->a1 + clg->a4 < rnd) clg->state = 1; break; -- cgit v1.2.3 From eff7979f00b2c546f36f6829f4072c8db54763a9 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 29 Nov 2013 11:04:26 -0800 Subject: netem: fix gemodel loss generator Patch from developers of the alternative loss models, downloaded from: http://netgroup.uniroma2.it/twiki/bin/view.cgi/Main/NetemCLG "in case 2, of the switch we change the direction of the inequality to net_random()>clg->a3, because clg->a3 is h in the GE model and when h is 0 all packets will be lost." Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 9685624f7bc7..bccd52b36e97 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -272,7 +272,7 @@ static bool loss_gilb_ell(struct netem_sched_data *q) case 2: if (net_random() < clg->a2) clg->state = 1; - if (clg->a3 > net_random()) + if (net_random() > clg->a3) return true; } -- cgit v1.2.3 From 99e872ae1eacb560152c0123cf1cef571569e681 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 29 Nov 2013 10:02:19 +0100 Subject: virtio_net: Fixed a trivial typo (fitler --> filter) "MAC filter" sounds more reasonable than "MAC fitler". Signed-off-by: Thomas Huth Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7bab4de658a9..acda66169973 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1084,7 +1084,7 @@ static void virtnet_set_rx_mode(struct net_device *dev) if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET, sg, NULL)) - dev_warn(&dev->dev, "Failed to set MAC fitler table.\n"); + dev_warn(&dev->dev, "Failed to set MAC filter table.\n"); kfree(buf); } -- cgit v1.2.3 From 8fc3b9e9a229778e5af3aa453c44f1a3857ba769 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 28 Nov 2013 13:30:55 +0200 Subject: virtio_net: fix error handling for mergeable buffers Eric Dumazet noticed that if we encounter an error when processing a mergeable buffer, we don't dequeue all of the buffers from this packet, the result is almost sure to be loss of networking. Jason Wang noticed that we also leak a page and that we don't decrement the rq buf count, so we won't repost buffers (a resource leak). Fix both issues. Cc: Rusty Russell Cc: Michael Dalton Reported-by: Eric Dumazet Reported-by: Jason Wang Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 82 ++++++++++++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 31 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index acda66169973..71a2eac7b039 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -299,35 +299,47 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq, return skb; } -static int receive_mergeable(struct receive_queue *rq, struct sk_buff *head_skb) +static struct sk_buff *receive_mergeable(struct net_device *dev, + struct receive_queue *rq, + void *buf, + unsigned int len) { - struct skb_vnet_hdr *hdr = skb_vnet_hdr(head_skb); + struct skb_vnet_hdr *hdr = buf; + int num_buf = hdr->mhdr.num_buffers; + struct page *page = virt_to_head_page(buf); + int offset = buf - page_address(page); + struct sk_buff *head_skb = page_to_skb(rq, page, offset, len, + MERGE_BUFFER_LEN); struct sk_buff *curr_skb = head_skb; - char *buf; - struct page *page; - int num_buf, len, offset; - num_buf = hdr->mhdr.num_buffers; + if (unlikely(!curr_skb)) + goto err_skb; + while (--num_buf) { - int num_skb_frags = skb_shinfo(curr_skb)->nr_frags; + int num_skb_frags; + buf = virtqueue_get_buf(rq->vq, &len); if (unlikely(!buf)) { - pr_debug("%s: rx error: %d buffers missing\n", - head_skb->dev->name, hdr->mhdr.num_buffers); - head_skb->dev->stats.rx_length_errors++; - return -EINVAL; + pr_debug("%s: rx error: %d buffers out of %d missing\n", + dev->name, num_buf, hdr->mhdr.num_buffers); + dev->stats.rx_length_errors++; + goto err_buf; } if (unlikely(len > MERGE_BUFFER_LEN)) { pr_debug("%s: rx error: merge buffer too long\n", - head_skb->dev->name); + dev->name); len = MERGE_BUFFER_LEN; } + + page = virt_to_head_page(buf); + --rq->num; + + num_skb_frags = skb_shinfo(curr_skb)->nr_frags; if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) { struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC); - if (unlikely(!nskb)) { - head_skb->dev->stats.rx_dropped++; - return -ENOMEM; - } + + if (unlikely(!nskb)) + goto err_skb; if (curr_skb == head_skb) skb_shinfo(curr_skb)->frag_list = nskb; else @@ -341,8 +353,7 @@ static int receive_mergeable(struct receive_queue *rq, struct sk_buff *head_skb) head_skb->len += len; head_skb->truesize += MERGE_BUFFER_LEN; } - page = virt_to_head_page(buf); - offset = buf - (char *)page_address(page); + offset = buf - page_address(page); if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) { put_page(page); skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1, @@ -351,9 +362,28 @@ static int receive_mergeable(struct receive_queue *rq, struct sk_buff *head_skb) skb_add_rx_frag(curr_skb, num_skb_frags, page, offset, len, MERGE_BUFFER_LEN); } + } + + return head_skb; + +err_skb: + put_page(page); + while (--num_buf) { + buf = virtqueue_get_buf(rq->vq, &len); + if (unlikely(!buf)) { + pr_debug("%s: rx error: %d buffers missing\n", + dev->name, num_buf); + dev->stats.rx_length_errors++; + break; + } + page = virt_to_head_page(buf); + put_page(page); --rq->num; } - return 0; +err_buf: + dev->stats.rx_dropped++; + dev_kfree_skb(head_skb); + return NULL; } static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) @@ -382,19 +412,9 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) len -= sizeof(struct virtio_net_hdr); skb_trim(skb, len); } else if (vi->mergeable_rx_bufs) { - struct page *page = virt_to_head_page(buf); - skb = page_to_skb(rq, page, - (char *)buf - (char *)page_address(page), - len, MERGE_BUFFER_LEN); - if (unlikely(!skb)) { - dev->stats.rx_dropped++; - put_page(page); + skb = receive_mergeable(dev, rq, buf, len); + if (unlikely(!skb)) return; - } - if (receive_mergeable(rq, skb)) { - dev_kfree_skb(skb); - return; - } } else { page = buf; skb = page_to_skb(rq, page, 0, len, PAGE_SIZE); -- cgit v1.2.3 From f121159d72091f25afb22007c833e60a6845e912 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 28 Nov 2013 13:30:59 +0200 Subject: virtio_net: make all RX paths handle erors consistently receive mergeable now handles errors internally. Do same for big and small packet paths, otherwise the logic is too hard to follow. Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 54 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 71a2eac7b039..916241d16c67 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -299,6 +299,35 @@ static struct sk_buff *page_to_skb(struct receive_queue *rq, return skb; } +static struct sk_buff *receive_small(void *buf, unsigned int len) +{ + struct sk_buff * skb = buf; + + len -= sizeof(struct virtio_net_hdr); + skb_trim(skb, len); + + return skb; +} + +static struct sk_buff *receive_big(struct net_device *dev, + struct receive_queue *rq, + void *buf, + unsigned int len) +{ + struct page *page = buf; + struct sk_buff *skb = page_to_skb(rq, page, 0, len, PAGE_SIZE); + + if (unlikely(!skb)) + goto err; + + return skb; + +err: + dev->stats.rx_dropped++; + give_pages(rq, page); + return NULL; +} + static struct sk_buff *receive_mergeable(struct net_device *dev, struct receive_queue *rq, void *buf, @@ -392,7 +421,6 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) struct net_device *dev = vi->dev; struct virtnet_stats *stats = this_cpu_ptr(vi->stats); struct sk_buff *skb; - struct page *page; struct skb_vnet_hdr *hdr; if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { @@ -407,23 +435,15 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len) return; } - if (!vi->mergeable_rx_bufs && !vi->big_packets) { - skb = buf; - len -= sizeof(struct virtio_net_hdr); - skb_trim(skb, len); - } else if (vi->mergeable_rx_bufs) { + if (vi->mergeable_rx_bufs) skb = receive_mergeable(dev, rq, buf, len); - if (unlikely(!skb)) - return; - } else { - page = buf; - skb = page_to_skb(rq, page, 0, len, PAGE_SIZE); - if (unlikely(!skb)) { - dev->stats.rx_dropped++; - give_pages(rq, page); - return; - } - } + else if (vi->big_packets) + skb = receive_big(dev, rq, buf, len); + else + skb = receive_small(buf, len); + + if (unlikely(!skb)) + return; hdr = skb_vnet_hdr(skb); -- cgit v1.2.3 From 3868204d6b89ea373a273e760609cb08020beb1a Mon Sep 17 00:00:00 2001 From: "fan.du" Date: Sun, 1 Dec 2013 16:28:48 +0800 Subject: {pktgen, xfrm} Update IPv4 header total len and checksum after tranformation commit a553e4a6317b2cfc7659542c10fe43184ffe53da ("[PKTGEN]: IPSEC support") tried to support IPsec ESP transport transformation for pktgen, but acctually this doesn't work at all for two reasons(The orignal transformed packet has bad IPv4 checksum value, as well as wrong auth value, reported by wireshark) - After transpormation, IPv4 header total length needs update, because encrypted payload's length is NOT same as that of plain text. - After transformation, IPv4 checksum needs re-caculate because of payload has been changed. With this patch, armmed pktgen with below cofiguration, Wireshark is able to decrypted ESP packet generated by pktgen without any IPv4 checksum error or auth value error. pgset "flag IPSEC" pgset "flows 1" Signed-off-by: Fan Du Signed-off-by: David S. Miller --- net/core/pktgen.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 261357a66300..a797fff7f222 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2527,6 +2527,8 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, if (x) { int ret; __u8 *eth; + struct iphdr *iph; + nhead = x->props.header_len - skb_headroom(skb); if (nhead > 0) { ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); @@ -2548,6 +2550,11 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, eth = (__u8 *) skb_push(skb, ETH_HLEN); memcpy(eth, pkt_dev->hh, 12); *(u16 *) ð[12] = protocol; + + /* Update IPv4 header len as well as checksum value */ + iph = ip_hdr(skb); + iph->tot_len = htons(skb->len - ETH_HLEN); + ip_send_check(iph); } } return 1; -- cgit v1.2.3 From 833846e8fa0c51fb3e47bca8adfdd7b10643b737 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Sun, 1 Dec 2013 14:19:34 +0200 Subject: net/mlx4_en: Remove selftest TX queues empty condition Remove waiting for TX queues to become empty during selftest. This check is not necessary for any purpose, and might put the driver into an infinite loop. Signed-off-by: Eugenia Emantayev Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_selftest.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c index 40626690e8a8..c11d063473e5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c @@ -140,7 +140,6 @@ void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; - struct mlx4_en_tx_ring *tx_ring; int i, carrier_ok; memset(buf, 0, sizeof(u64) * MLX4_EN_NUM_SELF_TEST); @@ -150,16 +149,10 @@ void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf) carrier_ok = netif_carrier_ok(dev); netif_carrier_off(dev); -retry_tx: /* Wait until all tx queues are empty. * there should not be any additional incoming traffic * since we turned the carrier off */ msleep(200); - for (i = 0; i < priv->tx_ring_num && carrier_ok; i++) { - tx_ring = priv->tx_ring[i]; - if (tx_ring->prod != (tx_ring->cons + tx_ring->last_nr_txbb)) - goto retry_tx; - } if (priv->mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UC_LOOPBACK) { -- cgit v1.2.3